Loading emri_data/scripts/data_preprocess_dataframe.py +16 −5 Original line number Diff line number Diff line import numpy as np from pathlib import Path import pandas as pd from scipy.interpolate import NearestNDInterpolator data_directory = '../schwarz_data/{}' Loading @@ -27,11 +28,16 @@ for intrinsic_set in intrinsics: grid_out[num,:9] = out snr_here = flat_snr_grid[num] if snr_here == 0: snr_here += 1e-6 grid_out[num,9] = snr_here num += 1 to_fix = flat_snr_grid == 0 subset = grid_out[np.where(~to_fix)] print(subset.shape) interpolator = NearestNDInterpolator(subset[:,:-1],subset[:,-1]) for fixnum in np.where(to_fix): grid_out[fixnum,9] = interpolator(grid_out[fixnum,:9]) save_dir = '../schwarz_data/{}' Path('../schwarz_data/').mkdir(parents=True, exist_ok=True) Loading @@ -40,7 +46,7 @@ df_out = pd.DataFrame(grid_out, columns=cols) df_out.to_csv(data_directory.format('grid_dataframe.csv'), index=False) # Samples snr_list = np.load(data_directory.format('samp_snrs.npy')) snr_list = np.load(data_directory.format('samp_snrs.npy')).flatten() inds_to_keep = ~np.isnan(snr_list) intrinsics = np.load(data_directory.format('samp_intrinsics.npy')) Loading @@ -56,11 +62,16 @@ for i in range(snr_list.size): here[8] = intrinsics[i,5] snr_here = snr_list[i] if snr_here == 0: snr_here += 1e-6 samp_out[i,:9] = here samp_out[i,9] = snr_here samp_out = samp_out[inds_to_keep,:] to_fix = snr_list[inds_to_keep] == 0 subset = samp_out[np.where(~to_fix)] interpolator = NearestNDInterpolator(subset[:,:-1],subset[:,-1]) for fixnum in np.where(to_fix): samp_out[fixnum,9] = interpolator(samp_out[fixnum,:9]) samp_df_out = pd.DataFrame(samp_out, columns=cols) samp_df_out.to_csv(data_directory.format('samp_dataframe.csv'), index=False) emri_data/scripts/existing_dataframe_fixzeronans.py 0 → 100644 +51 −0 Original line number Diff line number Diff line import numpy as np from pathlib import Path import pandas as pd from scipy.interpolate import NearestNDInterpolator data_directory = '../schwarz_negY_fix/{}' save_dir = data_directory[:-3] Path(save_dir).mkdir(parents=True, exist_ok=True) # Grid try: grid_df = pd.read_csv(data_directory.format('grid_dataframe.csv')) to_fix = np.isnan(grid_df['SNR'].to_numpy()) grid_df = grid_df.iloc[~to_fix,:] to_fix = grid_df['SNR'].to_numpy() == 0 if to_fix.size > 0: subset = grid_df.iloc[np.where(~to_fix)[0],:] interpolator = NearestNDInterpolator(subset.loc[:, grid_df.columns != 'SNR'].to_numpy(),subset['SNR'].to_numpy()) for fixnum in np.where(to_fix)[0]: temp = interpolator(grid_df.loc[fixnum,grid_df.columns != 'SNR'].to_numpy()) grid_df.at[fixnum,'SNR'] = temp cols = ['logM','logq','a','p0','e','Y0','thetaS','phiS','thetaK','t','SNR'] grid_df.to_csv(data_directory.format('grid_dataframe_fixed.csv'), index=False) except FileNotFoundError: print('Grid data not found. Continuing...') pass except: raise # Samples try: samp_df = pd.read_csv(data_directory.format('samp_dataframe.csv')) to_fix = np.isnan(samp_df['SNR'].to_numpy()) samp_df = samp_df.iloc[~to_fix,:] to_fix = samp_df['SNR'].to_numpy() == 0 if to_fix.size > 0: subset = samp_df.iloc[np.where(~to_fix)[0],:] interpolator = NearestNDInterpolator(subset.loc[:, samp_df.columns != 'SNR'].to_numpy(),subset['SNR'].to_numpy()) for fixnum in np.where(to_fix)[0]: temp = interpolator(samp_df.loc[fixnum,samp_df.columns != 'SNR'].to_numpy()) samp_df.at[fixnum,'SNR'] = temp samp_df.to_csv(data_directory.format('samp_dataframe_fixed.csv'), index=False) except FileNotFoundError: print('Sample data not found. Continuing...') except: raise No newline at end of file Loading
emri_data/scripts/data_preprocess_dataframe.py +16 −5 Original line number Diff line number Diff line import numpy as np from pathlib import Path import pandas as pd from scipy.interpolate import NearestNDInterpolator data_directory = '../schwarz_data/{}' Loading @@ -27,11 +28,16 @@ for intrinsic_set in intrinsics: grid_out[num,:9] = out snr_here = flat_snr_grid[num] if snr_here == 0: snr_here += 1e-6 grid_out[num,9] = snr_here num += 1 to_fix = flat_snr_grid == 0 subset = grid_out[np.where(~to_fix)] print(subset.shape) interpolator = NearestNDInterpolator(subset[:,:-1],subset[:,-1]) for fixnum in np.where(to_fix): grid_out[fixnum,9] = interpolator(grid_out[fixnum,:9]) save_dir = '../schwarz_data/{}' Path('../schwarz_data/').mkdir(parents=True, exist_ok=True) Loading @@ -40,7 +46,7 @@ df_out = pd.DataFrame(grid_out, columns=cols) df_out.to_csv(data_directory.format('grid_dataframe.csv'), index=False) # Samples snr_list = np.load(data_directory.format('samp_snrs.npy')) snr_list = np.load(data_directory.format('samp_snrs.npy')).flatten() inds_to_keep = ~np.isnan(snr_list) intrinsics = np.load(data_directory.format('samp_intrinsics.npy')) Loading @@ -56,11 +62,16 @@ for i in range(snr_list.size): here[8] = intrinsics[i,5] snr_here = snr_list[i] if snr_here == 0: snr_here += 1e-6 samp_out[i,:9] = here samp_out[i,9] = snr_here samp_out = samp_out[inds_to_keep,:] to_fix = snr_list[inds_to_keep] == 0 subset = samp_out[np.where(~to_fix)] interpolator = NearestNDInterpolator(subset[:,:-1],subset[:,-1]) for fixnum in np.where(to_fix): samp_out[fixnum,9] = interpolator(samp_out[fixnum,:9]) samp_df_out = pd.DataFrame(samp_out, columns=cols) samp_df_out.to_csv(data_directory.format('samp_dataframe.csv'), index=False)
emri_data/scripts/existing_dataframe_fixzeronans.py 0 → 100644 +51 −0 Original line number Diff line number Diff line import numpy as np from pathlib import Path import pandas as pd from scipy.interpolate import NearestNDInterpolator data_directory = '../schwarz_negY_fix/{}' save_dir = data_directory[:-3] Path(save_dir).mkdir(parents=True, exist_ok=True) # Grid try: grid_df = pd.read_csv(data_directory.format('grid_dataframe.csv')) to_fix = np.isnan(grid_df['SNR'].to_numpy()) grid_df = grid_df.iloc[~to_fix,:] to_fix = grid_df['SNR'].to_numpy() == 0 if to_fix.size > 0: subset = grid_df.iloc[np.where(~to_fix)[0],:] interpolator = NearestNDInterpolator(subset.loc[:, grid_df.columns != 'SNR'].to_numpy(),subset['SNR'].to_numpy()) for fixnum in np.where(to_fix)[0]: temp = interpolator(grid_df.loc[fixnum,grid_df.columns != 'SNR'].to_numpy()) grid_df.at[fixnum,'SNR'] = temp cols = ['logM','logq','a','p0','e','Y0','thetaS','phiS','thetaK','t','SNR'] grid_df.to_csv(data_directory.format('grid_dataframe_fixed.csv'), index=False) except FileNotFoundError: print('Grid data not found. Continuing...') pass except: raise # Samples try: samp_df = pd.read_csv(data_directory.format('samp_dataframe.csv')) to_fix = np.isnan(samp_df['SNR'].to_numpy()) samp_df = samp_df.iloc[~to_fix,:] to_fix = samp_df['SNR'].to_numpy() == 0 if to_fix.size > 0: subset = samp_df.iloc[np.where(~to_fix)[0],:] interpolator = NearestNDInterpolator(subset.loc[:, samp_df.columns != 'SNR'].to_numpy(),subset['SNR'].to_numpy()) for fixnum in np.where(to_fix)[0]: temp = interpolator(samp_df.loc[fixnum,samp_df.columns != 'SNR'].to_numpy()) samp_df.at[fixnum,'SNR'] = temp samp_df.to_csv(data_directory.format('samp_dataframe_fixed.csv'), index=False) except FileNotFoundError: print('Sample data not found. Continuing...') except: raise No newline at end of file