Commit 90fa2e7c authored by Christian Chapman-Bird's avatar Christian Chapman-Bird
Browse files

general updates

parent 76d9ab5f
Loading
Loading
Loading
Loading
+12 −6
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ from pathlib import Path


class LinearModel(nn.Module):
    def __init__(self, in_features, out_features, neurons, n_layers, activation, name, initialisation=xavier_uniform_):
    def __init__(self, in_features, out_features, neurons, n_layers, activation, name, out_activation=None, initialisation=xavier_uniform_, use_dropout=False,drop_p=0.25, use_bn=False):
        super().__init__()
        self.initial = initialisation
        self.name = name
@@ -15,8 +15,16 @@ class LinearModel(nn.Module):
        layers = [nn.Linear(in_features, neurons[0]), activation()]
        for i in range(n_layers - 1):
            layers.append(nn.Linear(neurons[i], neurons[i + 1]))

            if use_dropout:
                layers.append(nn.Dropout(drop_p))  
            layers.append(activation())
            if use_bn:
                layers.append(nn.BatchNorm1d(num_features=neurons[i+1]))
                
        layers.append(nn.Linear(neurons[-1], out_features))
        if out_activation is not None:
            layers.append(out_activation())
        
        self.layers = nn.Sequential(*layers)
        self.layers.apply(self.init_weights)
@@ -29,15 +37,13 @@ class LinearModel(nn.Module):
            self.initial(m.weight)


def create_mlp(input_features, output_features, neurons, layers, activation, model_name, init=xavier_uniform_, device=None):
    if layers < 2:
        raise RuntimeError('You need at least two layers for an MLP.')
def create_mlp(input_features, output_features, neurons, layers, activation, model_name, out_activation=None, init=xavier_uniform_, device=None, use_dropout=False,drop_p=0.25, use_bn=False):
    if isinstance(neurons, list):
        if len(neurons) != layers:
            raise RuntimeError('Length of neuron vector does not equal number of hidden layers.')
    else:
        neurons = [neurons, ]
    model = LinearModel(input_features, output_features, neurons, layers, activation, model_name, initialisation=init)
    model = LinearModel(input_features, output_features, neurons, layers, activation, model_name, initialisation=init, use_dropout=use_dropout,drop_p=drop_p,use_bn=use_bn)

    Path(get_script_path()+f'/../models/{model_name}/').mkdir(parents=True, exist_ok=True)
    pickle.dump(model, open(get_script_path()+f'/../models/{model_name}/function.pickle', "wb"), pickle.HIGHEST_PROTOCOL)  # save blank model
+42 −15
Original line number Diff line number Diff line
@@ -6,7 +6,11 @@ from sys import stdout
from pathlib import Path


def model_train_test(data, model, device, n_epochs, n_batches, loss_function, learning_rate, verbose=False, return_losses=False):
def model_train_test(data, model, device, n_epochs, n_batches, loss_function, optimizer, verbose=False, return_losses=False, update_every=None, n_test_batches=None, save_best=False, scheduler=None):
    
    if n_test_batches is None:
        n_test_batches = n_batches
    
    xtrain, ytrain, xtest, ytest = data
    model.to(device)

@@ -24,8 +28,6 @@ def model_train_test(data, model, device, n_epochs, n_batches, loss_function, le
    ytrainsize = len(ytrain)
    ytestsize = len(ytest)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.97)  # to settle into the loss 'valley'
    train_losses = []
    test_losses = []
    rate = []
@@ -34,6 +36,7 @@ def model_train_test(data, model, device, n_epochs, n_batches, loss_function, le
    datasets = {"train": [xtrain, ytrain], "test": [xtest, ytest]}

    cutoff_LR = n_epochs - 50
    lowest_loss = 1e5
    for epoch in range(n_epochs):  # 5 epochs at maximum
        # Print epoch
        for phase in ['train','test']:
@@ -50,7 +53,7 @@ def model_train_test(data, model, device, n_epochs, n_batches, loss_function, le
                inputs = inputs[shuffled_inds]
                targets = targets[shuffled_inds]

                targets = targets.reshape((targets.shape[0], 1))
                #targets = targets.reshape((targets.shape[0], 1))

                for i in range(n_batches):
                    for param in model.parameters():
@@ -59,6 +62,8 @@ def model_train_test(data, model, device, n_epochs, n_batches, loss_function, le
                    loss = loss_function(outputs, targets[i * ytrainsize // n_batches: (i+1)*ytrainsize // n_batches])
                    loss.backward()
                    optimizer.step()
                    if scheduler is not None:
                        scheduler.step()
                    current_loss += loss.item()

                train_losses.append(current_loss / n_batches)
@@ -72,25 +77,47 @@ def model_train_test(data, model, device, n_epochs, n_batches, loss_function, le
                    inputs = inputs[shuffled_inds]
                    targets = targets[shuffled_inds]

                    targets = targets.reshape((targets.shape[0], 1))
#                     targets = targets.reshape((targets.shape[0], 1))

                    for i in range(n_batches):
                    for i in range(n_test_batches):
                        outputs = model(inputs[i * ytestsize // n_batches: (i+1)*ytestsize // n_batches])
                        loss = loss_function(outputs, targets[i * ytestsize // n_batches: (i+1)*ytestsize // n_batches])
                        current_loss += loss.item()

                    test_losses.append(current_loss / n_batches)
                    test_losses.append(current_loss / n_test_batches)
        if test_losses[-1] < lowest_loss:
            lowest_loss = test_losses[-1]
            if save_best:
                torch.save(model.state_dict(),path+'/../models/'+name+'/model.pth')
                
        if epoch >= cutoff_LR:
            scheduler.step()
            rate.append(scheduler.get_last_lr()[0])
        else:
            rate.append(learning_rate)
#         if epoch >= cutoff_LR:
#             scheduler.step()
#             rate.append(scheduler.get_last_lr()[0])
#         else:
#             rate.append(learning_rate)
        if verbose:
            stdout.write(f'\rEpoch: {epoch} | Train loss: {train_losses[-1]:.3e} | Test loss: {test_losses[-1]:.3e} ')
        if update_every is not None:
            if epoch % update_every == 0:
                epochs = np.arange(epoch+1)
                plt.semilogy(epochs, train_losses, label='Train')
                plt.semilogy(epochs, test_losses, label='Test')
                plt.legend()
                plt.xlabel('Epochs')
                plt.ylabel('Loss')
                plt.title('Train and Test Loss Across Train Epochs')
                plt.savefig(path+'/../models/'+name+'/losses.png')
                #plt.show()
                plt.close()
                
                if not save_best:
                    torch.save(model.state_dict(),path+'/../models/'+name+'/model.pth')

        
    if verbose:
        print('\nTraining complete - saving.')
    
    if not save_best:
        torch.save(model.state_dict(),path+'/../models/'+name+'/model.pth')

    epochs = np.arange(n_epochs)
+4 −4
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ def run_on_dataset(model, test_data, distances=None, n_batches=1, device=None, y

    Returns:
        tuple containing:
            output_data (double): root-mean-square-error between the truth and the model output for this dataset.
            output_data (double): Neural network output for this dataset.
            total_time (double): total time taken by the model to process the input dataset.
                                 Only if `runtime` is set to True.
            per_point (double): mean time taken by the model per sample in the input dataset.
@@ -57,7 +57,7 @@ def run_on_dataset(model, test_data, distances=None, n_batches=1, device=None, y
        per_point = (et - st) / ydata.size

    output = np.concatenate(out)
    out_unnorm = unnorm(output, ref_mean=ymeanstd[0], ref_std=ymeanstd[1]).flatten()
    out_unnorm = unnorm(output, ref_mean=ymeanstd[0], ref_std=ymeanstd[1])

    if y_transform_fn is not None:
        out_unnorm = y_transform_fn(out_unnorm)
@@ -65,7 +65,7 @@ def run_on_dataset(model, test_data, distances=None, n_batches=1, device=None, y
    if distances is None:
        distances = np.ones(xdata.shape[0]) * 0.5
    
    out_unnorm *= (0.5/distances)
    out_unnorm *= (0.5/distances)[:,None]
    
    outputs = (out_unnorm,)

@@ -77,7 +77,7 @@ def run_on_dataset(model, test_data, distances=None, n_batches=1, device=None, y

def compute_rmse(comparison_sets):
    """
    Get the root-mean-square-error difference between two sets of data.
    Get the root-mean-square-error difference between two sets of ordered data.
    Args:
        comparison_sets (2-tuple): Tuple of the list of true values, followed by the list of predicted values.
    Returns:
+64 −0
Original line number Diff line number Diff line
import numpy as np
from pathlib import Path
from sys import stdout
import pandas as pd

outdir = '../emri_data/prograde/{}'
Path('../emri_data/prograde/').mkdir(parents=True, exist_ok=True)

injection_params = np.zeros(14)

cols = ['logM', 'logq', 'a', 'p0', 'e', 'Y0', 'thetaS', 'phiS', 'thetaK', 't', 'SCHW_SNR','KERR_SNR']

dataframe = pd.read_csv(outdir.format('samp_dataframe.csv'))
place = 0

total = int(1e4)  # we can stop any time before this, though.
per_batch = 100
batches = total // per_batch
print(dataframe)
while place < batches:
    stdout.write(
        f'\rWaveform sets generated: {place:d} out of {batches:d}, or {100 * place / (batches - 1):.2f}% done. ')
    stdout.flush()
    
    i = 0
    this_chunk = np.zeros((per_batch, 12))

    while i < per_batch:
        stdout.write(
            f'\rWaveform sets generated: {place + 1:d} out of {batches:d}, or {100 * place / (batches - 1):.2f}% done. Set progress: {100 * i / (per_batch - 1):.2f}%')
        stdout.flush()

        # update injection parameters
        injection_params[0] = 10 ** np.random.uniform(np.log10(8e4), np.log10(5e7))
        injection_params[1] = injection_params[0] / (10 ** np.random.uniform(np.log10(5e4), np.log10(5e7)))
        if injection_params[1] < 0.1:
            continue
        injection_params[2] = np.random.uniform(0.01, 0.9999)
        injection_params[4] = np.random.uniform(0.08, 0.5)
        injection_params[5] = np.random.uniform(0.5,0.99)
        injection_params[6] = 0.5  # fiducial value (we will rescale for the others)
        tplunge = 0
        injection_params[7] = np.arccos(np.random.uniform(-1,1))#np.random.uniform(0, np.pi)
        injection_params[9] = np.arccos(np.random.uniform(-1,1))#np.random.uniform(0, np.pi)
        injection_params[8] = np.random.uniform(0, 2 * np.pi)
        
        injection_params[3] = 0

        this_chunk[i, 0] = np.log10(injection_params[0])
        this_chunk[i, 1] = np.log10(injection_params[0] / injection_params[1])
        this_chunk[i, 2:6] = injection_params[2:6]
        this_chunk[i, 6:9] = injection_params[7:10]
        this_chunk[i, 9] = tplunge
        i += 1
    
    # 10, 11 are zeros already so don't need to do anything
    
    # cache our progress
    little_dataframe = pd.DataFrame(this_chunk, columns=cols)
    dataframe = dataframe.append(little_dataframe, ignore_index=True)

    dataframe.to_csv(outdir.format('samp_dataframe.csv'), index=False)

    place += 1
+163 −0
Original line number Diff line number Diff line
import cupy
import numpy as np
from few.waveform import GenerateEMRIWaveform, EMRIInspiral
from utility import snr, brentq_p_at_t
from pathlib import Path
from sys import stdout
import cupy as cp
from scipy.constants import year
import time
import pandas as pd

mpool = cupy.get_default_memory_pool()
mpool2 = cupy.get_default_pinned_memory_pool()

outdir = '../emri_data/schwarz_negY_fix/{}'
Path('../emri_data/schwarz_negY_fix/').mkdir(parents=True, exist_ok=True)

use_gpu = True
use_schwarz_separatrix = False  # set to False for negative Y, as Kerr separatrix exceeds Schwarz in this case

kerr = GenerateEMRIWaveform(
    "Pn5AAKWaveform",
    sum_kwargs=dict(pad_output=True),
    inspiral_kwargs={"max_init_len": int(1e6), "enforce_schwarz_sep": use_schwarz_separatrix},
    use_gpu=use_gpu,
    return_list=True,
)

kerr_not_list = GenerateEMRIWaveform(
    "Pn5AAKWaveform",
    sum_kwargs=dict(pad_output=True),
    inspiral_kwargs={"max_init_len": int(1e6), "enforce_schwarz_sep": use_schwarz_separatrix},
    use_gpu=use_gpu,
    return_list=False,
)

# Max waveform duration and sample spacing
T = 4  # years
Tsec = T * year
dt = 15.0

# for SNR and waveform calculations
inner_product_kwargs = dict(dt=dt, PSD="cornish_lisa_psd", use_gpu=True)
waveform_kwargs = {"T": T, "dt": dt}

# define our injection set
nM = 6  # 7
nq = 5  # 5
na = 4  # 4
ne = 4  # 4
ny = 4  # 4
nTheta = 4  # 5
nPhi = 5  # 8
nTimes = 5  # 8
nDists = 10  # dont change

Mvals = np.logspace(np.log10(8e4), np.log10(5e7), nM)
qvals = np.logspace(np.log10(5e4), np.log10(5e7), nq)
avals = np.linspace(0.5, 0.95, na)
evals = np.linspace(0.08, 0.5, ne)
Y0vals = np.linspace(0.5, 0.99, ny)

dVals = np.linspace(0, 5, nDists + 1)[1:]
dist = dVals[1]  # our dummy distance

tplVals = np.linspace(4, 0, nTimes + 1)[:-1]  # we can fill in the last value here
thetaVals = np.linspace(0, np.pi, nTheta + 1)[:-1]  # we can fill in the last value here
phiVals = np.linspace(0, 2 * np.pi, nPhi + 1)[:-1]  # we can fill in the last value here

# initialise trajectory module
traj = EMRIInspiral(func="pn5",inspiral_kwargs={"max_init_len": int(1e9)}, enforce_schwarz_sep=use_schwarz_separatrix)

traj_inds = np.array([0, 1, 2, 4, 5, 11, 12, 13]).astype(np.int32)  # parameter indices needed for trajectory calc

totforms = nM * nq * na * ne * ny
num_wform_samples = int(T * year / dt)

inp_angles = []
for num1 in thetaVals:
    for num2 in thetaVals:
        for num3 in phiVals:
            inp_angles.append([num1, num2, num3])


# np.save(outdir.format('angular_param_perms.npy'),arr=np.array(inp_angles))


def wform_snr_at_t(wform, plunge_time):
    ind_to_cut = int((1 - plunge_time / 4) * num_wform_samples)
    cut_waveform = wform.copy()[ind_to_cut:]  # truncate waveforms (after a copy operation)
    snrval = snr([cp.array(cut_waveform.real), cp.array(cut_waveform.imag)],
                 **inner_product_kwargs).get()  # need to send waveform to gpu for SNR calc
    return snrval


cols = ['logM', 'logq', 'a', 'p0', 'e', 'Y0','verdict']
try:  # pick up where we left off.
    dataframe = pd.read_csv(outdir.format('grid_dataframe.csv'))
    place = np.load(outdir.format('grid_position.npy')).item()
    
except:
    dataframe = pd.DataFrame(columns=cols)
    place = -1

'''
We now iterate through the grid dimensions. To divide things up, we process the extrinsic parameters in a block before saving to the array.
This is a nice balance between caching data (in case of error) and not spending too long appending to the dataframe.
'''

injection_params = np.zeros(14)
for i in range(nM):
    for j in range(nq):
        for k in range(na):
            for l in range(ne):
                for m in range(ny):
                    num = i * nq * na * ne * ny + j * na * ne * ny + k * ne * ny + l * ny + m
                    if num <= place:
                        continue
                    stdout.write(
                        f'\rWaveform sets generated: {num + 1:d} out of {totforms:d}, or {100 * num / (totforms - 1):.2f}% done.')
                    stdout.flush()

                    # update injection parameters
                    injection_params[0] = Mvals[i]
                    injection_params[1] = injection_params[0] / qvals[j]
                    injection_params[2] = avals[k]
                    injection_params[4] = evals[l]
                    injection_params[5] = Y0vals[m]
                    injection_params[6] = dist

                    root = brentq_p_at_t(traj, T, traj_args=np.take(injection_params, traj_inds).tolist(),
                                         traj_kwargs={'max_init_len': int(1e9)},
                                         kerr_separatrix=not use_schwarz_separatrix,
                                         xtol=1e-6)  # find initial semi-latus rectum for T year orbit
                    injection_params[3] = root

                    out_list = []
                    wave_time = 0
                    process_time = 0

                    this_chunk = np.zeros(7)
                    this_chunk[:, 0] = np.log10(injection_params[0])
                    this_chunk[:, 1] = np.log10(qvals[j])
                    this_chunk[:, 2:6] = injection_params[2:6]

                    wave_start = time.perf_counter()
                    check_sig = kerr_not_list(*injection_params, **waveform_kwargs).get()
                    wave_end = time.perf_counter()
                    wave_time += wave_end - wave_start

                    wavegen_times.append(wave_time)
                    stdout.write(f' done. Processing...')
                    stdout.flush()

                    little_dataframe = pd.DataFrame(this_chunk, columns=cols)
                    dataframe = dataframe.append(little_dataframe, ignore_index=True)
                    process_times.append(process_time)

                    # cache our progress
                    dataframe.to_csv(outdir.format('grid_dataframe.csv'), index=False)
                    np.save(outdir.format('grid_waveform_times.npy'), arr=np.array(wavegen_times))
                    np.save(outdir.format('grid_processing_times.npy'), arr=np.array(process_times))
                    np.save(outdir.format('grid_position.npy'), arr=np.array(num))
Loading