diff --git a/cvae.py b/cvae.py index fde656fc561954ae1c668cb4e961abf1d1d7089d..6d89d91834b25cef4478dd6c9abd3819c96ae36a 100644 --- a/cvae.py +++ b/cvae.py @@ -51,7 +51,9 @@ class CVAE(nn.Module): self.decoder_conv, self.decoder_lin = self.create_network("d", self.input_dim, self.par_dim, append_dim=self.latent_dim, fc_layers = self.fc_layers, conv_layers = self.conv_layers) def fix_qencoder(self,grad = True): - + """ + fix the q encoder weights so that they are not updated in training + """ for param in self.qencoder_conv.parameters(): param.requires_grad_(grad) @@ -64,6 +66,13 @@ class CVAE(nn.Module): param.requires_grad_(grad) def create_network(self, name, input_dim, output_dim, append_dim=0, mean=True, variance=True, fc_layers=[], conv_layers=[]): + """ + create a convolutional neural network + conv_layers: list + format - [[num_filters_1, filter_size_1], [num_filters_2, filter_size_2], ...] + fc_layers: list + format - [num_neurons_1, num_neurons_2, ....] + """ conv_network = nn.Sequential() lin_network = nn.Sequential() layer_out_sizes = [] @@ -72,21 +81,26 @@ class CVAE(nn.Module): inchannels = 1 insize = self.input_dim for i in range(num_conv): + # padding half filter width padding = int(conv_layers[i][1]/2.) + # add convolutional, activation and maxpooling layer conv_network.add_module("r_conv{}".format(i),module = nn.Conv1d(inchannels, conv_layers[i][0], conv_layers[i][1], stride = self.stride,padding=padding)) conv_network.add_module("act_r_conv{}".format(i), module = self.activation) conv_network.add_module("pool_r_conv{}".format(i),module = self.maxpool) + # calulate output size of conv layer outsize = int(self.conv_out_size(insize, padding, 1, conv_layers[i][1], self.stride)/self.maxpool_size) layer_out_sizes.append((conv_layers[i][0],outsize)) insize = outsize inchannels = conv_layers[i][0] + # calcualte input size to fc layer lin_input_size = np.prod(layer_out_sizes[-1]) if num_conv > 0 else self.input_dim if append_dim: lin_input_size += append_dim layer_size = int(lin_input_size) # hidden layers for i in range(num_fc): + # add fc layer and activation lin_network.add_module("r_lin{}".format(i),module=nn.Linear(layer_size, fc_layers[i])) lin_network.add_module("act_r_lin{}".format(i),module=self.activation) layer_size = fc_layers[i] @@ -160,7 +174,7 @@ class CVAE(nn.Module): return mu_par, mu_q, log_var_q, mu_r, log_var_r def test_latent(model, y, par, num_samples): - """generating samples when testing the network """ + """generating samples when testing the network, returns latent samples""" num_data = y.size(0) x_samples = [] # encode the data into latent space with r1(z,y) diff --git a/run_cvae.py b/run_cvae.py index e088f184c749fc25cfdfceb2fa2b71f32a34ac27..bced86d3fef4ede0b3cb2963cd6aeab2b8629fad 100755 --- a/run_cvae.py +++ b/run_cvae.py @@ -75,7 +75,6 @@ def train_batch(epoch, model, optimizer, device, batch, labels, ramp=1.0, train # get the KL loss funtion kl_loss_b = KL(mu_q,log_var_q,mu_r,log_var_r) - #kl_loss_b = KL(mu_q,log_var_q,mu_r,log_var_r) # get the L loss (evaluate gaussian at true parameter) L_loss_b = log_lik(labels, mu_x) @@ -118,6 +117,7 @@ def train(model, device, epochs, train_iterator, learning_rate, validation_itera elif epoch>ramp_end: ramp = 1.0 #adjust_learning_rate(learning_rate, optimizer, epoch) + # Training for local_batch, local_labels in train_iterator: # Transfer to GPU @@ -126,6 +126,7 @@ def train(model, device, epochs, train_iterator, learning_rate, validation_itera train_losses.append(train_loss) kl_losses.append(kl_loss) lik_losses.append(lik_loss) + # validation for val_batch, val_labels in validation_iterator: # Transfer to GPU @@ -141,6 +142,7 @@ def train(model, device, epochs, train_iterator, learning_rate, validation_itera loss_plot(save_dir, train_losses, kl_losses, lik_losses, val_losses, val_kl_losses, val_lik_losses) if do_test or epoch == epochs - 1: + # test plots lat_fig = latent_plot(pars, local_labels.cpu().numpy()) lat_fig.savefig('{}/latent_epoch{}.png'.format(save_dir,epoch)) samples,tr,zr_sample,zq_sample = run(model,test_iterator,num_samples,device=device) @@ -172,7 +174,10 @@ def run(model,test_it,num_samples = 500,device="cpu"): return samples,truths.cpu().numpy(), zr_samples,zq_samples def latent_samp_fig(zr_sample, zq_sample, truths): - + """ + plot latent space samples as histogram + """ + # set shape of latent space zrshape = np.shape(zr_sample) if len(zrshape) == 2: num_latent = 1 @@ -186,6 +191,7 @@ def latent_samp_fig(zr_sample, zq_sample, truths): for inj in rint: color = "C0" if truths[inj][0] == 0 else "C1" if num_latent == 1: + # get 1000 random samples from latent space and histogram randint = np.random.randint(0,len(zr_sample[inj]),size = 1000) hstr = ax[0].hist(zr_sample[inj][randint,0],bins=30,color=color,histtype = "step") hstq = ax[1].hist(zq_sample[inj][randint,0],bins=30,color=color,histtype="step") @@ -357,26 +363,31 @@ def run_cvae_lin(device,do_train=True,do_test=True, learning_rate=1e-4,train_epochs=500, num_test=1000, num_samples=1000,noise_var=0.1, length=25,batch=256,num_train=1e4, datatype="line",dropout=0.0,basedir=None,test_dir = None,ramp_start=-1,ramp_end=-1): + + # load the training and validation data train_data,validation_data = load_train_data(datatype = datatype, length=length, noise_var = noise_var, num_train=num_train, num_val = int(0.1*num_train)) train_iterator = DataLoader(train_data, batch_size=batch, shuffle=True) validation_iterator = DataLoader(validation_data, batch_size=batch, shuffle=True) - # precompute the true odds + # load pre computed true odds with open(os.path.join(test_dir, "modelclass.pkl"),"rb") as f: test_data = pickle.load(f) + # convert data to onehot encoding !! this is temporary will remove when rerun nested sampling !!! for ind in range(len(test_data.data)): temp_onehot = np.zeros(2) temp_onehot[int(test_data.data[ind][1])] = 1 test_data.data[ind] = test_data.data[ind][0], temp_onehot + test_iterator = DataLoader(test_data, batch_size=num_test, shuffle=False) # single batch - - # precompute the true odds + + # load precomputed odds with open(os.path.join(test_dir, "nest_odds.pkl"),"rb") as f: nest_odds = pickle.load(f) true_mod = np.array([float(np.array(i[1]).argmax()) for i in test_data.data]) + # define the fully connected and convolutional layers fc_layers = [num_fc_neurons for i in range(num_fc_layers)] conv_layers = [(num_conv_filters, conv_size) for i in range(num_conv_layers)] @@ -402,6 +413,7 @@ def run_cvae_lin(device,do_train=True,do_test=True, # train the network if do_train: losses, kl_loss, l_loss, val_loss, val_kl_loss, val_l_loss= train(model, device, train_epochs, train_iterator, learning_rate, validation_iterator, test_iterator, nest_odds, true_mod, num_samples, save_dir,ramp_start=ramp_start,ramp_end=ramp_end) + # output loss plot loss_plot(save_dir, losses, kl_loss, l_loss, val_loss, val_kl_loss, val_l_loss) # make the loss plot torch.save(model, os.path.join(save_dir,"model.pt")) # save the model else: @@ -416,7 +428,9 @@ def run_cvae_lin(device,do_train=True,do_test=True, print('{}: saved results to {}'.format(time.asctime(),save_dir)) def optimize_hyperparameters(save_root, datatype, num_train, length, noise_var, train_epochs, num_samples, device, test_dir, num_iterations, run_type = "leakyrelu_kl"): - + """ + optimise the network hyperparameters using a gaussian process + """ xs = np.linspace(0,1.4,length) dropout=0.0 stride = 1 @@ -556,7 +570,9 @@ def optimize_hyperparameters(save_root, datatype, num_train, length, noise_var, def brute_search(device,length=None,noise_var=None,train_epochs=None,datatype=None,num_train=None,ramp_start=-1,ramp_end=-1): - + """ + optimise the network using a brute force search + """ # data parameters if num_train is None: num_train = int(1e4) @@ -580,8 +596,8 @@ def brute_search(device,length=None,noise_var=None,train_epochs=None,datatype=No num_parameters = 2 # dnn network - num_hidden = [0]#[0,1,2] - num_neurons = [8]#[8,16,32] + num_hidden = [1]#[0,1,2] + num_neurons = [16]#[8,16,32] dropout = [0.0] conv_size = [2]#[3,5] @@ -662,7 +678,7 @@ if __name__ == "__main__": ramp_start = 100 ramp_end = 1000 - num_parameters = 1 + num_parameters = 2 num_iterations = 20 root_dir = '/home/joseph.bayley/public_html/cvae_odds/cvae/' test_dir = "/home/joseph.bayley/data/cvae_odds/test/" @@ -684,5 +700,34 @@ if __name__ == "__main__": num_iterations = num_iterations, ramp_start=ramp_start, ramp_end=ramp_end) - """ + brute_search(device=device,length=ln,noise_var=nv,train_epochs=t_ep,datatype=datatype,num_train=num_train,ramp_start=ramp_start,ramp_end=ramp_end) + """ + + run_cvae_lin(device=device, + latent_dim=1, + par_dim=num_parameters, + num_fc_layers=0, + num_fc_neurons=8, + num_conv_layers=2, + num_conv_filters=8, + conv_size=2, + maxpool_size=1, + stride=1, + learning_rate=1e-5, + train_epochs=t_ep, + length=ln, + batch=256, + num_train=num_train, + num_test=num_test, + datatype=datatype, + do_train=True, + do_test=True, + dropout=0.0, + noise_var=nv, + num_samples=10000, + basedir=root_dir, + test_dir = test_dir, + ramp_start = ramp_start, + ramp_end=ramp_end) +