Skip to content
Snippets Groups Projects
Commit cd1ebac3 authored by Christian Chapman-Bird's avatar Christian Chapman-Bird
Browse files

Main commit

parent 60c10176
No related branches found
No related tags found
No related merge requests found
**/_*
import torch
import torch.nn as nn
from torch.nn.init import xavier_uniform_
from MLtools.utilities import get_script_path
class LinearModel(nn.Module):
def __init__(self, in_features, out_features, neurons, n_layers, activation, name):
super().__init__()
layers = [nn.Linear(in_features, neurons[0]), activation()]
for i in range(n_layers - 1):
layers.append(nn.Linear(neurons[i], neurons[i + 1]))
layers.append(activation())
layers.append(nn.Linear(neurons[-1], out_features))
self.layers = nn.Sequential(*layers)
self.layers.apply(self.init_weights)
self.name = name
def forward(self, x):
return self.layers(x)
def init_weights(self, m):
if isinstance(m, nn.Linear):
xavier_uniform_(m.weight)
def create_mlp(input_features, output_features, neurons, layers, activation, model_name, device=None):
if layers < 2:
raise RuntimeError('You need at least two layers for an MLP.')
if isinstance(neurons, list):
if len(neurons) != layers:
raise RuntimeError('Length of neuron vector does not equal number of hidden layers.')
else:
neurons = [neurons, ]
model = LinearModel(input_features, output_features, neurons, layers, activation, model_name)
input = [input_features, output_features, neurons, layers]
with open(get_script_path()+'/'+model_name+'_hypers.txt', "w") as f:
f.write('\t'.join(input))
if device is not None:
model = model.to(device)
return model
def load_mlp(model_name, activation):
with open(get_script_path()+'/'+model_name+'_hypers.txt',"r") as f:
pms = f.readline().split('\t')
model = LinearModel(*pms, activation)
return model
import torch
from torch import nn
from torch.nn.init import xavier_uniform
import numpy as np
import matplotlib.pyplot as plt
from EMRI_DET.MLtools.utilities import norm, norm_inputs, unnorm_inputs, unnorm, get_script_path
from EMRI_DET.MLtools.nn.model_creation import create_mlp
from sys import stdout
def model_train_test(data, model, device, n_epochs, n_batches, loss_function, learning_rate, verbose=False):
xtrain, ytrain, xtest, ytest = data
name = model.name
path = get_script_path()
np.save(path+'/'+name+'_xdata_mean_std.npy',np.array([xtrain.mean(axis=0), xtrain.std(axis=0)]))
np.save(path+'/'+name+'_ydata_mean_std.npy',np.array([ytrain.mean(), ytrain.std()]))
xtest = torch.from_numpy(norm_inputs(xtest, xtrain)).to(device).float()
ytest = torch.from_numpy(norm(ytest, ytrain)).to(device).float()
xtrain = torch.from_numpy(norm_inputs(xtrain, xtrain)).to(device).float()
ytrain = torch.from_numpy(norm(ytrain, ytrain)).to(device).float()
ytrainsize = len(ytrain)
ytestsize = len(ytest)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.97) # to settle into the loss 'valley'
train_losses = []
test_losses = []
rate = []
# Run the training loop
datasets = {"train": [xtrain, ytrain], "test": [xtest, ytest]}
cutoff_LR = n_epochs - 50
for epoch in range(n_epochs): # 5 epochs at maximum
# Print epoch
for phase in ['train','test']:
if phase == 'train':
model.train(True)
shuffled_inds = torch.randperm(ytrainsize)
# Set current loss value
current_loss = 0.0
# Iterate over the DataLoader for training data
# Get and prepare inputs
inputs, targets = datasets[phase]
inputs = inputs[shuffled_inds]
targets = targets[shuffled_inds]
targets = targets.reshape((targets.shape[0], 1))
for i in range(n_batches):
for param in model.parameters():
param.grad = None
outputs = model(inputs[i * ytrainsize // n_batches:(i+1)*ytrainsize // n_batches])
loss = torch.sqrt(loss_function(outputs, targets[i * ytrainsize // n_batches: (i+1)*ytrainsize // n_batches]))
loss.backward()
optimizer.step()
current_loss += loss.item()
train_losses.append(current_loss / n_batches)
else:
with torch.no_grad():
model.train(False)
shuffled_inds = torch.randperm(ytestsize)
current_loss = 0.0
inputs, targets = datasets[phase]
inputs = inputs[shuffled_inds]
targets = targets[shuffled_inds]
targets = targets.reshape((targets.shape[0], 1))
for i in range(n_batches):
outputs = model(inputs[i * ytestsize // n_batches: (i+1)*ytestsize // n_batches])
loss = torch.sqrt(loss_function(outputs, targets[i * ytestsize // n_batches: (i+1)*ytestsize // n_batches]))
current_loss += loss.item()
test_losses.append(current_loss / n_batches)
if epoch >= cutoff_LR:
scheduler.step()
rate.append(scheduler.get_last_lr()[0])
else:
rate.append(learning_rate)
if verbose:
stdout.write(f'\rTrain loss:{train_losses[-1]} | Test loss:{test_losses[-1]} | ')
if verbose:
print('\nTraining complete - saving.')
torch.save(model.state_dict(),path+'/'+name+'_model.pth')
epochs = np.arange(n_epochs)
plt.semilogy(epochs, train_losses, label='Train')
plt.semilogy(epochs, test_losses, label='Test')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Train and Test Loss Across Train Epochs')
plt.savefig(path+'/'+name+'_losses.png')
#plt.show()
plt.close()
import os
import sys
def get_script_path():
return os.path.dirname(os.path.realpath(sys.argv[0]))
def norm(dataframe, ref_dataframe=None, ref_mean=None, ref_std=None):
if ref_dataframe is not None:
df_norm = (dataframe - ref_dataframe.mean())/ref_dataframe.std()
elif ref_mean is not None and ref_std is not None:
df_norm = (dataframe - ref_mean)/ref_std
else:
raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
return df_norm
def norm_inputs(dataframe, ref_dataframe=None, ref_mean=None, ref_std=None):
if ref_dataframe is not None:
df_norm = (dataframe - ref_dataframe.mean(axis=0)) / ref_dataframe.std(axis=0)
elif ref_mean is not None and ref_std is not None:
df_norm = (dataframe - ref_mean) / ref_std
else:
raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
return df_norm
def unnorm(dataframe_norm, ref_dataframe=None, ref_mean=None, ref_std=None):
if ref_dataframe is not None:
dataframe = (dataframe_norm * ref_dataframe.std()) + ref_dataframe.mean()
elif ref_mean is not None and ref_std is not None:
dataframe = (dataframe_norm * ref_std) + ref_mean
else:
raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
return dataframe
def unnorm_inputs(dataframe_norm, ref_dataframe,ref_mean=None, ref_std=None):
if ref_dataframe is not None:
dataframe = (dataframe_norm * ref_dataframe.std(axis=0)) + ref_dataframe.mean(axis=0)
elif ref_mean is not None and ref_std is not None:
dataframe = (dataframe_norm * ref_std) + ref_mean
else:
raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
return dataframe
%% Cell type:code id:e186ab86 tags:
``` python
import os
import torch
import torch.nn as nn
from torch.nn.init import xavier_uniform_
import matplotlib.pyplot as plt
from scipy.interpolate import RegularGridInterpolator
from cupyx.scipy.ndimage import map_coordinates
import cupy as cp
import numpy as np
from data_generation import func_out
```
%% Cell type:markdown id:0afef590 tags:
# Interpolators vs. NNs: where do they meet up?
Currently we have observed that for a dataset of $M=10^6$ points, a neural network operating with uniform samples in the prior range outperforms classic interpolation schemes operating over an $n$-dimensional grid (where $n=9$ is the number of parameters/dimensionaliy of the prior space). We expect that if we decrease $n$ and keep $M$ constant, eventually this will flip around and the interpolators will outperform the network, even if the network is retrained on this new, lower-dimension space.
%% Cell type:code id:976dcc72 tags:
``` python
def cartesian_product_recursive(arrays, out=None):
arrays = [np.asarray(x) for x in arrays]
dtype = arrays[0].dtype
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
m = n // arrays[0].size
out[:, 0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian_product_recursive(arrays[1:], out=out[0:m, 1:])
for j in range(1, arrays[0].size):
out[j * m : (j + 1) * m, 1:] = out[0:m, 1:]
return out
def get_data(M, n, sample_ranges):
# For a given M and n, we set a grid size to keep the number of samples ~constant.
# We output these as 'grid' set and a 'sample' set.
# Unused dimensions are taken to be constant-valued, with a value between the min and max.
N = np.round(np.exp(np.log(M)/n)).astype(np.int64) # dimension of the grid
ranges_kept = sample_ranges[:n,:]
fill_samps = np.mean(sample_ranges[n:,:],axis=1).tolist() # list for input
iterators = [np.linspace(nums[0],nums[1],N) for nums in ranges_kept]
combinations = cartesian_product_recursive(iterators)
xgrid = np.array(combinations)
ygrid = np.zeros(xgrid.shape[0])
for i, combo in enumerate(combinations):
ygrid[i] = func_out(*combo.tolist(),*fill_samps)
xsamp = np.zeros((M,n))
for i in range(n):
xsamp[:,i] = np.random.uniform(ranges_kept[i,0], ranges_kept[i,1], M)
ysamp = np.zeros(M)
for i,row in enumerate(xsamp):
ysamp[i] = func_out(*row.tolist(), *fill_samps)
xtest = np.zeros((M,n))
for i in range(n):
xtest[:,i] = np.random.uniform(ranges_kept[i,0], ranges_kept[i,1], M)
ytest = np.zeros(M)
for i,row in enumerate(xtest):
ytest[i] = func_out(*row.tolist(), *fill_samps)
return xgrid, ygrid, xsamp, ysamp, xtest, ytest, N
```
%% Cell type:code id:9583b6ea tags:
``` python
#Our prior space (params are M,q,a,e,Y,thetaS,phiS,thetaK,t)
sample_ranges=np.array([
[np.log10(8e4),np.log10(5e7)],
[np.log10(5e4),np.log10(5e7)],
[0.01,0.99],
[0.01,0.5],
[0.5,0.99],
[0,np.pi],
[0,2*np.pi],
[0,np.pi],
[0.5,4],
])
#xg, yg, xs, ys = get_data(int(1e6),9, sample_ranges)
```
%% Cell type:code id:15de4530 tags:
``` python
# now we define our network model.
class MLP(nn.Module):
def __init__(self, n_inputs):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(n_inputs, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 32),
nn.Tanh(),
nn.Linear(32, 16),
nn.Tanh(),
nn.Linear(16, 1),
)
self.layers.apply(self.init_weights)
def forward(self, x):
return self.layers(x)
def init_weights(self, m):
if isinstance(m, nn.Linear):
xavier_uniform_(m.weight)
def norm(dataframe, ref_dataframe):
"""Normalise input dataframe using Z-score. First input is the dataframe to be normalised. Second input is
the referene dataframe to which everything is normalised which we need for the mean, std,
ie. df['distance_x_SNR']"""
df_norm = (dataframe - ref_dataframe.mean())/ref_dataframe.std()
return df_norm
def norm_inputs(dataframe, ref_dataframe):
df_norm = (dataframe - ref_dataframe.mean(axis=0))/ref_dataframe.std(axis=0)
return df_norm
def unnorm(dataframe_norm, ref_dataframe):
"""Undo the Z-score normalisation.
Input the data which may be one column (our label output from the model, which is normalised),
Second input is the corresponding origninal data, which we need for the mean, std, ie. df['distance_x_SNR']"""
dataframe = (dataframe_norm * ref_dataframe.std()) + ref_dataframe.mean()
return dataframe
def unnorm_inputs(dataframe_norm, ref_dataframe):
df_norm = (dataframe_norm * ref_dataframe.std(axis=0)) + ref_dataframe.mean(axis=0)
return df_norm
```
%% Cell type:code id:d166b8d9 tags:
``` python
# we define our network train and test functions
def prepare_model(mlp,xtrain,ytrain,xtest,ytest):
# model training. note that xtrain,ytrain correspond to the SAMPLES, while xtest,ytest are the GRID
device = "cuda:0"
this_size = xtest.shape[1]
xtest = torch.from_numpy(norm_inputs(xtest, xtrain)).to(device).float()
ytest = torch.from_numpy(norm(ytest, ytrain)).to(device).float()
xtrain = torch.from_numpy(norm_inputs(xtrain, xtrain)).to(device).float()
ytrain = torch.from_numpy(norm(ytrain, ytrain)).to(device).float()
ytrainsize = len(ytrain)
ytestsize = len(ytest)
loss_function = nn.MSELoss()
LR = 1e-3
optimizer = torch.optim.Adam(mlp.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.97)
train_losses = []
test_losses = []
rate = []
# Run the training loop
datasets = {"train": [xtrain, ytrain], "test": [xtest, ytest]}
num_epochs = 5000
nbatch_train = 50
nbatch_test = 1
cutoff_LR = 4900
for epoch in range(num_epochs):
print(f'Starting epoch {epoch + 1}')
for phase in ['train','test']:
if phase == 'train':
mlp.train(True)
shuffled_inds = torch.randperm(ytrainsize)
# Set current loss value
current_loss = 0.0
# Iterate over the DataLoader for training data
# Get and prepare inputs
inputs, targets = datasets[phase]
inputs = inputs[shuffled_inds]
targets = targets[shuffled_inds]
targets = targets.reshape((targets.shape[0], 1))
for i in range(nbatch_train):
for param in mlp.parameters():
param.grad = None
outputs = mlp(inputs[i * ytrainsize // nbatch_train: (i+1)*ytrainsize // nbatch_train])
loss = torch.sqrt(loss_function(outputs, targets[i * ytrainsize // nbatch_train: (i+1)*ytrainsize // nbatch_train]))
loss.backward()
optimizer.step()
current_loss += loss.item()
train_losses.append(current_loss / nbatch_train)
else:
with torch.no_grad():
mlp.train(False)
shuffled_inds = torch.randperm(ytestsize)
current_loss = 0.0
inputs, targets = datasets[phase]
inputs = inputs[shuffled_inds]
targets = targets[shuffled_inds]
targets = targets.reshape((targets.shape[0], 1))
for i in range(nbatch_test):
outputs = mlp(inputs[i * ytestsize // nbatch_test: (i+1)*ytestsize // nbatch_test])
loss = torch.sqrt(loss_function(outputs, targets[i * ytestsize // nbatch_test: (i+1)*ytestsize // nbatch_test]))
current_loss += loss.item()
test_losses.append(current_loss / nbatch_test)
if epoch >= cutoff_LR:
scheduler.step()
rate.append(scheduler.get_last_lr()[0])
else:
rate.append(LR)
print(f'Train loss: {train_losses[-1]}, Test loss: {test_losses[-1]}')
torch.save(mlp.state_dict(),f'./downscale_data/{this_size}_model.pth')
return mlp
```
%% Cell type:code id:f2206ce9 tags:
``` python
#for n in np.arange(9,1,-1):
for n in np.arange(9,1,-1):
print('Grid dimension: ',n)
xgrid, ygrid, xsamp, ysamp, xtest, ytest, N = get_data(int(1e6), n, sample_ranges)
np.save(f'./downscale_data/{n}_xtest.npy',arr=xtest)
np.save(f'./downscale_data/{n}_ytest.npy',arr=ytest)
xdata_in = ()
for i in range(xgrid.shape[1]):
xdata_in += (np.unique(xgrid[:,i]),)
yresh = (N,)
for i in range(n-1):
yresh += (N,)
y_in = ygrid.reshape(yresh)
interp = RegularGridInterpolator(xdata_in, y_in, method='linear',bounds_error=False)
nn_interp = RegularGridInterpolator(xdata_in, y_in, method='nearest',bounds_error=False)
new_values = interp(xtest)
new_values2 = nn_interp(xtest)
print('Linear + NN done')
np.save(f'./downscale_data/{n}_linear_out.npy',arr=new_values)
np.save(f'./downscale_data/{n}_nearest_out.npy',arr=new_values2)
xtest_in = []
norms = [listy[1] - listy[0] for listy in sample_ranges]
for i in range(ytest.size):
sub = []
for j in range(n):
sub.append((N-1)*(xtest[i, j] - sample_ranges[j][0])/norms[j])
xtest_in.append(sub)
xtest_in = np.asarray(xtest_in).T
map_values = map_coordinates(cp.array(y_in), cp.array(xtest_in), prefilter=True).get()
print('map_coords done')
np.save(f'./downscale_data/{n}_map_out.npy',arr=map_values)
device = 'cuda:0'
mlp = MLP(n).to(device)
#mlp = prepare_model(mlp,xsamp,np.log(ysamp),xgrid,np.log(ygrid)) # train
mlp.load_state_dict(torch.load(f'./downscale_data/{n}_model.pth'))
mlp.eval()
xsamp = np.load(f'./downscale_data/{n}_xsamp.npy')
ysamp = np.log(np.load(f'./downscale_data/{n}_ysamp.npy'))
test_input = torch.Tensor(xtest)
normed_input = norm_inputs(test_input, xsamp).float().to(device)
with torch.no_grad():
out = []
for i in range(20):
output = mlp(normed_input[i * ytest.size // 20 : (i+1)*ytest.size // 20])
out.append(output.detach().cpu().numpy())
output = np.concatenate(out)
out_unnorm = np.exp(unnorm(output, ysamp))
out_truth = out_unnorm.flatten()
np.save(f'./downscale_data/{n}_network_out.npy',arr=out_truth)
```
%% Output
Grid dimension: 9
Linear + NN done
map_coords done
OMP: Warning #96: Cannot form a team with 44 threads, using 42 instead.
OMP: Hint Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS), KMP_TEAMS_THREAD_LIMIT, and OMP_THREAD_LIMIT (if any are set).
Grid dimension: 8
Linear + NN done
map_coords done
Grid dimension: 7
Linear + NN done
map_coords done
Grid dimension: 6
Linear + NN done
map_coords done
Grid dimension: 5
Linear + NN done
map_coords done
Grid dimension: 4
Linear + NN done
map_coords done
Grid dimension: 3
Linear + NN done
map_coords done
Grid dimension: 2
Linear + NN done
map_coords done
%% Cell type:code id:f4481771 tags:
``` python
```
%% Cell type:code id:dd1f6d19 tags:
``` python
import numpy as np
import matplotlib.pyplot as plt
```
%% Cell type:code id:8aced990 tags:
``` python
data_dir = './downscale_data/{}'
```
%% Cell type:code id:abc2e918 tags:
``` python
fig, ax = plt.subplots(nrows=2, ncols=4, figsize=(20,10), facecolor="w")
ax = ax.flatten()
i = 0
for n in range(2,10):
ydata = np.load(f'./downscale_data/{n}_ytest.npy')
lin = np.load(f'./downscale_data/{n}_linear_out.npy')
near = np.load(f'./downscale_data/{n}_nearest_out.npy')
mcoord = np.load(f'./downscale_data/{n}_map_out.npy')
network = np.load(f'./downscale_data/{n}_network_out.npy')
# ax[i].hist(np.log10(abs(lin-ydata)), bins='auto', label='Linear Interp.',histtype='step',density=True, color="blue")
# ax[i].hist(np.log10(abs(near-ydata)), bins='auto', label='Nearest Neighbour',histtype='step',density=True, color="orange")
# if n != 2:
# ax[i].hist(np.log10(abs(mcoord-ydata)), bins='auto', label='n-Cubic Spine interp.',histtype='step',density=True, color="green")
# else:
# temp = abs(mcoord-ydata)
# ax[i].hist(np.log10(temp[temp != 0]), bins='auto', label='n-Cubic Spine interp.',histtype='step',density=True, color="green")
# ax[i].hist(np.log10(abs(network-ydata)), bins='auto',label='NN', histtype='step', density=True, color="red")
if n > 5:
ax[i].hist(np.log10(near/ydata), bins='auto', label='Nearest Neighbour',histtype='step',density=False, color='orange')
if n > 2:
temp = mcoord/ydata
ax[i].hist(np.log10(temp[temp != 0]), bins='auto', label='n-Cubic Spine interp.',histtype='step',density=False, color='green')
ax[i].hist(np.log10(lin/ydata), bins='auto', label='Linear Interp.',histtype='step',density=False, color='blue')
ax[i].hist(np.log10(network/ydata), bins='auto',label='NN', histtype='step', density=False, color='red')
ax[i].legend(loc='upper left')
ax[i].set_title(f'{n} dimensional parameter space')
i += 1
fig.suptitle('Network vs. Interpolators - log10(ratio)',fontsize=24)
#plt.xlabel('log10(|diff|) between truth and interpolated')
# plt.title('Neural Network (NN) performance vs. selected interpolation methods')
plt.show()
```
%% Output
%% Cell type:code id:8566ea0d tags:
``` python
sorty = np.argsort(ydata)
plt.plot(ydata[sorty],alpha=0.5)
plt.plot(network[sorty],alpha=0.5)
plt.plot(lin[sorty],alpha=0.5)
plt.plot(mcoord[sorty],alpha=0.5)
```
%% Output
[<matplotlib.lines.Line2D at 0x7fa2c29570d0>]
%% Cell type:code id:62eb37fb tags:
``` python
for n in range(2,10):
print(n,np.round(10**(6/n)))
```
%% Output
2 1000.0
3 100.0
4 32.0
5 16.0
6 10.0
7 7.0
8 6.0
9 5.0
%% Cell type:code id:0e249162 tags:
``` python
```
import numpy as np
from sys import stdout
"""
Generating mock data to test out function approximation methods.
Parameter space is 9-dimensional.
M: peaks around 5e5
q: linear then plateau
a: linear-ish increase (decrease at high M)
e: linear decrease
Y0: linear increase
qS, qK: (sin(x)+1.25)**2
phiS: (sin(2x)+1.25)**2
tpl: log increase
We don't really care what the normalisation of this distribution is but it should be 10 +- 1 order of mag
across most of it. We can fix it such that the M peak has an SNR of ~200 and that should sort it out.
"""
def fM(lM):
return 10 * np.exp(-2 * (lM - 6) ** 2) + 0.1
def fq(lq):
#lq = np.log10(q)
return 3 / (lq - 4.1)
def fa(a):
return 0.2*a + 0.9
def fe(e):
return 1.1 - 0.2 * e
def fY(Y):
return Y
def fTh(th, phase):
return 0.4*(np.sin(2 * np.pi * th + phase)) ** 2 + 0.6
def fPh(ph, phase):
return 0.4*(np.sin(2 * np.pi * 2 * ph + phase) + 1) ** 2 + 0.6
def ft(t):
return t**0.5/4**0.5
def func_out(M, q, a, e, Y, qS, phiS, qK, tpl):
# return fM(M) + fq(q) + fa(a) + fe(e) + fY(Y) + fTh(qS, np.pi/3) + fTh(qK, np.pi/4) + fPh(phiS, 3*np.pi/8) + ft(tpl)
return fM(M) * fq(q) * fa(a) * fe(e) * fY(Y) * fTh(qS, np.pi / 3) * fTh(qK, np.pi / 4) * fPh(phiS,
3 * np.pi / 8) * ft(
tpl)
def grid_sampling(grid_dims, ranges):
total = np.prod(grid_dims)
print(f'Total number of points: {total:.3e}')
def cartesian_product_recursive(arrays, out=None):
arrays = [np.asarray(x) for x in arrays]
dtype = arrays[0].dtype
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
m = n // arrays[0].size
out[:, 0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian_product_recursive(arrays[1:], out=out[0:m, 1:])
for j in range(1, arrays[0].size):
out[j * m: (j + 1) * m, 1:] = out[0:m, 1:]
return out
def get_data(grid_dims, sample_ranges):
# get grid data, sampled data and some test data
print('Generating grid data')
iterators = [np.linspace(nums[0], nums[1], grid_dims[i]) for i,nums in enumerate(sample_ranges)]
total = np.prod(grid_dims)
combinations = cartesian_product_recursive(iterators)
xgrid = np.array(combinations)
ygrid = np.zeros(xgrid.shape[0])
for i, combo in enumerate(combinations):
ygrid[i] = func_out(*combo.tolist())
print('Generating sample data')
xsamp = np.zeros((total, len(grid_dims)))
for i in range(len(grid_dims)):
xsamp[:, i] = np.random.uniform(sample_ranges[i, 0], sample_ranges[i, 1], total)
ysamp = np.zeros(total)
for i, row in enumerate(xsamp):
ysamp[i] = func_out(*row.tolist())
print('Generating test data')
xtest = np.zeros((total, len(grid_dims)))
for i in range(len(grid_dims)):
xtest[:, i] = np.random.uniform(sample_ranges[i, 0], sample_ranges[i, 1], total)
ytest = np.zeros(total)
for i, row in enumerate(xtest):
ytest[i] = func_out(*row.tolist())
return xgrid, ygrid, xsamp, ysamp, xtest, ytest
if __name__ == '__main__':
# nM = 12
# nq = 8
# na = 7
# ne = 7
# nY = 7
# nTh = 7
# nPh = 8
# nt = 8
nM = 8
nq = 5
na = 4
ne = 4
nY = 4
nTh = 5
nPh = 8
nt = 5
# Our prior space (params are M,q,a,e,Y,thetaS,phiS,thetaK,t)
sample_ranges = np.array([
[np.log10(8e4), np.log10(5e7)],
[np.log10(5e4), np.log10(5e7)],
[0.01, 0.99],
[0.01, 0.5],
[0.5, 0.99],
[0, np.pi],
[0, 2 * np.pi],
[0, np.pi],
[0.5, 4],
])
xgrid, ygrid, xsamp, ysamp, xtest, ytest = get_data([nM,nq,na,ne,nY,nTh,nPh,nPh,nt],sample_ranges)
fp = '../_data/{}'
np.save(fp.format('xgrid.npy'),xgrid)
np.save(fp.format('ygrid.npy'),ygrid)
np.save(fp.format('xsamp.npy'),xsamp)
np.save(fp.format('ysamp.npy'),ysamp)
np.save(fp.format('xtest.npy'),xtest)
np.save(fp.format('ytest.npy'),ytest)
print('Data generation complete.')
\ No newline at end of file
import torch
from torch import nn
from torch.nn.init import xavier_uniform_
import numpy as np
import matplotlib.pyplot as plt
from MLtools.utilities import norm, norm_inputs, unnorm, unnorm_inputs
from MLtools.nn.model_creation import create_mlp
if __name__ == '__main__':
device = "cuda:0"
xtrain = np.load('./xdata_pert.npy')
ytrain = np.log(np.load('./ydata_pert.npy'))
xtest = np.load('./xtest.npy')
ytest = np.log(np.load('./ytest.npy'))
np.save('pert_xdata_mean_std.npy',np.array([xtrain.mean(axis=0), xtrain.std(axis=0)]))
np.save('pert_ydata_mean_std.npy',np.array([ytrain.mean(), ytrain.std()]))
xtest = torch.from_numpy(norm_inputs(xtest, xtrain)).to(device).float()
ytest = torch.from_numpy(norm(ytest, ytrain)).to(device).float()
xtrain = torch.from_numpy(norm_inputs(xtrain, xtrain)).to(device).float()
ytrain = torch.from_numpy(norm(ytrain, ytrain)).to(device).float()
# xgrid = np.load('./xdata.npy')
# ygrid = np.log(np.load('./ydata.npy'))
#
# xtrain = np.load('./xtest.npy')
# ytrain = np.log(np.load('./ytest.npy'))
#
# xtest = np.load('./xtest2.npy')
# ytest = np.log(np.load('./ytest2.npy'))
#
# xtrain2 = 2*(xtrain - np.min(xgrid,axis=0))/np.max(xgrid,axis=0) - 1
# ytrain2 = 2*(ytrain - np.min(ygrid))/np.max(ygrid) - 1
#
# xtest2 = 2 * (xtest - np.min(xgrid, axis=0)) / np.max(xgrid, axis=0) - 1
# ytest2 = 2 * (ytest - np.min(ygrid)) / np.max(ygrid) - 1
#
# xtest = torch.from_numpy(xtest2).to(device).float()
# ytest = torch.from_numpy(ytest2).to(device).float()
#
# xtrain = torch.from_numpy(xtrain2).to(device).float()
# ytrain = torch.from_numpy(ytrain2).to(device).float()
ytrainsize = len(ytrain)
ytestsize = len(ytest)
mlp = MLP().to(device)
loss_function = nn.MSELoss()
LR = 1e-2
optimizer = torch.optim.Adam(mlp.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.97)
train_losses = []
test_losses = []
rate = []
# Run the training loop
datasets = {"train": [xtrain, ytrain], "test": [xtest, ytest]}
num_epochs = 1000
nbatch = 50
cutoff_LR = 600
for epoch in range(num_epochs): # 5 epochs at maximum
# Print epoch
print(f'Starting epoch {epoch + 1}')
for phase in ['train','test']:
if phase == 'train':
mlp.train(True)
shuffled_inds = torch.randperm(ytrainsize)
# Set current loss value
current_loss = 0.0
# Iterate over the DataLoader for training data
# Get and prepare inputs
inputs, targets = datasets[phase]
inputs = inputs[shuffled_inds]
targets = targets[shuffled_inds]
targets = targets.reshape((targets.shape[0], 1))
for i in range(nbatch):
for param in mlp.parameters():
param.grad = None
outputs = mlp(inputs[i * ytrainsize // nbatch : (i+1)*ytrainsize // nbatch])
loss = torch.sqrt(loss_function(outputs, targets[i * ytrainsize // nbatch: (i+1)*ytrainsize // nbatch]))
loss.backward()
optimizer.step()
current_loss += loss.item()
train_losses.append(current_loss / nbatch)
else:
with torch.no_grad():
mlp.train(False)
shuffled_inds = torch.randperm(ytestsize)
current_loss = 0.0
inputs, targets = datasets[phase]
inputs = inputs[shuffled_inds]
targets = targets[shuffled_inds]
targets = targets.reshape((targets.shape[0], 1))
for i in range(nbatch):
outputs = mlp(inputs[i * ytestsize // nbatch: (i+1)*ytestsize // nbatch])
loss = torch.sqrt(loss_function(outputs, targets[i * ytestsize // nbatch: (i+1)*ytestsize // nbatch]))
current_loss += loss.item()
test_losses.append(current_loss / nbatch)
if epoch >= cutoff_LR:
scheduler.step()
rate.append(scheduler.get_last_lr()[0])
else:
rate.append(LR)
print(f'Train loss: {train_losses[-1]}, Test loss: {test_losses[-1]}')
torch.save(mlp.state_dict(),'pert_model.pth')
epochs = np.arange(num_epochs)
plt.semilogy(epochs, train_losses, label='Train')
plt.semilogy(epochs, test_losses, label='Test')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Root Mean Square Error')
plt.title('Train and Test Loss Across Train Epochs')
plt.savefig('losses.png')
plt.show()
plt.semilogy(epochs, rate)
plt.xlabel('Epochs')
plt.ylabel('Adam learning rate')
plt.savefig('LR.png')
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment