Commit 71a551a9 authored by Christian Chapman-Bird's avatar Christian Chapman-Bird
Browse files

Added support for uniform normalisation

parent 90fa2e7c
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -37,14 +37,14 @@ class LinearModel(nn.Module):
            self.initial(m.weight)


def create_mlp(input_features, output_features, neurons, layers, activation, model_name, out_activation=None, init=xavier_uniform_, device=None, use_dropout=False,drop_p=0.25, use_bn=False):
def create_mlp(input_features, output_features, neurons, layers, activation, model_name, out_activation=None, init=xavier_uniform_, device=None, norm_type='z-score', use_dropout=False,drop_p=0.25, use_bn=False):
    if isinstance(neurons, list):
        if len(neurons) != layers:
            raise RuntimeError('Length of neuron vector does not equal number of hidden layers.')
    else:
        neurons = [neurons, ]
    model = LinearModel(input_features, output_features, neurons, layers, activation, model_name, initialisation=init, use_dropout=use_dropout,drop_p=drop_p,use_bn=use_bn)

    model.norm_type=norm_type
    Path(get_script_path()+f'/../models/{model_name}/').mkdir(parents=True, exist_ok=True)
    pickle.dump(model, open(get_script_path()+f'/../models/{model_name}/function.pickle', "wb"), pickle.HIGHEST_PROTOCOL)  # save blank model

+11 −7
Original line number Diff line number Diff line
@@ -16,14 +16,18 @@ def model_train_test(data, model, device, n_epochs, n_batches, loss_function, op

    name = model.name
    path = get_script_path()
    norm_type = model.norm_type
    Path(get_script_path()+f'/../models/{name}/').mkdir(parents=True, exist_ok=True)
    np.save(path+'/../models/'+name+'/xdata_mean_std.npy',np.array([xtrain.mean(axis=0), xtrain.std(axis=0)]))
    np.save(path+'/../models/'+name+'/ydata_mean_std.npy',np.array([ytrain.mean(), ytrain.std()]))

    xtest = torch.from_numpy(norm_inputs(xtest, ref_dataframe=xtrain)).to(device).float()
    ytest = torch.from_numpy(norm(ytest, ref_dataframe=ytrain)).to(device).float()
    xtrain = torch.from_numpy(norm_inputs(xtrain, ref_dataframe=xtrain)).to(device).float()
    ytrain = torch.from_numpy(norm(ytrain, ref_dataframe=ytrain)).to(device).float()
    if norm_type = 'z-score':
        np.save(path+'/../models/'+name+'/xdata_inputs.npy',np.array([xtrain.mean(axis=0), xtrain.std(axis=0)]))
        np.save(path+'/../models/'+name+'/ydata_inputs.npy',np.array([ytrain.mean(), ytrain.std()]))
    elif norm_type = 'uniform':
        np.save(path+'/../models/'+name+'/xdata_inputs.npy',np.array([np.min(xtrain,axis=0), np.max(xtrain,axis=0)]))
        np.save(path+'/../models/'+name+'/ydata_inputs.npy',np.array([np.min(ytrain), np.max(ytrain)]))
    xtest = torch.from_numpy(norm_inputs(xtest, ref_dataframe=xtrain, norm_type=norm_type)).to(device).float()
    ytest = torch.from_numpy(norm(ytest, ref_dataframe=ytrain, norm_type=norm_type)).to(device).float()
    xtrain = torch.from_numpy(norm_inputs(xtrain, ref_dataframe=xtrain, norm_type=norm_type)).to(device).float()
    ytrain = torch.from_numpy(norm(ytrain, ref_dataframe=ytrain, norm_type=norm_type)).to(device).float()

    ytrainsize = len(ytrain)
    ytestsize = len(ytest)
+87 −24
Original line number Diff line number Diff line
import os
import sys


def get_script_path():
    #return os.path.dirname(os.path.realpath(sys.argv[0]))
    return os.getcwd()


def norm(dataframe, ref_dataframe=None, ref_mean=None, ref_std=None):
def norm(dataframe, ref_dataframe=None, ref_inputs=None, norm_type='z-score'):
    if ref_dataframe is not None:
        if norm_type == 'z-score':
            df_norm = (dataframe - ref_dataframe.mean())/ref_dataframe.std()
    elif ref_mean is not None and ref_std is not None:
        df_norm = (dataframe - ref_mean)/ref_std
        elif norm_type == 'uniform':
            df_norm = 2*((dataframe - ref_dataframe.min())/ref_dataframe.max()) - 1
    elif ref_inputs is not None:
        if norm_type == 'z-score':
            df_norm = (dataframe - ref_inputs[0])/ref_inputs[1]
        elif norm_type == 'uniform':
            df_norm = 2*(dataframe - ref_inputs[0])/ref_inputs[1] - 1
        else:
            raise InputError("normalisation must be z-score or uniform")
    else:
        raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
        raise RuntimeError("Either a reference dataset or a set of reference inputs must be supplied")
    return df_norm


def norm_inputs(dataframe, ref_dataframe=None, ref_mean=None, ref_std=None):
def norm_inputs(dataframe, ref_dataframe=None, ref_inputs=None, norm_type='z-score'):
    if ref_dataframe is not None:
        if norm_type == 'z-score':
            df_norm = (dataframe - ref_dataframe.mean(axis=0))/ref_dataframe.std(axis=0)
    elif ref_mean is not None and ref_std is not None:
        df_norm = (dataframe - ref_mean) / ref_std
        elif norm_type == 'uniform':
            df_norm = 2*((dataframe - ref_dataframe.min(axis=0))/ref_dataframe.max(axis=0)) - 1
    elif ref_inputs is not None:
        if norm_type == 'z-score':
            df_norm = (dataframe - ref_inputs[0])/ref_inputs[1]
        elif norm_type == 'uniform':
            df_norm = 2*(dataframe - ref_inputs[0])/ref_inputs[1] - 1
        else:
            raise InputError("normalisation must be z-score or uniform")
    else:
        raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
        raise RuntimeError("Either a reference dataset or a set of reference inputs must be supplied")
    return df_norm


def unnorm(dataframe_norm, ref_dataframe=None, ref_mean=None, ref_std=None):
#def norm_inputs(dataframe, ref_dataframe=None, ref_inputs=None, norm_type='z-score'):
#    if ref_dataframe is not None:
#        df_norm = (dataframe - ref_dataframe.mean(axis=0)) / ref_dataframe.std(axis=0)
#    elif ref_mean is not None and ref_std is not None:
#        df_norm = (dataframe - ref_mean) / ref_std
#    else:
#        raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
#    return df_norm


def unnorm(dataframe, ref_dataframe=None, ref_inputs=None, norm_type='z-score'):
    if ref_dataframe is not None:
        dataframe = (dataframe_norm * ref_dataframe.std()) + ref_dataframe.mean()
    elif ref_mean is not None and ref_std is not None:
        dataframe = (dataframe_norm * ref_std) + ref_mean
        if norm_type == 'z-score':
            df_unnorm = (dataframe * ref_dataframe.std()) + ref_dataframe.mean()
        elif norm_type == 'uniform':
            #df_unnorm = 2*((dataframe - np.min(ref_dataframe))/np.max(ref_dataframe)) - 1
            df_unnorm = 0.5*(dataframe + 1) * ref_dataframe.max() + ref_dataframe.min()
    elif ref_inputs is not None:
        if norm_type == 'z-score':
            df_unnorm = (dataframe * ref_inputs[1]) + ref_inputs[0]
        elif norm_type == 'uniform':
            #df_unnorm = 2*(dataframe - ref_inputs[0])/ref_inputs[1] - 1
            df_unnorm = 0.5*(dataframe + 1) * ref_inputs[1] + ref_inputs[0]
        else:
        raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
    return dataframe
            raise InputError("normalisation must be z-score or uniform")
    else:
        raise RuntimeError("Either a reference dataset or a set of reference inputs must be supplied")
    return df_unnorm


#def unnorm(dataframe_norm, ref_dataframe=None, ref_inputs=None, norm_type='z-score'):
#    if ref_dataframe is not None:
#        dataframe = (dataframe_norm * ref_dataframe.std()) + ref_dataframe.mean()
#    elif ref_
#        dataframe = (dataframe_norm * ref_std) + ref_mean
#    else:
#        raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
#    return dataframe

def unnorm_inputs(dataframe_norm, ref_dataframe,ref_mean=None, ref_std=None):
def unnorm_inputs(dataframe, ref_dataframe=None, ref_inputs=None, norm_type='z-score'):
    if ref_dataframe is not None:
        dataframe = (dataframe_norm * ref_dataframe.std(axis=0)) + ref_dataframe.mean(axis=0)
    elif ref_mean is not None and ref_std is not None:
        dataframe = (dataframe_norm * ref_std) + ref_mean
        if norm_type == 'z-score':
            df_unnorm = (dataframe * ref_dataframe.std(axis=0)) + ref_dataframe.mean(axis=0)
        elif norm_type == 'uniform':
            #df_unnorm = 2*((dataframe - np.min(ref_dataframe))/np.max(ref_dataframe)) - 1
            df_unnorm = 0.5*(dataframe + 1) * ref_dataframe.max(axis=0) + ref_dataframe.min(axis=0)
    elif ref_inputs is not None:
        if norm_type == 'z-score':
            df_unnorm = (dataframe * ref_inputs[1]) + ref_inputs[0]
        elif norm_type == 'uniform':
            #df_unnorm = 2*(dataframe - ref_inputs[0])/ref_inputs[1] - 1
            df_unnorm = 0.5*(dataframe + 1) * ref_inputs[1] + ref_inputs[0]
        else:
            raise InputError("normalisation must be z-score or uniform")
    else:
        raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
    return dataframe
        raise RuntimeError("Either a reference dataset or a set of reference inputs must be supplied")
    return df_unnorm


#def unnorm_inputs(dataframe_norm, ref_dataframe,ref_mean=None, ref_std=None, norm_type='z-score'):
#    if ref_dataframe is not None:
#        dataframe = (dataframe_norm * ref_dataframe.std(axis=0)) + ref_dataframe.mean(axis=0)
#    elif ref_mean is not None and ref_std is not None:
#        dataframe = (dataframe_norm * ref_std) + ref_mean
#    else:
#        raise RuntimeError("Either a reference dataset or a reference mean + std must be supplied.")
#    return dataframe
+4 −4
Original line number Diff line number Diff line
@@ -37,11 +37,11 @@ def run_on_dataset(model, test_data, distances=None, n_batches=1, device=None, y

    xdata, ydata = test_data

    xmeanstd = np.load(get_script_path() + f'/../models/{model.name}/xdata_mean_std.npy')
    ymeanstd = np.load(get_script_path() + f'/../models/{model.name}/ydata_mean_std.npy')
    xscalevals = np.load(get_script_path() + f'/../models/{model.name}/xdata_inputs.npy')
    yscalevals = np.load(get_script_path() + f'/../models/{model.name}/ydata_inputs.npy')

    test_input = torch.Tensor(xdata)
    normed_input = norm_inputs(test_input, ref_mean=xmeanstd[0], ref_std=xmeanstd[1]).float().to(device)
    normed_input = norm_inputs(test_input, ref_inputs=xscalevals,norm_type=model.norm_type).float().to(device)

    if runtime:
        st = time.perf_counter()
@@ -57,7 +57,7 @@ def run_on_dataset(model, test_data, distances=None, n_batches=1, device=None, y
        per_point = (et - st) / ydata.size

    output = np.concatenate(out)
    out_unnorm = unnorm(output, ref_mean=ymeanstd[0], ref_std=ymeanstd[1])
    out_unnorm = unnorm(output, ref_inputs=yscalevals,norm_type=model.norm_type)

    if y_transform_fn is not None:
        out_unnorm = y_transform_fn(out_unnorm)