Commit 6930a164 authored by Stefan Hiemer's avatar Stefan Hiemer
Browse files

Initial commit

parents
Loading
Loading
Loading
Loading

.gitignore

0 → 100644
+3 −0
Original line number Diff line number Diff line
*csv
*txt
models/

DisDataExp.dat

0 → 100644
+140 −0
Original line number Diff line number Diff line
1.7E-5	20500000000	0.635	0.4
1.7E-5	29400000000	1.75	0.7
1.7E-5	40000000000	1.73	0.7
1.7E-5	72000000000	0.981	1.1
1.7E-5	114000000000	1.32	1.5
1.7E-5	122000000000	1.56	1.2
1.7E-5	75600000000	3.23	2
1.7E-5	75200000000	3.59	2.6
1.7E-5	119000000000	3.3	2.2
1.7E-5	136000000000	2.87	3.3
1.7E-5	199000000000	2.7	4.3
1.7E-5	203000000000	5.29	5.1
1.7E-5	224000000000	4.59	5.2
1.7E-5	313000000000	4.4	6.2
1.7E-5	356000000000	5.16	6.3
1.7E-5	480000000000	7.44	6.1
1.7E-5	613000000000	4.82	6.9
1.7E-5	1540000000000	7.63	8.2
6.3E-5	956000	0.999	0
6.3E-5	8950000	0.545	0
6.3E-5	15800000	0.72	0
6.3E-5	16100000	0.545	0
6.3E-5	28000000	0.519	0
6.3E-5	101000000	0.644	0
6.3E-5	1820000000	1.18	0
6.3E-5	2290000000	1.31	0
6.3E-5	2110000000	1.39	0
6.3E-5	3080000000	1.5	0
6.3E-5	5560000000	1.62	0
6.3E-5	18500000000	1.97	0
6.3E-5	1410000000	0.649	0
6.3E-5	1590000000	0.595	0
6.3E-5	1920000000	0.616	0
6.3E-5	2030000000	0.719	0
6.3E-5	2670000000	0.743	0
6.3E-5	3130000000	0.74	0
6.3E-5	4130000000	0.722	0
6.3E-5	5270000000	0.831	0
6.3E-5	6730000000	0.849	0
6.3E-5	8430000000	0.936	0
6.3E-5	17800000000	0.897	0
6.3E-5	14200000000	1.02	0
6.3E-5	76400000000	1.32	0
6.3E-5	79200000000	1.38	0
1E-4	19600000000	1.02	0.5
1E-4	33200000000	1.03	0.7
1E-4	59200000000	1.58	1.5
1E-4	88200000000	2.51	0.0871982863335726
1E-4	287000000000	4.52	0.223486601498623
1E-4	391000000000	5.54	0.309489286113777
1E-4	456000000000	3.86	0.173607979688386
1E-4	492000000000	5.54	0.309489286113777
1E-4	1220000000000	7.92	0.548264701837827
1E-4	3960000000000	15.4	1.58877774954313
1E-4	6250000000000	19.5	2.31785024944096
1E-4	8420000000000	26.1	3.69534344070533
1E-4	12400000000000	28.4	4.229989990906
1E-4	14100000000000	27.8	4.08791323224191
1E-4	28700000000000	31.7	5.04341627088995
1E-4	34900000000000	40	7.31688083083722
1E-4	221000000000000	113	38.5438866283516
1E-4	100000000	0.66	0
1E-4	100000000	0.37	0
1E-4	200000000	0.37	0
1E-4	200000000	0.35	0
1E-4	1500000000	0.34	0
1E-4	100000000	0.38	0
1E-4	2000000000	0.37	0
1E-4	10000000000	0.94	0
6E-4	157000000	59	0
6E-4	157000000	50.3	0
6E-4	1420000000	38.5	0
6E-4	3960000000	28.8	0
6E-4	9340000000	20.7	0
6E-4	17800000000	16.7	0
6E-4	43300000000	16.1	0
400	22000000000	9.73	0.5
400	47000000000	8.78	1
400	120000000000	7.51	1.5
400	140000000000	8.1	1.8
400	150000000000	8.03	1.8
800	22000000000	12.1	0.5
800	47000000000	10	1
800	120000000000	9.1	1.5
800	140000000000	9.32	1.8
4.37E-5	47000000000	1.63	0.5
1.76E-4	47000000000	1.73	0.5
687	47000000000	10.4	0.5
4.28E-5	140000000000	3.19	1.5
1.62E-4	140000000000	3.33	1.5
414	140000000000	9.69	1.5
719	140000000000	12.3	1.5
4.29E-5	4160000000000	2.12	0.7
5.47E-4	4160000000000	2.38	0.7
509	4160000000000	2.93	0.7
799	4160000000000	3.21	0.7
960	4160000000000	3.5	0.7
1160	4160000000000	4.05	0.7
1400	4160000000000	4.44	0.7
850	4960000000000	2.85	0.681818181818182
850	5770000000000	4.1	1.36363636363636
850	8290000000000	6.23	2.72727272727273
850	11900000000000	7.82	4.54545454545455
850	17800000000000	8.88	6.36363636363636
4.55E-4	1880000000000	10	2
0.0439	1880000000000	8.53	2
2120	1880000000000	13.3	2
5830	1880000000000	26.1	2
8970	1880000000000	39	2
3830	6200000000000	6.94	0.4
4160	6200000000000	7.78	0.4
12400	6200000000000	17.9	0.4
18800	6200000000000	23.6	0.4
18200	6200000000000	23.9	0.4
26700	6200000000000	32.7	0.4
3830	6200000000000	6.94	0.4
4.09E-4	6050000000000	6.62	0.1
0.0042	6050000000000	6.28	0.1
0.0441	6050000000000	6.62	0.1
1.99	6050000000000	7.25	0.1
10.1	6050000000000	7.88	0.1
13.1	6050000000000	7.59	0.1
79.5	6050000000000	7.88	0.1
315	6050000000000	13.9	0.1
379	6050000000000	11.4	0.1
1920	6050000000000	23.6	0.1
1920	6050000000000	24	0.1
3450	6050000000000	55.1	0.1
4490	6050000000000	57.9	0.1
4870	6050000000000	59.1	0.1
7010	6050000000000	73.6	0.1
5830	6050000000000	74.2	0.1
7010	6050000000000	74.9	0.1
7300	6050000000000	89.3	0.1
1000000	1500000000000	1290	0
100	6680000000000	15.4	0.1
1000	6680000000000	17.3	0.1
10000	6680000000000	23.7	0.1
100000	6680000000000	75.5	0.1
1000000	6680000000000	274	0.1

DisDataSim.dat

0 → 100644
+194 −0
Original line number Diff line number Diff line
0.1	22600000	1.38	1.01E-5
0.1	173000000	0.381	1E-5
0.1	583000000	0.281	1E-5
0.1	1980000000	0.305	1E-5
0.1	14700000000	1.08	1E-5
0.1	110000000000	1.19	1E-5
1	142000000	3.86	1E-4
1	1100000000	0.931	1E-4
1	3030000000	0.747	1E-4
1	13900000000	0.882	1E-4
1	107000000000	1.95	1E-4
1	719000000000	3.38	1E-4
1	141000000	3	1.01E-4
1	1030000000	1.06	8.09E-5
1	3030000000	0.747	1E-4
1	13800000000	0.747	1E-4
1	108000000000	1.62	1E-4
1	141000000	3.86	1E-4
1	1550000000	0.943	1E-4
1	2030000000	0.588	1E-4
1	14500000000	1.11	1E-4
1	104000000000	1.51	1E-4
1	733000000000	1.92	1E-4
10	141000000	36.8	5E-4
10	566000000	11.6	5E-4
10	1960000000	3.41	5.01E-4
10	122000000000	3.28	5E-4
10	661000000000	2.82	5E-4
10	4110000000000	6.98	5E-4
10	141000000	29.1	5.01E-4
10	566000000	11.5	5.01E-4
10	1960000000	3.38	5.01E-4
10	25400000000	2.09	3.78E-4
10	106000000000	1.64	5E-4
10	763000000000	1.85	5E-4
10	141000000	36.8	5E-4
10	566000000	11.6	5E-4
10	1960000000	3.41	5.01E-4
10	116000000000	2.18	5E-4
10	659000000000	1.95	5E-4
10	3850000000000	4.08	5.02E-4
100	141000000	657	0.002
100	566000000	151	0.002
100	2520000000	36.8	0.002
100	132000000000	2.45	0.002
100	676000000000	4.03	0.002
100	3940000000000	8.37	0.002
100	25300000000000	9.76	0.00201
100	141000000	490	0.00202
100	566000000	151	0.002
100	2940000000	37.8	0.00189
100	282000000000	4.53	0.002
100	646000000000	4.61	0.002
100	4010000000000	10.3	0.002
100	25000000000000	11.6	0.00201
100	141000000	657	0.002
100	566000000	151	0.002
100	2210000000	36.7	0.002
100	173000000000	3.67	0.002
100	672000000000	4.74	0.002
100	718000000000	4.9	0.002
100	3620000000000	8.21	0.002
100	27000000000000	11.4	0.002
1000	566000000	1710	0.002
1000	15200000000	62.9	0.002
1000	158000000000	8.12	0.002
1000	1150000000000	7.43	0.002
1000	4450000000000	12.5	0.002
1000	25100000000000	20.9	0.002
1000	124000000000000	43.3	0.002
1000	566000000	1710	0.002
1000	4330000000	387	0.002
1000	17700000000	65.7	0.00156
1000	193000000000	9.51	0.002
1000	1040000000000	6.9	0.002
1000	4470000000000	10.5	0.002
1000	24200000000000	19.1	0.00201
1000	101000000000000	45.3	0.002
1000	566000000	1710	0.002
1000	7790000000	381	0.002
1000	39800000000	42.5	0.002
1000	188000000000	8.53	0.002
1000	824000000000	8	0.002
1000	4810000000000	12.3	0.002
1000	23800000000000	14.8	0.002
1000	116000000000000	32	0.00201
1000	566000000	1710	0.002
1000	2010000000	453	0.002
1000	2010000000	453	0.002
1000	14000000000	61.6	0.0018
1000	175000000000	8.37	0.002
1000	809000000000	8.53	0.002
1000	1270000000000	8.74	0.002
1000	4860000000000	12.2	0.002
1000	28600000000000	24.9	0.002
1000	126000000000000	39.2	0.002
10000	6270000000	1540	0.002
10000	51000000000	166	0.00201
10000	75700000000	140	0.00201
10000	108000000000	102	0.00201
10000	1500000000000	19.4	0.002
10000	5400000000000	18	0.002
10000	27800000000000	25.6	0.002
10000	117000000000000	33.6	0.002
10000	711000000000000	84.5	0.00209
10000	6270000000	1540	0.002
10000	19600000000	584	0.002
10000	106000000000	105	0.00201
10000	611000000000	32	0.002
10000	1370000000000	20	0.002
10000	7720000000000	25.4	0.002
10000	24000000000000	19.9	0.00201
10000	110000000000000	48.6	0.00201
10000	712000000000000	90.2	0.00201
10000	6270000000	1540	0.002
10000	18300000000	637	0.002
10000	158000000000	100	0.002
10000	298000000000	55.1	0.002
10000	802000000000	21.4	0.002
10000	4650000000000	16.7	0.002
10000	25400000000000	21.6	0.002
10000	132000000000000	43.7	0.002
10000	746000000000000	71	0.002
10000	13900000000	768	0.00201
10000	109000000000	102	0.00204
10000	232000000000	84.5	0.00194
10000	553000000000	36.7	0.002
10000	746000000000	27.1	0.002
10000	1620000000000	17.4	0.002
10000	5350000000000	20.1	0.002
10000	18100000000000	24.5	0.00201
10000	22900000000000	31.1	0.002
10000	27400000000000	31.1	0.002
10000	107000000000000	42.9	0.002
10000	697000000000000	85.7	0.002
10000	733000000000000	71.4	0.002
100000	60100000000	1770	0.00201
100000	334000000000	878	0.002
100000	978000000000	134	0.00202
100000	6490000000000	55.5	0.002
100000	27900000000000	41.2	0.002
100000	122000000000000	53.1	0.00204
100000	745000000000000	86.5	0.00205
100000	3.76E15	131	0.00204
100000	5.48E15	110	0.00202
100000	55400000000	1980	0.002
100000	109000000000	1010	0.002
100000	275000000000	425	0.002
100000	1690000000000	100	0.00201
100000	12100000000000	54.7	0.002
100000	28200000000000	49.8	0.00202
100000	109000000000000	53.5	0.002
100000	729000000000000	94.3	0.00203
100000	5.52E15	93.5	0.00201
100000	55600000000	1650	0.002
100000	109000000000	959	0.002
100000	954000000000	147	0.00205
100000	5420000000000	53.9	0.002
100000	6720000000000	52.7	0.00201
100000	6720000000000	52.7	0.00201
100000	33200000000000	48.6	0.00202
100000	126000000000000	53.5	0.00204
100000	726000000000000	111	0.00203
100000	3.77E15	136	0.00212
100000	5.39E15	147	0.0019
1000000	1460000000000	992	0.00201
1000000	2320000000000	576	0.002
1000000	11400000000000	234	0.002
1000000	32700000000000	155	0.002
1000000	149000000000000	114	0.002
1000000	774000000000000	127	0.00205
1000000	3.88E15	164	0.00201
1000000	5.65E15	96.8	0.00202
1000000	680000000000	1650	0.00221
1000000	4120000000000	429	0.00228
1000000	28100000000000	165	0.00209
1000000	114000000000000	100	0.0022
1000000	766000000000000	88.2	0.00213
1000000	5.47E15	98.8	0.00225
1000000	1500000000000	935	0.00201
1000000	1990000000000	645	0.002
1000000	2250000000000	645	0.002
1000000	8430000000000	256	0.002
1000000	43500000000000	160	0.002
1000000	155000000000000	126	0.00201
1000000	767000000000000	136	0.00203
1000000	3.89E15	131	0.00222
1000000	5.59E15	109	0.00201
1000000	1.2E16	138	0.00204
250000000	700000000000000	457	0.002
250000000	1.26E15	249	0.002
250000000	2.4E15	234	0.002
250000000	7.65E15	158	0.002
250000000	2.35E16	293	0.002

connect-data.py

0 → 100644
+23 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This file is used to postprocess the data by Bapst et al. It relies on several
conventions used by Bapst to sample and format his data, so do not use this
to process other data without checking that it behaves as intended.
@author: Victor Bapst et al.
modified by Stefan Hiemer
"""

import pandas as pd
from glob import glob

if __name__=="__main__":

    for f in glob("*origin*"):
        origin = pd.read_csv(f,sep=',',header=None)
        origin.columns=["data source"]
        data = f.split("_")
        data = "_".join([data[0].split("-")[0],data[1]])
        data = pd.read_csv(data,sep=',',header=0)
        print(data)
        print(pd.concat((data,origin)))
+301 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This file is used to postprocess the data by Bapst et al. It relies on several
conventions used by Bapst to sample and format his data, so do not use this
to process other data without checking that it behaves as intended.
@author: Victor Bapst et al.
modified by Stefan Hiemer
"""

import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import r2_score
from sklearn.tree import DecisionTreeRegressor

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras import backend as K

def coeff_determination(y_true, y_pred):

    SS_res =  K.sum(K.square( y_true-y_pred ))
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

def build_perceptron(units, activation):
    """
    Simple multilayer perceptron.

    units: list
    activation: activation function
    """

    model = keras.Sequential()

    for unit in units:
        model.add(Dense(unit, activation=activation))
        model.add(BatchNormalization())

    model.add(Dense(1))

    minimizer = Adam(learning_rate = 0.001,
                     beta_1 = 0.9,  beta_2 = 0.999,
                     amsgrad = False)
    model.compile(loss = 'mse', optimizer = minimizer,
                    metrics = ['mse',coeff_determination])

    return model

if __name__ == '__main__':

    # load data
    exp = np.loadtxt('DisDataExp.dat', dtype=float, delimiter='\t')
    sim = np.loadtxt('DisDataSim.dat', dtype=float, delimiter='\t')

    # reorder the data as currently it is impractical
    exp = np.concatenate((exp[:,[0,1,3]], exp[:,2:3]),axis=1)
    sim = np.concatenate((sim[:,[0,1,3]], sim[:,2:3]),axis=1)

    # split in training and test set
    exp_train, exp_test = train_test_split(exp, train_size=120, random_state=0)
    sim_train, sim_test = train_test_split(sim, train_size=174, random_state=0)

    # unite both datasets
    train = np.concatenate((exp_train,sim_train),axis=0)
    test = np.concatenate((exp_test,sim_test),axis=0)

    # logarithm of columns 0,1,3
    trainlog = train.copy()
    trainlog[:,[0,1,3]] = np.log10(train[:,[0,1,3]])

    testlog = test.copy()
    testlog[:,[0,1,3]] = np.log10(test[:,[0,1,3]])

    # scale to unit variance and zero mean
    scaler = StandardScaler().fit(trainlog)
    trainlog_scaled = scaler.transform(trainlog)
    testlog_scaled = scaler.transform(testlog)

    # split into features and target
    xtrain = trainlog_scaled[:,:3]
    ytrain = trainlog_scaled[:,3]

    xtest = testlog_scaled[:,:3]
    ytest = testlog_scaled[:,3]

    # feed to linear models
    lin = None
    for alpha in np.arange(0.1,10.1,0.1):
        for l1_ratio in np.arange(0,1.1,0.1):
            lin_regressor = ElasticNet(alpha=alpha,
                                       l1_ratio=l1_ratio,
                                       max_iter=100000,
                                       random_state=0).fit(xtrain,ytrain)

            if not lin:
                lin = lin_regressor
                r2_train = r2_score(ytrain,lin_regressor.predict(xtrain))
                r2_test = r2_score(ytest,lin_regressor.predict(xtest))

                print()
                print(alpha, l1_ratio, r2_train, r2_test)
                print()

            else:
                if r2_score(ytest,lin_regressor.predict(xtest)) > r2_test:
                    lin = lin_regressor
                    r2_train = r2_score(ytrain,lin_regressor.predict(xtrain))
                    r2_test = r2_score(ytest,lin_regressor.predict(xtest))

                    print()
                    print(alpha, l1_ratio, r2_train, r2_test)
                    print()

    print(lin.get_params())
    print(r2_train, r2_test)

    # feed to kernel ridge regression
    kridge = None
    for alpha in np.logspace(-5,5,101):
        for gamma in np.logspace(-5,5,101):
            kridge_regressor = KernelRidge(alpha=alpha,
                                        gamma=gamma,
                                        kernel='rbf').fit(xtrain,ytrain)

            if not kridge:
                kridge = kridge_regressor
                r2_train = r2_score(ytrain,kridge_regressor.predict(xtrain))
                r2_test = r2_score(ytest,kridge_regressor.predict(xtest))

                print()
                print(alpha, gamma, r2_train, r2_test)
                print()

            else:
                if r2_score(ytest,kridge_regressor.predict(xtest)) > r2_test:
                    kridge = kridge_regressor
                    r2_train = r2_score(ytrain,kridge_regressor.predict(xtrain))
                    r2_test = r2_score(ytest,kridge_regressor.predict(xtest))

                    print()
                    print(alpha, gamma, r2_train, r2_test)
                    print()

    print(kridge.get_params())
    print(r2_train, r2_test)

    # feed to tree model
    tree = None
    for max_depth in [2,4,8,16,32,64,None]:
        for min_samples_split in [2,4,8,16,32,64]:
            for min_samples_leaf in [2,4,8,16,32,64]:
                tree_regressor = DecisionTreeRegressor(max_depth=max_depth,
                                            min_samples_split=min_samples_split,
                                            min_samples_leaf=min_samples_leaf)\
                                            .fit(xtrain,ytrain)

                if not tree:
                    tree = tree_regressor
                    r2_train = r2_score(ytrain, tree_regressor.predict(xtrain))
                    r2_test = r2_score(ytest, tree_regressor.predict(xtest))

                    print()
                    print(max_depth, min_samples_split, min_samples_leaf,
                          r2_train, r2_test)
                    print()

                else:
                    if r2_score(ytest,tree_regressor.predict(xtest)) > r2_test:
                        tree = tree_regressor
                        r2_train = r2_score(ytrain,
                                            tree_regressor.predict(xtrain))
                        r2_test = r2_score(ytest,
                                           tree_regressor.predict(xtest))

                        print()
                        print(max_depth, min_samples_split, min_samples_leaf,
                              r2_train, r2_test)
                        print()

    print(tree.get_params())
    print(r2_train, r2_test)

    # feed to keras perceptron
    perceptron = None
    for units in [[10,10],[10,10,10],[10,10,10,10]]:
        for activation in ["relu","sigmoid"]:
            model_callback = ModelCheckpoint(filepath = './models/tensorboard-logs/perceptron',
                                             save_weights_only = True,
                                             monitor = 'mse',
                                             mode = 'min',
                                             save_best_only = True,
                                             save_freq = 'epoch')
            tensorboard_callback = TensorBoard(log_dir= './models/tensorboard-logs/perceptron',
                                               histogram_freq = 0,
                                               write_graph = True,
                                               write_images = False,
                                               update_freq = 'epoch',
                                               profile_batch = 2,
                                               embeddings_freq = 0,
                                               embeddings_metadata = None)

            network = build_perceptron(units, activation)

            network.fit(xtrain, ytrain,
                        epochs = 1000,
                        batch_size = ytrain.shape[0],
                        shuffle = True,
                        validation_data = (xtest, ytest),
                        callbacks = [model_callback, tensorboard_callback])

            if not perceptron:
                perceptron = network
                r2_train = r2_score(ytrain,perceptron.predict(xtrain))
                r2_test = r2_score(ytest,perceptron.predict(xtest))
                _units = units
                _act = activation
                print()
                print(units, activation, r2_train, r2_test)
                print()

            else:
                if r2_score(ytest,network.predict(xtest)) > r2_test:
                    perceptron = network
                    r2_train = r2_score(ytrain,perceptron.predict(xtrain))
                    r2_test = r2_score(ytest,perceptron.predict(xtest))
                    _units = units
                    _act = activation
                    print()
                    print(units, activation, r2_train, r2_test)
                    print()

    print(_units, _act)
    print(r2_train, r2_test)

    # make final predictions
    train_size = np.shape(ytrain)[0]
    test_size = np.shape(ytest)[0]

    ytrain_lin = lin.predict(xtrain).reshape((train_size,1))
    ytest_lin = lin.predict(xtest).reshape((test_size,1))

    ytrain_kridge = kridge.predict(xtrain).reshape((train_size,1))
    ytest_kridge = kridge.predict(xtest).reshape((test_size,1))

    ytrain_tree = tree.predict(xtrain).reshape((train_size,1))
    ytest_tree = tree.predict(xtest).reshape((test_size,1))

    ytrain_perceptron = perceptron.predict(xtrain).reshape((train_size,1))
    ytest_perceptron = perceptron.predict(xtest).reshape((test_size,1))

    # undo the standardization and the logarithm
    backscaler = StandardScaler().fit(np.reshape(trainlog[:,3],(train_size,1)))

    ytrain_lin = 10**backscaler.inverse_transform(ytrain_lin)
    ytest_lin = 10**backscaler.inverse_transform(ytest_lin)

    ytrain_kridge = 10**backscaler.inverse_transform(ytrain_kridge)
    ytest_kridge = 10**backscaler.inverse_transform(ytest_kridge)

    ytrain_tree = 10**backscaler.inverse_transform(ytrain_tree)
    ytest_tree = 10**backscaler.inverse_transform(ytest_tree)

    ytrain_perceptron = 10**backscaler.inverse_transform(ytrain_perceptron)
    ytest_perceptron = 10**backscaler.inverse_transform(ytest_perceptron)

    # create labels which mark experimental and simulated values
    train_label = np.concatenate((np.full((120,1),'exp'),np.full((174,1),'sim'))
                           ,axis=0)
    test_label = np.concatenate((np.full((20,1),'exp'),np.full((20,1),'sim'))
                           ,axis=0)

    # save to files
    np.savetxt("train_random-choice.txt",
               np.concatenate((train,
                               ytrain_lin,
                               ytrain_kridge,
                               ytrain_tree,
                               ytrain_perceptron),axis=1),
               header = 'origin,strain rate,dislocation density, strain, yield stress, elastic net model, kernel ridge regression, decision tree, perceptron',
               delimiter=',')
    np.savetxt("train-origin_random-choice.txt",
               train_label, fmt = '%s')

    np.savetxt("test_random-choice.txt",
               np.concatenate((test,
                               ytest_lin,
                               ytest_kridge,
                               ytest_tree,
                               ytest_perceptron),axis=1),
               header = 'origin,strain rate,dislocation density, strain, yield stress, elastic net model, kernel ridge regression, decision tree, perceptron',
               delimiter=',')
    np.savetxt("test-origin_random-choice.txt",
               test_label, fmt = '%s')