-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathsample.py
More file actions
executable file
·50 lines (46 loc) · 1.26 KB
/
sample.py
File metadata and controls
executable file
·50 lines (46 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
import pandas as pd
import json
import numpy as np
def user_input(x):
with open(x.location + '/user_input','w') as f:
f.write(str(x.bandgap) + '\n')
f.write(str(np.log(x.bandgap)) + '\n')
f.write(str(x.bandgap) + '\n')
d = pd.read_json('all.json')
d = d[(d.metallic == False) & (d.bandgap > 0.15)]
d = d.drop(d[(d.phase == 'GaAs') & (d.dopant != 'GaAs')].index)
d.apply(user_input,axis=1)
#d = pd.read_csv('data.csv')
t_total = pd.DataFrame()
v_total = pd.DataFrame()
test_total = pd.DataFrame()
phase = d['phase'].unique()
for j in phase:
t_ = d[d.phase == j]
dopants = t_['dopant'].unique()
for i in dopants:
t = t_[t_.dopant == i]
if len(t) < 10: continue
train = t.sample(frac=0.8)
_ = t.drop(train.index)
val = _.sample(frac=0.50)
test = _.drop(val.index)
t_total = t_total.append(train)
v_total = v_total.append(val)
test_total = test_total.append(test)
train = open('train.dat','w')
for i in t_total.location.as_matrix():
train.write(i)
train.write('\n')
train.close()
val = open('val.dat','w')
for i in v_total.location.as_matrix():
val.write(i)
val.write('\n')
val.close()
test = open('test.dat','w')
for i in test_total.location.as_matrix():
test.write(i)
test.write('\n')
test.close()