5 from os.path
import isfile, join
9 from utils
import read_config, get_data, get_patch
13 parser = argparse.ArgumentParser(description=
'Makes training data set for EM vs track separation')
14 parser.add_argument(
'-c',
'--config', help=
"JSON with script configuration", default=
'config.json')
15 parser.add_argument(
'-t',
'--type', help=
"Input file format")
16 parser.add_argument(
'-i',
'--input', help=
"Input directory")
17 parser.add_argument(
'-o',
'--output', help=
"Output directory")
18 args = parser.parse_args()
22 print '#'*50,
'\nPrepare data for CNN' 23 if args.type
is None: INPUT_TYPE = config[
'prepare_data_em_track'][
'input_type']
24 else: INPUT_TYPE = args.type
25 if args.input
is None: INPUT_DIR = config[
'prepare_data_em_track'][
'input_dir']
26 else: INPUT_DIR = args.input
27 if args.output
is None: OUTPUT_DIR = config[
'prepare_data_em_track'][
'output_dir']
28 else: OUTPUT_DIR = args.output
30 PATCH_SIZE_W = config[
'prepare_data_em_track'][
'patch_size_w']
31 PATCH_SIZE_D = config[
'prepare_data_em_track'][
'patch_size_d']
33 print 'Using %s as input dir, and %s as output dir' % (INPUT_DIR, OUTPUT_DIR)
34 if INPUT_TYPE ==
'root':
print 'Reading from ROOT file' 35 else:
print 'Reading from TEXT files' 38 doing_nue = config[
'prepare_data_em_track'][
'doing_nue']
39 selected_view_idx = config[
'prepare_data_em_track'][
'selected_view_idx']
40 patch_fraction = config[
'prepare_data_em_track'][
'patch_fraction']
41 empty_fraction = config[
'prepare_data_em_track'][
'empty_fraction']
42 clean_track_fraction = config[
'prepare_data_em_track'][
'clean_track_fraction']
43 muon_track_fraction = config[
'prepare_data_em_track'][
'muon_track_fraction']
44 crop_event = config[
'prepare_data_em_track'][
'crop_event']
46 blur_kernel = np.asarray(config[
'prepare_data_em_track'][
'blur'])
47 white_noise = config[
'prepare_data_em_track'][
'noise']
48 coherent_noise = config[
'prepare_data_em_track'][
'coherent']
50 print 'Using', patch_fraction,
'% of data from view', selected_view_idx
51 print 'Using', muon_track_fraction,
'% of muon points' 52 if doing_nue:
print 'Neutrino mode, will skip more showers.' 54 print 'Blur kernel', blur_kernel,
'noise RMS', white_noise
56 max_capacity = 1700000
57 db = np.zeros((max_capacity, PATCH_SIZE_W, PATCH_SIZE_D), dtype=np.float32)
58 db_y = np.zeros((max_capacity, 4), dtype=np.int32)
60 patch_area = PATCH_SIZE_W * PATCH_SIZE_D
71 rootModule =
'datadump' 73 if INPUT_TYPE ==
"root":
74 fnames = [f
for f
in os.listdir(INPUT_DIR)
if '.root' in f]
76 rootFile = TFile(INPUT_DIR+
'/'+n)
77 keys = [rootModule+
'/'+k.GetName()[:-4]
for k
in rootFile.Get(rootModule).GetListOfKeys()
if '_raw' in k.GetName()]
78 event_list.append((rootFile, keys))
80 keys = [f[:-4]
for f
in os.listdir(INPUT_DIR)
if '.raw' in f]
81 event_list.append((INPUT_DIR, keys))
83 for entry
in event_list:
85 event_names = entry[1]
87 for evname
in event_names:
88 finfo = evname.split(
'_')
90 tpc_idx =
int(finfo[8])
91 view_idx =
int(finfo[10])
93 if view_idx != selected_view_idx:
continue 96 print 'Process event', fcount, evname,
'NO.', evt_no
99 raw, deposit, pdg, tracks, showers =
get_data(folder, evname, PATCH_SIZE_D/2 + 2, crop_event, blur_kernel, white_noise, coherent_noise)
101 print 'Skip empty event...' 104 pdg_michel = ((pdg & 0xF000) == 0x2000)
105 vtx_map = (pdg >> 16)
107 print 'Tracks', np.sum(tracks),
'showers', np.sum(showers),
'michels', np.sum(pdg_michel)
117 for i
in range(raw.shape[0]):
118 for j
in range(raw.shape[1]):
119 is_raw_zero = (raw[i,j] < 0.01)
120 is_michel = (pdg[i,j] & 0xF000 == 0x2000)
121 is_muon = (pdg[i,j] & 0xFFF == 13)
123 is_vtx = (vtx_map[i,j] > 0)
125 x_start = np.max([0, i - PATCH_SIZE_W/2])
126 x_stop = np.min([raw.shape[0], x_start + PATCH_SIZE_W])
128 y_start = np.max([0, j - PATCH_SIZE_D/2])
129 y_stop = np.min([raw.shape[1], y_start + PATCH_SIZE_D])
131 if x_stop - x_start != PATCH_SIZE_W
or y_stop - y_start != PATCH_SIZE_D:
134 is_mu_near_stop =
False 136 pdg_patch = pdg_michel[x_start+2:x_stop-2, y_start+2:y_stop-2]
137 if np.count_nonzero(pdg_patch) > 2:
138 is_mu_near_stop =
True 139 sel_mu_near_stop += 1
141 vtx_patch = vtx_map[x_start+2:x_stop-2, y_start+2:y_stop-2]
142 near_vtx_count = np.count_nonzero(vtx_patch)
144 nuvtx_patch = vtx_patch & 0x4
145 is_near_nu = (np.count_nonzero(nuvtx_patch) > 0)
147 eff_patch_fraction = patch_fraction
148 if near_vtx_count > 0
and eff_patch_fraction < 40:
149 eff_patch_fraction = 40
152 if not(is_michel | is_mu_near_stop | is_vtx | is_near_nu)
and np.random.randint(10000) >
int(100*eff_patch_fraction):
continue 154 track_pixels = np.count_nonzero(tracks[x_start:x_stop, y_start:y_stop])
155 shower_pixels = np.count_nonzero(showers[x_start:x_stop, y_start:y_stop])
157 target = np.zeros(4, dtype=np.int32)
160 if is_raw_zero:
continue 162 if shower_pixels < 8
and near_vtx_count == 0
and not(is_near_nu):
163 if np.random.randint(10000) >
int(100*clean_track_fraction):
continue 164 else: sel_clean_trk += 1
167 if not(is_mu_near_stop)
and np.random.randint(10000) >
int(100*muon_track_fraction):
continue 171 elif showers[i,j] == 1:
173 if doing_nue
and not(is_near_nu):
174 if near_vtx_count == 0
and np.random.randint(100) < 40:
continue 175 if shower_pixels > 0.05*patch_area
and np.random.randint(100) < 50:
continue 176 if shower_pixels > 0.20*patch_area
and np.random.randint(100) < 90:
continue 177 if is_raw_zero:
continue 186 if np.random.randint(10000) <
int(100*empty_fraction):
187 nclose = np.count_nonzero(showers[i-2:i+3, j-2:j+3])
188 nclose += np.count_nonzero(tracks[i-2:i+3, j-2:j+3])
190 npix = shower_pixels + track_pixels
201 if np.count_nonzero(target) == 0:
202 print 'NEED A LABEL IN THE TARGET!!!' 205 if cnt_ind < max_capacity:
206 db[cnt_ind] =
get_patch(raw, i, j, PATCH_SIZE_W, PATCH_SIZE_D)
207 db_y[cnt_ind] = target
210 print 'MAX CAPACITY REACHED!!!' 213 print 'Selected: tracks', sel_trk,
'showers', sel_sh,
'empty', sel_empty,
'/// muons', sel_muon,
'michel', sel_michel,
'clean trk', sel_clean_trk,
'near nu', sel_near_nu
215 print 'Added', cnt_ind,
'tracks:', cnt_trk,
'showers:', cnt_sh,
'michels:', cnt_michel,
'empty:', cnt_void
217 np.save(OUTPUT_DIR+
'/db_view_'+
str(selected_view_idx)+
'_x', db[:cnt_ind])
218 np.save(OUTPUT_DIR+
'/db_view_'+
str(selected_view_idx)+
'_y', db_y[:cnt_ind])
220 if __name__ ==
"__main__":
def get_patch(a, wire, drift, wsize, dsize)
def get_data(folder, fname, drift_margin=0, crop=True, blur=None, white_noise=0, coherent_noise=0)