prepare_patches_em-trk-michel-none.py
Go to the documentation of this file.
1 from ROOT import TFile
2 import numpy as np
3 from sys import argv
4 from os import listdir
5 from os.path import isfile, join
6 import os, json
7 import argparse
8 
9 from utils import read_config, get_data, get_patch
10 
11 def main(argv):
12 
13  parser = argparse.ArgumentParser(description='Makes training data set for EM vs track separation')
14  parser.add_argument('-c', '--config', help="JSON with script configuration", default='config.json')
15  parser.add_argument('-t', '--type', help="Input file format")
16  parser.add_argument('-i', '--input', help="Input directory")
17  parser.add_argument('-o', '--output', help="Output directory")
18  args = parser.parse_args()
19 
20  config = read_config(args.config)
21 
22  print '#'*50,'\nPrepare data for CNN'
23  if args.type is None: INPUT_TYPE = config['prepare_data_em_track']['input_type']
24  else: INPUT_TYPE = args.type
25  if args.input is None: INPUT_DIR = config['prepare_data_em_track']['input_dir']
26  else: INPUT_DIR = args.input
27  if args.output is None: OUTPUT_DIR = config['prepare_data_em_track']['output_dir']
28  else: OUTPUT_DIR = args.output
29 
30  PATCH_SIZE_W = config['prepare_data_em_track']['patch_size_w']
31  PATCH_SIZE_D = config['prepare_data_em_track']['patch_size_d']
32 
33  print 'Using %s as input dir, and %s as output dir' % (INPUT_DIR, OUTPUT_DIR)
34  if INPUT_TYPE == 'root': print 'Reading from ROOT file'
35  else: print 'Reading from TEXT files'
36  print '#'*50
37 
38  doing_nue = config['prepare_data_em_track']['doing_nue'] # set to true for nu_e events (will skip more showers)
39  selected_view_idx = config['prepare_data_em_track']['selected_view_idx'] # set the view id
40  patch_fraction = config['prepare_data_em_track']['patch_fraction'] # percent of used patches
41  empty_fraction = config['prepare_data_em_track']['empty_fraction'] # percent of "empty background" patches
42  clean_track_fraction = config['prepare_data_em_track']['clean_track_fraction'] # percent of selected patches, where only a clean track is present
43  muon_track_fraction = config['prepare_data_em_track']['muon_track_fraction'] # ***** new: preselect muos, they are many *****
44  crop_event = config['prepare_data_em_track']['crop_event'] # use true only if no crop on LArSoft level and not a noise dump
45 
46  blur_kernel = np.asarray(config['prepare_data_em_track']['blur']) # add blur in wire direction with given kernel if it is not empty (only for tests)
47  white_noise = config['prepare_data_em_track']['noise'] # add gauss noise with given sigma if value > 0 (only for tests)
48  coherent_noise = config['prepare_data_em_track']['coherent'] # add coherent (groups of 32 wires) gauss noise with given sigma if value > 0 (only for tests)
49 
50  print 'Using', patch_fraction, '% of data from view', selected_view_idx
51  print 'Using', muon_track_fraction, '% of muon points'
52  if doing_nue: print 'Neutrino mode, will skip more showers.'
53 
54  print 'Blur kernel', blur_kernel, 'noise RMS', white_noise
55 
56  max_capacity = 1700000
57  db = np.zeros((max_capacity, PATCH_SIZE_W, PATCH_SIZE_D), dtype=np.float32)
58  db_y = np.zeros((max_capacity, 4), dtype=np.int32)
59 
60  patch_area = PATCH_SIZE_W * PATCH_SIZE_D
61 
62  cnt_ind = 0
63  cnt_trk = 0
64  cnt_sh = 0
65  cnt_michel = 0
66  cnt_void = 0
67 
68  fcount = 0
69 
70  rootFile = None
71  rootModule = 'datadump'
72  event_list = []
73  if INPUT_TYPE == "root":
74  fnames = [f for f in os.listdir(INPUT_DIR) if '.root' in f]
75  for n in fnames:
76  rootFile = TFile(INPUT_DIR+'/'+n)
77  keys = [rootModule+'/'+k.GetName()[:-4] for k in rootFile.Get(rootModule).GetListOfKeys() if '_raw' in k.GetName()]
78  event_list.append((rootFile, keys))
79  else:
80  keys = [f[:-4] for f in os.listdir(INPUT_DIR) if '.raw' in f] # only main part of file name, without extension
81  event_list.append((INPUT_DIR, keys)) # single entry in the list of txt files
82 
83  for entry in event_list:
84  folder = entry[0]
85  event_names = entry[1]
86 
87  for evname in event_names:
88  finfo = evname.split('_')
89  evt_no = finfo[2]
90  tpc_idx = int(finfo[8])
91  view_idx = int(finfo[10])
92 
93  if view_idx != selected_view_idx: continue
94  fcount += 1
95 
96  print 'Process event', fcount, evname, 'NO.', evt_no
97 
98  # get clipped data, margin depends on patch size in drift direction
99  raw, deposit, pdg, tracks, showers = get_data(folder, evname, PATCH_SIZE_D/2 + 2, crop_event, blur_kernel, white_noise, coherent_noise)
100  if raw is None:
101  print 'Skip empty event...'
102  continue
103 
104  pdg_michel = ((pdg & 0xF000) == 0x2000)
105  vtx_map = (pdg >> 16)
106 
107  print 'Tracks', np.sum(tracks), 'showers', np.sum(showers), 'michels', np.sum(pdg_michel)
108 
109  sel_trk = 0
110  sel_sh = 0
111  sel_muon = 0
112  sel_mu_near_stop = 0
113  sel_michel = 0
114  sel_empty = 0
115  sel_clean_trk = 0
116  sel_near_nu = 0
117  for i in range(raw.shape[0]):
118  for j in range(raw.shape[1]):
119  is_raw_zero = (raw[i,j] < 0.01)
120  is_michel = (pdg[i,j] & 0xF000 == 0x2000) # has michel flag set, wont skip it
121  is_muon = (pdg[i,j] & 0xFFF == 13)
122 
123  is_vtx = (vtx_map[i,j] > 0)
124 
125  x_start = np.max([0, i - PATCH_SIZE_W/2])
126  x_stop = np.min([raw.shape[0], x_start + PATCH_SIZE_W])
127 
128  y_start = np.max([0, j - PATCH_SIZE_D/2])
129  y_stop = np.min([raw.shape[1], y_start + PATCH_SIZE_D])
130 
131  if x_stop - x_start != PATCH_SIZE_W or y_stop - y_start != PATCH_SIZE_D:
132  continue
133 
134  is_mu_near_stop = False
135  if is_muon:
136  pdg_patch = pdg_michel[x_start+2:x_stop-2, y_start+2:y_stop-2]
137  if np.count_nonzero(pdg_patch) > 2:
138  is_mu_near_stop = True
139  sel_mu_near_stop += 1
140 
141  vtx_patch = vtx_map[x_start+2:x_stop-2, y_start+2:y_stop-2]
142  near_vtx_count = np.count_nonzero(vtx_patch)
143 
144  nuvtx_patch = vtx_patch & 0x4 # any nu primary vtx
145  is_near_nu = (np.count_nonzero(nuvtx_patch) > 0)
146 
147  eff_patch_fraction = patch_fraction
148  if near_vtx_count > 0 and eff_patch_fraction < 40:
149  eff_patch_fraction = 40 # min 40% of points near vertices
150 
151  # randomly skip fraction of patches
152  if not(is_michel | is_mu_near_stop | is_vtx | is_near_nu) and np.random.randint(10000) > int(100*eff_patch_fraction): continue
153 
154  track_pixels = np.count_nonzero(tracks[x_start:x_stop, y_start:y_stop])
155  shower_pixels = np.count_nonzero(showers[x_start:x_stop, y_start:y_stop])
156 
157  target = np.zeros(4, dtype=np.int32)
158  if tracks[i,j] == 1:
159  target[0] = 1 # label as a track-like
160  if is_raw_zero: continue
161  # skip fraction of almost-track-only patches
162  if shower_pixels < 8 and near_vtx_count == 0 and not(is_near_nu):
163  if np.random.randint(10000) > int(100*clean_track_fraction): continue
164  else: sel_clean_trk += 1
165  else:
166  if is_muon:
167  if not(is_mu_near_stop) and np.random.randint(10000) > int(100*muon_track_fraction): continue
168  sel_muon += 1
169  cnt_trk += 1
170  sel_trk += 1
171  elif showers[i,j] == 1:
172  target[1] = 1 # label as a em-like
173  if doing_nue and not(is_near_nu): # for nu_e events (lots of showers) skip some fraction of shower patches
174  if near_vtx_count == 0 and np.random.randint(100) < 40: continue # skip 40% of any shower
175  if shower_pixels > 0.05*patch_area and np.random.randint(100) < 50: continue
176  if shower_pixels > 0.20*patch_area and np.random.randint(100) < 90: continue
177  if is_raw_zero: continue
178  if is_michel:
179  target[2] = 1 # additionally label as a michel electron
180  cnt_michel += 1
181  sel_michel += 1
182  cnt_sh += 1
183  sel_sh += 1
184  else: # use small fraction of empty-but-close-to-something patches
185  target[3] = 1 # label an empty pixel
186  if np.random.randint(10000) < int(100*empty_fraction):
187  nclose = np.count_nonzero(showers[i-2:i+3, j-2:j+3])
188  nclose += np.count_nonzero(tracks[i-2:i+3, j-2:j+3])
189  if nclose == 0:
190  npix = shower_pixels + track_pixels
191  if npix > 6:
192  cnt_void += 1
193  sel_empty += 1
194  else: continue # completely empty patch
195  else: continue # too close to trk/shower
196  else: continue # not selected randomly
197 
198  if is_near_nu:
199  sel_near_nu += 1
200 
201  if np.count_nonzero(target) == 0:
202  print 'NEED A LABEL IN THE TARGET!!!'
203  continue
204 
205  if cnt_ind < max_capacity:
206  db[cnt_ind] = get_patch(raw, i, j, PATCH_SIZE_W, PATCH_SIZE_D)
207  db_y[cnt_ind] = target
208  cnt_ind += 1
209  else:
210  print 'MAX CAPACITY REACHED!!!'
211  break
212 
213  print 'Selected: tracks', sel_trk, 'showers', sel_sh, 'empty', sel_empty, '/// muons', sel_muon, 'michel', sel_michel, 'clean trk', sel_clean_trk, 'near nu', sel_near_nu
214 
215  print 'Added', cnt_ind, 'tracks:', cnt_trk, 'showers:', cnt_sh, 'michels:', cnt_michel, 'empty:', cnt_void
216 
217  np.save(OUTPUT_DIR+'/db_view_'+str(selected_view_idx)+'_x', db[:cnt_ind])
218  np.save(OUTPUT_DIR+'/db_view_'+str(selected_view_idx)+'_y', db_y[:cnt_ind])
219 
220 if __name__ == "__main__":
221  main(argv)
222 
def get_patch(a, wire, drift, wsize, dsize)
Definition: utils.py:127
def read_config(cfgname)
Definition: utils.py:192
def get_data(folder, fname, drift_margin=0, crop=True, blur=None, white_noise=0, coherent_noise=0)
Definition: utils.py:33
static QCString str