5 from os.path 
import isfile, join
     9 from utils 
import read_config, get_data, get_patch
    13     parser = argparse.ArgumentParser(description=
'Makes training data set for EM vs track separation')
    14     parser.add_argument(
'-c', 
'--config', help=
"JSON with script configuration", default=
'config.json')
    15     parser.add_argument(
'-t', 
'--type', help=
"Input file format")
    16     parser.add_argument(
'-i', 
'--input', help=
"Input directory")
    17     parser.add_argument(
'-o', 
'--output', help=
"Output directory")
    18     args = parser.parse_args()
    22     print '#'*50,
'\nPrepare data for CNN'    23     if args.type 
is None: INPUT_TYPE   = config[
'prepare_data_em_track'][
'input_type']
    24     else: INPUT_TYPE = args.type
    25     if args.input 
is None: INPUT_DIR   = config[
'prepare_data_em_track'][
'input_dir']
    26     else: INPUT_DIR = args.input
    27     if args.output 
is None: OUTPUT_DIR = config[
'prepare_data_em_track'][
'output_dir']
    28     else: OUTPUT_DIR = args.output
    30     PATCH_SIZE_W = config[
'prepare_data_em_track'][
'patch_size_w']
    31     PATCH_SIZE_D = config[
'prepare_data_em_track'][
'patch_size_d']
    33     print 'Using %s as input dir, and %s as output dir' % (INPUT_DIR, OUTPUT_DIR)
    34     if INPUT_TYPE == 
'root': 
print 'Reading from ROOT file'    35     else: 
print 'Reading from TEXT files'    38     doing_nue = config[
'prepare_data_em_track'][
'doing_nue']                       
    39     selected_view_idx = config[
'prepare_data_em_track'][
'selected_view_idx']       
    40     patch_fraction = config[
'prepare_data_em_track'][
'patch_fraction']             
    41     empty_fraction = config[
'prepare_data_em_track'][
'empty_fraction']             
    42     clean_track_fraction = config[
'prepare_data_em_track'][
'clean_track_fraction'] 
    43     muon_track_fraction = config[
'prepare_data_em_track'][
'muon_track_fraction']   
    44     crop_event = config[
'prepare_data_em_track'][
'crop_event']                     
    46     blur_kernel = np.asarray(config[
'prepare_data_em_track'][
'blur'])              
    47     white_noise = config[
'prepare_data_em_track'][
'noise']                         
    48     coherent_noise = config[
'prepare_data_em_track'][
'coherent']                   
    50     print 'Using', patch_fraction, 
'% of data from view', selected_view_idx
    51     print 'Using', muon_track_fraction, 
'% of muon points'    52     if doing_nue: 
print 'Neutrino mode, will skip more showers.'    54     print 'Blur kernel', blur_kernel, 
'noise RMS', white_noise
    56     max_capacity = 1700000
    57     db = np.zeros((max_capacity, PATCH_SIZE_W, PATCH_SIZE_D), dtype=np.float32)
    58     db_y = np.zeros((max_capacity, 4), dtype=np.int32)
    60     patch_area = PATCH_SIZE_W * PATCH_SIZE_D
    71     rootModule = 
'datadump'    73     if INPUT_TYPE == 
"root":
    74         fnames = [f 
for f 
in os.listdir(INPUT_DIR) 
if '.root' in f]
    76             rootFile = TFile(INPUT_DIR+
'/'+n)
    77             keys = [rootModule+
'/'+k.GetName()[:-4] 
for k 
in rootFile.Get(rootModule).GetListOfKeys() 
if '_raw' in k.GetName()]
    78             event_list.append((rootFile, keys))
    80         keys = [f[:-4] 
for f 
in os.listdir(INPUT_DIR) 
if '.raw' in f] 
    81         event_list.append((INPUT_DIR, keys)) 
    83     for entry 
in event_list:
    85         event_names = entry[1]
    87         for evname 
in event_names:
    88             finfo = evname.split(
'_')
    90             tpc_idx = 
int(finfo[8])
    91             view_idx = 
int(finfo[10])
    93             if view_idx != selected_view_idx: 
continue    96             print 'Process event', fcount, evname, 
'NO.', evt_no
    99             raw, deposit, pdg, tracks, showers = 
get_data(folder, evname, PATCH_SIZE_D/2 + 2, crop_event, blur_kernel, white_noise, coherent_noise)
   101                 print 'Skip empty event...'   104             pdg_michel = ((pdg & 0xF000) == 0x2000)
   105             vtx_map = (pdg >> 16)
   107             print 'Tracks', np.sum(tracks), 
'showers', np.sum(showers), 
'michels', np.sum(pdg_michel)
   117             for i 
in range(raw.shape[0]):
   118                 for j 
in range(raw.shape[1]):
   119                     is_raw_zero = (raw[i,j] < 0.01)
   120                     is_michel = (pdg[i,j] & 0xF000 == 0x2000) 
   121                     is_muon = (pdg[i,j] & 0xFFF == 13)
   123                     is_vtx = (vtx_map[i,j] > 0)
   125                     x_start = np.max([0, i - PATCH_SIZE_W/2])
   126                     x_stop  = np.min([raw.shape[0], x_start + PATCH_SIZE_W])
   128                     y_start = np.max([0, j - PATCH_SIZE_D/2])
   129                     y_stop  = np.min([raw.shape[1], y_start + PATCH_SIZE_D])
   131                     if x_stop - x_start != PATCH_SIZE_W 
or y_stop - y_start != PATCH_SIZE_D:
   134                     is_mu_near_stop = 
False   136                         pdg_patch = pdg_michel[x_start+2:x_stop-2, y_start+2:y_stop-2]
   137                         if np.count_nonzero(pdg_patch) > 2:
   138                             is_mu_near_stop = 
True   139                             sel_mu_near_stop += 1
   141                     vtx_patch = vtx_map[x_start+2:x_stop-2, y_start+2:y_stop-2]
   142                     near_vtx_count = np.count_nonzero(vtx_patch)
   144                     nuvtx_patch = vtx_patch & 0x4 
   145                     is_near_nu = (np.count_nonzero(nuvtx_patch) > 0)
   147                     eff_patch_fraction = patch_fraction
   148                     if near_vtx_count > 0 
and eff_patch_fraction < 40:
   149                         eff_patch_fraction = 40 
   152                     if not(is_michel | is_mu_near_stop | is_vtx | is_near_nu) 
and np.random.randint(10000) > 
int(100*eff_patch_fraction): 
continue   154                     track_pixels = np.count_nonzero(tracks[x_start:x_stop, y_start:y_stop])
   155                     shower_pixels = np.count_nonzero(showers[x_start:x_stop, y_start:y_stop])
   157                     target = np.zeros(4, dtype=np.int32)
   160                         if is_raw_zero: 
continue   162                         if shower_pixels < 8 
and near_vtx_count == 0 
and not(is_near_nu):
   163                             if np.random.randint(10000) > 
int(100*clean_track_fraction): 
continue   164                             else: sel_clean_trk += 1
   167                                 if not(is_mu_near_stop) 
and np.random.randint(10000) > 
int(100*muon_track_fraction): 
continue   171                     elif showers[i,j] == 1:
   173                         if doing_nue 
and not(is_near_nu): 
   174                             if near_vtx_count == 0 
and np.random.randint(100) < 40: 
continue     175                             if shower_pixels > 0.05*patch_area 
and np.random.randint(100) < 50: 
continue   176                             if shower_pixels > 0.20*patch_area 
and np.random.randint(100) < 90: 
continue   177                         if is_raw_zero: 
continue   186                         if np.random.randint(10000) < 
int(100*empty_fraction):
   187                             nclose = np.count_nonzero(showers[i-2:i+3, j-2:j+3])
   188                             nclose += np.count_nonzero(tracks[i-2:i+3, j-2:j+3])
   190                                 npix = shower_pixels + track_pixels
   201                     if np.count_nonzero(target) == 0:
   202                         print 'NEED A LABEL IN THE TARGET!!!'   205                     if cnt_ind < max_capacity:
   206                         db[cnt_ind] = 
get_patch(raw, i, j, PATCH_SIZE_W, PATCH_SIZE_D)
   207                         db_y[cnt_ind] = target
   210                         print 'MAX CAPACITY REACHED!!!'   213             print 'Selected: tracks', sel_trk, 
'showers', sel_sh, 
'empty', sel_empty, 
'/// muons', sel_muon, 
'michel', sel_michel, 
'clean trk', sel_clean_trk, 
'near nu', sel_near_nu
   215     print 'Added', cnt_ind, 
'tracks:', cnt_trk, 
'showers:', cnt_sh, 
'michels:', cnt_michel, 
'empty:', cnt_void
   217     np.save(OUTPUT_DIR+
'/db_view_'+
str(selected_view_idx)+
'_x', db[:cnt_ind])
   218     np.save(OUTPUT_DIR+
'/db_view_'+
str(selected_view_idx)+
'_y', db_y[:cnt_ind])
   220 if __name__ == 
"__main__":
 def get_patch(a, wire, drift, wsize, dsize)
def get_data(folder, fname, drift_margin=0, crop=True, blur=None, white_noise=0, coherent_noise=0)