utils.py
Go to the documentation of this file.
1 from ROOT import TFile
2 from root_numpy import hist2array
3 
4 import numpy as np
5 from os import listdir
6 from os.path import isfile, join
7 import os, json
8 
9 def count_events(folder, key):
10  nevents = 0
11  dlist = [f for f in listdir(folder) if key in f]
12  dlist.sort()
13  for dirname in dlist:
14  flist = [f for f in listdir(folder + '/' + dirname) if '_y.npy' in f]
15  for fname in flist:
16  d = np.load(folder + '/' + dirname + '/' + fname)
17  nevents += d.shape[0]
18  return nevents
19 
20 def get_patch_size(folder):
21  dlist = [f for f in listdir(folder) if 'training' in f]
22  flist = [f for f in listdir(folder + '/' + dlist[0]) if '_x.npy' in f]
23  d = np.load(folder + '/' + dlist[0] + '/' + flist[0])
24  return d.shape[1], d.shape[2]
25 
26 def get_event_bounds(A, drift_margin = 0):
27  # get center with 99% of signal
28  cum = np.cumsum(np.sum(A, axis=0))
29  start_ind = np.max([0, np.where(cum > cum[-1]*0.005)[0][0] - drift_margin])
30  end_ind = np.min([A.shape[1], np.where(cum > cum[-1]*0.995)[0][0] + drift_margin])
31  return start_ind, end_ind
32 
33 def get_data(folder, fname, drift_margin = 0, crop = True, blur = None, white_noise = 0, coherent_noise = 0):
34  print 'Reading', fname
35  try:
36  if isinstance(folder, TFile): # read from ROOT file
37  A_raw = hist2array(folder.Get(fname + '_raw'))
38  A_deposit = hist2array(folder.Get(fname + '_deposit'))
39  A_pdg = hist2array(folder.Get(fname + '_pdg'))
40  else: # read text files
41  A_raw = np.genfromtxt(folder+'/'+fname + '.raw', delimiter=' ', dtype=np.float32)
42  A_deposit = np.genfromtxt(folder+'/'+fname + '.deposit', delimiter=' ', dtype=np.float32)
43  A_pdg = np.genfromtxt(folder+'/'+fname + '.pdg', delimiter=' ', dtype=np.int32)
44  except:
45  print 'Bad event, return empty arrays'
46  return None, None, None, None, None
47 
48  if A_raw.shape[0] < 8 or A_raw.shape[1] < 8: return None, None, None, None, None
49 
50  test_pdg = np.sum(A_pdg)
51  test_dep = np.sum(A_deposit)
52  test_raw = np.sum(A_raw)
53  if test_raw == 0.0 or test_dep == 0.0 or test_pdg == 0: return None, None, None, None, None
54 
55  print test_raw, test_dep, test_pdg
56  #assert np.sum(A_deposit) > 0
57  # get main event body (99% signal)
58  if crop:
59  evt_start_ind, evt_stop_ind = get_event_bounds(A_deposit, drift_margin)
60  A_raw = A_raw[:,evt_start_ind:evt_stop_ind]
61  A_deposit = A_deposit[:,evt_start_ind:evt_stop_ind]
62  A_pdg = A_pdg[:,evt_start_ind:evt_stop_ind]
63  else:
64  evt_start_ind = 0
65  evt_stop_ind = A_raw.shape[1]
66  print evt_start_ind, evt_stop_ind
67 
68  A_raw = applyBlur(A_raw, blur)
69  A_raw = addWhiteNoise(A_raw, white_noise)
70  A_raw = addCoherentNoise(A_raw, coherent_noise)
71 
72  deposit_th_ind = A_deposit < 2.0e-5
73  A_pdg[deposit_th_ind] = 0
74  tracks = A_pdg.copy()
75  showers = A_pdg.copy()
76  tracks[(A_pdg & 0x0FFF) == 11] = 0
77  tracks[tracks > 0] = 1
78  showers[(A_pdg & 0x0FFF) != 11] = 0
79  showers[showers > 0] = 1
80  return A_raw, A_deposit, A_pdg, tracks, showers
81 
82 
83 # MUST give the same result as nnet::DataProviderAlg::applyBlur() in PointIdAlg/PointIdAlg.cxx
84 def applyBlur(a, kernel):
85  if kernel is None or kernel.shape[0] < 2: return a
86 
87  margin_left = kernel.shape[0] >> 1
88  margin_right = kernel.shape[0] - margin_left - 1
89  src = np.copy(a)
90  for w in range(margin_left, a.shape[0] - margin_right):
91  for d in range(a.shape[1]):
92  s = 0.
93  for i in range(kernel.shape[0]):
94  s += kernel[i] * src[w + i - margin_left, d]
95  a[w, d] = s
96  return a
97 
98 # MUST give the same result as nnet::DataProviderAlg::addWhiteNoise() in PointIdAlg/PointIdAlg.cxx
99 # so sigma here should be "effective": divided by ADC scaling (constant, 10) and downsampling factor
100 def addWhiteNoise(a, sigma):
101  if sigma is None or sigma == 0: return a
102 
103  a += np.random.normal(0, sigma, a.shape)
104 
105  return a
106 
107 # MUST give the same result as nnet::DataProviderAlg::addCoherentNoise() in PointIdAlg/PointIdAlg.cxx
108 # so sigma here should be "effective": divided by ADC scaling (constant, 10) and downsampling factor
109 def addCoherentNoise(a, sigma):
110  if sigma is None or sigma == 0: return a
111 
112  a += np.random.normal(0, sigma, a.shape)
113 
114  amps1 = np.random.normal(1, 0.1, a.shape[0]);
115  amps2 = np.random.normal(1, 0.1, 1 + (a.shape[0] >> 5));
116 
117  group_amp = 1
118  for w in range(a.shape[0]):
119  if (w & 31) == 0:
120  noise = np.random.normal(0, sigma, a.shape[1])
121  group_amp = amps2[w >> 5]
122  a[w] += group_amp * amps1[w] * noise
123 
124  return a
125 
126 # MUST give the same result as nnet::PointIdAlg::bufferPatch() in PointIdAlg/PointIdAlg.cxx
127 def get_patch(a, wire, drift, wsize, dsize):
128  halfSizeW = wsize / 2;
129  halfSizeD = dsize / 2;
130 
131  w0 = wire - halfSizeW;
132  w1 = wire + halfSizeW;
133 
134  d0 = drift - halfSizeD;
135  d1 = drift + halfSizeD;
136 
137  patch = np.zeros((wsize, dsize), dtype=np.float32)
138 
139  wpatch = 0
140  for w in range(w0, w1):
141  if w >= 0 and w < a.shape[0]:
142  dpatch = 0
143  for d in range(d0, d1):
144  if d >= 0 and d < a.shape[1]:
145  patch[wpatch,dpatch] = a[w,d];
146  dpatch += 1
147  wpatch += 1
148 
149  return patch
150 
152  max_count = A.shape[0]*A.shape[1] / 4 # rather not more than 25% of plane filled with vertices
153  vtx = np.zeros((max_count, 3), dtype=np.int32)
154  nvtx = 0
155  for i in range(A.shape[0]):
156  for j in range(A.shape[1]):
157  if nvtx >= max_count: break
158  if (A[i,j] & 0xFF000000) > 0:
159  t = A[i,j] >> 24
160  v = np.zeros(3)
161  v[0] = i
162  v[1] = j
163  v[2] = t
164  vtx[nvtx] = v
165  nvtx += 1
166  return vtx[:nvtx]
167 
169  max_count = 10 # 10 vertices per view shoud be enough...
170  vtx = np.zeros((max_count, 3), dtype=np.int32)
171  nvtx = 0
172  for i in range(A.shape[0]):
173  for j in range(A.shape[1]):
174  if nvtx >= max_count: break
175  if (A[i,j] & 0xFF0000) > 0:
176  t = (A[i,j] >> 16) & 0xFF
177  v = np.zeros(3)
178  v[0] = i
179  v[1] = j
180  v[2] = t
181  vtx[nvtx] = v
182  nvtx += 1
183  return vtx[:nvtx]
184 
186  assert len(a) == len(b)
187  rng_state = np.random.get_state()
188  np.random.shuffle(a)
189  np.random.set_state(rng_state)
190  np.random.shuffle(b)
191 
192 def read_config(cfgname):
193  config = None
194  with open(cfgname, 'r') as fin:
195  config = json.loads(fin.read());
196  if config is None:
197  print 'This script requires configuration file: config.json'
198  exit(1)
199  return config
200 
def get_event_bounds(A, drift_margin=0)
Definition: utils.py:26
def get_vertices(A)
Definition: utils.py:151
int open(const char *, int)
Opens a file descriptor.
def get_nu_vertices(A)
Definition: utils.py:168
def addWhiteNoise(a, sigma)
Definition: utils.py:100
def applyBlur(a, kernel)
Definition: utils.py:84
def count_events(folder, key)
Definition: utils.py:9
def get_patch(a, wire, drift, wsize, dsize)
Definition: utils.py:127
def addCoherentNoise(a, sigma)
Definition: utils.py:109
def read_config(cfgname)
Definition: utils.py:192
def shuffle_in_place(a, b)
Definition: utils.py:185
def get_patch_size(folder)
Definition: utils.py:20
def get_data(folder, fname, drift_margin=0, crop=True, blur=None, white_noise=0, coherent_noise=0)
Definition: utils.py:33