Public Member Functions | Public Attributes | Private Member Functions | List of all members
data_generator.DataGenerator Class Reference
Inheritance diagram for data_generator.DataGenerator:

Public Member Functions

def __init__ (self, cells=500, planes=500, views=3, batch_size=32, branches=True, outputs=7, standardize=True, images_path='/', shuffle=True, test_values=[])
 
def generate (self, labels, list_IDs, yield_labels=True)
 
def sparsify1 (self, y)
 
def sparsify2 (self, y)
 
def sparsify3 (self, y)
 
def normalize (self, value, obj)
 
def sparsify5 (self, y)
 
def sparsify7 (self, y)
 

Public Attributes

 cells
 
 planes
 
 views
 
 batch_size
 
 branches
 
 outputs
 
 images_path
 
 standardize
 
 shuffle
 
 test_values
 

Private Member Functions

def __get_exploration_order (self, list_IDs)
 
def __data_generation (self, labels, list_IDs_temp, yield_labels)
 

Detailed Description

Definition at line 13 of file data_generator.py.

Constructor & Destructor Documentation

def data_generator.DataGenerator.__init__ (   self,
  cells = 500,
  planes = 500,
  views = 3,
  batch_size = 32,
  branches = True,
  outputs = 7,
  standardize = True,
  images_path = '/',
  shuffle = True,
  test_values = [] 
)

Definition at line 21 of file data_generator.py.

21  outputs=7, standardize=True, images_path = '/', shuffle=True, test_values=[]):
22  'Initialization'
23  self.cells = cells
24  self.planes = planes
25  self.views = views
26  self.batch_size = batch_size
27  self.branches = branches
28  self.outputs = outputs
29  self.images_path = images_path
30  self.standardize = standardize
31  self.shuffle = shuffle
32  self.test_values = test_values
33 

Member Function Documentation

def data_generator.DataGenerator.__data_generation (   self,
  labels,
  list_IDs_temp,
  yield_labels 
)
private

Definition at line 85 of file data_generator.py.

85  def __data_generation(self, labels, list_IDs_temp, yield_labels):
86  'Generates data of batch_size samples' # X : (n_samples, v_size, v_size, v_size, n_channels)
87 
88  # Initialization
89  if self.branches:
90  # X data should be a list of length == branches
91  X = [None]*self.views
92 
93  for view in range(self.views):
94  X[view] = np.empty((self.batch_size, self.planes, self.cells, 1))
95  else:
96  # X data should't be a list because there is only one branch
97  X = np.empty((self.batch_size, self.planes, self.cells, self.views))
98 
99  if yield_labels:
100  # only include the labels when requested (train, validation)
101  if self.outputs == 1:
102  y = np.empty((self.batch_size), dtype = int)
103  else:
104  y = np.empty((self.batch_size, self.outputs), dtype = int)
105 
106  # Generate data
107  for i, ID in enumerate(list_IDs_temp):
108  # Decompress image into pixel NumPy tensor
109  with open(self.images_path + '/' + ID.split('.')[0].lstrip('a') + '/images/' + ID + '.gz', 'rb') as image_file:
110  pixels = np.fromstring(zlib.decompress(image_file.read()), dtype=np.uint8, sep='').reshape(self.views, self.planes, self.cells)
111  #pixels = np.load(self.images_path + '/' + labels[ID] + '/' + ID + '.npy')
112 
113  if self.standardize:
114  # standardize the image
115  pixels = pixels.astype('float32') # 32-bit precision floating-point pixel image
116  pixels /= 255. # pixel range from 0 to 1
117 
118  # Store volume
119  if self.branches:
120  for view in range(self.views):
121  X[view][i, :, :, :] = pixels[view, :, :].reshape(self.planes, self.cells, 1)
122  else:
123  pixels = np.rollaxis(pixels, 0, 3) # from 'channels_first' to 'channels_last'
124  X[i, :, :, :] = pixels
125 
126  # get y value
127  y_value = labels[ID]
128 
129  if yield_labels:
130  # store class/label (train, validation)
131  y[i] = y_value
132  else:
133  # store actual label and energy values (used for the confusion matrix and normalization)
134  with open(self.images_path + '/' + ID.split('.')[0].lstrip('a') + '/info/' + ID + '.info', 'rb') as info_file:
135  energy_values = info_file.readlines()
136  self.test_values.append({'y_value':y_value,
137  'fNuEnergy':float(energy_values[1]),
138  'fLepEnergy':float(energy_values[2]),
139  'fRecoNueEnergy': float(energy_values[3]),
140  'fRecoNumuEnergy': float(energy_values[4]),
141  'fEventWeight': float(energy_values[5])})
142 
143  if yield_labels:
144  # return X and Y (train, validation)
145  if self.outputs == 1:
146  return X, self.sparsify1(y)
147  if self.outputs == 5:
148  return X, self.sparsify5(y)
149  return X, self.sparsify7(y)
150 
151  # return X (test, predictions)
152  return X
153 
int open(const char *, int)
Opens a file descriptor.
auto enumerate(Iterables &&...iterables)
Range-for loop helper tracking the number of iteration.
Definition: enumerate.h:69
def __data_generation(self, labels, list_IDs_temp, yield_labels)
def data_generator.DataGenerator.__get_exploration_order (   self,
  list_IDs 
)
private

Definition at line 70 of file data_generator.py.

70  def __get_exploration_order(self, list_IDs):
71  'Generates order of exploration'
72 
73  # Find exploration order
74  indexes = np.arange(len(list_IDs))
75 
76  if self.shuffle == True:
77  np.random.shuffle(indexes)
78 
79  return indexes
80 
def __get_exploration_order(self, list_IDs)
def data_generator.DataGenerator.generate (   self,
  labels,
  list_IDs,
  yield_labels = True 
)

Definition at line 37 of file data_generator.py.

37  def generate(self, labels, list_IDs, yield_labels=True):
38  'Generates batches of samples'
39 
40  # Infinite loop
41  while 1:
42  # Generate random order of exploration of dataset (to make each epoch different)
43  indexes = self.__get_exploration_order(list_IDs)
44 
45  # Generate batches
46  imax = int(len(indexes)/self.batch_size) # number of batches
47 
48  for i in range(imax):
49  # Find list of IDs for one batch
50  list_IDs_temp = [list_IDs[k] for k in indexes[i*self.batch_size:(i+1)*self.batch_size]]
51 
52  # Generate data
53  if yield_labels:
54  # Train, validation
55  X, y = self.__data_generation(labels, list_IDs_temp, yield_labels)
56 
57  yield X, y
58  else:
59  # Test, predictions
60  X = self.__data_generation(labels, list_IDs_temp, yield_labels)
61 
62  yield X
63 
def generate(self, labels, list_IDs, yield_labels=True)
def __get_exploration_order(self, list_IDs)
def __data_generation(self, labels, list_IDs_temp, yield_labels)
def data_generator.DataGenerator.normalize (   self,
  value,
  obj 
)

Definition at line 204 of file data_generator.py.

204  def normalize(self, value, obj):
205  if value == -1 or obj.size == 1:
206  obj.fill(value)
207  else:
208  obj[value] = 1
209 
def normalize(self, value, obj)
def data_generator.DataGenerator.sparsify1 (   self,
  y 
)

Definition at line 161 of file data_generator.py.

161  def sparsify1(self, y):
162  'Returns labels in binary NumPy array'
163  return np.array([[1 if y[i] == j else 1 if y[i]-1 == j and j == 12 else 0 for j in range(13)] for i in range(y.shape[0])])
164 
def data_generator.DataGenerator.sparsify2 (   self,
  y 
)

Definition at line 165 of file data_generator.py.

165  def sparsify2(self, y):
166  'Returns labels in binary NumPy array'
167  res = [None]*2
168  res[0] = np.zeros((y.shape[0], 4), dtype=int)
169  res[1] = np.zeros((y.shape[0], 4), dtype=int)
170 
171  for i in range(y.shape[0]):
172 
173  res[0][i][(y[i] // 4)] = 1 # CC Numu, CC Nue, CC Nutau
174  res[1][i][(y[i] % 4)] = 1 # CC QE, CC Res, CC DIS, CC Other
175 
176  if y[i] == 12:
177  res[1][i] = [-1, -1, -1, -1]
178 
179  return res
180 
def data_generator.DataGenerator.sparsify3 (   self,
  y 
)

Definition at line 181 of file data_generator.py.

181  def sparsify3(self, y):
182  'Returns labels in binary NumPy array'
183  res = [None]*3
184  res[0] = np.zeros((y.shape[0], 1), dtype=int)
185  res[1] = np.zeros((y.shape[0], 4), dtype=int)
186  res[2] = np.zeros((y.shape[0], 4), dtype=int)
187 
188  for i in range(y.shape[0]):
189  quotient = y[i] // 13
190 
191  if quotient > 0:
192  y[i] %= 13 # from 0 to 12
193  res[0][i][0] = 1 # antineutrino
194 
195  res[1][i][(y[i] // 4)] = 1 # CC Numu, CC Nue, CC Nutau
196  res[2][i][(y[i] % 4)] = 1 # CC QE, CC Res, CC DIS, CC Other
197 
198  if y[i] == 12:
199  res[0][i] = [-1]
200  res[2][i] = [-1, -1, -1, -1]
201 
202  return res
203 
def data_generator.DataGenerator.sparsify5 (   self,
  y 
)

Definition at line 210 of file data_generator.py.

210  def sparsify5(self, y):
211  'Returns labels in binary NumPy array'
212  res = [None]*self.outputs
213 
214  for i in range(0,len(res)): # flavour, fNProton, fNPion, fNPizero, fNNeutron
215  res[i] = np.zeros((y.shape[0], 4), dtype=int)
216 
217  for i in range(y.shape[0]):
218  for j in range(len(res)):
219  self.normalize(y[i][j], res[j][i])
220 
221  return res
222 
def normalize(self, value, obj)
def data_generator.DataGenerator.sparsify7 (   self,
  y 
)

Definition at line 223 of file data_generator.py.

223  def sparsify7(self, y):
224  'Returns labels in binary NumPy array'
225  res = [None]*self.outputs
226  res[0] = np.zeros((y.shape[0], 1), dtype=int) # fNuPDG
227 
228  for i in range(1,len(res)): # flavour, interaction, fNProton, fNPion, fNPizero, fNNeutron
229  res[i] = np.zeros((y.shape[0], 4), dtype=int)
230 
231  for i in range(y.shape[0]):
232  for j in range(len(res)):
233  self.normalize(y[i][j], res[j][i])
234 
235  return res

Member Data Documentation

data_generator.DataGenerator.batch_size

Definition at line 26 of file data_generator.py.

data_generator.DataGenerator.branches

Definition at line 27 of file data_generator.py.

data_generator.DataGenerator.cells

Definition at line 23 of file data_generator.py.

data_generator.DataGenerator.images_path

Definition at line 29 of file data_generator.py.

data_generator.DataGenerator.outputs

Definition at line 28 of file data_generator.py.

data_generator.DataGenerator.planes

Definition at line 24 of file data_generator.py.

data_generator.DataGenerator.shuffle

Definition at line 31 of file data_generator.py.

data_generator.DataGenerator.standardize

Definition at line 30 of file data_generator.py.

data_generator.DataGenerator.test_values

Definition at line 32 of file data_generator.py.

data_generator.DataGenerator.views

Definition at line 25 of file data_generator.py.


The documentation for this class was generated from the following file: