2 This is the generator module. 6 __author__ =
'Saul Alonso-Monsalve' 7 __email__ =
"saul.alonso.monsalve@cern.ch" 11 from string
import digits
15 'Generates data for Keras' 18 Initialization function of the class 20 def __init__(self, cells=500, planes=500, views=3, batch_size=32, branches=True,
21 outputs=7, standardize=
True, images_path =
'/', shuffle=
True, test_values=[]):
35 Goes through the dataset and outputs one batch at a time. 37 def generate(self, labels, list_IDs, yield_labels=True):
38 'Generates batches of samples' 65 Generates a random order of exploration for a given set of list_IDs. 66 If activated, this feature will shuffle the order in which the examples 67 are fed to the classifier so that batches between epochs do not look alike. 68 Doing so will eventually make our model more robust. 71 'Generates order of exploration' 74 indexes = np.arange(len(list_IDs))
77 np.random.shuffle(indexes)
82 Outputs batches of data and only needs to know about the list of IDs included 83 in batches as well as their corresponding labels. 86 'Generates data of batch_size samples' 93 for view
in range(self.
views):
109 with
open(self.
images_path +
'/' + ID.split(
'.')[0].lstrip(
'a') +
'/images/' + ID +
'.gz',
'rb')
as image_file:
110 pixels = np.fromstring(zlib.decompress(image_file.read()), dtype=np.uint8, sep=
'').reshape(self.
views, self.
planes, self.
cells)
115 pixels = pixels.astype(
'float32')
120 for view
in range(self.
views):
121 X[view][i, :, :, :] = pixels[view, :, :].reshape(self.
planes, self.
cells, 1)
123 pixels = np.rollaxis(pixels, 0, 3)
124 X[i, :, :, :] = pixels
134 with
open(self.
images_path +
'/' + ID.split(
'.')[0].lstrip(
'a') +
'/info/' + ID +
'.info',
'rb')
as info_file:
135 energy_values = info_file.readlines()
136 self.test_values.append({
'y_value':y_value,
137 'fNuEnergy':
float(energy_values[1]),
138 'fLepEnergy':
float(energy_values[2]),
139 'fRecoNueEnergy':
float(energy_values[3]),
140 'fRecoNumuEnergy':
float(energy_values[4]),
141 'fEventWeight':
float(energy_values[5])})
155 Please note that Keras only accepts labels written in a binary form 156 (in a 6-label problem, the third label is writtten [0 0 1 0 0 0]), 157 which is why we need the sparsify function to perform this task, 158 should y be a list of numerical values. 162 'Returns labels in binary NumPy array' 163 return np.array([[1
if y[i] == j
else 1
if y[i]-1 == j
and j == 12
else 0
for j
in range(13)]
for i
in range(y.shape[0])])
166 'Returns labels in binary NumPy array' 168 res[0] = np.zeros((y.shape[0], 4), dtype=int)
169 res[1] = np.zeros((y.shape[0], 4), dtype=int)
171 for i
in range(y.shape[0]):
173 res[0][i][(y[i] // 4)] = 1
174 res[1][i][(y[i] % 4)] = 1
177 res[1][i] = [-1, -1, -1, -1]
182 'Returns labels in binary NumPy array' 184 res[0] = np.zeros((y.shape[0], 1), dtype=int)
185 res[1] = np.zeros((y.shape[0], 4), dtype=int)
186 res[2] = np.zeros((y.shape[0], 4), dtype=int)
188 for i
in range(y.shape[0]):
189 quotient = y[i] // 13
195 res[1][i][(y[i] // 4)] = 1
196 res[2][i][(y[i] % 4)] = 1
200 res[2][i] = [-1, -1, -1, -1]
205 if value == -1
or obj.size == 1:
211 'Returns labels in binary NumPy array' 214 for i
in range(0,len(res)):
215 res[i] = np.zeros((y.shape[0], 4), dtype=int)
217 for i
in range(y.shape[0]):
218 for j
in range(len(res)):
224 'Returns labels in binary NumPy array' 226 res[0] = np.zeros((y.shape[0], 1), dtype=int)
228 for i
in range(1,len(res)):
229 res[i] = np.zeros((y.shape[0], 4), dtype=int)
231 for i
in range(y.shape[0]):
232 for j
in range(len(res)):
def generate(self, labels, list_IDs, yield_labels=True)
int open(const char *, int)
Opens a file descriptor.
def __init__(self, cells=500, planes=500, views=3, batch_size=32, branches=True, outputs=7, standardize=True, images_path='/', shuffle=True, test_values=[])
auto enumerate(Iterables &&...iterables)
Range-for loop helper tracking the number of iteration.
def normalize(self, value, obj)
def __get_exploration_order(self, list_IDs)
def __data_generation(self, labels, list_IDs_temp, yield_labels)