2 This is the generator module.     6 __author__ = 
'Saul Alonso-Monsalve'     7 __email__ = 
"saul.alonso.monsalve@cern.ch"    11 from string 
import digits
    15     'Generates data for Keras'    18     Initialization function of the class    20     def __init__(self, cells=500, planes=500, views=3, batch_size=32, branches=True, 
    21                  outputs=7, standardize=
True, images_path = 
'/', shuffle=
True, test_values=[]):
    35     Goes through the dataset and outputs one batch at a time.    37     def generate(self, labels, list_IDs, yield_labels=True):
    38         'Generates batches of samples'    65     Generates a random order of exploration for a given set of list_IDs.     66     If activated, this feature will shuffle the order in which the examples     67     are fed to the classifier so that batches between epochs do not look alike.     68     Doing so will eventually make our model more robust.    71         'Generates order of exploration'    74         indexes = np.arange(len(list_IDs))
    77             np.random.shuffle(indexes)
    82     Outputs batches of data and only needs to know about the list of IDs included     83     in batches as well as their corresponding labels.    86         'Generates data of batch_size samples'     93             for view 
in range(self.
views):
   109             with 
open(self.
images_path + 
'/' + ID.split(
'.')[0].lstrip(
'a') + 
'/images/' + ID + 
'.gz', 
'rb') 
as image_file:
   110                 pixels = np.fromstring(zlib.decompress(image_file.read()), dtype=np.uint8, sep=
'').reshape(self.
views, self.
planes, self.
cells)
   115                 pixels = pixels.astype(
'float32') 
   120                 for view 
in range(self.
views):
   121                     X[view][i, :, :, :] = pixels[view, :, :].reshape(self.
planes, self.
cells, 1)
   123                 pixels = np.rollaxis(pixels, 0, 3) 
   124                 X[i, :, :, :] = pixels
   134                 with 
open(self.
images_path + 
'/' + ID.split(
'.')[0].lstrip(
'a') + 
'/info/' + ID + 
'.info', 
'rb') 
as info_file:
   135                     energy_values = info_file.readlines()               
   136                     self.test_values.append({
'y_value':y_value,
   137                                              'fNuEnergy':
float(energy_values[1]),
   138                                              'fLepEnergy':
float(energy_values[2]),
   139                                              'fRecoNueEnergy': 
float(energy_values[3]), 
   140                                              'fRecoNumuEnergy': 
float(energy_values[4]), 
   141                                              'fEventWeight': 
float(energy_values[5])})
   155     Please note that Keras only accepts labels written in a binary form    156     (in a 6-label problem, the third label is writtten [0 0 1 0 0 0]),    157     which is why we need the sparsify function to perform this task,    158     should y be a list of numerical values.   162         'Returns labels in binary NumPy array'   163         return np.array([[1 
if y[i] == j 
else 1 
if y[i]-1 == j 
and j == 12 
else 0 
for j 
in range(13)] 
for i 
in range(y.shape[0])])
   166         'Returns labels in binary NumPy array'   168         res[0] = np.zeros((y.shape[0], 4), dtype=int)
   169         res[1] = np.zeros((y.shape[0], 4), dtype=int)
   171         for i 
in range(y.shape[0]):
   173             res[0][i][(y[i] // 4)] = 1 
   174             res[1][i][(y[i] %  4)] = 1 
   177                 res[1][i] = [-1, -1, -1, -1]
   182         'Returns labels in binary NumPy array'   184         res[0] = np.zeros((y.shape[0], 1), dtype=int)
   185         res[1] = np.zeros((y.shape[0], 4), dtype=int)
   186         res[2] = np.zeros((y.shape[0], 4), dtype=int)
   188         for i 
in range(y.shape[0]):
   189             quotient = y[i] // 13
   195             res[1][i][(y[i] // 4)] = 1 
   196             res[2][i][(y[i] %  4)] = 1 
   200                 res[2][i] = [-1, -1, -1, -1]
   205         if value == -1 
or obj.size == 1:
   211         'Returns labels in binary NumPy array'   214         for i 
in range(0,len(res)): 
   215             res[i] = np.zeros((y.shape[0], 4), dtype=int)      
   217         for i 
in range(y.shape[0]):
   218             for j 
in range(len(res)):
   224         'Returns labels in binary NumPy array'   226         res[0] = np.zeros((y.shape[0], 1), dtype=int) 
   228         for i 
in range(1,len(res)): 
   229             res[i] = np.zeros((y.shape[0], 4), dtype=int)      
   231         for i 
in range(y.shape[0]):
   232             for j 
in range(len(res)):
 
def generate(self, labels, list_IDs, yield_labels=True)
 
int open(const char *, int)
Opens a file descriptor. 
 
def __init__(self, cells=500, planes=500, views=3, batch_size=32, branches=True, outputs=7, standardize=True, images_path='/', shuffle=True, test_values=[])
 
auto enumerate(Iterables &&...iterables)
Range-for loop helper tracking the number of iteration. 
 
def normalize(self, value, obj)
 
def __get_exploration_order(self, list_IDs)
 
def __data_generation(self, labels, list_IDs_temp, yield_labels)