9 #include "boost/program_options/options_description.hpp" 10 #include "boost/program_options/variables_map.hpp" 11 #include "boost/program_options/parsers.hpp" 12 #include "boost/algorithm/string/predicate.hpp" 33 #include <leveldb/db.h> 34 #include <leveldb/write_batch.h> 38 #pragma GCC diagnostic push 39 #pragma GCC diagnostic ignored "-Wsign-compare" 40 #include "caffe/caffe.hpp" 41 #pragma GCC diagnostic pop 47 namespace po = boost::program_options;
153 fLevelDB(0), mdb_env(0), mdb_txn(0) {
157 outputDir=config.fTestOutputDir;
159 outputDir=config.fTrainOutputDir;
161 if (config.fOutputFormat==
"LevelDB"){
162 leveldb::Options fileOptions;
163 fileOptions.error_if_exists = config.fErrorIfExists;
164 fileOptions.create_if_missing = config.fCreateIfMissing;
165 fileOptions.write_buffer_size = config.fWriteBufferSize;
169 if(!leveldb::DB::Open(fileOptions, outputDir, &
fLevelDB).ok()) {
170 std::cout <<
"Problem opening the database: " 176 else if (config.fOutputFormat==
"LMDB") {
177 mkdir(outputDir.c_str(),0777);
179 mdb_env_set_mapsize(
mdb_env, 10737418240);
180 mdb_env_open(
mdb_env, outputDir.c_str(), 0, 0777);
186 std::cout <<
"Unrecognized output format " << config.fOutputFormat <<
std::endl;
201 mdb_data.mv_size=serializeString.size();
202 mdb_data.mv_data=
reinterpret_cast<void*
>(&serializeString[0]);
203 mdb_key.mv_size=serializeKey.size();
204 mdb_key.mv_data=
reinterpret_cast<void*
>(&serializeKey[0]);
206 std::cout<<
"ERROR: Events not loaded correctly" <<
std::endl;
214 TChain chain(config.fTreeName.c_str());
216 if (boost::ends_with(input,
".list")) {
217 std::ifstream list_file(input.c_str());
218 if (!list_file.is_open()) {
219 std::cout <<
"Could not open " << input <<
std::endl;
224 while (list_file>>ifname)
225 chain.Add(ifname.c_str());
229 else if (boost::ends_with(input,
".root")) {
230 chain.Add(input.c_str());
233 chain.SetMakeClass(1);
238 std::vector<float> fPMap_fPEX;
239 std::vector<float> fPMap_fPEY;
240 std::vector<float> fPMap_fPEZ;
242 chain.SetBranchAddress(
"fInt", &fInt);
243 chain.SetBranchAddress(
"fPMap.fNWire", &fPMap_fNWire);
244 chain.SetBranchAddress(
"fPMap.fNTdc", &fPMap_fNTdc);
245 chain.SetBranchAddress(
"fPMap.fPEX", &fPMap_fPEX);
246 chain.SetBranchAddress(
"fPMap.fPEY", &fPMap_fPEY);
247 chain.SetBranchAddress(
"fPMap.fPEZ", &fPMap_fPEZ);
249 unsigned int entries = chain.GetEntries();
250 if(config.fNEvents < entries){
251 entries = config.fNEvents;
254 std::cout <<
"Error: Input tree has no entries." <<
std::endl;
258 std::cout <<
"- Will process " << entries <<
" from the input tree." <<
std::endl;
263 char*
key =
new char[config.fMaxKeyLength];
267 std::srand (
unsigned ( std::time(0) ) );
268 std::vector<unsigned int> shuffled;
269 for (
unsigned int i = 0; i < entries; ++i)
271 shuffled.push_back(i);
274 std::random_shuffle( shuffled.begin(), shuffled.end() );
278 unsigned int blockSize = config.fNTrainPerTest + 1;
280 unsigned int nTest = 1 + entries / blockSize;
282 unsigned int nTrain = entries / blockSize * config.fNTrainPerTest;
284 if (entries % blockSize) nTrain += entries % blockSize - 1;
287 const unsigned int nRegressionFeatures = 2;
291 int** regressionDataTest =
new int*[nTest];
292 for(
unsigned int i = 0; i < nTest; ++i) {regressionDataTest[i] =
new int[nRegressionFeatures];}
293 int** regressionDataTrain =
new int*[nTrain];
294 for(
unsigned int i = 0; i < nTrain; ++i) {regressionDataTrain[i] =
new int[nRegressionFeatures];}
301 const char saveFilePath[] =
"test.h5";
302 const hsize_t ndims = 2;
303 const hsize_t ncols = 3;
305 hid_t
file = H5Fcreate(saveFilePath, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
306 std::cout <<
"- File created" <<
std::endl;
308 hsize_t dims[ndims] = {0, ncols};
309 hsize_t max_dims[ndims] = {H5S_UNLIMITED, ncols};
310 hid_t file_space = H5Screate_simple(ndims, dims, max_dims);
311 std::cout <<
"- Dataspace created" <<
std::endl;
313 hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
314 H5Pset_layout(plist, H5D_CHUNKED);
315 hsize_t chunk_dims[ndims] = {2, ncols};
316 H5Pset_chunk(plist, ndims, chunk_dims);
317 std::cout <<
"- Property list created" <<
std::endl;
320 H5Dcreate(file,
"dset1", H5T_NATIVE_FLOAT, file_space, H5P_DEFAULT, plist, H5P_DEFAULT);
321 std::cout <<
"- Dataset 'dset1' created" <<
std::endl;
324 H5Sclose(file_space);
326 if(entries > chain.GetEntries()){
327 entries = chain.GetEntries();
330 for(
unsigned int iEntry = 0; iEntry < entries; ++iEntry)
332 unsigned int entry = shuffled[iEntry];
333 chain.GetEntry(entry);
335 unsigned int nViews = 3;
343 std::vector<unsigned char> pixelArray(nViews * config.fPlaneLimit * config.fTDCLimit,0);
350 datum.set_height(config.fPlaneLimit);
351 datum.set_width(config.fTDCLimit);
353 datum.set_data(pixelArray.data(), nViews * config.fPlaneLimit * config.fTDCLimit);
355 datum.set_label(fInt);
357 datum.SerializeToString(&serializeString);
359 if(iEntry % (blockSize))
361 snprintf(key, config.fMaxKeyLength,
"%08lld", (
long long int)iTrain);
364 TrainDB.
Put(serializeKey,serializeString);
366 regressionDataTrain[iTrain][0] = 1.;
367 regressionDataTrain[iTrain][1] = 1.;
372 float *buffer =
new float[nlines * ncols];
373 float **
b =
new float*[nlines];
374 for (hsize_t i = 0; i < nlines; ++i){
375 b[i] = &buffer[i * ncols];
385 snprintf(key, config.fMaxKeyLength,
"%08lld", (
long long int)iTest);
388 TestDB.
Put(serializeKey,serializeString);
390 regressionDataTest[iTest][0] = 1.;
391 regressionDataTest[iTest][1] = 1.;
394 if(not (iEntry % config.fProgressInterval))
395 std::cout <<
"Fraction complete: " 396 << iEntry / (
float)entries << std::endl;
400 if (config.fWriteRegressionHDF5)
403 H5::FloatType
type(H5::PredType::IEEE_F32LE);
404 std::cout <<
"Writing HDF5 regression output : " 405 << config.fRegressionHDF5NameTest <<
std::endl;
406 H5::H5File h5FileTest(config.fRegressionHDF5NameTest, H5F_ACC_TRUNC );
409 shape[1] = nRegressionFeatures;
410 H5::DataSpace spaceTest(2, shape);
412 H5::DataSet datasetTest = h5FileTest.createDataSet(
"regression",
416 datasetTest.write(regressionDataTest, type);
418 std::cout <<
"Writing HDF5 regression output : " 419 << config.fRegressionHDF5NameTrain <<
std::endl;
420 H5::H5File h5FileTrain(config.fRegressionHDF5NameTrain, H5F_ACC_TRUNC );
422 H5::DataSpace spaceTrain(2, shape);
424 H5::DataSet datasetTrain = h5FileTrain.createDataSet(
"regression",
428 datasetTrain.write(regressionDataTrain, type);
434 for(
unsigned int i = 0; i < nTest; ++i) {
435 delete [] regressionDataTest[i];
437 delete [] regressionDataTest;
438 for(
unsigned int i = 0; i < nTrain; ++i) {
439 delete [] regressionDataTrain[i];
441 delete [] regressionDataTrain;
452 po::options_description
desc(
"Allowed options");
454 (
"help",
"produce help message")
455 (
"config,c", po::value<std::string>(&config)->required(),
456 "configuration file")
457 (
"input,i", po::value<std::string>(&input)->required(),
458 "Input data in ROOT file.");
459 po::variables_map vm;
463 po::store(po::parse_command_line(argc, argv, desc), vm);
469 std::cout <<
"ERROR: " << e.what() <<
std::endl;
474 if (vm.count(
"help")) {
475 std::cout << desc <<
"\n";
504 std::string configPath, inputPath, outputPath, logPath;
505 po::variables_map vm =
getOptions(argc, argv, configPath, inputPath);
510 fill(config, inputPath);
fhicl::ParameterSet getPSet(std::string configPath)
Label is conversion of fNuEnergy to int.
Config(const fhicl::ParameterSet &pset)
void fill(const Config &config, std::string input)
def mkdir(path, mode=0o777)
void ConvertChargeVectorsToPixelArray(std::vector< float > &v0pe, std::vector< float > &v1pe, std::vector< float > &v2pe, std::vector< unsigned char > &pix)
int main(int argc, char *argv[])
std::string fRegressionHDF5NameTest
void SetViewReversal(bool reverseX, bool reverseY, bool reverseZ)
Function to set any views that need reversing.
void SetLogScale(bool setLog)
Set the log scale for charge.
void make_ParameterSet(intermediate_table const &tbl, ParameterSet &ps)
unsigned int fMaxKeyLength
std::string fRegressionHDF5NameTrain
Label all interaction types separately.
unsigned int fLabelingMode
typename config_impl< T >::type Config
bool fWriteRegressionHDF5
Flag to control whether or not we write HDF5 regression features.
unsigned int fNTrainPerTest
Number of training examples for each test sample, e.g. 4 for 80/20 split.
Utilities for producing images for the CVN.
int fTDCLimit
Limit the number of TDCs in the output image.
void Put(std::string &serializeKey, std::string &serializeString)
Class containing some utility functions for all things CVN.
unsigned int fNEvents
Limit the number of entries in the tree to consider.
std::string fOutputFormat
std::string fTrainOutputDir
po::variables_map getOptions(int argc, char *argv[], std::string &config, std::string &input)
OutputDB(std::string sample, const Config &config)
std::vector< bool > fReverseViews
Views to reverse.
int fPlaneLimit
Limit the number of wires in the output image.
std::string fTrainingBranchObjectName
leveldb::WriteOptions fWriteOptions
intermediate_table parse_document(std::string const &filename, cet::filepath_maker &maker)
unsigned int fProgressInterval
Number of examples in between progress updates (% complete)
std::string fTestOutputDir
auto const & get(AssnsNode< L, R, D > const &r)
QTextStream & endl(QTextStream &s)
unsigned int fWriteBufferSize
void SetPixelMapSize(unsigned int nWires, unsigned int nTDCs)
Set the input pixel map size.