MixHelper.cc
Go to the documentation of this file.
8 
9 #include <algorithm>
10 #include <cassert>
11 #include <functional>
12 #include <limits>
13 #include <numeric>
14 #include <ostream>
15 #include <random>
16 #include <regex>
17 #include <unordered_set>
18 
19 using namespace std::string_literals;
20 
21 namespace {
23  buildEventIDIndex(art::FileIndex const& fileIndex)
24  {
26  for (auto const& element : fileIndex) {
27  if (element.getEntryType() != art::FileIndex::kEvent)
28  continue;
29  result.emplace(element.entry, element.eventID);
30  }
31  return result;
32  }
33 
35  buildProductIDTransMap(art::MixOpList const& mixOps)
36  {
38  for (auto const& mixOp : mixOps) {
39  auto const bt = mixOp->branchType();
40  if (bt != art::InEvent)
41  continue;
42  transMap[mixOp->incomingProductID()] = mixOp->outgoingProductID();
43  }
44  return transMap;
45  }
46 
47  class EventIDLookup {
48  public:
49  explicit EventIDLookup(art::EventIDIndex const& index) : index_{index} {}
50 
52  operator()(art::FileIndex::EntryNumber_t const entry) const
53  {
54  auto i = index_.find(entry);
55  if (i == cend(index_)) {
57  << "MixHelper could not find entry number " << entry
58  << " in its own lookup table.\n";
59  }
60  return i->second;
61  }
62 
63  private:
64  art::EventIDIndex const& index_;
65  };
66 
67  double
68  initCoverageFraction(double fraction)
69  {
70  if (fraction > (1 + std::numeric_limits<double>::epsilon())) {
71  mf::LogWarning("Configuration")
72  << "coverageFraction > 1: treating as a percentage.\n";
73  fraction /= 100.0;
74  }
75  return fraction;
76  }
77 
78 } // namespace
79 
81  std::string const& moduleLabel,
82  ProducesCollector& collector,
83  std::unique_ptr<MixIOPolicy> ioHandle)
85  , collector_{collector}
86  , moduleLabel_{moduleLabel}
87  , filenames_{pset.get<std::vector<std::string>>("fileNames", {})}
88  , compactMissingProducts_{pset.get<bool>("compactMissingProducts", false)}
89  , fileIter_{filenames_.begin()}
90  , readMode_{initReadMode_(pset.get<std::string>("readMode", "sequential"))}
91  , coverageFraction_{initCoverageFraction(
92  pset.get<double>("coverageFraction", 1.0))}
93  , canWrapFiles_{pset.get<bool>("wrapFiles", false)}
94  , engine_{initEngine_(pset.get<long>("seed", -1), readMode_)}
96  , ioHandle_{move(ioHandle)}
97 {}
98 
100  std::string const& moduleLabel,
101  ProducesCollector& collector,
102  std::unique_ptr<MixIOPolicy> ioHandle)
103  : detail::EngineCreator{moduleLabel, art::ScheduleID::first()}
104  , collector_{collector}
105  , moduleLabel_{moduleLabel}
108  , fileIter_{filenames_.begin()}
110  , coverageFraction_{initCoverageFraction(config.coverageFraction())}
114  , ioHandle_{move(ioHandle)}
115 {}
116 
117 std::ostream&
118 art::operator<<(std::ostream& os, MixHelper::Mode const mode)
119 {
120  switch (mode) {
122  return os << "SEQUENTIAL";
124  return os << "RANDOM_REPLACE";
126  return os << "RANDOM_LIM_REPLACE";
128  return os << "RANDOM_NO_REPLACE";
130  return os << "UNKNOWN";
131  // No default so compiler can warn.
132  }
133  return os;
134 }
135 
136 void
138 {
139  if (!filenames_.empty()) {
141  << "Provision of a secondary file name provider is incompatible"
142  << " with a\nnon-empty fileNames parameter to the mix filter.\n";
143  }
145 }
146 
149 {
150  if (engine_ &&
152  ServiceHandle<RandomNumberGenerator const>()->defaultEngineKind(),
153  ""s)) {
154  return *engine_;
155  }
157 }
158 
161  std::string const& kind_of_engine_to_make)
162 {
163  if (engine_ && consistentRequest_(kind_of_engine_to_make, ""s)) {
164  return *engine_;
165  }
166  return detail::EngineCreator::createEngine(seed, kind_of_engine_to_make);
167 }
168 
171  std::string const& kind_of_engine_to_make,
172  label_t const& engine_label)
173 {
174  if (engine_ && consistentRequest_(kind_of_engine_to_make, engine_label)) {
175  return *engine_;
176  }
178  seed, kind_of_engine_to_make, engine_label);
179 }
180 
181 bool
182 art::MixHelper::generateEventSequence(size_t const nSecondaries,
183  EntryNumberSequence& enSeq,
184  EventIDSequence& eIDseq)
185 {
186  assert(enSeq.empty());
187  assert(eIDseq.empty());
188  if (not ioHandle_->fileOpen() and not openNextFile_()) {
189  return false;
190  }
191 
192  auto const nEventsInFile = ioHandle_->nEventsInFile();
193  bool const over_threshold =
195  ((nEventsReadThisFile_ + nSecondaries) > nEventsInFile) :
196  ((nEventsReadThisFile_ + nSecondaries) >
197  (nEventsInFile * coverageFraction_));
198  if (over_threshold) {
199  if (!providerFunc_) {
201  if (nOpensOverThreshold_ > filenames_.size()) {
203  "An error occurred while preparing product-mixing for "
204  "the current event.\n"}
205  << "The number of requested secondaries (" << nSecondaries
206  << ") exceeds the number of events in any\n"
207  << "of the files specified for product mixing. For a read mode of '"
208  << readMode_ << "',\n"
209  << "the framework does not currently allow product-mixing to span "
210  "multiple secondary\n"
211  << "input files for a given event. Please contact artists@fnal.gov "
212  "for more information.\n";
213  }
214  }
215  if (openNextFile_()) {
216  return generateEventSequence(nSecondaries, enSeq, eIDseq);
217  } else {
218  return false;
219  }
220  }
221 
223  switch (readMode_) {
224  case Mode::SEQUENTIAL:
225  enSeq.resize(nSecondaries);
226  std::iota(begin(enSeq), end(enSeq), nEventsReadThisFile_);
227  break;
229  std::generate_n(
230  std::back_inserter(enSeq), nSecondaries, [this, nEventsInFile] {
231  return dist_.get()->fireInt(nEventsInFile);
232  });
233  std::sort(enSeq.begin(), enSeq.end());
234  break;
236  std::unordered_set<EntryNumberSequence::value_type>
237  entries; // Guaranteed unique.
238  while (entries.size() < nSecondaries) {
239  std::generate_n(
240  std::inserter(entries, entries.begin()),
241  nSecondaries - entries.size(),
242  [this, nEventsInFile] { return dist_.get()->fireInt(nEventsInFile); });
243  }
244  enSeq.assign(cbegin(entries), cend(entries));
245  std::sort(begin(enSeq), end(enSeq));
246  // Since we need to sort at the end anyway, it's unclear whether
247  // unordered_set is faster than set even though inserts are
248  // approximately linear time. Since the complexity of the sort is
249  // NlogN, we'd need a profile run for it all to come out in the
250  // wash.
251  assert(enSeq.size() == nSecondaries); // Should be true by construction.
252  } break;
254  auto i = shuffledSequence_.cbegin() + nEventsReadThisFile_;
255  enSeq.assign(i, i + nSecondaries);
256  } break;
257  default:
259  << "Unrecognized read mode " << static_cast<int>(readMode_)
260  << ". Contact the art developers.\n";
261  }
263  enSeq, back_inserter(eIDseq), EventIDLookup{eventIDIndex_});
264  return true;
265 }
266 
269 {
270  return ioHandle_->generateEventAuxiliarySequence(enSeq);
271 }
272 
273 namespace {
274  art::PtrRemapper const nopRemapper{};
275 }
276 
277 void
279  EventIDSequence const& eIDseq,
280  Event& e)
281 {
282  // Create required info only if we're likely to need it.
283  EntryNumberSequence subRunEntries;
284  EntryNumberSequence runEntries;
285  auto const& fileIndex = ioHandle_->fileIndex();
286  if (haveSubRunMixOps_) {
287  subRunEntries.reserve(eIDseq.size());
288  for (auto const& eID : eIDseq) {
289  auto const it = fileIndex.findPosition(eID.subRunID(), true);
290  if (it != std::cend(fileIndex)) {
291  subRunEntries.emplace_back(it->entry);
292  } else {
293  throw Exception(errors::NotFound, "NO_SUBRUN")
294  << "- Unable to find an entry in the SubRun tree corresponding to "
295  "event ID "
296  << eID << " in secondary mixing input file " << *fileIter_ << ".\n";
297  }
298  }
299  }
300  if (haveRunMixOps_) {
301  runEntries.reserve(eIDseq.size());
302  for (auto const& eID : eIDseq) {
303  auto const it = fileIndex.findPosition(eID.runID(), true);
304  if (it != std::cend(fileIndex)) {
305  runEntries.emplace_back(it->entry);
306  } else {
307  throw Exception(errors::NotFound, "NO_RUN")
308  << "- Unable to find an entry in the Run tree corresponding to "
309  "event ID "
310  << eID << " in secondary mixing input file " << *fileIter_ << ".\n";
311  }
312  }
313  }
314 
315  // Populate the remapper in case we need to remap any Ptrs.
317 
318  // Do the branch-wise read, mix and put.
319  for (auto const& op : mixOps_) {
320  switch (op->branchType()) {
321  case InEvent: {
322  auto const inProducts = ioHandle_->readFromFile(*op, eventEntries);
323  op->mixAndPut(e, inProducts, ptrRemapper_);
324  break;
325  }
326  case InSubRun: {
327  auto const inProducts = ioHandle_->readFromFile(*op, subRunEntries);
328  // Ptrs not supported for subrun product mixing.
329  op->mixAndPut(e, inProducts, nopRemapper);
330  break;
331  }
332  case InRun: {
333  auto const inProducts = ioHandle_->readFromFile(*op, runEntries);
334  // Ptrs not support for run product mixing.
335  op->mixAndPut(e, inProducts, nopRemapper);
336  break;
337  }
338  default:
339  throw Exception(errors::LogicError, "Unsupported BranchType")
340  << "- MixHelper::mixAndPut() attempted to handle unsupported branch "
341  "type "
342  << op->branchType() << ".\n";
343  }
344  }
345 
346  nEventsReadThisFile_ += eventEntries.size();
347  totalEventsRead_ += eventEntries.size();
348 }
349 
350 void
352 {
353  eventsToSkip_ = eventsToSkip;
354 }
355 
356 auto
358 {
359  // These regexes must correspond by index to the valid Mode enumerator
360  // values.
361  static std::regex const robjs[4]{
362  std::regex("^seq", std::regex_constants::icase),
363  std::regex("^random(replace)?$", std::regex_constants::icase),
364  std::regex("^randomlimreplace$", std::regex_constants::icase),
365  std::regex("^randomnoreplace$", std::regex_constants::icase)};
366  int i{0};
367  for (auto const& r : robjs) {
368  if (std::regex_search(mode, r)) {
369  return Mode(i);
370  } else {
371  ++i;
372  }
373  }
375  << "Unrecognized value of readMode parameter: \"" << mode
376  << "\". Valid values are:\n"
377  << " sequential,\n"
378  << " randomReplace (random is accepted for reasons of legacy),\n"
379  << " randomLimReplace,\n"
380  << " randomNoReplace.\n";
381 }
382 
383 bool
385 {
387  if (providerFunc_) {
388  filename = providerFunc_();
389  if (filename.empty()) {
390  return false;
391  }
392  } else if (filenames_.empty()) {
393  return false;
394  } else {
395  if (ioHandle_->fileOpen()) { // Already seen one file.
396  ++fileIter_;
397  }
398  if (fileIter_ == filenames_.end()) {
399  if (canWrapFiles_) {
400  mf::LogWarning("MixingInputWrap")
401  << "Wrapping around to initial input file for mixing after "
402  << totalEventsRead_ << " secondary events read.";
403  fileIter_ = filenames_.begin();
404  } else {
405  return false;
406  }
407  }
408  filename = *fileIter_;
409  }
411  eventsToSkip_() :
412  0; // Reset for this file.
413  ioHandle_->openAndReadMetaData(filename, mixOps_);
414 
415  eventIDIndex_ = buildEventIDIndex(ioHandle_->fileIndex());
416  auto transMap = buildProductIDTransMap(mixOps_);
418 
420  // Prepare shuffled event sequence.
421  shuffledSequence_.resize(ioHandle_->nEventsInFile());
422  std::iota(shuffledSequence_.begin(), shuffledSequence_.end(), 0);
423  std::random_device rd;
424  std::mt19937 g{rd()};
425  std::shuffle(shuffledSequence_.begin(), shuffledSequence_.end(), g);
426  }
427 
428  return true;
429 }
430 
431 bool
432 art::MixHelper::consistentRequest_(std::string const& kind_of_engine_to_make,
433  label_t const& engine_label) const
434 {
435  auto const& default_engine_kind =
436  ServiceHandle<RandomNumberGenerator const>()->defaultEngineKind();
437  if (kind_of_engine_to_make == default_engine_kind && engine_label.empty()) {
438  mf::LogInfo{"RANDOM"} << "A random number engine has already been created "
439  "since the read mode is "
440  << readMode_ << '.';
441  return true;
442  }
444  "An error occurred while creating a random number engine "
445  "within a MixFilter detail class.\n"}
446  << "A random number engine with an empty label has already been created "
447  "with an engine type of "
448  << default_engine_kind << ".\n"
449  << "If you would like to use a different engine type, please supply a "
450  "different engine label.\n";
451 }
452 
455 {
456  using namespace art;
457  if (readMode > MixHelper::Mode::SEQUENTIAL) {
458  if (ServiceRegistry::isAvailable<RandomNumberGenerator>()) {
460  } else {
461  throw Exception{errors::Configuration, "MixHelper"}
462  << "Random event mixing selected but RandomNumberGenerator service "
463  "not loaded.\n"
464  << "Ensure service is loaded with: \n"
465  << "services.RandomNumberGenerator: {}\n";
466  }
467  }
468  return nullptr;
469 }
470 
471 std::unique_ptr<CLHEP::RandFlat>
473 {
474  std::unique_ptr<CLHEP::RandFlat> result{nullptr};
475  if (engine) {
476  result = std::make_unique<CLHEP::RandFlat>(*engine);
477  }
478  return result;
479 }
end
while True: pbar.update(maxval-len(onlies[E][S])) #print iS, "/", len(onlies[E][S]) found = False for...
base_engine_t & createEngine(seed_t seed)
void registerSecondaryFileNameProvider(ProviderFunc_ func)
Definition: MixHelper.cc:137
std::vector< std::string >::const_iterator fileIter_
Definition: MixHelper.h:401
std::vector< EventAuxiliary > EventAuxiliarySequence
Definition: MixTypes.h:28
std::function< size_t()> eventsToSkip_
Definition: MixHelper.h:411
std::vector< EventID > EventIDSequence
Definition: MixTypes.h:26
QList< Entry > entry
decltype(auto) constexpr cend(T &&obj)
ADL-aware version of std::cend.
Definition: StdUtils.h:87
static constexpr double g
Definition: Units.h:144
static QCString result
ProviderFunc_ providerFunc_
Definition: MixHelper.h:398
Mode initReadMode_(std::string const &mode) const
Definition: MixHelper.cc:357
cet::exempt_ptr< base_engine_t > initEngine_(seed_t seed, Mode readMode)
Definition: MixHelper.cc:454
void mixAndPut(EntryNumberSequence const &enSeq, EventIDSequence const &eIDseq, Event &e)
Definition: MixHelper.cc:278
void setEventsToSkipFunction(std::function< size_t()> eventsToSkip)
Definition: MixHelper.cc:351
bool consistentRequest_(std::string const &kind_of_engine_to_make, label_t const &engine_label) const
Definition: MixHelper.cc:432
std::size_t totalEventsRead_
Definition: MixHelper.h:405
std::string string
Definition: nybbler.cc:12
RNGsnapshot::label_t label_t
Definition: EngineCreator.h:37
std::map< ProductID, ProductID > ProductIDTransMap
MixOpList mixOps_
Definition: MixHelper.h:399
bool haveSubRunMixOps_
Definition: MixHelper.h:413
static constexpr ScheduleID first()
Definition: ScheduleID.h:50
unsigned nOpensOverThreshold_
Definition: MixHelper.h:407
EntryNumberSequence shuffledSequence_
Definition: MixHelper.h:412
std::vector< std::string > const filenames_
Definition: MixHelper.h:396
fhicl::Atom< double > coverageFraction
Definition: MixHelper.h:263
string filename
Definition: train.py:213
long long EntryNumber_t
Definition: FileIndex.h:41
base_engine_t & createEngine(seed_t seed)
Definition: MixHelper.cc:148
fhicl::Atom< bool > wrapFiles
Definition: MixHelper.h:265
std::unique_ptr< CLHEP::RandFlat > dist_
Definition: MixHelper.h:410
double const coverageFraction_
Definition: MixHelper.h:403
Mode const readMode_
Definition: MixHelper.h:402
bool generateEventSequence(size_t nSecondaries, EntryNumberSequence &enSeq, EventIDSequence &eIDseq)
Definition: MixHelper.cc:182
std::ostream & operator<<(std::ostream &os, const GroupSelector &gs)
bool const canWrapFiles_
Definition: MixHelper.h:406
const double e
bt
Definition: tracks.py:83
fhicl::Sequence< std::string > filenames
Definition: MixHelper.h:258
ProducesCollector & collector_
Definition: MixHelper.h:394
static Config * config
Definition: config.cpp:1054
bool openNextFile_()
Definition: MixHelper.cc:384
def move(depos, offset)
Definition: depos.py:107
bool compactMissingProducts_
Definition: MixHelper.h:397
void prepareTranslationTables(ProductIDTransMap &transMap)
constexpr exempt_ptr< E > make_exempt_ptr(E *) noexcept
std::unique_ptr< MixIOPolicy > ioHandle_
Definition: MixHelper.h:417
MixHelper(Config const &config, std::string const &moduleLabel, ProducesCollector &collector, std::unique_ptr< MixIOPolicy > ioHandle)
Definition: MixHelper.cc:99
CLHEP::HepRandomEngine base_engine_t
Definition: EngineCreator.h:36
std::string const moduleLabel_
Definition: MixHelper.h:395
fhicl::Atom< seed_t > seed
Definition: MixHelper.h:266
std::vector< FileIndex::EntryNumber_t > EntryNumberSequence
Definition: MixTypes.h:27
auto transform_all(Container &, OutputIt, UnaryOp)
cet::exempt_ptr< base_engine_t > engine_
Definition: MixHelper.h:409
PtrRemapper ptrRemapper_
Definition: MixHelper.h:400
std::size_t nEventsReadThisFile_
Definition: MixHelper.h:404
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
std::function< std::string()> ProviderFunc_
Definition: MixHelper.h:246
std::map< FileIndex::EntryNumber_t, EventID > EventIDIndex
Definition: MixTypes.h:23
Mode readMode() const
Definition: MixHelper.h:424
bool haveRunMixOps_
Definition: MixHelper.h:414
fhicl::Atom< bool > compactMissingProducts
Definition: MixHelper.h:259
EventAuxiliarySequence generateEventAuxiliarySequence(EntryNumberSequence const &)
Definition: MixHelper.cc:268
EventIDIndex eventIDIndex_
Definition: MixHelper.h:415
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
def func()
Definition: docstring.py:7
fhicl::Atom< std::string > readMode
Definition: MixHelper.h:262
decltype(auto) constexpr cbegin(T &&obj)
ADL-aware version of std::cbegin.
Definition: StdUtils.h:82
PtrRemapper getRemapper(Event const &e) const
decltype(auto) constexpr begin(T &&obj)
ADL-aware version of std::begin.
Definition: StdUtils.h:72
std::vector< std::unique_ptr< MixOpBase >> MixOpList
Definition: MixIOPolicy.h:22
std::unique_ptr< CLHEP::RandFlat > initDist_(cet::exempt_ptr< base_engine_t > engine) const
Definition: MixHelper.cc:472
void function(int client, int *resource, int parblock, int *test, int p)
ProdToProdMapBuilder ptpBuilder_
Definition: MixHelper.h:408
static QCString * s
Definition: config.cpp:1042