TimeTracker_service.cc
Go to the documentation of this file.
1 // vim: set sw=2 expandtab :
2 
14 #include "boost/format.hpp"
17 #include "cetlib/HorizontalRule.h"
19 #include "cetlib/sqlite/Ntuple.h"
20 #include "cetlib/sqlite/helpers.h"
22 #include "fhiclcpp/types/Atom.h"
23 #include "fhiclcpp/types/Name.h"
24 #include "fhiclcpp/types/Table.h"
26 #include "tbb/concurrent_unordered_map.h"
27 
28 #include <algorithm>
29 #include <cassert>
30 #include <chrono>
31 #include <iomanip>
32 #include <memory>
33 #include <sstream>
34 #include <string>
35 #include <vector>
36 
37 using namespace std;
38 using namespace cet;
39 using namespace hep::concurrency;
40 
41 using chrono::steady_clock;
42 
43 namespace art {
44 
45  namespace {
46 
47  using ConcurrentKey = std::pair<ScheduleID, std::string>;
48  struct ConcurrentKeyHasher {
49  size_t
50  operator()(ConcurrentKey const& key) const
51  {
52  static std::hash<ScheduleID> schedule_hasher{};
53  static std::hash<std::string> string_hasher{};
54  // A better hash will be desirable if this becomes a bottleneck.
55  return schedule_hasher(key.first) ^ string_hasher(key.second);
56  }
57  };
58 
59  auto
60  key(ScheduleID const sid)
61  {
62  return ConcurrentKey{sid, {}};
63  }
64  auto
65  key(ModuleContext const& mc)
66  {
67  return ConcurrentKey{mc.scheduleID(), mc.moduleLabel()};
68  }
69 
70  auto now = bind(&steady_clock::now);
71 
72  struct Statistics {
73  explicit Statistics() = default;
74 
75  explicit Statistics(string const& p,
76  string const& label,
77  string const& type,
78  sqlite3* const db,
79  string const& table,
80  string const& column)
81  : path{p}
82  , mod_label{label}
83  , mod_type{type}
84  , min{sqlite::min(db, table, column)}
85  , mean{sqlite::mean(db, table, column)}
86  , max{sqlite::max(db, table, column)}
87  , median{sqlite::median(db, table, column)}
88  , rms{sqlite::rms(db, table, column)}
89  , n{sqlite::nrows(db, table)}
90  {}
91 
92  string path{};
93  string mod_label{};
94  string mod_type{};
95  double min{-1.};
96  double mean{-1.};
97  double max{-1.};
98  double median{-1.};
99  double rms{-1.};
100  unsigned n{0u};
101  };
102 
103  ostream&
104  operator<<(ostream& os, Statistics const& info)
105  {
106  string label{info.path};
107  if (!info.mod_label.empty()) {
108  label += ':' + info.mod_label;
109  }
110  if (!info.mod_type.empty()) {
111  label += ':' + info.mod_type;
112  }
113  os << label << " " << boost::format(" %=12g ") % info.min
114  << boost::format(" %=12g ") % info.mean
115  << boost::format(" %=12g ") % info.max
116  << boost::format(" %=12g ") % info.median
117  << boost::format(" %=12g ") % info.rms
118  << boost::format(" %=10d ") % info.n;
119  return os;
120  }
121 
122  } // unnamed namespace
123 
124  class TimeTracker {
125  public:
126  static constexpr bool service_handle_allowed{false};
127 
128  struct Config {
129  fhicl::Atom<bool> printSummary{fhicl::Name{"printSummary"}, true};
130  struct DBoutput {
133  };
134  fhicl::Table<DBoutput> dbOutput{fhicl::Name{"dbOutput"}};
135  };
137  explicit TimeTracker(Parameters const&, ActivityRegistry&);
138 
139  private:
142  steady_clock::time_point eventStart;
143  steady_clock::time_point moduleStart;
144  };
145  template <unsigned SIZE>
147  using timeSource_t =
149  using timeEvent_t =
151  using timeModule_t = cet::sqlite::
152  Ntuple<uint32_t, uint32_t, uint32_t, string, string, string, double>;
153 
154  void postSourceConstruction(ModuleDescription const&);
155  void postEndJob();
156  void preEventReading(ScheduleContext);
157  void postEventReading(Event const&, ScheduleContext);
158  void preEventProcessing(Event const&, ScheduleContext);
159  void postEventProcessing(Event const&, ScheduleContext);
160  void startTime(ModuleContext const& mc);
161  void recordTime(ModuleContext const& mc, string const& suffix);
162  void logToDestination_(Statistics const& evt,
163  vector<Statistics> const& modules);
164 
165  tbb::concurrent_unordered_map<ConcurrentKey,
167  ConcurrentKeyHasher>
169  bool const printSummary_;
170  unique_ptr<cet::sqlite::Connection> const db_;
171  bool const overwriteContents_;
172  string sourceType_{};
179  };
180 
181  TimeTracker::TimeTracker(Parameters const& config, ActivityRegistry& areg)
182  : printSummary_{config().printSummary()}
184  config().dbOutput().filename())}
185  , overwriteContents_{config().dbOutput().overwrite()}
186  , timeSourceColumnNames_{{"Run", "SubRun", "Event", "Source", "Time"}}
187  , timeEventColumnNames_{{"Run", "SubRun", "Event", "Time"}}
188  , timeModuleColumnNames_{{"Run",
189  "SubRun",
190  "Event",
191  "Path",
192  "ModuleLabel",
193  "ModuleType",
194  "Time"}}
196  "TimeSource",
199  , timeEventTable_{*db_,
200  "TimeEvent",
202  overwriteContents_}
204  "TimeModule",
206  overwriteContents_}
207  {
208  areg.sPostSourceConstruction.watch(this,
210  areg.sPostEndJob.watch(this, &TimeTracker::postEndJob);
211  // Event reading
212  areg.sPreSourceEvent.watch(this, &TimeTracker::preEventReading);
213  areg.sPostSourceEvent.watch(this, &TimeTracker::postEventReading);
214  // Event execution
215  areg.sPreProcessEvent.watch(this, &TimeTracker::preEventProcessing);
216  areg.sPostProcessEvent.watch(this, &TimeTracker::postEventProcessing);
217  // Module execution
218  areg.sPreModule.watch(this, &TimeTracker::startTime);
219  areg.sPostModule.watch(
220  [this](auto const& mc) { this->recordTime(mc, ""s); });
221  areg.sPreWriteEvent.watch(this, &TimeTracker::startTime);
222  areg.sPostWriteEvent.watch(
223  [this](auto const& mc) { this->recordTime(mc, "(write)"s); });
224  }
225 
226  void
228  {
232  if (!printSummary_) {
233  return;
234  }
235  using namespace cet::sqlite;
236  query_result<size_t> rEvents;
237  rEvents << select("count(*)").from(*db_, timeEventTable_.name());
238  query_result<size_t> rModules;
239  rModules << select("count(*)").from(*db_, timeModuleTable_.name());
240  auto const nEventRows = unique_value(rEvents);
241  auto const nModuleRows = unique_value(rModules);
242  if ((nEventRows == 0) && (nModuleRows == 0)) {
243  logToDestination_(Statistics{}, vector<Statistics>{});
244  return;
245  }
246  if (nEventRows == 0 && nModuleRows != 0) {
247  string const errMsg{
248  "Malformed TimeTracker database. The TimeEvent table is empty, but\n"
249  "the TimeModule table is not. This can happen if an exception has\n"
250  "been thrown from a module while processing the first event. Any\n"
251  "saved database file is suspect and should not be used."};
252  mf::LogAbsolute("TimeTracker") << errMsg;
253  return;
254  }
255  // Gather statistics for full Event
256  // -- Unfortunately, this is not a simple query since the (e.g.)
257  // 'RootOutput(write)' times and the source time are not
258  // recorded in the TimeEvent rows. They must be added in.
259  string const fullEventTime_ddl =
260  "CREATE TABLE temp.fullEventTime AS "
261  "SELECT Run,Subrun,Event,SUM(Time) AS FullEventTime FROM ("
262  " SELECT Run,Subrun,Event,Time FROM TimeEvent"
263  " UNION"
264  " SELECT Run,Subrun,Event,Time FROM TimeModule WHERE ModuleType "
265  "LIKE '%(write)'"
266  " UNION"
267  " SELECT Run,Subrun,Event,Time FROM TimeSource"
268  ") GROUP BY Run,Subrun,Event";
269  using namespace cet::sqlite;
270  exec(*db_, fullEventTime_ddl);
271  Statistics const evtStats{
272  "Full event", "", "", *db_, "temp.fullEventTime", "FullEventTime"};
273  drop_table(*db_, "temp.fullEventTime");
275  r << select_distinct("Path", "ModuleLabel", "ModuleType")
276  .from(*db_, timeModuleTable_.name());
277  vector<Statistics> modStats;
278  modStats.emplace_back(
279  "source", sourceType_ + "(read)", "", *db_, "TimeSource", "Time");
280  for (auto const& row : r) {
281  auto const& [path, mod_label, mod_type] = row;
282  create_table_as("temp.tmpModTable",
283  select("*")
284  .from(*db_, "TimeModule")
285  .where("Path='"s + path + "'"s + " AND ModuleLabel='"s +
286  mod_label + "'"s + " AND ModuleType='"s +
287  mod_type + "'"s));
288  modStats.emplace_back(
289  path, mod_label, mod_type, *db_, "temp.tmpModTable", "Time");
290  drop_table(*db_, "temp.tmpModTable");
291  }
292  logToDestination_(evtStats, modStats);
293  }
294 
295  void
297  {
298  sourceType_ = md.moduleName();
299  }
300 
301  void
303  {
304  auto& d = data_[key(sc.id())];
305  d.eventID = EventID::invalidEvent();
306  d.eventStart = now();
307  }
308 
309  void
311  {
312  auto& d = data_[key(sc.id())];
313  d.eventID = e.id();
314  auto const t = chrono::duration<double>{now() - d.eventStart}.count();
316  d.eventID.run(), d.eventID.subRun(), d.eventID.event(), sourceType_, t);
317  }
318 
319  void
320  TimeTracker::preEventProcessing(Event const& e [[maybe_unused]],
321  ScheduleContext const sc)
322  {
323  auto& d = data_[key(sc.id())];
324  assert(d.eventID == e.id());
325  d.eventStart = now();
326  }
327 
328  void
330  {
331  auto const& d = data_[key(sc.id())];
332  auto const t = chrono::duration<double>{now() - d.eventStart}.count();
334  d.eventID.run(), d.eventID.subRun(), d.eventID.event(), t);
335  }
336 
337  void
339  {
340  data_[key(mc)].eventID = data_[key(mc.scheduleID())].eventID;
341  data_[key(mc)].moduleStart = now();
342  }
343 
344  void
345  TimeTracker::recordTime(ModuleContext const& mc, string const& suffix)
346  {
347  auto const& d = data_[key(mc)];
348  auto const t = chrono::duration<double>{now() - d.moduleStart}.count();
349  timeModuleTable_.insert(d.eventID.run(),
350  d.eventID.subRun(),
351  d.eventID.event(),
352  mc.pathName(),
353  mc.moduleLabel(),
354  mc.moduleName() + suffix,
355  t);
356  }
357 
358  void
360  vector<Statistics> const& modules)
361  {
362  size_t width{30};
363  auto identifier_size = [](Statistics const& s) {
364  return s.path.size() + s.mod_label.size() + s.mod_type.size() +
365  2; // Don't forget the two ':'s.
366  };
367  cet::for_all(modules, [&identifier_size, &width](auto const& mod) {
368  width = max(width, identifier_size(mod));
369  });
370  ostringstream msgOss;
371  HorizontalRule const rule{width + 4 + 5 * 14 + 12};
372  msgOss << '\n'
373  << rule('=') << '\n'
374  << std::setw(width + 2) << std::left << "TimeTracker printout (sec)"
375  << boost::format(" %=12s ") % "Min"
376  << boost::format(" %=12s ") % "Avg"
377  << boost::format(" %=12s ") % "Max"
378  << boost::format(" %=12s ") % "Median"
379  << boost::format(" %=12s ") % "RMS"
380  << boost::format(" %=10s ") % "nEvts"
381  << "\n";
382  msgOss << rule('=') << '\n';
383  if (evt.n == 0u) {
384  msgOss << "[ No processed events ]\n";
385  } else {
386  // N.B. setw(width) applies to the first field in
387  // ostream& operator<<(ostream&, Statistics const&).
388  msgOss << setw(width) << evt << '\n' << rule('-') << '\n';
389  for (auto const& mod : modules) {
390  msgOss << setw(width) << mod << '\n';
391  }
392  }
393  msgOss << rule('=');
394  mf::LogAbsolute("TimeTracker") << msgOss.str();
395  }
396 
397 } // namespace art
398 
void insert(Args const ...)
Definition: Ntuple.h:231
void logToDestination_(Statistics const &evt, vector< Statistics > const &modules)
void preEventReading(ScheduleContext)
T unique_value(query_result< T > const &r)
Definition: query_result.h:94
tbb::concurrent_unordered_map< ConcurrentKey, PerScheduleData, ConcurrentKeyHasher > data_
double rms(sqlite3 *db, std::string const &table_name, std::string const &column_name)
Definition: statistics.cc:40
auto const & pathName() const
Definition: ModuleContext.h:33
T * get() const
Definition: ServiceHandle.h:63
auto select_distinct(T const &...t)
Definition: select.h:154
static bool format(QChar::Decomposition tag, QString &str, int index, int len)
Definition: qstring.cpp:11496
void preEventProcessing(Event const &, ScheduleContext)
auto scheduleID() const
Definition: ModuleContext.h:28
STL namespace.
name_array< 4u > const timeEventColumnNames_
uint size() const
Definition: qcstring.h:201
std::string const & name() const
Definition: Ntuple.h:145
string filename
Definition: train.py:213
void postEventProcessing(Event const &, ScheduleContext)
void postSourceConstruction(ModuleDescription const &)
void create_table_as(std::string const &tablename, SelectStmt const &stmt)
Definition: create_table.h:127
cet::sqlite::name_array< SIZE > name_array
fhicl::Atom< bool > printSummary
std::string const & moduleName() const
const double e
auto const & moduleName() const
Definition: ModuleContext.h:48
void postEventReading(Event const &, ScheduleContext)
def key(type, name=None)
Definition: graph.py:13
timeModule_t timeModuleTable_
#define DECLARE_ART_SERVICE(svc, scope)
static Config * config
Definition: config.cpp:1054
std::void_t< T > n
p
Definition: test.py:223
std::ostream & operator<<(std::ostream &os, Analyzer::Table< T > const &t)
Definition: Analyzer.h:136
static int max(int a, int b)
unique_ptr< cet::sqlite::Connection > const db_
name_array< 7u > const timeModuleColumnNames_
timeSource_t timeSourceTable_
auto select(T const &...t)
Definition: select.h:146
steady_clock::time_point moduleStart
struct sqlite3 sqlite3
Q_EXPORT QTSManip setw(int w)
Definition: qtextstream.h:331
void recordTime(ModuleContext const &mc, string const &suffix)
void drop_table(sqlite3 *db, std::string const &tablename)
Definition: helpers.cc:69
name_array< 5u > const timeSourceColumnNames_
#define DEFINE_ART_SERVICE(svc)
void startTime(ModuleContext const &mc)
T min(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:55
std::vector< std::string > column
auto const & moduleLabel() const
Definition: ModuleContext.h:43
static constexpr EventID invalidEvent() noexcept
Definition: EventID.h:202
std::array< std::string, N > name_array
Definition: column.h:40
auto for_all(FwdCont &, Func)
unsigned nrows(sqlite3 *db, std::string const &tablename)
Definition: helpers.cc:82
TCEvent evt
Definition: DataStructs.cxx:7
void exec(sqlite3 *db, std::string const &ddl)
Definition: exec.cc:5
double mean(sqlite3 *db, std::string const &table_name, std::string const &column_name)
Definition: statistics.cc:16
MaybeLogger_< ELseverityLevel::ELsev_severe, true > LogAbsolute
static QCString * s
Definition: config.cpp:1042
EventID id() const
Definition: Event.cc:34
double median(sqlite3 *db, std::string const &table_name, std::string const &column_name)
Definition: statistics.cc:26
fhicl::Table< DBoutput > dbOutput