MemoryTrackerLinux_service.cc
Go to the documentation of this file.
1 // vim: set sw=2 expandtab :
2 // ======================================================================
3 // MemoryTracker
4 //
5 // This MemoryTracker implementation is supported only for Linux
6 // systems. It relies on the proc file system to record VSize and RSS
7 // information throughout the course of an art process. It inserts
8 // memory information into an in-memory SQLite database, or an
9 // external file if the user provides a non-empty file name.
10 //
11 // Since information that procfs provides is process-specific, the
12 // MemoryTracker does not attempt to provide per-module information in
13 // the context of multi-threading. If more than one thread has been
14 // enabled for the art process, only the maximum RSS and VSize for the
15 // process is reported and the end of the job.
16 // ======================================================================
17 
18 #ifndef __linux__
19 #error "This source file can be built only for Linux platforms."
20 #endif
21 
33 #include "art/Utilities/Globals.h"
38 #include "cetlib/HorizontalRule.h"
41 #include "cetlib/sqlite/Ntuple.h"
42 #include "cetlib/sqlite/select.h"
43 #include "fhiclcpp/types/Atom.h"
47 
48 #include <iomanip>
49 #include <memory>
50 #include <sstream>
51 #include <string>
52 #include <tuple>
53 #include <vector>
54 
55 using namespace std;
56 using namespace string_literals;
57 using namespace cet;
58 
62 
63 namespace art {
64 
65  class MemoryTracker {
66  template <unsigned N>
69  using otherInfo_t =
71  using memEvent_t =
74  uint32_t,
75  uint32_t,
76  uint32_t,
77  string,
78  string,
79  string,
80  double,
81  double>;
83  uint32_t,
84  uint32_t,
85  uint32_t,
86  int,
87  int,
88  int,
89  int,
90  int,
91  int,
92  int>;
94  uint32_t,
95  uint32_t,
96  uint32_t,
97  string,
98  string,
99  string,
100  int,
101  int,
102  int,
103  int,
104  int,
105  int,
106  int>;
107 
108  public:
109  static constexpr bool service_handle_allowed{false};
110 
111  struct Config {
112  template <typename T>
114  using Name = fhicl::Name;
116  template <typename T>
118  struct DBoutput {
119  Atom<string> filename{Name{"filename"}, ""};
120  Atom<bool> overwrite{Name{"overwrite"}, false};
121  };
122  Table<DBoutput> dbOutput{Name{"dbOutput"}};
123  Atom<bool> includeMallocInfo{Name{"includeMallocInfo"}, false};
124  };
125 
128 
129  private:
130  void prePathProcessing(PathContext const& pc);
131  void recordOtherData(ModuleDescription const& md, string const& step);
132  void recordOtherData(ModuleContext const& mc, string const& step);
133  void recordEventData(Event const& e, string const& step);
134  void recordModuleData(ModuleContext const& mc, string const& step);
135  void postEndJob();
136  bool checkMallocConfig_(string const&, bool);
137  void recordPeakUsages_();
138  void flushTables_();
139  void summary_();
140 
141  LinuxProcMgr procInfo_{};
142  string const fileName_;
143  unique_ptr<cet::sqlite::Connection> const db_;
144  bool const overwriteContents_;
145  bool const includeMallocInfo_;
146 
147  // NB: using "current" semantics for the MemoryTracker is valid
148  // since per-module/event information are retrieved only in a
149  // sequential (i.e. single-threaded) context.
150  EventID currentEventID_{EventID::invalidEvent()};
151  name_array<3u> peakUsageColumns_{{"Name", "Value", "Description"}};
152  name_array<5u> otherInfoColumns_{
153  {"Step", "ModuleLabel", "ModuleType", "Vsize", "RSS"}};
154  name_array<6u> eventColumns_{
155  {"Step", "Run", "SubRun", "Event", "Vsize", "RSS"}};
156  name_array<9u> moduleColumns_{{"Step",
157  "Run",
158  "SubRun",
159  "Event",
160  "Path",
161  "ModuleLabel",
162  "ModuleType",
163  "Vsize",
164  "RSS"}};
165  name_array<11u> eventHeapColumns_{{"Step",
166  "Run",
167  "SubRun",
168  "Event",
169  "arena",
170  "ordblks",
171  "keepcost",
172  "hblkhd",
173  "hblks",
174  "uordblks",
175  "fordblks"}};
176  name_array<14u> moduleHeapColumns_{{"Step",
177  "Run",
178  "SubRun",
179  "Event",
180  "Path",
181  "ModuleLabel",
182  "ModuleType",
183  "arena",
184  "ordblks",
185  "keepcost",
186  "hblkhd",
187  "hblks",
188  "uordblks",
189  "fordblks"}};
194  unique_ptr<memEventHeap_t> eventHeapTable_;
195  unique_ptr<memModuleHeap_t> moduleHeapTable_;
196  };
197 
198  MemoryTracker::MemoryTracker(ServiceTable<Config> const& config,
199  ActivityRegistry& iReg)
200  : fileName_{config().dbOutput().filename()}
202  , overwriteContents_{config().dbOutput().overwrite()}
204  config().includeMallocInfo())}
205  // Fix so that a value of 'false' is an error if filename => in-memory db.
206  , peakUsageTable_{*db_, "PeakUsage", peakUsageColumns_, true}
207  // always recompute the peak usage
209  , eventTable_{*db_, "EventInfo", eventColumns_, overwriteContents_}
210  , moduleTable_{*db_, "ModuleInfo", moduleColumns_, overwriteContents_}
212  make_unique<memEventHeap_t>(*db_,
213  "EventMallocInfo",
215  nullptr}
217  make_unique<memModuleHeap_t>(*db_,
218  "ModuleMallocInfo",
220  nullptr}
221  {
222  iReg.sPostEndJob.watch(this, &MemoryTracker::postEndJob);
223  auto const nthreads = Globals::instance()->nthreads();
224  if (nthreads != 1) {
225  mf::LogWarning("MemoryTracker")
226  << "Since " << nthreads
227  << " threads have been configured, only process-level\n"
228  "memory usage will be recorded at the end of the job.";
229  }
230 
231  if (!fileName_.empty() && nthreads == 1u) {
232  iReg.sPreModuleConstruction.watch([this](auto const& md) {
233  this->recordOtherData(md, "PreModuleConstruction");
234  });
235  iReg.sPostModuleConstruction.watch([this](auto const& md) {
236  this->recordOtherData(md, "PostModuleConstruction");
237  });
238  iReg.sPreModuleBeginJob.watch(
239  [this](auto const& md) { this->recordOtherData(md, "PreBeginJob"); });
240  iReg.sPostModuleBeginJob.watch(
241  [this](auto const& md) { this->recordOtherData(md, "PostBeginJob"); });
242  iReg.sPreModuleBeginRun.watch(
243  [this](auto const& mc) { this->recordOtherData(mc, "PreBeginRun"); });
244  iReg.sPostModuleBeginRun.watch(
245  [this](auto const& mc) { this->recordOtherData(mc, "PostBeginRun"); });
246  iReg.sPreModuleBeginSubRun.watch([this](auto const& mc) {
247  this->recordOtherData(mc, "PreBeginSubRun");
248  });
249  iReg.sPostModuleBeginSubRun.watch([this](auto const& mc) {
250  this->recordOtherData(mc, "PostBeginSubRun");
251  });
252  iReg.sPreProcessEvent.watch([this](auto const& e, ScheduleContext) {
253  this->recordEventData(e, "PreProcessEvent");
254  });
255  iReg.sPostProcessEvent.watch([this](auto const& e, ScheduleContext) {
256  this->recordEventData(e, "PostProcessEvent");
257  });
258  iReg.sPreModule.watch([this](auto const& mc) {
259  this->recordModuleData(mc, "PreProcessModule");
260  });
261  iReg.sPostModule.watch([this](auto const& mc) {
262  this->recordModuleData(mc, "PostProcessModule");
263  });
264  iReg.sPreWriteEvent.watch([this](auto const& mc) {
265  this->recordModuleData(mc, "PreWriteEvent");
266  });
267  iReg.sPostWriteEvent.watch([this](auto const& mc) {
268  this->recordModuleData(mc, "PostWriteEvent");
269  });
270  iReg.sPreModuleEndSubRun.watch(
271  [this](auto const& mc) { this->recordOtherData(mc, "PreEndSubRun"); });
272  iReg.sPreModuleEndRun.watch(
273  [this](auto const& mc) { this->recordOtherData(mc, "PreEndRun"); });
274  iReg.sPreModuleEndJob.watch(
275  [this](auto const& md) { this->recordOtherData(md, "PreEndJob"); });
276  iReg.sPostModuleEndSubRun.watch(
277  [this](auto const& mc) { this->recordOtherData(mc, "PostEndSubRun"); });
278  iReg.sPostModuleEndRun.watch(
279  [this](auto const& mc) { this->recordOtherData(mc, "PostEndRun"); });
280  iReg.sPostModuleEndJob.watch(
281  [this](auto const& md) { this->recordOtherData(md, "PostEndJob"); });
282  }
283  }
284 
285  void
287  {
289  }
290 
291  void
293  string const& step)
294  {
295  auto const data = procInfo_.getCurrentData();
296  otherInfoTable_.insert(step,
297  md.moduleLabel(),
298  md.moduleName(),
299  LinuxProcData::getValueInMB<vsize_t>(data),
300  LinuxProcData::getValueInMB<rss_t>(data));
301  }
302 
303  void
305  {
306  currentEventID_ = e.id();
307  auto const currentMemory = procInfo_.getCurrentData();
308  eventTable_.insert(step,
312  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
313  LinuxProcData::getValueInMB<rss_t>(currentMemory));
314  if (includeMallocInfo_) {
315  auto minfo = LinuxMallInfo{}.get();
316  eventHeapTable_->insert(step,
320  minfo.arena,
321  minfo.ordblks,
322  minfo.keepcost,
323  minfo.hblkhd,
324  minfo.hblks,
325  minfo.uordblks,
326  minfo.fordblks);
327  }
328  }
329 
330  void
332  {
333  auto const currentMemory = procInfo_.getCurrentData();
334  moduleTable_.insert(step,
338  mc.pathName(),
339  mc.moduleLabel(),
340  mc.moduleName(),
341  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
342  LinuxProcData::getValueInMB<rss_t>(currentMemory));
343  if (includeMallocInfo_) {
344  auto minfo = LinuxMallInfo{}.get();
345  moduleHeapTable_->insert(step,
349  mc.pathName(),
350  mc.moduleLabel(),
351  mc.moduleName(),
352  minfo.arena,
353  minfo.ordblks,
354  minfo.keepcost,
355  minfo.hblkhd,
356  minfo.hblks,
357  minfo.uordblks,
358  minfo.fordblks);
359  }
360  }
361 
362  void
364  {
366  flushTables_();
367  summary_();
368  }
369 
370  bool
371  MemoryTracker::checkMallocConfig_(string const& dbfilename,
372  bool const include)
373  {
374  if (include && dbfilename.empty()) {
375  string const errmsg =
376  "\n'includeMallocInfo : true' is valid only if a nonempty db filename is specified:\n\n"s +
377  " MemoryTracker: {\n"
378  " includeMallocInfo: true\n"
379  " dbOutput: {\n"
380  " filename: \"your_filename.db\"\n"
381  " }\n"
382  " }\n\n";
383  throw Exception{errors::Configuration} << errmsg;
384  }
385  return include;
386  }
387 
388  void
390  {
392  "VmPeak", procInfo_.getVmPeak(), "Peak virtual memory (MB)");
394  "VmHWM", procInfo_.getVmHWM(), "Peak resident set size (MB)");
395  }
396 
397  void
399  {
401  eventTable_.flush();
404  if (eventHeapTable_) {
405  eventHeapTable_->flush();
406  }
407  if (moduleHeapTable_) {
408  moduleHeapTable_->flush();
409  }
410  }
411 
412  void
414  {
415  using namespace cet::sqlite;
416  using namespace std;
417  query_result<double> rVMax;
418  query_result<double> rRMax;
419  rVMax << select("Value")
420  .from(*db_, peakUsageTable_.name())
421  .where("Name='VmPeak'");
422  rRMax << select("Value")
423  .from(*db_, peakUsageTable_.name())
424  .where("Name='VmHWM'");
425  mf::LogAbsolute log{"MemoryTracker"};
426  HorizontalRule const rule{100};
427  log << '\n' << rule('=') << '\n';
428  log << std::left << "MemoryTracker summary (base-10 MB units used)\n\n";
429  log << " Peak virtual memory usage (VmPeak) : " << unique_value(rVMax)
430  << " MB\n"
431  << " Peak resident set size usage (VmHWM): " << unique_value(rRMax)
432  << " MB\n";
433  if (!(fileName_.empty() || fileName_ == ":memory:")) {
434  log << " Details saved in: '" << fileName_ << "'\n";
435  }
436  log << rule('=');
437  }
438 
439 } // namespace art
440 
void insert(Args const ...)
Definition: Ntuple.h:231
double getVmHWM() const noexcept(false)
Definition: LinuxProcMgr.h:34
name_array< 11u > eventHeapColumns_
T unique_value(query_result< T > const &r)
Definition: query_result.h:94
double getVmPeak() const noexcept(false)
Definition: LinuxProcMgr.h:29
auto const & pathName() const
Definition: ModuleContext.h:33
T * get() const
Definition: ServiceHandle.h:63
std::string string
Definition: nybbler.cc:12
art::LinuxProcData::rss_t rss_t
std::string const & moduleLabel() const
ChannelGroupService::Name Name
cet::sqlite::name_array< N > name_array
STL namespace.
unique_ptr< memEventHeap_t > eventHeapTable_
std::string const & name() const
Definition: Ntuple.h:145
LinuxProcData::proc_tuple getCurrentData() const noexcept(false)
Definition: LinuxProcMgr.cc:69
string filename
Definition: train.py:213
ScheduleID::size_type nthreads() const
Definition: Globals.cc:36
RunNumber_t run() const
Definition: EventID.h:98
void recordEventData(Event const &e, string const &step)
std::string const & moduleName() const
const double e
auto const & moduleName() const
Definition: ModuleContext.h:48
#define DECLARE_ART_SERVICE(svc, scope)
static Config * config
Definition: config.cpp:1054
unique_ptr< cet::sqlite::Connection > const db_
struct mallinfo get() const
Definition: LinuxMallInfo.h:42
void include(std::istream &in, std::string &result)
Definition: include.cc:63
bool checkMallocConfig_(string const &, bool)
auto select(T const &...t)
Definition: select.h:146
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
#define DEFINE_ART_SERVICE(svc)
#define Comment
art::LinuxProcData::vsize_t vsize_t
auto const & moduleDescription() const
Definition: ModuleContext.h:38
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
auto const & moduleLabel() const
Definition: ModuleContext.h:43
std::array< std::string, N > name_array
Definition: column.h:40
EventNumber_t event() const
Definition: EventID.h:116
name_array< 14u > moduleHeapColumns_
void recordModuleData(ModuleContext const &mc, string const &step)
static Globals * instance()
Definition: Globals.cc:17
void recordOtherData(ModuleDescription const &md, string const &step)
unique_ptr< memModuleHeap_t > moduleHeapTable_
static QCString * s
Definition: config.cpp:1042
SubRunNumber_t subRun() const
Definition: EventID.h:110
EventID id() const
Definition: Event.cc:34