TFileMetadataDUNE_service.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 // Name: TFileMetadataDUNE_service.cc.
3 //
4 // Purpose: generate DUNE-specific sam metadata for root Tfiles (histogram or ntuple files).
5 //
6 // FCL parameters: GenerateTFileMetadata: This needs to be set to "true" in the fcl file
7 // to generate metadata (default value: false)
8 // dataTier: Currrently this needs to be parsed by the user
9 // for ntuples, dataTier = root-tuple;
10 // for histos, dataTier = root-histogram
11 // (default value: root-tuple)
12 // fileFormat: This is currently specified by the user,
13 // the fileFormat for Tfiles is "root" (default value: root)
14 //
15 // Other notes: 1. This service uses the ART's standard file_catalog_metadata service
16 // to extract some of the common (common to both ART and TFile outputs)
17 // job-specific metadata parameters, so, it is important to call this
18 // service in your fcl file
19 // stick this line in your "services" section of fcl file:
20 // FileCatalogMetadata: @local::art_file_catalog_mc
21 //
22 // 2. When you call FileCatalogMetadata service in your fcl file, and if
23 // you have (art) root Output section in your fcl file, and if you do not
24 // have "dataTier" specified in that section, then this service will throw
25 // an exception. To avoid this, either remove the entire root Output section
26 // in your fcl file (and remove art stream output from your end_paths) or
27 // include appropriate dataTier information in the section.If you are only
28 // running analysis job, best way is to not include any art root Output section.
29 //
30 // 3. This service is exclusively written to work with production (in other
31 // words for jobs submitted through grid). Some of the metadata parameters
32 // (output TFileName, filesize, Project related details) are captured/updated
33 // during and/or after the workflow.
34 //
35 //
36 // Created: 1-Nov-2017, T. Junk
37 // based on the MicroBooNE example by S. Gollapinni
38 //
39 ////////////////////////////////////////////////////////////////////////
40 
41 #include <string>
42 #include <sstream>
43 #include <iomanip>
44 #include <vector>
45 #include <iostream>
46 #include <fstream>
47 #include <algorithm>
54 //#include "art/Utilities/OutputFileInfo.h"
56 #include "art_root_io/RootDB/SQLite3Wrapper.h"
57 #include "art_root_io/RootDB/SQLErrMsg.h"
58 #include "cetlib_except/exception.h"
60 #include "TROOT.h"
61 #include "TFile.h"
62 #include "TTimeStamp.h"
63 #include <ctime>
64 #include <stdio.h>
65 #include <time.h>
66 
67 using namespace std;
68 
69 
70 //--------------------------------------------------------------------
71 
72 // Constructor.
75  fGenerateTFileMetadata(false)
76 {
77  reconfigure(pset);
78 
79  // Register for callbacks.
80 
87 }
88 
89 //--------------------------------------------------------------------
90 // Destructor.
92 {
93 }
94 
95 //--------------------------------------------------------------------
96 // Set service paramters
98 {
99  // Get parameters.
100  fGenerateTFileMetadata = pset.get<bool>("GenerateTFileMetadata", false);
101  fJSONFileName = pset.get<std::string>("JSONFileName");
102 
103  if (!fGenerateTFileMetadata) return;
104 
105  md.fdata_tier = pset.get<std::string>("dataTier","root-tuple");
106  md.ffile_format = pset.get<std::string>("fileFormat","root");
107 }
108 
109 //--------------------------------------------------------------------
110 // PostBeginJob callback.
111 // Insert per-job metadata via TFileMetadata service.
113 {
114  // only generate metadata when this is true
115  if (!fGenerateTFileMetadata) return;
116 
117  // get the start time
118  md.fstart_time = time(0);
119 
120  // Get art metadata service and extract paramters from there
122 
124  artmds->getMetadata(artmd);
125 
126  for(auto const & d : artmd) {
127  mdmap[d.first] = d.second;
128  //std::cout << d.first << " " << d.second << std::endl;
129  }
130 
132 
133  // if a certain paramter/key is not found, assign an empty string value to it
134 
135  if ((it=mdmap.find("application.family"))!=mdmap.end()) {
136  std::cout << "Setting applicationFamily: " << it->second << std::endl;
137  std::get<0>(md.fapplication) = it->second;
138  }
139  else {
140  std::cout << "Setting applicationFamily: empty" << std::endl;
141  std::get<0>(md.fapplication) = "\" \"";
142  }
143 
144  if ((it=mdmap.find("art.process_name"))!=mdmap.end()) {
145  std::cout << "Setting process_name: " << it->second << std::endl;
146  std::get<1>(md.fapplication) = it->second;
147  }
148  else {
149  std::cout << "Setting process_name: empty" << std::endl;
150  std::get<1>(md.fapplication) = "\" \"";
151  }
152 
153  if ((it=mdmap.find("application.version"))!=mdmap.end()) std::get<2>(md.fapplication) = it->second;
154  else std::get<2>(md.fapplication) = "\" \"";
155 
156  if ((it=mdmap.find("application.version"))!=mdmap.end()) std::get<2>(md.fapplication) = it->second;
157  else std::get<2>(md.fapplication) = "\" \"";
158 
159  if ((it=mdmap.find("group"))!=mdmap.end()) md.fgroup = it->second;
160  else md.fgroup = "\" \"";
161 
162  if ((it=mdmap.find("file_type"))!=mdmap.end()) md.ffile_type = it->second;
163  else md.ffile_type = "\" \"";
164 
165  if ((it=mdmap.find("run_type"))!=mdmap.end()) frunType = it->second;
166  else frunType = "\" \"";
167 
168  //std::cout << "Run type: " << frunType << std::endl;
169 
170  //Remove this
171  if ((it=mdmap.find("art.run_type"))!=mdmap.end()) frunType = it->second;
172  else frunType = "\" \"";
173  //std::cout << "Run type: " << frunType << std::endl;
174 
175 }
176 
177 
178 //--------------------------------------------------------------------
179 // PostOpenFile callback.
181 {
182  if (!fGenerateTFileMetadata) return;
183 
184  // save parent input files here
185  md.fParents.insert(fn);
186 
187 }
188 
189 //--------------------------------------------------------------------
190 // PostEvent callback.
192 {
193 
194  if(!fGenerateTFileMetadata) return;
195 
196  art::RunNumber_t run = evt.run();
197  art::SubRunNumber_t subrun = evt.subRun();
198  art::EventNumber_t event = evt.event();
199  art::SubRunID srid = evt.id().subRunID();
200 
201  // save run, subrun and runType information once every subrun
202  if (fSubRunNumbers.count(srid) == 0){
203  fSubRunNumbers.insert(srid);
204  md.fruns.push_back(make_tuple(run, subrun, frunType));
205  }
206 
207  // save the first event
208  if (md.fevent_count == 0) md.ffirst_event = event;
209  md.flast_event = event;
210  // event counter
211  ++md.fevent_count;
212 
214  if (evt.getByLabel("TriggerResults", h) and h->accept()) {
215  // Event passed at least one of the paths
217  }
218 
219 }
220 
221 //--------------------------------------------------------------------
222 // PostEvent callback.
224 {
225 
226  if(!fGenerateTFileMetadata) return;
227 
228 
229  // save the first event
230  //if (md.fevent_count == 0) md.ffirst_event = event;
231  //md.flast_event = event;
232  // event counter
233  md.fFileName = output_file.fileName();
234 
235 }
236 
237 
238 //--------------------------------------------------------------------
239 // PostSubRun callback.
241 {
242 
243  if(!fGenerateTFileMetadata) return;
244 
245  art::RunNumber_t run = sr.run();
246  art::SubRunNumber_t subrun = sr.subRun();
247  art::SubRunID srid = sr.id();
248 
249  // save run, subrun and runType information once every subrun
250  if (fSubRunNumbers.count(srid) == 0){
251  fSubRunNumbers.insert(srid);
252  md.fruns.push_back(make_tuple(run, subrun, frunType));
253  }
254 }
255 
256 //--------------------------------------------------------------------
257 // PostCloseFile callback.
259 {
260 
261  // Do nothing if generating TFile metadata is disabled.
262  if(!fGenerateTFileMetadata) return;
263 
264  // get metadata from the FileCatalogMetadataDUNE service, which is filled on its construction
265 
267  md.fMCGenerators = paramhandle->MCGenerators();
268  md.fMCOscillationP = paramhandle->MCOscillationP();
270  md.fMCBeamEnergy = paramhandle->MCBeamEnergy();
271  md.fMCBeamFluxID = paramhandle->MCBeamFluxID();
272  md.fMCName = paramhandle->MCName();
273  md.fMCDetectorType = paramhandle->MCDetectorType();
274  md.fMCNeutrinoFlavors = paramhandle->MCNeutrinoFlavors();
275  md.fMCMassHierarchy = paramhandle->MCMassHierarchy();
276  md.fMCMiscellaneous = paramhandle->MCMiscellaneous();
277  md.fMCGeometryVersion = paramhandle->MCGeometryVersion();
278  md.fMCOverlay = paramhandle->MCOverlay();
279  md.fDataRunMode = paramhandle->DataRunMode();
280  md.fDataDetectorType = paramhandle->DataDetectorType();
281  md.fDataName = paramhandle->DataName();
282  md.fStageName = paramhandle->StageName();
283 
284  //update end time
285  md.fend_time = time(0);
286 
287  // convert start and end times into time format: Year-Month-DayTHours:Minutes:Seconds
288  char endbuf[80], startbuf[80];
289  struct tm tstruct;
290  tstruct = *localtime(&md.fend_time);
291  strftime(endbuf,sizeof(endbuf),"%Y-%m-%dT%H:%M:%S",&tstruct);
292  tstruct = *localtime(&md.fstart_time);
293  strftime(startbuf,sizeof(startbuf),"%Y-%m-%dT%H:%M:%S",&tstruct);
294 
295  // open a json file and write everything from the struct md complying to the
296  // samweb json format. This json file holds the below information temporarily.
297  // If you submitted a grid job invoking this service, the information from
298  // this file is appended to a final json file and this file will be removed
299 
300  std::ofstream jsonfile;
301  jsonfile.open(fJSONFileName);
302  jsonfile<<"{\n \"application\": {\n \"family\": "<<std::get<0>(md.fapplication)<<",\n \"name\": ";
303  jsonfile<<std::get<1>(md.fapplication)<<",\n \"version\": "<<std::get<2>(md.fapplication)<<"\n },\n ";
304  jsonfile<<"\"data_tier\": \""<<md.fdata_tier<<"\",\n ";
305  jsonfile<<"\"event_count\": "<<md.fnew_event_count<<",\n ";
306  //jsonfile<<"\"fcl.name\": \""<<md.ffcl_name<<"\",\n ";
307  //jsonfile<<"\"fcl.version\": \""<<md.ffcl_version<<"\",\n ";
308  jsonfile<<"\"file_format\": \""<<md.ffile_format<<"\",\n ";
309  jsonfile<<"\"file_type\": "<<md.ffile_type<<",\n ";
310  jsonfile<<"\"first_event\": "<<md.ffirst_event<<",\n ";
311  jsonfile<<"\"group\": "<<md.fgroup<<",\n ";
312  jsonfile<<"\"last_event\": "<<md.flast_event<<",\n ";
313  //if (md.fdataTier != "generated"){
314  unsigned int c=0;
315  jsonfile<<"\"parents\": [\n";
316  for(auto parent : md.fParents) {
317  c++;
318  size_t n = parent.find_last_of('/');
319  size_t f1 = (n == std::string::npos ? 0 : n+1);
320  jsonfile<<" {\n \"file_name\": \""<<parent.substr(f1)<<"\"\n }";
321  if (md.fParents.size()==1 || c==md.fParents.size()) jsonfile<<"\n";
322  else jsonfile<<",\n";
323  }
324  jsonfile<<" ],\n ";
325  //}
326  c=0;
327  jsonfile<<"\"runs\": [\n";
328  for(auto &t : md.fruns){
329  c++;
330  jsonfile<<" [\n "<<std::get<0>(t)<<",\n "<<std::get<1>(t)<<",\n "<<std::get<2>(t)<<"\n ]";
331  if (md.fruns.size()==1 || c==md.fruns.size()) jsonfile<<"\n";
332  else jsonfile<<",\n";
333  }
334  jsonfile<<" ],\n";
335 
336  if (md.fMCGenerators!="") jsonfile << "\"lbne_MC.generators\": \"" << md.fMCGenerators << "\",\n";
337  if (md.fMCOscillationP!="") jsonfile << "\"lbne_MC.oscillationP\": \"" << md.fMCOscillationP << "\",\n";
338  if (md.fMCTriggerListVersion!="") jsonfile << "\"lbne_MC.trigger-list-version\": \"" << md.fMCTriggerListVersion << "\",\n";
339  if (md.fMCBeamEnergy!="") jsonfile << "\"lbne_MC.beam_energy\": \"" << md.fMCBeamEnergy << "\",\n";
340  if (md.fMCBeamFluxID!="") jsonfile << "\"lbne_MC.beam_flux_ID\": \"" << md.fMCBeamFluxID << "\",\n";
341  if (md.fMCName!="") jsonfile << "\"lbne_MC.name\": \"" << md.fMCName << "\",\n";
342  if (md.fMCDetectorType!="") jsonfile << "\"lbne_MC.detector_type\": \"" << md.fMCDetectorType << "\",\n";
343  if (md.fMCNeutrinoFlavors!="") jsonfile << "\"lbne_MC.neutrino_flavors\": \"" << md.fMCNeutrinoFlavors << "\",\n";
344  if (md.fMCMassHierarchy!="") jsonfile << "\"lbne_MC.mass_hierarchy\": \"" << md.fMCMassHierarchy << "\",\n";
345  if (md.fMCMiscellaneous!="") jsonfile << "\"lbne_MC.miscellaneous\": \"" << md.fMCMiscellaneous << "\",\n";
346  if (md.fMCGeometryVersion!="") jsonfile << "\"lbne_MC.geometry_version\": \"" << md.fMCGeometryVersion << "\",\n";
347  if (md.fMCOverlay!="") jsonfile << "\"lbne_MC.overlay\": \"" << md.fMCOverlay << "\",\n";
348  if (md.fDataRunMode!="") jsonfile << "\"lbne_data.run_mode\": \"" << md.fDataRunMode << "\",\n";
349  if (md.fDataDetectorType!="") jsonfile << "\"lbne_data.detector_type\": \"" << md.fDataDetectorType << "\",\n";
350  if (md.fDataName!="") jsonfile << "\"lbne_data.name\": \"" << md.fDataName << "\",\n";
351  // fStageName appears not to be in our metadata spec
352 
353  // put these at the end because we know they'll be there and the last one needs to not have a comma
354 
355  jsonfile<<"\"start_time\": \""<<startbuf<<"\",\n";
356  jsonfile<<"\"end_time\": \""<<endbuf<<"\"\n";
357 
358  //jsonfile<<" \"ub_project.name\": \""<<md.fproject_name<<"\",\n ";
359  //jsonfile<<"\"ub_project.stage\": \""<<md.fproject_stage;
360  //jsonfile<<"\",\n \"ub_project.version\": \""<<md.fproject_version<<"\"\n";
361 
362  jsonfile<<"}\n";
363  jsonfile.close();
364 }
365 
366 
367 
368 namespace util{
369 
371 }//namespace util
SubRunID const & subRunID() const
Definition: EventID.h:104
intermediate_table::iterator iterator
void reconfigure(fhicl::ParameterSet const &p)
Namespace for general, non-LArSoft-specific utilities.
std::vector< std::pair< std::string, std::string >> collection_type
std::map< std::string, std::string > mdmap
EventNumber_t event() const
Definition: DataViewImpl.cc:85
GlobalSignal< detail::SignalResponseType::FIFO, void()> sPostBeginJob
std::string string
Definition: nybbler.cc:12
GlobalSignal< detail::SignalResponseType::LIFO, void()> sPostEndJob
const std::string & MCOverlay() const
const std::string & MCGenerators() const
STL namespace.
GlobalSignal< detail::SignalResponseType::LIFO, void(SubRun const &)> sPostBeginSubRun
std::set< art::SubRunID > fSubRunNumbers
const std::string & DataName() const
const std::string & MCGeometryVersion() const
tm
Definition: demo.py:21
TFileMetadataDUNE(fhicl::ParameterSet const &pset, art::ActivityRegistry &reg)
std::tuple< std::string, std::string, std::string > fapplication
std::vector< std::tuple< art::RunNumber_t, art::SubRunNumber_t, std::string > > fruns
const std::string & MCNeutrinoFlavors() const
QAsciiDict< Entry > fn
bool getByLabel(std::string const &label, std::string const &instance, Handle< PROD > &result) const
Definition: DataViewImpl.h:633
std::string const & fileName() const
const std::string & DataRunMode() const
void postCloseOutput(art::OutputFileInfo const &)
void postBeginSubRun(art::SubRun const &subrun)
std::void_t< T > n
T get(std::string const &key) const
Definition: ParameterSet.h:271
const std::string & MCMiscellaneous() const
const std::string & DataDetectorType() const
IDNumber_t< Level::SubRun > SubRunNumber_t
Definition: IDNumber.h:119
const std::string & MCTriggerListVersion() const
SubRunNumber_t subRun() const
Definition: DataViewImpl.cc:78
RunNumber_t run() const
Definition: DataViewImpl.cc:71
const std::string & MCBeamFluxID() const
const std::string & MCName() const
GlobalSignal< detail::SignalResponseType::LIFO, void(Event const &, ScheduleContext)> sPostProcessEvent
#define DEFINE_ART_SERVICE(svc)
const std::string & StageName() const
GlobalSignal< detail::SignalResponseType::LIFO, void(std::string const &)> sPostOpenFile
SubRunID id() const
Definition: SubRun.cc:21
std::set< std::string > fParents
void postEvent(art::Event const &ev, art::ScheduleContext)
IDNumber_t< Level::Event > EventNumber_t
Definition: IDNumber.h:118
GlobalSignal< detail::SignalResponseType::LIFO, void(OutputFileInfo const &)> sPostCloseOutputFile
const std::string & MCOscillationP() const
TCEvent evt
Definition: DataStructs.cxx:7
const std::string & MCDetectorType() const
static constexpr double sr
Definition: Units.h:166
void getMetadata(collection_type &coll) const
if(!yymsg) yymsg
const std::string & MCBeamEnergy() const
def parent(G, child, parent_type)
Definition: graph.py:67
EventID id() const
Definition: Event.cc:34
QTextStream & endl(QTextStream &s)
void postOpenFile(std::string const &fn)
const std::string & MCMassHierarchy() const
IDNumber_t< Level::Run > RunNumber_t
Definition: IDNumber.h:120