sam_metadata_dumper.cc
Go to the documentation of this file.
1 // sam_metadata_dumper.cc
2 
6 #include "boost/program_options.hpp"
13 #include "fhiclcpp/ParameterSet.h"
15 
16 #include "TError.h"
17 #include "TFile.h"
18 
19 extern "C" {
20 #include "sqlite3.h"
21 }
22 
23 #include <algorithm>
24 #include <cstddef>
25 #include <cstdlib>
26 #include <cstring>
27 #include <iostream>
28 #include <ostream>
29 #include <sstream>
30 #include <string>
31 #include <vector>
32 
33 using namespace std::string_literals;
34 namespace bpo = boost::program_options;
35 
39 using std::back_inserter;
40 using std::cerr;
41 using std::cout;
42 using std::endl;
43 using std::ostream;
44 using std::string;
45 using std::vector;
46 
47 using stringvec = vector<string>;
49  int SMDid;
52 };
53 
56 {
57  std::string result;
58  if (value[0] == '[' || value[0] == '{' ||
60  // Assume entry is already a legal JSON representation.
61  result = value;
62  } else {
63  // Attempt to convert to number. If this works, we don't
64  // canonicalize the string. Note that we use the glibc version
65  // because we don't want to have to catch the exception. We could
66  // use streams, but we don't care about the result and dealing with
67  // streams is awkward.
68  char const* entval = value.c_str();
69  char* endptr = const_cast<char*>(entval);
70  strtold(entval, &endptr);
71  if (endptr == entval + value.size()) {
72  // Full conversion: no string canonicalization necessary.
73  result = value;
74  } else {
75  cet::canonical_string(value, result);
76  }
77  }
78  return result;
79 }
80 
81 // Print the human-readable form of a single metadata entry.
82 void
84  size_t const idLen,
85  size_t const longestName,
86  ostream& output)
87 {
88  std::string const& name = ent.name;
89  constexpr size_t maxIDdigits{5};
90  constexpr size_t maxNameSpacing{20};
91 
92  // right-justify SMDid (unless it is more than 5 digits)
93  auto const maxIDspace = std::min(idLen, maxIDdigits);
94  int nspaces = maxIDspace - 1;
95  {
96  auto id = static_cast<int>(ent.SMDid);
97  for (int i{}; (nspaces > 0) && (id > 0); ++i) {
98  id /= 10;
99  if (id > 0)
100  --nspaces;
101  }
102  }
103 
104  std::string const indent(nspaces, ' ');
105  output << indent << ent.SMDid << ": " << name;
106 
107  // right-justify value (unless name is more than 20 characters)
108  auto const nameSpacing = std::min(maxNameSpacing, longestName);
109  nspaces = static_cast<int>(nameSpacing - name.size());
110  while (nspaces > 0) {
111  output << " ";
112  --nspaces;
113  }
114 
115  output << " " << entryValue(ent.value) << "\n";
116 }
117 
118 // Print all the entries in the file catalog metadata from a file
119 void
121  vector<FileCatalogMetadataEntry> const& entries,
122  ostream& output,
123  ostream& /*errors*/)
124 {
125  // For nice formatting, determine maximum id length and name size,
126  // so that values can be lined up.
127  int maxID{1};
128  size_t longestName{1};
129  for (auto const& entry : entries) {
130  maxID = std::max(entry.SMDid, maxID);
131  longestName = std::max(entry.name.size(), longestName);
132  }
133  size_t idLen{1};
134  for (int i{}; (i < 5) && (maxID > 0); ++i) {
135  maxID /= 10;
136  if (maxID > 0)
137  ++idLen;
138  }
139  for (auto const& entry : entries) {
140  print_one_fc_metadata_entry_hr(entry, idLen, longestName, output);
141  }
142 }
143 
144 // Print the JSON form of the metadata for the current entry.
145 void
147  ostream& output)
148 {
149  output << cet::canonical_string(ent.name) << ": " << entryValue(ent.value);
150 }
151 
152 void
154  vector<FileCatalogMetadataEntry> const& entries,
155  ostream& output,
156  ostream& /*errors*/)
157 {
158  std::ostringstream buf; // Need seekp to work.
159  buf << "{\n";
160  for (auto const& entry : entries) {
161  buf << " "; // Indent.
163  buf << ",\n";
164  }
165  buf.seekp(-2, std::ios_base::cur);
166  buf << "\n }";
167  output << buf.str();
168 }
169 
170 // Read all the file catalog metadata entries stored in the table in 'file'.
171 // Write any error messages to errors.
172 // Return false on failure, and true on success.
173 bool
175  TFile& file,
176  vector<FileCatalogMetadataEntry>& all_metadata_entries,
177  ostream& errors)
178 {
180  // Open the DB
181  art::SQLite3Wrapper sqliteDB{&file, "RootFileDB"};
182  // Read the entries into memory.
183  sqlite3_stmt* stmt = nullptr;
184  sqlite3_prepare_v2(sqliteDB,
185  "SELECT rowid, Name, Value from FileCatalog_metadata;",
186  -1,
187  &stmt,
188  nullptr);
189  bool row_found = false;
190  int sqlite_status = SQLITE_OK;
191  while ((sqlite_status = sqlite3_step(stmt)) == SQLITE_ROW) {
192  row_found = true;
193  ent.SMDid = sqlite3_column_int(stmt, 0);
194  ent.name =
195  std::string{reinterpret_cast<char const*>(sqlite3_column_text(stmt, 1))};
196  ent.value =
197  std::string{reinterpret_cast<char const*>(sqlite3_column_text(stmt, 2))};
198  all_metadata_entries.push_back(ent);
199  }
200  if (sqlite_status != SQLITE_DONE) {
201  errors << "Unexpected status from table read: " << sqlite3_errmsg(sqliteDB)
202  << " (0x" << sqlite_status << ").\n";
203  }
204  int const finalize_status = sqlite3_finalize(stmt);
205  if (finalize_status != SQLITE_OK) {
206  errors << "Unexpected status (" << finalize_status
207  << ") from DB status cleanup:\n"
208  << sqlite3_errmsg(sqliteDB) << '\n';
209  }
210  if (!row_found) {
211  errors
212  << "No file catalog Metadata rows found - table is missing or empty\n";
213  return false;
214  }
215  return true;
216 }
217 
218 // Extract the file catalog metadata from the given TFile. The
219 // metadata entries are written to the stream output, and error
220 // messages are written to the stream errors.
221 //
222 // Returns 0 to indicate success, and 1 on failure.
223 // Precondition: file.IsZombie() == false
224 
225 // Caution: We pass 'file' by non-const reference because the TFile
226 // interface does not declare the functions we use to be const, even
227 // though they do not modify the underlying file.
228 int
230  ostream& output,
231  ostream& errors,
232  bool want_json)
233 {
234  vector<FileCatalogMetadataEntry> all_metadata_entries;
235  if (!read_all_fc_metadata_entries(file, all_metadata_entries, errors)) {
236  errors << "Unable to to read metadata entries.\n";
237  return 1;
238  }
239  // Iterate through all the entries, printing each one.
240  if (want_json) {
241  std::string const& path = file.GetName();
242  std::string const& baseName = path.substr(path.find_last_of("/") + 1u);
243  output << cet::canonical_string(baseName) << ": ";
244  print_all_fc_metadata_entries_JSON(all_metadata_entries, output, errors);
245  } else { // Human-readable.
246  output << "\nFile catalog metadata from file " << file.GetName() << ":\n\n";
247  print_all_fc_metadata_entries_hr(all_metadata_entries, output, errors);
248  output << "-------------------------------\n";
249  }
250  return 0;
251 }
252 
253 // Extract all the requested metadata tables (for from the named
254 // files. The contents of the tables are written to the stream
255 // output, and error messages are written to the stream errors.
256 //
257 // The return value is the number of files in which errors were
258 // encountered, and is thus 0 to indicate success.
259 int
261  ostream& output,
262  ostream& errors,
263  bool const want_json)
264 {
265  int rc{0};
266  bool first{true};
267  bool printed_opening{false};
268  for (auto const& fn : file_names) {
269  std::unique_ptr<TFile> current_file(TFile::Open(fn.c_str(), "READ"));
270  if (!current_file || current_file->IsZombie()) {
271  ++rc;
272  errors << "Unable to open file '" << fn << "' for reading."
273  << "\nSkipping file.\n";
274  continue;
275  }
276 
277  auto* key_ptr = current_file->GetKey("RootFileDB");
278  if (key_ptr == nullptr) {
279  ++rc;
280  errors << "\nRequested DB, \"RootFileDB\" of type, \"tkeyvfs\", not "
281  "present in file: \""
282  << fn << "\"\n"
283  << "Either this is not an art/ROOT file, it is a corrupt art/ROOT "
284  "file,\n"
285  << "or it is an art/ROOT file produced with a version older than "
286  "v1_00_12.\n";
287  continue;
288  }
289 
290  if (first) {
291  first = false;
292  if (want_json) {
293  output << "{\n ";
294  printed_opening = true;
295  }
296  } else if (want_json) {
297  output << ",\n ";
298  }
299  rc += print_fc_metadata_from_file(*current_file, output, errors, want_json);
300  }
301  if (printed_opening) {
302  output << "\n}\n";
303  }
304  return rc;
305 }
306 
307 void
308 RootErrorHandler(int level, bool die, char const* location, char const* message)
309 {
310  // Ignore dictionary errors.
311  if (level == kWarning && (!die) && strcmp(location, "TClass::TClass") == 0 &&
312  std::string{message}.find("no dictionary") != std::string::npos) {
313  return;
314  } else {
315  // Default behavior
316  DefaultErrorHandler(level, die, location, message);
317  }
318 }
319 
320 int
321 main(int argc, char* argv[])
322 {
323  // ------------------
324  // use the boost command line option processing library to help out
325  // with command line options
326  std::ostringstream descstr;
327  descstr << argv[0] << " <options> [<source-file>]+";
328  bpo::options_description desc(descstr.str());
329  desc.add_options()("help,h", "produce help message")(
330  "hr,H", "produce human-readable output (default is JSON)")(
331  "human-readable", "produce human-readable output (default is JSON)")(
332  "source,s", bpo::value<stringvec>(), "source data file (multiple OK)");
333  bpo::options_description all_opts("All Options");
334  all_opts.add(desc);
335  // Each non-option argument is interpreted as the name of a files to
336  // be processed. Any number of filenames is allowed.
337  bpo::positional_options_description pd;
338  pd.add("source", -1);
339  // The variables_map contains the actual program options.
340  bpo::variables_map vm;
341  try {
342  bpo::store(bpo::command_line_parser(argc, argv)
343  .options(all_opts)
344  .positional(pd)
345  .run(),
346  vm);
347  bpo::notify(vm);
348  }
349  catch (bpo::error const& e) {
350  std::cerr << "Exception from command line processing in " << argv[0] << ": "
351  << e.what() << "\n";
352  return 2;
353  }
354  if (vm.count("help")) {
355  std::cout << desc << std::endl;
356  return 1;
357  }
358  bool const want_json =
359  (!vm.count("hr")) && (!vm.count("human-readable")); // Default is JSON.
360 
361  // Get the names of the files we will process.
362  stringvec file_names;
363  size_t const file_count = vm.count("source");
364  if (file_count < 1) {
365  cerr << "One or more input files must be specified;"
366  << " supply filenames as program arguments\n"
367  << "For usage and options list, please do 'sam_metadata_dumper "
368  "--help'.\n";
369  return 3;
370  }
371  file_names.reserve(file_count);
372  cet::copy_all(vm["source"].as<stringvec>(), std::back_inserter(file_names));
373 
374  // Set the ROOT error handler.
375  SetErrorHandler(RootErrorHandler);
376 
377  // Register the tkey VFS with sqlite:
378  tkeyvfs_init();
379 
380  // Do the work.
381  return print_fc_metadata_from_files(file_names, cout, cerr, want_json);
382 }
bool read_all_fc_metadata_entries(TFile &file, vector< FileCatalogMetadataEntry > &all_metadata_entries, ostream &errors)
std::string string
Definition: nybbler.cc:12
cur
Definition: dbjson.py:21
void print_one_fc_metadata_entry_hr(FileCatalogMetadataEntry const &ent, size_t const idLen, size_t const longestName, ostream &output)
std::string entryValue(std::string const &value)
void RootErrorHandler(int level, bool die, char const *location, char const *message)
std::vector< std::string > stringvec
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:265
bool is_double_quoted_string(std::string const &str)
error
Definition: includer.cc:31
const double e
int tkeyvfs_init(void)
Definition: tkeyvfs.cc:1768
void print_one_fc_metadata_entry_JSON(FileCatalogMetadataEntry const &ent, ostream &output)
std::string indent(std::size_t const i)
int print_fc_metadata_from_file(TFile &file, ostream &output, ostream &errors, bool want_json)
void print_all_fc_metadata_entries_JSON(vector< FileCatalogMetadataEntry > const &entries, ostream &output, ostream &)
auto copy_all(FwdCont &, FwdIter)
bool canonical_string(std::string const &str, std::string &result)
T min(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:57
std::map< fhicl::ParameterSetID, ParameterSetBlob > ParameterSetMap
int print_fc_metadata_from_files(stringvec const &file_names, ostream &output, ostream &errors, bool const want_json)
void print_all_fc_metadata_entries_hr(vector< FileCatalogMetadataEntry > const &entries, ostream &output, ostream &)
int main(int argc, char *argv[])
unsigned int run