RootSizeOnDisk.cc
Go to the documentation of this file.
1 //
2 // Collect information about the disk space used by the top tier and second
3 // tier objects inside an art format root event-data file.
4 //
5 
6 #include <iomanip>
7 #include <set>
8 #include <string>
9 #include <vector>
10 
13 #include "boost/filesystem.hpp"
14 #include "boost/format.hpp"
16 
17 #include "TBranch.h"
18 #include "TFile.h"
19 #include "TKey.h"
20 #include "TList.h"
21 #include "TObjArray.h"
22 #include "TTree.h"
23 
24 using namespace std;
25 
26 namespace {
27 
28  // Helper struct used to pull information out of a TList.
29  struct MyObjects {
30 
31  MyObjects(set<art::RootSizeOnDisk::Record>& akeys) : keys(akeys) {}
32 
33  bool
34  operator()(TObject* aObj)
35  {
36  TKey* key = (TKey*)aObj;
37  keys.insert(
38  art::RootSizeOnDisk::Record(key->GetName(), key->GetClassName()));
39  return true;
40  }
41 
42  set<art::RootSizeOnDisk::Record>& keys;
43  };
44 
45 } // end anonymous namespace
46 
48  std::string const& aclassName,
49  Long64_t const asize,
50  double const afraction)
51  : name_(aname), className_(aclassName), size_(asize), fraction_(afraction)
52 {}
53 
54 bool
56  art::RootSizeOnDisk::Record const& rhs)
57 {
58  return lhs.size() > rhs.size();
59 }
60 
61 void
62 art::RootSizeOnDisk::print(std::ostream& os, double const minimumFraction) const
63 {
64 
65  os << "\nSize on disk for the file: " << filename() << "\n"
66  << "Total size on disk: " << size() << "\n"
67  << endl;
68  os << setw(18) << "Size in bytes" << setw(10) << " Fraction"
69  << " TTree/TKey Name" << endl;
70  for (RootSizeOnDisk::Record const& key : contents()) {
71  if (key.isTree() || key.isTKey()) {
72  os << setw(18) << key.size() << " "
73  << boost::format("%10.3f") % key.fraction() << " " << key.name()
74  << endl;
75  } else {
76  os << setw(18) << key.size() << " "
77  << boost::format("%10.3f") % key.fraction() << " " << key.name()
78  << " (skipped because not a TTree or a TKey; it is a"
79  << key.className() << ")" << endl;
80  }
81  }
82  os << "------------------------------\n"
83  << setw(18) << sum() << " " << boost::format("%10.3f") % fraction() << " "
84  << "Total\n"
85  << endl;
86 
87  os << "Details for each TTree that occupies more than the fraction "
88  << minimumFraction << " of the size on disk.\n"
89  << endl;
90 
91  for (RootSizeOnDisk::Record const& key : contents()) {
92  if (key.isTree() && (key.fraction() > minimumFraction)) {
93  os << "\nDetails for branch: " << key.name() << "\n" << endl;
94  os << setw(18) << "Size in bytes" << setw(10) << " Fraction"
95  << " Data Product Name" << endl;
96 
97  Long64_t sum(0);
98  for (auto const& branch : key.contents()) {
99  sum += branch.size();
100  os << setw(18) << branch.size() << " "
101  << boost::format("%10.3f") % branch.fraction() << " "
102  << branch.name() << endl;
103  }
104  double ratio = double(sum) / double(key.size());
105  os << "------------------------------\n"
106  << setw(18) << sum << " " << boost::format("%10.3f") % ratio << " "
107  << "Total\n"
108  << endl;
109  }
110  }
111 }
112 
113 art::RootSizeOnDisk::RootSizeOnDisk(std::string const& aFileName, TFile* file)
114  : fileName_(aFileName), size_(0), sum_(0), fraction_(0)
115 {
116 
117  // File size on disk, in bytes.
118  size_ = boost::filesystem::file_size(fileName_.c_str());
119 
120  // Extract info about top level objects.
121  // There are usually Multiple cycles of these objects; we only want each name
122  // once.
123  set<RootSizeOnDisk::Record> topKeys;
124  TList* keys = file->GetListOfKeys();
125  TIter iter(keys);
126  for_each(iter.Begin(), TIter::End(), MyObjects(topKeys));
127 
128  // Copy to a vector since we will want to sort them by size.
129  contents_.assign(topKeys.begin(), topKeys.end());
130 
131  // Compute sizes of each top level TTree and TKey.
132  for (auto& key : contents_) {
133  if (key.isTree()) {
134  TTree* tree;
135  file->GetObject(key.name().c_str(), tree);
136  Long64_t const size = detail::sizeOnDisk(tree);
137  sum_ += size;
138  double const f = double(size) / double(size_);
139  key.size(size);
140  key.fraction(f);
141  fillLevel2(key, tree);
142  } else if (key.isTKey()) {
143  TKey* tkey = file->FindKey(key.name().c_str());
144  Long64_t const size = tkey->GetNbytes();
145  sum_ += size;
146  double const f = double(size) / double(size_);
147  key.size(size);
148  key.fraction(f);
149  }
150  }
151 
152  // Sort by decreasing size.
153  cet::sort_all(contents_, greaterBySize);
154 
155  fraction_ = double(sum_) / double(size_);
156 }
157 
158 // For each TTree Record, fill the information about the branches.
159 void
161 {
162  TObjArray* branches = tree->GetListOfBranches();
163  size_t n = branches->GetEntries();
164 
165  Records_t branchInfo;
166 
167  for (size_t i = 0; i < n; ++i) {
168  auto subbr = static_cast<TBranch*>(branches->At(i));
169  Long64_t const size = detail::sizeOnDisk(subbr, true);
170  double const f = double(size) / double(key.size());
171  branchInfo.emplace_back(subbr->GetName(), "TBranch", size, f);
172  }
173 
174  cet::sort_all(branchInfo, greaterBySize);
175  key.contents(branchInfo);
176 }
void fillLevel2(Record &, TTree *)
std::vector< Record > const & contents() const
std::string string
Definition: nybbler.cc:12
Record(std::string const &aname, std::string const &aclassName, Long64_t asize=0, double afraction=0.)
void print(std::ostream &os, double minimumFraction) const
STL namespace.
std::string const & filename() const
Long64_t sum() const
void sort_all(RandCont &)
Long64_t size() const
std::vector< Record > Records_t
Long64_t sizeOnDisk(TTree *t)
void End(void)
Definition: gXSecComp.cxx:210
bool greaterBySize(RootSizeOnDisk::Record const &lhs, RootSizeOnDisk::Record const &rhs)
static std::string format(PyObject *obj, unsigned int pos, unsigned int indent, unsigned int maxlen)
Definition: fhiclmodule.cc:311
RootSizeOnDisk(std::string const &aFileName, TFile *aFile)