StatCollector.h
Go to the documentation of this file.
1 /**
2  * @file lardataalg/Utilities/StatCollector.h
3  * @brief Classes gathering simple statistics
4  * @author Gianluca Petrillo (petrillo@fnal.gov)
5  * @date December 23rd, 2014
6  *
7  * Currently includes:
8  * - MinMaxCollector to extract data range
9  * - StatCollector to extract simple statistics (average, RMS etc.)
10  *
11  */
12 
13 #ifndef LARDATA_UTILITIES_STATCOLLECTOR_H
14 #define LARDATA_UTILITIES_STATCOLLECTOR_H
15 
16 // C/C++ standard libraries
17 #include <cmath> // std::sqrt()
18 #include <array>
19 #include <tuple>
20 #include <limits> // std::numeric_limits<>
21 #include <initializer_list>
22 #include <iterator> // std::begin(), std::end()
23 #include <utility> // std::forward()
24 #include <algorithm> // std::for_each()
25 #include <stdexcept> // std::range_error
26 
27 
28 namespace lar {
29  namespace util {
30 
31 
32  /// A unary functor returning its own argument (any type)
33  struct identity {
34  template<typename T>
35  constexpr auto operator()(T&& v) const noexcept
36  -> decltype(std::forward<T>(v))
37  {
38  return std::forward<T>(v);
39  } // operator()
40  }; // class identity
41 
42 
43  namespace details {
44 
45  /// Class tracking the number of entries and their total weight
46  /// @tparam W type of the weight
47  template <typename W>
48  class WeightTracker {
49  public:
50  using Weight_t = W; ///< type of the weight
51 
52  /// Adds the specified weight to the statistics
53  void add(Weight_t weight) { ++n; w += weight; }
54 
55  /// Resets the count
56  void clear() { n= 0; w = Weight_t(0); }
57 
58  /// Returns the number of entries added
59  int N() const { return n; }
60 
61  /// Returns the sum of the weights
62  Weight_t Weights() const { return w; }
63 
64  /**
65  * @brief Returns the arithmetic average of the weights
66  * @return the weight average
67  * @throws std::range_error if no entry was added
68  */
69  Weight_t AverageWeight() const;
70 
71 
72  /// Returns the square of the specified value
73  template <typename V>
74  static constexpr V sqr(V const& v) { return v*v; }
75 
76  protected:
77  int n = 0; ///< number of added entries
78  Weight_t w = Weight_t(0); ///< total weight
79 
80  }; // class WeightTracker<>
81 
82 
83  /**
84  * @brief Class tracking sums of variables up to a specified power
85  * @tparam Power power up to which to collect statistics
86  * @tparam T type of the quantity
87  * @tparam W type of the weight (as T by default)
88  *
89  * Note that statistics of order 0 (that is, pertaining only weights)
90  * are not collected.
91  */
92  template <unsigned int PWR, typename T, typename W = T>
93  class DataTracker {
94  public:
95  static constexpr unsigned int Power = PWR;
96  static_assert(Power >= 1, "DataTracker must have at least power 1");
97 
98  using Data_t = T; ///< type of data
99  using Weight_t = T; ///< type of weight
100 
101  /// Default constructor
103 
104  /// Adds the specified weight to the statistics
105  void add(Data_t v, Weight_t w)
106  {
107  Weight_t x = w;
108  for (size_t i = 0; i < Power; ++i) sums[i] += (x *= v);
109  }
110 
111  /// Resets the count
112  void clear() { sums.fill(Data_t(0)); }
113 
114  /// Returns the sum of the values to the power N (1 <= N <= 2)
115  template <unsigned int N>
116  Weight_t SumN() const
117  {
118  static_assert
119  ((N > 0) && (N <= Power), "Invalid sum of power requested");
120  return sums[N - 1];
121  } // SumN()
122 
123  /// Returns the sum of the values to the power n (1 <= n <= 2, no check)
124  Weight_t Sum(unsigned int n) const { return sums[n - 1]; }
125 
126  /// Returns the weighted sum of the entries
127  Weight_t Sum() const { return SumN<1>(); }
128 
129  protected:
130  std::array<Weight_t, Power> sums;
131 
132  }; // class DataTracker<>
133 
134 
135  /**
136  * @brief Class tracking sums of variables up to power 2
137  * @tparam T type of the quantity
138  * @tparam W type of the weight (as T by default)
139  */
140  template <typename T, typename W = T, unsigned int PWR = 2>
141  class DataTracker2: public DataTracker<PWR, T, W> {
142  using Base_t = DataTracker<PWR, T, W>; ///< base class type
143  public:
144  using Base_t::Power;
145  static_assert(Power >= 2, "DataTracker2 must have Power >= 2");
146  using Weight_t = typename Base_t::Weight_t;
147 
148  /// Returns the weighted sum of the square of the entries
149  Weight_t SumSq() const
150  { return Base_t::sums[1]; }
151  //{ return Base_t::SumN<2>(); } // I can't make gcc 4.9.1 digest this...
152 
153  }; // class DataTracker2
154 
155  /**
156  * @brief Class tracking sums of variables up to power 2
157  * @tparam T type of the quantity
158  * @tparam W type of the weight (as T by default)
159  */
160  template <typename T, typename W = T, unsigned int PWR = 3>
161  class DataTracker3: public DataTracker2<T, W, PWR> {
162  using Base_t = DataTracker2<T, W, PWR>; ///< base class type
163  public:
164  using Base_t::Power;
165  static_assert(Power >= 3, "DataTracker3 must have Power >= 3");
166  using Weight_t = typename Base_t::Weight_t;
167 
168  /// Returns the weighted sum of the square of the entries
170  { return Base_t::sums[2]; }
171  //{ return Base_t::SumN<3>(); } // I can't make gcc 4.9.1 digest this...
172 
173  }; // class DataTracker3
174 
175  } // namespace details
176 
177 
178 
179  /** ************************************************************************
180  * @brief Collects statistics on a single quantity (weighted)
181  * @tparam T type of the quantity
182  * @tparam W type of the weight (as T by default)
183  *
184  * This is a convenience class, as easy to use as:
185  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
186  * lar::util::StatCollector<double> stat;
187  * stat.add(3.0, 2.0);
188  * stat.add(4.0, 2.0);
189  * stat.add(5.0, 1.0);
190  * std::cout << "Statistics from " << stat.N() << " entries: "
191  * << stat.Average() << std::endl;
192  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193  * or also
194  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
195  * std::vector<std::pair<double, double>> values({
196  * { 3.0, 2.0 },
197  * { 4.0, 2.0 },
198  * { 5.0, 1.0 },
199  * });
200  * lar::util::StatCollector<double> stat;
201  * stat.add_weighted(values.begin(), values.end());
202  * std::cout << "Statistics from " << stat.N() << " entries: "
203  * << stat.Average() << std::endl;
204  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
205  * that should both print: "Statistics from 3 entries: 3.8".
206  *
207  * Other functions are available allowing addition of weighted and
208  * unweighted data from collections.
209  * For additional examples, see the unit test StatCollector_test.cc .
210  *
211  * StatCollector::Variance() is known to be very sensitive to rounding
212  * errors, since it uses the formula E[x^2] - E^2[x]. If the variance
213  * is effectively small the formula can become negative. In case of
214  * negative value produced by the variance formula, the Variance()
215  * method will round up to zero.
216  * If you know roughly the average of the items you are
217  * add()ing, you can reduce the rounding error by subtracting it from
218  * the input value; you'll have to shift the average back, while the
219  * variance will not be affected;
220  * also the sums will be shifted. Example:
221  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
222  * // fill the values, shifted
223  * for (auto element: elements)
224  * sc.add(element - elements[0], (*weight)++);
225  *
226  * auto sum_weights = sc.Weights();
227  * auto sum_values = sc.Sum() - elements[0] * sc.Weights();
228  * auto sum_values2 = sc.SumSq() + sqr(elements[0]) * sc.Weights()
229  * + 2. * elements[0] * sc.Sum();
230  * auto average = sc.Average() + elements[0];
231  * auto variance = sc.Variance();
232  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
233  * A small variance implies values of similar magnitude, and therefore
234  * subtracting any single one of them (in the example, the first one) is
235  * more effective than it seems.
236  * As a rule of thumb, if you are collecting statistics for elements with
237  * N significant bits, a 2N significant bits is recommended for statistics
238  * collection. That means `double` for `float`, and `long double` for
239  * `double` (although usually `long double` is only marginally more precise
240  * than `double`, typically 25 or 50% more).
241  */
242  template <typename T, typename W = T>
243  class StatCollector: protected details::WeightTracker<W> {
245  using Base_t::sqr;
246  public:
247  using This_t = StatCollector<T, W>; ///< this type
248  using Data_t = T; ///< type of the data
249  using Weight_t = typename Base_t::Weight_t; ///< type of the weight
250 
251  // default constructor, destructor and all the rest
252 
253  /// @{
254  /// @name Add elements
255 
256  /// Adds one entry with specified value and weight
257  void add(Data_t value, Weight_t weight = Weight_t(1.0));
258 
259  /**
260  * @brief Adds entries from a sequence with weight 1
261  * @tparam Iter forward iterator to the elements to be added
262  * @param begin iterator pointing to the first element to be added
263  * @param end iterator pointing after the last element to be added
264  *
265  * The value pointed by the iterator must be convertible to the Data_t
266  * type.
267  */
268  template <typename Iter>
269  void add_unweighted(Iter begin, Iter end)
270  { add_unweighted(begin, end, identity()); }
271 
272  /**
273  * @brief Adds entries from a sequence with weight 1
274  * @tparam Iter forward iterator to the elements to be added
275  * @tparam Pred a predicate to extract the element from iterator value
276  * @param begin iterator pointing to the first element to be added
277  * @param end iterator pointing after the last element to be added
278  * @param extractor the predicate extracting the value to be inserted
279  *
280  * The predicate is required to react to a call like with:
281  *
282  * Data_t Pred::operator() (typename Iter::value_type);
283  *
284  */
285  template <typename Iter, typename Pred>
286  void add_unweighted(Iter begin, Iter end, Pred extractor);
287 
288  /**
289  * @brief Adds all entries from a container, with weight 1
290  * @tparam Cont type of container of the elements to be added
291  * @tparam Pred a predicate to extract the element from iterator value
292  * @param cont container of the elements to be added
293  * @param extractor the predicate extracting the value to be inserted
294  *
295  * The predicate is required to react to a call like with:
296  *
297  * Data_t Pred::operator() (typename Cont::value_type);
298  *
299  * The container must support the range-based for loop syntax, that is
300  * is must have std::begin<Cont>() and std::end<Cont>() defined.
301  */
302  template <typename Cont, typename Pred>
303  void add_unweighted(Cont cont, Pred extractor)
304  { add_unweighted(std::begin(cont), std::end(cont), extractor); }
305 
306  /**
307  * @brief Adds all entries from a container, with weight 1
308  * @tparam Cont type of container of the elements to be added
309  * @param cont container of the elements to be added
310  *
311  * The container must support the range-based for loop syntax, that is
312  * is must have std::begin<Cont>() and std::end<Cont>() defined.
313  * The value in the container must be convertible to the Data_t type.
314  */
315  template <typename Cont>
316  void add_unweighted(Cont cont)
317  { add_unweighted(std::begin(cont), std::end(cont)); }
318 
319 
320  /**
321  * @brief Adds entries from a sequence with individually specified weights
322  * @tparam VIter forward iterator to the elements to be added
323  * @tparam WIter forward iterator to the weights
324  * @tparam VPred a predicate to extract the element from iterator value
325  * @tparam WPred a predicate to extract the weight from iterator value
326  * @param begin_value iterator pointing to the first element to be added
327  * @param end_value iterator pointing after the last element to be added
328  * @param begin_weight iterator pointing to the weight of first element
329  * @param value_extractor predicate extracting the value to be inserted
330  * @param weight_extractor predicate extracting the value to be inserted
331  *
332  * Each value is added with the weight pointed by the matching element in
333  * the list pointed by begin_weight: the element `*(begin_value)` will
334  * have weight `*(begin_weight)`, the next element `*(begin_value + 1)`
335  * will have weight `*(begin_weight + 1)`, etc.
336  *
337  * The predicates are required to react to a call like with:
338  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
339  * Data_t VPred::operator() (typename VIter::value_type);
340  * Weight_t WPred::operator() (typename WIter::value_type);
341  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
342  */
343  template <
344  typename VIter, typename WIter,
345  typename VPred, typename WPred = identity
346  >
347  void add_weighted(
348  VIter begin_value, VIter end_value,
349  WIter begin_weight,
350  VPred value_extractor,
351  WPred weight_extractor = WPred()
352  );
353 
354 
355  /**
356  * @brief Adds entries from a sequence with individually specified weights
357  * @tparam Iter forward iterator to (value, weight) pairs to be added
358  * @param begin iterator pointing to the first element to be added
359  * @param end iterator pointing after the last element to be added
360  *
361  * The value pointed by the iterator must be a pair with first element
362  * convertible to the Data_t and the second element convertible to
363  * Weight_t.
364  * For more complicate structures, use the version with two predicates
365  * (using the weight iterator the same as the value iterator).
366  */
367  template <typename Iter>
368  void add_weighted(Iter begin, Iter end);
369 
370  /**
371  * @brief Adds entries from a sequence with individually specified weights
372  * @tparam Cont type of container of (value, weight) pairs to be added
373  * @param cont container of (value, weight) pairs to be added
374  *
375  * The values in the container must be pairs with first element
376  * convertible to the Data_t and the second element convertible to
377  * Weight_t.
378  */
379  template <typename Cont>
380  void add_weighted(Cont cont)
381  { add_weighted(std::begin(cont), std::end(cont)); }
382 
383  ///@}
384 
385  /// Clears all the statistics
386  void clear();
387 
388  /// @{
389  /// @name Statistic retrieval
390 
391  /// Returns the number of entries added
392  int N() const { return Base_t::N(); }
393 
394  /// Returns the sum of the weights
395  Weight_t Weights() const { return Base_t::Weights(); }
396 
397  /// Returns the weighted sum of the values
398  Weight_t Sum() const { return x.Sum(); }
399 
400  /// Returns the weighted sum of the square of the values
401  Weight_t SumSq() const { return x.SumSq(); }
402 
403  /**
404  * @brief Returns the value average
405  * @return the value average
406  * @throws std::range_error if the total weight is 0 (usually: no data)
407  */
408  Weight_t Average() const;
409 
410  /**
411  * @brief Returns the square of the RMS of the values
412  * @return the square of the RMS of the values
413  * @throws std::range_error if the total weight is 0 (usually: no data)
414  */
415  Weight_t Variance() const;
416 
417  /**
418  * @brief Returns the root mean square
419  * @return the RMS of the values
420  * @throws std::range_error if the total weight is 0 (see Variance())
421  * @throws std::range_error if Variance() is negative (due to rounding errors)
422  */
423  Weight_t RMS() const;
424 
425 
426  /**
427  * @brief Returns the arithmetic average of the weights
428  * @return the weight average
429  * @throws std::range_error if no entry was added
430  */
431  Weight_t AverageWeight() const { return Base_t::AverageWeight(); }
432 
433  /// @}
434 
435 
436  protected:
438 
439  Variable_t x; ///< accumulator for variable x
440 
441  }; // class StatCollector<>
442 
443 
444  /** ************************************************************************
445  * @brief Collects statistics on two homogeneous quantities (weighted)
446  * @tparam T type of the quantities
447  * @tparam W type of the weight (as T by default)
448  *
449  * This is a convenience class, as easy to use as:
450  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
451  * lar::util::StatCollector2D<double> stat;
452  * stat.add(3.0, 4.0, 2.0);
453  * stat.add(4.0, 3.0, 2.0);
454  * stat.add(5.0, 5.0, 1.0);
455  * std::cout << "Statistics from " << stat.N() << " entries: "
456  * << stat.AverageX() << ", " << stat.AverageY() << std::endl;
457  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
458  * or also
459  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
460  * std::vector<std::pair<double, double>> values({
461  * { 3.0, 4.0, 2.0 },
462  * { 4.0, 3.0, 2.0 },
463  * { 5.0, 5.0, 1.0 },
464  * });
465  * lar::util::StatCollector<double> stat;
466  * stat.add_weighted(values.begin(), values.end());
467  * std::cout << "Statistics from " << stat.N() << " entries: "
468  * << stat.AverageX() << ", " << stat.AverageY() << std::endl;
469  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
470  * that should both print: "Statistics from 3 entries: 3.8, 3.8".
471  *
472  * Other functions are available allowing addition of weighted and
473  * unweighted data from collections.
474  * For additional examples, see the unit test StatCollector_test.cc .
475  */
476  template <typename T, typename W = T>
478  public:
480  using Base_t::sqr;
481  public:
482  using This_t = StatCollector2D<T, W>; ///< this type
483  using Data_t = T; ///< type of the data
484  using Weight_t = typename Base_t::Weight_t; ///< type of the weight
485 
486  using Pair_t = std::tuple<Data_t, Data_t>;
487  using WeightedPair_t = std::tuple<Data_t, Data_t, Weight_t>;
488 
489  // default constructor, destructor and all the rest
490 
491  /// @{
492  /// @name Add elements
493 
494  //@{
495  /// Adds one entry with specified values and weight
496  void add(Data_t x, Data_t y, Weight_t weight = Weight_t(1.0));
497 
499  { add(std::get<0>(value), std::get<1>(value), weight); }
500 
501  void add(WeightedPair_t value)
502  { add(std::get<0>(value), std::get<1>(value), std::get<2>(value)); }
503  //@}
504 
505  /**
506  * @brief Adds entries from a sequence with weight 1
507  * @tparam Iter forward iterator to the pairs to be added
508  * @param begin iterator pointing to the first element to be added
509  * @param end iterator pointing after the last element to be added
510  *
511  * The value pointed by the iterator must be a tuple with types compatible
512  * with Pair_t.
513  */
514  template <typename Iter>
515  void add_unweighted(Iter begin, Iter end)
516  { add_unweighted(begin, end, identity()); }
517 
518  /**
519  * @brief Adds entries from a sequence with weight 1
520  * @tparam Iter forward iterator to the elements to be added
521  * @tparam Pred a predicate to extract the element from iterator value
522  * @param begin iterator pointing to the first element to be added
523  * @param end iterator pointing after the last element to be added
524  * @param extractor the predicate extracting the value to be inserted
525  *
526  * The predicate is required to react to a call like with:
527  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
528  * Pair_t Pred::operator() (typename Iter::value_type);
529  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
530  */
531  template <typename Iter, typename Pred>
532  void add_unweighted(Iter begin, Iter end, Pred extractor);
533 
534  /**
535  * @brief Adds all entries from a container, with weight 1
536  * @tparam Cont type of container of the elements to be added
537  * @tparam Pred a predicate to extract the element from iterator value
538  * @param cont container of the elements to be added
539  * @param extractor the predicate extracting the value to be inserted
540  *
541  * The predicate is required to react to a call like with:
542  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
543  * Pair_t Pred::operator() (typename Cont::value_type);
544  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
545  * The container must support the range-based for loop syntax, that is
546  * is must have std::begin<Cont>() and std::end<Cont>() defined.
547  */
548  template <typename Cont, typename Pred>
549  void add_unweighted(Cont cont, Pred extractor)
550  { add_unweighted(std::begin(cont), std::end(cont), extractor); }
551 
552  /**
553  * @brief Adds all entries from a container, with weight 1
554  * @tparam Cont type of container of the elements to be added
555  * @param cont container of the elements to be added
556  *
557  * The container must support the range-based for loop syntax, that is
558  * is must have std::begin<Cont>() and std::end<Cont>() defined.
559  * The value in the container must be convertible to the Data_t type.
560  */
561  template <typename Cont>
562  void add_unweighted(Cont cont)
563  { add_unweighted(std::begin(cont), std::end(cont)); }
564 
565 
566  /**
567  * @brief Adds entries from a sequence with individually specified weights
568  * @tparam VIter forward iterator to the elements to be added
569  * @tparam WIter forward iterator to the weights
570  * @tparam VPred a predicate to extract the element from iterator value
571  * @tparam WPred a predicate to extract the weight from iterator value
572  * @param begin_value iterator pointing to the first element to be added
573  * @param end_value iterator pointing after the last element to be added
574  * @param begin_weight iterator pointing to the weight of first element
575  * @param value_extractor predicate extracting the value to be inserted
576  * @param weight_extractor predicate extracting the weight to be inserted
577  *
578  * Each element is added with the weight pointed by the matching element
579  * in the list pointed by begin_weight: the element `*(begin_value)` will
580  * have weight `*(begin_weight)`, the next element `*(begin_value + 1)`
581  * will have weight `*(begin_weight + 1)`, etc.
582  *
583  * The predicates are required to react to a call like with:
584  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
585  * Pair_t VPred::operator() (typename VIter::value_type);
586  * Weight_t WPred::operator() (typename WIter::value_type);
587  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
588  */
589  template <
590  typename VIter, typename WIter,
591  typename VPred, typename WPred = identity
592  >
593  void add_weighted(
594  VIter begin_value, VIter end_value,
595  WIter begin_weight,
596  VPred value_extractor,
597  WPred weight_extractor = WPred()
598  );
599 
600 
601  /**
602  * @brief Adds entries from a sequence with individually specified weights
603  * @tparam Iter forward iterator to WeightedPair_t elements to be added
604  * @param begin iterator pointing to the first element to be added
605  * @param end iterator pointing after the last element to be added
606  *
607  * The value pointed by the iterator must be a WeightedPair_t or something
608  * with elements convertible to its own (Data_t, Data_t and Weight_t).
609  * For more complicate structures, use the version with two predicates
610  * (using the weight iterator the same as the value iterator).
611  */
612  template <typename Iter>
613  void add_weighted(Iter begin, Iter end);
614 
615  /**
616  * @brief Adds entries from a sequence with individually specified weights
617  * @tparam Cont type of container of WeightedPair_t elements to be added
618  * @param cont container of (x, y, weight) pairs to be added
619  *
620  * The values in the container must be tuples with first and second
621  * element convertible to the Data_t and the third element convertible to
622  * Weight_t.
623  */
624  template <typename Cont>
625  void add_weighted(Cont cont)
626  { add_weighted(std::begin(cont), std::end(cont)); }
627 
628  ///@}
629 
630  /// Clears all the statistics
631  void clear();
632 
633  /// @{
634  /// @name Statistic retrieval
635 
636  /// Returns the number of entries added
637  int N() const { return Base_t::N(); }
638 
639  /// Returns the sum of the weights
640  Weight_t Weights() const { return Base_t::Weights(); }
641 
642  /// Returns the weighted sum of the x values
643  Weight_t SumX() const { return x.Sum(); }
644 
645  /// Returns the weighted sum of the y values
646  Weight_t SumY() const { return y.Sum(); }
647 
648  /// Returns the weighted sum of the square of the x values
649  Weight_t SumSqX() const { return x.SumSq(); }
650 
651  /// Returns the weighted sum of the square of the y values
652  Weight_t SumSqY() const { return y.SumSq(); }
653 
654  /// Returns the weighted sum of the product of x and y values
655  Weight_t SumXY() const { return sum_xy; }
656 
657  /**
658  * @brief Returns the x value average
659  * @return the x value average
660  * @throws std::range_error if the total weight is 0 (usually: no data)
661  */
662  Weight_t AverageX() const;
663 
664  /**
665  * @brief Returns the y value average
666  * @return the y value average
667  * @throws std::range_error if the total weight is 0 (usually: no data)
668  */
669  Weight_t AverageY() const;
670 
671  /**
672  * @brief Returns the variance of the x values
673  * @return the square of the RMS of the x values
674  * @throws std::range_error if the total weight is 0 (usually: no data)
675  */
676  Weight_t VarianceX() const;
677 
678  /**
679  * @brief Returns the variance of the y values
680  * @return the square of the RMS of the y values
681  * @throws std::range_error if the total weight is 0 (usually: no data)
682  */
683  Weight_t VarianceY() const;
684 
685  /**
686  * @brief Returns the covariance of the (x, y) pair
687  * @return the square of the RMS of the y values
688  * @throws std::range_error if the total weight is 0 (usually: no data)
689  */
690  Weight_t Covariance() const;
691 
692  /**
693  * @brief Returns the standard deviation of the x sample
694  * @return the RMS of the x values
695  * @throws std::range_error if the total weight is 0 (see VarianceX())
696  * @throws std::range_error if VarianceX() is negative (due to rounding errors)
697  */
698  Weight_t RMSx() const;
699 
700  /**
701  * @brief Returns the standard deviation of the y sample
702  * @return the RMS of the y values
703  * @throws std::range_error if the total weight is 0 (see VarianceY())
704  * @throws std::range_error if VarianceY() is negative (due to rounding errors)
705  */
706  Weight_t RMSy() const;
707 
708 
709  /**
710  * @brief Returns the linear correlation
711  * @return the linear correlation RMS of the y values
712  * @throws std::range_error if the total weight is 0 (see Covariance())
713  * @throws std::range_error if any variance is non-positive
714  */
715  Weight_t LinearCorrelation() const;
716 
717 
718  /**
719  * @brief Returns the arithmetic average of the weights
720  * @return the weight average
721  * @throws std::range_error if no entry was added
722  */
723  Weight_t AverageWeight() const { return Base_t::AverageWeight(); }
724 
725  /// @}
726 
727  protected:
729 
730  Variable_t x; ///< accumulator for variable x
731  Variable_t y; ///< accumulator for variable y
732  Weight_t sum_xy = Weight_t(0); ///< weighted sum of xy
733 
734  }; // class StatCollector2D<>
735 
736 
737 
738  /** ************************************************************************
739  * @brief Keeps track of the minimum and maximum value we observed
740  * @tparam T type of datum
741  *
742  * Implementation note: a similar class with an arbitrary comparison rule
743  * would require a careful choice of initial values for minimum and maximum,
744  * or a entry count that should be checked at each insertion.
745  * We save that slight overhead here.
746  */
747  template <typename T>
749  public:
750  using Data_t = T; ///< type of data we collect
751  using This_t = MinMaxCollector<T>; ///< this type
752 
753  //@{
754  /// Default constructor: no data collected so far.
755  MinMaxCollector() = default;
756 
757  /// Constructor: starts with parsing the specified data
758  MinMaxCollector(std::initializer_list<Data_t> init)
759  { add(init); }
760 
761  /**
762  * @brief Include a sequence of values in the statistics
763  * @tparam Iter type of an iterator on values
764  * @param begin iterator pointing to the first value to be included
765  * @param end iterator pointing to the last value to be included
766  */
767  template <typename Iter>
769  { add(begin, end); }
770  //@}
771 
772 
773  // default copy and move constructor and assignment, and destructor
774 
775  /// @{
776  /// @name Inserters
777  /**
778  * @brief Include a single value in the statistics
779  * @param value the value to be added
780  * @return this object
781  */
783 
784  /**
785  * @brief Include a sequence of values in the statistics
786  * @param values the values to be added
787  * @return this object
788  */
789  This_t& add(std::initializer_list<Data_t> values);
790 
791  /**
792  * @brief Include a sequence of values in the statistics
793  * @tparam Iter type of an iterator on values
794  * @param begin iterator pointing to the first value to be included
795  * @param end iterator pointing to the last value to be included
796  * @return this object
797  */
798  template <typename Iter>
799  This_t& add(Iter begin, Iter end);
800  /// @}
801 
802 
803  /// Returns whether at least one datum has been added
804  bool has_data() const { return minimum <= maximum; }
805 
806  /// Returns the accumulated minimum, or a very large number if no values
807  Data_t min() const { return minimum; }
808 
809  /// Returns the accumulated maximum, or a very small number if no values
810  Data_t max() const { return maximum; }
811 
812 
813  /// Removes all statistics and reinitializes the object
814  void clear();
815 
816  protected:
817  /// the accumulated minimum
819 
820  /// the accumulated maximum
822 
823  }; // class MinMaxCollector<>
824 
825 
826  } // namespace util
827 } // namespace lar
828 
829 
830 //==============================================================================
831 //=== template implementation
832 //==============================================================================
833 
834 
835 //******************************************************************************
836 //*** details::WeightTracker<>
837 //***
838 
839 template <typename W>
842 {
843  if (N() == 0)
844  throw std::range_error("WeightTracker<>::AverageWeight(): divide by 0");
845  return Weights() / N();
846 } // details::WeightTracker<W>::AverageWeight()
847 
848 
849 //******************************************************************************
850 //*** StatCollector<>
851 //***
852 
853 template <typename T, typename W>
855  (Data_t value, Weight_t weight /* = Weight_t(1.0) */)
856 {
857  Base_t::add(weight);
858  x.add(value, weight);
859 } // StatCollector<T, W>::add()
860 
861 
862 template <typename T, typename W>
863 template <typename Iter, typename Pred>
865  (Iter begin, Iter end, Pred extractor)
866 {
867  std::for_each
868  (begin, end, [this, extractor](auto item) { this->add(extractor(item)); });
869 } // StatCollector<T, W>::add_unweighted(Iter, Iter, Pred)
870 
871 
872 template <typename T, typename W>
873 template <typename VIter, typename WIter, typename VPred, typename WPred>
875  VIter begin_value, VIter end_value,
876  WIter begin_weight,
877  VPred value_extractor,
878  WPred weight_extractor /* = WPred() */
879 ) {
880  while (begin_value != end_value) {
881  add(value_extractor(*begin_value), weight_extractor(*begin_weight));
882  ++begin_value;
883  ++begin_weight;
884  } // while
885 } // StatCollector<T, W>::add_weighted(VIter, VIter, WIter, VPred, WPred)
886 
887 
888 template <typename T, typename W>
889 template <typename Iter>
891 
892  std::for_each(begin, end, [this](auto p) { this->add(p.first, p.second); });
893 
894 } // StatCollector<T, W>::add_weighted(Iter, Iter)
895 
896 
897 
898 template <typename T, typename W>
900  Base_t::clear();
901  x.clear();
902 } // StatCollector<T, W>::clear()
903 
904 
905 template <typename T, typename W>
908 {
909  if (Weights() == Weight_t(0))
910  throw std::range_error("StatCollector<>::Average(): divide by 0");
911  return Sum() / Weights();
912 } // StatCollector<T, W>::Average()
913 
914 
915 template <typename T, typename W>
918 {
919  if (Weights() == Weight_t(0))
920  throw std::range_error("StatCollector<>::Variance(): divide by 0");
921  return std::max(Weight_t(0), (SumSq() - sqr(Sum()) / Weights()) / Weights());
922 } // StatCollector<T, W>::Variance()
923 
924 
925 template <typename T, typename W>
928 {
929  const Weight_t rms2 = Variance();
930  if (rms2 < Weight_t(0)) return 0.;
931 // throw std::range_error("StatCollector<>::RMS(): negative RMS^2");
932  return std::sqrt(rms2);
933 } // StatCollector<T, W>::RMS()
934 
935 
936 
937 //******************************************************************************
938 //*** StatCollector2D<>
939 //***
940 
941 template <typename T, typename W>
943  (Data_t x_value, Data_t y_value, Weight_t weight /* = Weight_t(1.0) */)
944 {
945  Base_t::add(weight);
946  x.add(x_value, weight);
947  y.add(y_value, weight);
948  sum_xy += weight * x_value * y_value;
949 } // StatCollector2D<T, W>::add()
950 
951 
952 template <typename T, typename W>
953 template <typename Iter, typename Pred>
955  (Iter begin, Iter end, Pred extractor)
956 {
957  std::for_each
958  (begin, end, [this, extractor](auto item) { this->add(extractor(item)); });
959 } // StatCollector2D<T, W>::add_unweighted(Iter, Iter, Pred)
960 
961 
962 template <typename T, typename W>
963 template <typename VIter, typename WIter, typename VPred, typename WPred>
965  VIter begin_value, VIter end_value,
966  WIter begin_weight,
967  VPred value_extractor,
968  WPred weight_extractor /* = WPred() */
969 ) {
970  while (begin_value != end_value) {
971  add(value_extractor(*begin_value), weight_extractor(*begin_weight));
972  ++begin_value;
973  ++begin_weight;
974  } // while
975 } // StatCollector2D<T, W>::add_weighted(VIter, VIter, WIter, VPred, WPred)
976 
977 
978 template <typename T, typename W>
979 template <typename Iter>
981 
982  std::for_each(begin, end, [this](auto p) { this->add(p); });
983 
984 } // StatCollector2D<T, W>::add_weighted(Iter, Iter)
985 
986 
987 
988 template <typename T, typename W>
990  Base_t::clear();
991  x.clear();
992  y.clear();
993  sum_xy = Weight_t(0);
994 } // StatCollector<T, W>::clear()
995 
996 
997 template <typename T, typename W>
1000 {
1001  if (Weights() == Weight_t(0))
1002  throw std::range_error("StatCollector2D<>::AverageX(): divide by 0");
1003  return SumX() / Weights();
1004 } // StatCollector2D<T, W>::AverageX()
1005 
1006 
1007 template <typename T, typename W>
1010 {
1011  if (Weights() == Weight_t(0))
1012  throw std::range_error("StatCollector2D<>::VarianceX(): divide by 0");
1013  return (SumSqX() - sqr(SumX()) / Weights()) / Weights();
1014 } // StatCollector2D<T, W>::VarianceX()
1015 
1016 
1017 template <typename T, typename W>
1020 {
1021  const Weight_t rms2 = VarianceX();
1022  if (rms2 < Weight_t(0)) return 0.;
1023 // throw std::range_error("StatCollector2D<>::RMSx(): negative RMS^2");
1024  return std::sqrt(rms2);
1025 } // StatCollector2D<T, W>::RMSx()
1026 
1027 
1028 template <typename T, typename W>
1031 {
1032  if (Weights() == Weight_t(0))
1033  throw std::range_error("StatCollector2D<>::AverageY(): divide by 0");
1034  return SumY() / Weights();
1035 } // StatCollector2D<T, W>::AverageY()
1036 
1037 
1038 template <typename T, typename W>
1041 {
1042  if (Weights() == Weight_t(0))
1043  throw std::range_error("StatCollector2D<>::VarianceY(): divide by 0");
1044  return (SumSqY() - sqr(SumY()) / Weights()) / Weights();
1045 } // StatCollector2D<T, W>::VarianceY()
1046 
1047 
1048 template <typename T, typename W>
1051 {
1052  if (Weights() == Weight_t(0))
1053  throw std::range_error("StatCollector2D<>::Covariance(): divide by 0");
1054  return (SumXY() - SumX() * SumY() / Weights()) / Weights();
1055 } // StatCollector2D<T, W>::VarianceY()
1056 
1057 
1058 template <typename T, typename W>
1061 {
1062  const Weight_t rms2 = VarianceY();
1063  if (rms2 < Weight_t(0)) return 0.;
1064 // throw std::range_error("StatCollector2D<>::RMSy(): negative RMS^2");
1065  return std::sqrt(rms2);
1066 } // StatCollector2D<T, W>::RMSy()
1067 
1068 
1069 template <typename T, typename W>
1072 {
1073  if (Weights() == Data_t(0))
1074  throw std::range_error("StatCollector2D<>::LinearCorrelation(): divide by 0");
1075 
1076  const Weight_t var_prod = VarianceX() * VarianceY();
1077  if (var_prod <= Weight_t(0))
1078  throw std::range_error("StatCollector2D<>::LinearCorrelation(): variance is 0");
1079 
1080  return Covariance() / std::sqrt(var_prod);
1081 } // StatCollector2D<T, W>::LinearCorrelation()
1082 
1083 
1084 
1085 //******************************************************************************
1086 //*** MinMaxCollector
1087 //***
1088 
1089 template <typename T>
1091 {
1092  if (value < minimum) minimum = value;
1093  if (value > maximum) maximum = value;
1094  return *this;
1095 } // lar::util::MinMaxCollector<T>::add
1096 
1097 
1098 template <typename T>
1100  (std::initializer_list<Data_t> values)
1101  { return add(values.begin(), values.end()); }
1102 
1103 
1104 template <typename T> template <typename Iter>
1106  (Iter begin, Iter end)
1107 {
1108  std::for_each(begin, end, [this](Data_t value) { this->add(value); });
1109  return *this;
1110 } // lar::util::MinMaxCollector<T>::add(Iter)
1111 
1112 
1113 template <typename T>
1117 } // lar::util::MinMaxCollector<T>::clear()
1118 
1119 //******************************************************************************
1120 
1121 
1122 #endif // LARDATA_UTILITIES_STATCOLLECTOR_H
Weight_t Weights() const
Returns the sum of the weights.
Definition: StatCollector.h:62
end
while True: pbar.update(maxval-len(onlies[E][S])) #print iS, "/", len(onlies[E][S]) found = False for...
void add(Data_t v, Weight_t w)
Adds the specified weight to the statistics.
Data_t max() const
Returns the accumulated maximum, or a very small number if no values.
void add_unweighted(Cont cont)
Adds all entries from a container, with weight 1.
bool has_data() const
Returns whether at least one datum has been added.
int N() const
Returns the number of entries added.
Definition: StatCollector.h:59
Namespace for general, non-LArSoft-specific utilities.
This_t & add(Data_t value)
Include a single value in the statistics.
void add_unweighted(Cont cont, Pred extractor)
Adds all entries from a container, with weight 1.
void add_weighted(VIter begin_value, VIter end_value, WIter begin_weight, VPred value_extractor, WPred weight_extractor=WPred())
Adds entries from a sequence with individually specified weights.
MinMaxCollector(std::initializer_list< Data_t > init)
Constructor: starts with parsing the specified data.
Weight_t SumSqY() const
Returns the weighted sum of the square of the y values.
void add(Data_t x, Data_t y, Weight_t weight=Weight_t(1.0))
Adds one entry with specified values and weight.
Weight_t AverageWeight() const
Returns the arithmetic average of the weights.
static constexpr V sqr(V const &v)
Returns the square of the specified value.
Definition: StatCollector.h:74
Weight_t SumSq() const
Returns the weighted sum of the square of the entries.
Weight_t AverageWeight() const
Returns the arithmetic average of the weights.
Coord add(Coord c1, Coord c2)
Definition: restypedef.cpp:23
void add(Pair_t value, Weight_t weight=Weight_t(1.0))
Weight_t SumXY() const
Returns the weighted sum of the product of x and y values.
Weight_t SumCube() const
Returns the weighted sum of the square of the entries.
STL namespace.
MinMaxCollector(Iter begin, Iter end)
Include a sequence of values in the statistics.
Weight_t Covariance() const
Returns the covariance of the (x, y) pair.
init
Definition: train.py:42
typename Base_t::Weight_t Weight_t
type of the weight
Variable_t x
accumulator for variable x
void add(Weight_t weight)
Adds the specified weight to the statistics.
Definition: StatCollector.h:53
weight
Definition: test.py:257
Weight_t RMS() const
Returns the root mean square.
Weight_t VarianceY() const
Returns the variance of the y values.
Variable_t y
accumulator for variable y
DataTracker()
Default constructor.
Weight_t Average() const
Returns the value average.
Weight_t Sum(unsigned int n) const
Returns the sum of the values to the power n (1 <= n <= 2, no check)
T sqr(T v)
void add_unweighted(Iter begin, Iter end)
Adds entries from a sequence with weight 1.
void clear()
Resets the count.
Definition: StatCollector.h:56
Keeps track of the minimum and maximum value we observed.
void add_unweighted(Cont cont, Pred extractor)
Adds all entries from a container, with weight 1.
Weight_t Variance() const
Returns the square of the RMS of the values.
Data_t min() const
Returns the accumulated minimum, or a very large number if no values.
Weight_t Sum() const
Returns the weighted sum of the values.
int N() const
Returns the number of entries added.
Weight_t AverageWeight() const
Returns the arithmetic average of the weights.
Weight_t AverageY() const
Returns the y value average.
std::void_t< T > n
void add_weighted(Cont cont)
Adds entries from a sequence with individually specified weights.
Weight_t SumSqX() const
Returns the weighted sum of the square of the x values.
void add_unweighted(Iter begin, Iter end)
Adds entries from a sequence with weight 1.
A unary functor returning its own argument (any type)
Definition: StatCollector.h:33
Weight_t SumN() const
Returns the sum of the values to the power N (1 <= N <= 2)
Weight_t AverageX() const
Returns the x value average.
float Data_t
type of data we collect
p
Definition: test.py:223
Weight_t SumSq() const
Returns the weighted sum of the square of the values.
Class tracking sums of variables up to a specified power.
Definition: StatCollector.h:93
Q_UINT16 values[128]
int N() const
Returns the number of entries added.
static int max(int a, int b)
Weight_t SumX() const
Returns the weighted sum of the x values.
void clear()
Clears all the statistics.
constexpr auto operator()(T &&v) const noexcept-> decltype(std::forward< T >(v))
Definition: StatCollector.h:35
void add_weighted(Cont cont)
Adds entries from a sequence with individually specified weights.
std::tuple< Data_t, Data_t > Pair_t
void clear()
Resets the count.
void clear()
Removes all statistics and reinitializes the object.
void add(WeightedPair_t value)
void add_weighted(VIter begin_value, VIter end_value, WIter begin_weight, VPred value_extractor, WPred weight_extractor=WPred())
Adds entries from a sequence with individually specified weights.
Variable_t x
accumulator for variable x
T min(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:55
LArSoft-specific namespace.
Weight_t SumY() const
Returns the weighted sum of the y values.
Class tracking sums of variables up to power 2.
Collects statistics on two homogeneous quantities (weighted)
Weight_t LinearCorrelation() const
Returns the linear correlation.
T Data_t
type of the data
Weight_t RMSy() const
Returns the standard deviation of the y sample.
list x
Definition: train.py:276
vector< vector< double > > clear
typename Base_t::Weight_t Weight_t
type of the weight
decltype(auto) constexpr begin(T &&obj)
ADL-aware version of std::begin.
Definition: StdUtils.h:72
Weight_t RMSx() const
Returns the standard deviation of the x sample.
Weight_t Weights() const
Returns the sum of the weights.
Weight_t VarianceX() const
Returns the variance of the x values.
Weight_t Sum() const
Returns the weighted sum of the entries.
void add_unweighted(Cont cont)
Adds all entries from a container, with weight 1.
void clear()
Clears all the statistics.
Collects statistics on a single quantity (weighted)
Class tracking sums of variables up to power 2.
Weight_t Weights() const
Returns the sum of the weights.
void add(Data_t value, Weight_t weight=Weight_t(1.0))
Adds one entry with specified value and weight.
std::tuple< Data_t, Data_t, Weight_t > WeightedPair_t
std::array< Weight_t, Power > sums
T Data_t
type of the data