LArSupportVectorMachine.h
Go to the documentation of this file.
1 /**
2  * @file larpandoracontent/LArObjects/LArSupportVectorMachine.h
3  *
4  * @brief Header file for the lar support vector machine class.
5  *
6  * $Log: $
7  */
8 #ifndef LAR_SUPPORT_VECTOR_MACHINE_H
9 #define LAR_SUPPORT_VECTOR_MACHINE_H 1
10 
12 
14 
15 #include "Helpers/XmlHelper.h"
16 #include "Pandora/StatusCodes.h"
17 
18 #include <functional>
19 #include <map>
20 #include <vector>
21 
22 //------------------------------------------------------------------------------------------------------------------------------------------
23 
24 namespace lar_content
25 {
26 
27 /**
28  * @brief SupportVectorMachine class
29  */
31 {
32 public:
33  typedef std::function<double(const LArMvaHelper::MvaFeatureVector &, const LArMvaHelper::MvaFeatureVector &, const double)> KernelFunction;
34 
35  /**
36  * @brief KernelType enum
37  */
39  {
41  LINEAR = 1,
42  QUADRATIC = 2,
43  CUBIC = 3,
45  };
46 
47  /**
48  * @brief Default constructor.
49  */
51 
52  /**
53  * @brief Initialize the svm using a serialized model.
54  *
55  * @param parameterLocation the location of the model
56  * @param svmName the name of the model
57  *
58  * @return success
59  */
60  pandora::StatusCode Initialize(const std::string &parameterLocation, const std::string &svmName);
61 
62  /**
63  * @brief Make a classification for a set of input features, based on the trained model
64  *
65  * @param features the input features
66  *
67  * @return the predicted boolean class
68  */
70 
71  /**
72  * @brief Calculate the classification score for a set of input features, based on the trained model
73  *
74  * @param features the input features
75  *
76  * @return the classification score
77  */
79 
80  /**
81  * @brief Calculate the classification probability for a set of input features, based on the trained model
82  *
83  * @param features the input features
84  *
85  * @return the classification probability
86  */
88 
89  /**
90  * @brief Query whether this svm is initialized
91  *
92  * @return whether the model is initialized
93  */
94  bool IsInitialized() const;
95 
96  /**
97  * @brief Get the number of features
98  *
99  * @return the number of features
100  */
101  unsigned int GetNFeatures() const;
102 
103  /**
104  * @brief Set the kernel function to use
105  *
106  * @param kernelFunction the kernel function
107  */
108  void SetKernelFunction(KernelFunction kernelFunction);
109 
110 private:
111  /**
112  * @brief SupportVectorInfo class
113  */
115  {
116  public:
117  /**
118  * @brief Constructor
119  *
120  * @param yAlpha the alpha value multiplied by the y-value for the support vector
121  * @param supportVector the support vector, passed by value then uses move semantics for efficiency
122  */
123  SupportVectorInfo(const double yAlpha, LArMvaHelper::MvaFeatureVector supportVector);
124 
125  double m_yAlpha; ///< The alpha-value multiplied by the y-value for the support vector
127  };
128 
129  /**
130  * @brief FeatureInfo class.
131  */
133  {
134  public:
135  /**
136  * @brief FeatureInfo class.
137  *
138  * @param muValue the average value of this feature
139  * @param sigmaValue the stddev of this feature
140  */
141  FeatureInfo(const double muValue, const double sigmaValue);
142 
143  /**
144  * @brief Default constructor to allow default-construction of (uninitialized) svms.
145  */
146  FeatureInfo();
147 
148  /**
149  * @brief Standardize a parameter corresponding to this feature
150  *
151  * @param parameter the parameter
152  *
153  * @return the standardized parameter
154  */
155  double StandardizeParameter(const double parameter) const;
156 
157  double m_muValue; ///< The average value of this feature
158  double m_sigmaValue; ///< The stddev of this feature
159  };
160 
161  typedef std::vector<SupportVectorInfo> SVInfoList;
162  typedef std::vector<FeatureInfo> FeatureInfoVector;
163 
164  typedef std::map<KernelType, KernelFunction> KernelMap;
165 
166  bool m_isInitialized; ///< Whether this svm has been initialized
167 
168  bool m_enableProbability; ///< Whether to enable probability calculations
169  double m_probAParameter; ///< The first-order score coefficient for mapping to a probability using the logistic function
170  double m_probBParameter; ///< The score offset parameter for mapping to a probability using the logistic function
171 
172  bool m_standardizeFeatures; ///< Whether to standardize the features
173  unsigned int m_nFeatures; ///< The number of features
174  double m_bias; ///< The bias term
175  double m_scaleFactor; ///< The kernel scale factor
176 
177  SVInfoList m_svInfoList; ///< The list of SupportVectorInfo objects
178  FeatureInfoVector m_featureInfoList; ///< The list of FeatureInfo objects
179 
180  KernelType m_kernelType; ///< The kernel type
181  KernelFunction m_kernelFunction; ///< The kernel function
182  KernelMap m_kernelMap; ///< Map from the kernel types to the kernel functions
183 
184  /**
185  * @brief Read the svm parameters from an xml file
186  *
187  * @param svmFileName the sml file name
188  * @param svmName the name of the svm
189  */
190  void ReadXmlFile(const std::string &svmFileName, const std::string &svmName);
191 
192  /**
193  * @brief Read the component at the current xml element
194  *
195  * @param pCurrentXmlElement address of the current xml element
196  *
197  * @return success
198  */
199  pandora::StatusCode ReadComponent(pandora::TiXmlElement *pCurrentXmlElement);
200 
201  /**
202  * @brief Read the machine component at the current xml handle
203  *
204  * @param currentHandle the current xml handle
205  *
206  * @return success
207  */
208  pandora::StatusCode ReadMachine(const pandora::TiXmlHandle &currentHandle);
209 
210  /**
211  * @brief Read the feature component at the current xml handle
212  *
213  * @param currentHandle the current xml handle
214  *
215  * @return success
216  */
217  pandora::StatusCode ReadFeatures(const pandora::TiXmlHandle &currentHandle);
218 
219  /**
220  * @brief Read the support vector component at the current xml handle
221  *
222  * @param currentHandle the current xml handle
223  *
224  * @return success
225  */
226  pandora::StatusCode ReadSupportVector(const pandora::TiXmlHandle &currentHandle);
227 
228  /**
229  * @brief Implementation method for calculating the classification score using the trained model.
230  *
231  * @param features the vector of features
232  *
233  * @return the classification score
234  */
236 
237  /**
238  * @brief An inhomogeneous quadratic kernel
239  *
240  * @param supportVector the support vector
241  * @param features the features
242  * @param scale factor the scale factor
243  *
244  * @return result of the kernel operation
245  */
246  static double QuadraticKernel(
247  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor = 1.);
248 
249  /**
250  * @brief An inhomogeneous cubic kernel
251  *
252  * @param supportVector the support vector
253  * @param features the features
254  * @param scale factor the scale factor
255  *
256  * @return result of the kernel operation
257  */
258  static double CubicKernel(
259  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor = 1.);
260 
261  /**
262  * @brief A linear kernel
263  *
264  * @param supportVector the support vector
265  * @param features the features
266  * @param scale factor the scale factor
267  *
268  * @return result of the kernel operation
269  */
270  static double LinearKernel(
271  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor = 1.);
272 
273  /**
274  * @brief A gaussian RBF kernel
275  *
276  * @param supportVector the support vector
277  * @param features the features
278  * @param scale factor the scale factor
279  *
280  * @return result of the kernel operation
281  */
282  static double GaussianRbfKernel(
283  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor = 1.);
284 };
285 
286 //------------------------------------------------------------------------------------------------------------------------------------------
287 
289 {
290  return (this->CalculateClassificationScoreImpl(features) > 0.);
291 }
292 
293 //------------------------------------------------------------------------------------------------------------------------------------------
294 
296 {
297  return this->CalculateClassificationScoreImpl(features);
298 }
299 
300 //------------------------------------------------------------------------------------------------------------------------------------------
301 
303 {
304  if (!m_enableProbability)
305  {
306  std::cout << "LArSupportVectorMachine: cannot calculate probabilities for this SVM" << std::endl;
307  throw pandora::STATUS_CODE_NOT_INITIALIZED;
308  }
309 
310  // Use the logistic function to map the linearly-transformed score on the interval (-inf,inf) to a probability on [0,1] - the two free
311  // parameters in the linear transformation are trained such that the logistic map produces an accurate probability
312  const double scaledScore = m_probAParameter * this->CalculateClassificationScoreImpl(features) + m_probBParameter;
313 
314  return 1. / (1. + std::exp(scaledScore));
315 }
316 
317 //------------------------------------------------------------------------------------------------------------------------------------------
318 
320 {
321  return m_isInitialized;
322 }
323 
324 //------------------------------------------------------------------------------------------------------------------------------------------
325 
326 inline unsigned int SupportVectorMachine::GetNFeatures() const
327 {
328  return m_nFeatures;
329 }
330 
331 //------------------------------------------------------------------------------------------------------------------------------------------
332 
334 {
335  m_kernelFunction = std::move(kernelFunction);
336 }
337 
338 //------------------------------------------------------------------------------------------------------------------------------------------
339 
341  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor)
342 {
343  const double denominator(scaleFactor * scaleFactor);
344  if (denominator < std::numeric_limits<double>::epsilon())
345  throw pandora::StatusCodeException(pandora::STATUS_CODE_INVALID_PARAMETER);
346 
347  double total(0.);
348  for (unsigned int i = 0; i < features.size(); ++i)
349  total += supportVector.at(i).Get() * features.at(i).Get();
350 
351  return total / denominator;
352 }
353 
354 //------------------------------------------------------------------------------------------------------------------------------------------
355 
357  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor)
358 {
359  const double denominator(scaleFactor * scaleFactor);
360  if (denominator < std::numeric_limits<double>::epsilon())
361  throw pandora::StatusCodeException(pandora::STATUS_CODE_INVALID_PARAMETER);
362 
363  double total(0.);
364  for (unsigned int i = 0; i < features.size(); ++i)
365  total += supportVector.at(i).Get() * features.at(i).Get();
366 
367  total = total / denominator + 1.;
368  return total * total;
369 }
370 
371 //------------------------------------------------------------------------------------------------------------------------------------------
372 
374  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor)
375 {
376  const double denominator(scaleFactor * scaleFactor);
377  if (denominator < std::numeric_limits<double>::epsilon())
378  throw pandora::StatusCodeException(pandora::STATUS_CODE_INVALID_PARAMETER);
379 
380  double total(0.);
381  for (unsigned int i = 0; i < features.size(); ++i)
382  total += supportVector.at(i).Get() * features.at(i).Get();
383 
384  total = total / denominator + 1.;
385  return total * total * total;
386 }
387 
388 //------------------------------------------------------------------------------------------------------------------------------------------
389 
391  const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor)
392 {
393  double total(0.);
394  for (unsigned int i = 0; i < features.size(); ++i)
395  total += (supportVector.at(i).Get() - features.at(i).Get()) * (supportVector.at(i).Get() - features.at(i).Get());
396 
397  return std::exp(-scaleFactor * total);
398 }
399 
400 //------------------------------------------------------------------------------------------------------------------------------------------
401 
403  m_yAlpha(yAlpha),
404  m_supportVector(std::move(supportVector))
405 {
406 }
407 
408 //------------------------------------------------------------------------------------------------------------------------------------------
409 
410 inline SupportVectorMachine::FeatureInfo::FeatureInfo(const double muValue, const double sigmaValue) :
411  m_muValue(muValue),
412  m_sigmaValue(sigmaValue)
413 {
414 }
415 
416 //------------------------------------------------------------------------------------------------------------------------------------------
417 
419 {
420 }
421 
422 //------------------------------------------------------------------------------------------------------------------------------------------
423 
425 {
426  if (m_sigmaValue < std::numeric_limits<double>::epsilon())
427  throw pandora::StatusCodeException(pandora::STATUS_CODE_INVALID_PARAMETER);
428 
429  return (parameter - m_muValue) / m_sigmaValue;
430 }
431 
432 } // namespace lar_content
433 
434 #endif // #ifndef LAR_SUPPORT_VECTOR_MACHINE_H
static double CubicKernel(const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor=1.)
An inhomogeneous cubic kernel.
bool Classify(const LArMvaHelper::MvaFeatureVector &features) const
Make a classification for a set of input features, based on the trained model.
pandora::StatusCode ReadComponent(pandora::TiXmlElement *pCurrentXmlElement)
Read the component at the current xml element.
pandora::StatusCode ReadSupportVector(const pandora::TiXmlHandle &currentHandle)
Read the support vector component at the current xml handle.
MvaTypes::MvaFeatureVector MvaFeatureVector
Definition: LArMvaHelper.h:58
std::vector< SupportVectorInfo > SVInfoList
bool m_enableProbability
Whether to enable probability calculations.
std::string string
Definition: nybbler.cc:12
double m_scaleFactor
The kernel scale factor.
FeatureInfoVector m_featureInfoList
The list of FeatureInfo objects.
double CalculateClassificationScore(const LArMvaHelper::MvaFeatureVector &features) const
Calculate the classification score for a set of input features, based on the trained model...
MvaInterface class.
STL namespace.
std::vector< FeatureInfo > FeatureInfoVector
SVInfoList m_svInfoList
The list of SupportVectorInfo objects.
bool IsInitialized() const
Query whether this svm is initialized.
static Argument * parameter
const char features[]
Definition: feature_tests.c:2
static double QuadraticKernel(const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor=1.)
An inhomogeneous quadratic kernel.
void SetKernelFunction(KernelFunction kernelFunction)
Set the kernel function to use.
static double LinearKernel(const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor=1.)
A linear kernel.
pandora::StatusCode ReadMachine(const pandora::TiXmlHandle &currentHandle)
Read the machine component at the current xml handle.
double m_muValue
The average value of this feature.
bool m_standardizeFeatures
Whether to standardize the features.
LArMvaHelper::MvaFeatureVector m_supportVector
The support vector.
def move(depos, offset)
Definition: depos.py:107
void ReadXmlFile(const std::string &svmFileName, const std::string &svmName)
Read the svm parameters from an xml file.
KernelMap m_kernelMap
Map from the kernel types to the kernel functions.
KernelFunction m_kernelFunction
The kernel function.
double StandardizeParameter(const double parameter) const
Standardize a parameter corresponding to this feature.
FeatureInfo()
Default constructor to allow default-construction of (uninitialized) svms.
KernelType m_kernelType
The kernel type.
pandora::StatusCode Initialize(const std::string &parameterLocation, const std::string &svmName)
Initialize the svm using a serialized model.
double m_probAParameter
The first-order score coefficient for mapping to a probability using the logistic function...
unsigned int m_nFeatures
The number of features.
double m_yAlpha
The alpha-value multiplied by the y-value for the support vector.
std::function< double(const LArMvaHelper::MvaFeatureVector &, const LArMvaHelper::MvaFeatureVector &, const double)> KernelFunction
std::map< KernelType, KernelFunction > KernelMap
double CalculateProbability(const LArMvaHelper::MvaFeatureVector &features) const
Calculate the classification probability for a set of input features, based on the trained model...
static double GaussianRbfKernel(const LArMvaHelper::MvaFeatureVector &supportVector, const LArMvaHelper::MvaFeatureVector &features, const double scaleFactor=1.)
A gaussian RBF kernel.
double CalculateClassificationScoreImpl(const LArMvaHelper::MvaFeatureVector &features) const
Implementation method for calculating the classification score using the trained model.
bool m_isInitialized
Whether this svm has been initialized.
double m_probBParameter
The score offset parameter for mapping to a probability using the logistic function.
pandora::StatusCode ReadFeatures(const pandora::TiXmlHandle &currentHandle)
Read the feature component at the current xml handle.
SupportVectorInfo(const double yAlpha, LArMvaHelper::MvaFeatureVector supportVector)
Constructor.
unsigned int GetNFeatures() const
Get the number of features.
Header file for the lar multivariate analysis interface class.
QTextStream & endl(QTextStream &s)