HitLineFitAlg.cxx
Go to the documentation of this file.
1 /********************************
2 
3 Implementation of MLESAC algorithm for robust estimation
4 of model parameters in the presence of outliers.
5 
6 October 2016
7 m.thiesse@sheffield.ac.uk
8 
9 ********************************/
10 
11 #include "HitLineFitAlg.h"
12 
14 {
15  this->reconfigure(pset);
16 }
17 
18 void dune::HitLineFitAlg::SetParameter(int i, double startValue, double minValue, double maxValue)
19 {
20  fParIVal[i].start = startValue; fParIVal[i].min = minValue; fParIVal[i].max = maxValue;
21 }
22 
24 {
25  fFitPolN = fParIVal.size()-1;
26  int test = 0;
27  for (auto & ipar : fParIVal)
28  {
29  if (ipar.first != test) return false;
30  ++test;
31  }
32  if (fParIVal.size() < 2) return false;
33  return true;
34 }
35 
36 float dune::HitLineFitAlg::PointToLineDist(TVector3 ptloc, TVector3 linept1, TVector3 linept2)
37 {
38  return (((ptloc-linept1).Cross(ptloc-linept2)).Mag()/(linept2-linept1).Mag());
39 }
40 
41 void dune::HitLineFitAlg::DeterministicShuffle(std::vector<unsigned int> & vec)
42 {
43  TRandom3 rand(fSeedValue);
44  for (size_t i = vec.size()-1; i > 0; --i)
45  {
46  unsigned int randint = (unsigned int)(rand.Uniform(i+1)); // gives random integer from [0,i) but we need [0,i], so do [0,i+1)...
47  std::swap(vec[randint],vec[i]);
48  }
49 }
50 
51 void dune::HitLineFitAlg::SetHorizVertRanges(float hmin, float hmax, float vmin, float vmax)
52 {
53  fHorizRangeMin = hmin; fHorizRangeMax = hmax;
54  fVertRangeMin = vmin; fVertRangeMax = vmax;
55 }
56 
57 int dune::HitLineFitAlg::FitLine(std::vector<HitLineFitData> & data, HitLineFitResults & bestfit)
58 {
59  if (!CheckModelParameters())
60  {
61  throw cet::exception("HitLineFitAlg") << "Invalid fit parameters. Fix it!";
62  return -9;
63  }
64 
65  // define variables once
66  size_t i;
67  float horiz,vert,dist,ssr,diff,fiterr;
68  TVector3 hitloc,lineloc1,lineloc2;
69  int iterations;
70  std::vector<float> distances;
71 
72  // vectors of vector indices of points to request from the input data vector
73  std::vector<unsigned int> points;
74  std::vector<unsigned int> points_best;
75  std::vector<unsigned int> bestkeys;
76  std::vector<unsigned int> datakeys;
77 
78  // initialize values
79  bestfit.fitsuccess = false;
80  iterations = 0;
82 
83  // construct TGraphAsymmErrors for doing the fits
84  TGraphAsymmErrors * maybe = new TGraphAsymmErrors();
85  TGraphAsymmErrors * maybebetter = new TGraphAsymmErrors();
86 
87  // define the fit model
88  // currently uses a polynomial of N dimensions
89  TF1 * model = new TF1("model",TString::Format("pol%u",fFitPolN),fHorizRangeMin,fHorizRangeMax);
90  model->SetNpx(10000);
91  model->SetParNames("constant","linear","quadratic","cubic","quartic","quintic");
92  for (auto & ipar : fParIVal)
93  {
94  model->SetParLimits(ipar.first,ipar.second.min,ipar.second.max);
95  model->SetParameter(ipar.first,ipar.second.start);
96  }
97 
98  // steering parameters for the RANSAC algorithm
99  // n (fMinStartPoints) = minimum number of data points requred to fit model
100  // k (fIterationsMultiplier) = maximum number of iterations allowed, defined as a multiple of the number of points in data set. 200 is usually more than enough
101  // t (fInclusionThreshold) = threshold value for model inclusion, in cm
102  // d (fMinAlsoPoints) = number of close data points requred to assert that a model fits well to data
103  unsigned int n = std::max((unsigned int)fMinStartPoints,(unsigned int)(data.size()*0.01));
104  if (n >= data.size()) return -1;
105  int k = data.size()*fIterationsMultiplier;
106  float t = fInclusionThreshold;
107  unsigned int d = std::max(int(data.size()*0.05-n),int(fMinAlsoPoints));
108  if (d < 2*n || n < 2) return -2;
109 
110  // make collection of indices to refer back to data vector
111  for (i = 0; i < data.size(); ++i) datakeys.push_back(i);
112 
113 
114  if (fLogLevel > 1) std::cout << "Minimum number of data points required to fit the model, n=" << n << "\n"
115  << "Maximum number of iterations allowed, k=" << k << "\n"
116  << "Threshold value for model inclusion (cm), t=" << t << "\n"
117  << "Number of close data points required to assert a good fit, d=" << d << std::endl;
118 
119 
120  // DO MAIN LOOP
121  while (iterations < k)
122  {
123  ++iterations;
124 
125  // reset collections
126  maybe->Clear();
127  maybebetter->Clear();
128  points.clear();
129  points_best.clear();
130  distances.clear();
131  if (fLogLevel > 1)
132  {
133  if (iterations % 1000 == 0) std::cout << "Iteration # " << iterations << std::endl;
134  }
135 
136  // Shuffle list of keys
137  DeterministicShuffle(datakeys);
138 
139  // Randomly sample n points from the original data set
140  for (i = 0; i < n; ++i)
141  {
142  points.push_back(datakeys[i]);
143  HitLineFitData thisdata = data.at(datakeys[i]);
144  maybe->SetPoint(i,thisdata.hitHoriz,thisdata.hitVert);
145  maybe->SetPointError(i,thisdata.hitHorizErrLo,
146  thisdata.hitHorizErrHi,
147  thisdata.hitVertErrLo,
148  thisdata.hitVertErrHi);
149  }
150 
151  // Do initial fit through these n points to the model
152  TFitResultPtr r = maybe->Fit("model","SQCBR0");
153 
154  // Now, search through all data points and select those which are near the best fit model
155  for (i = n; i < data.size(); ++i)
156  {
157  vert = data.at(datakeys[i]).hitVert;
158  horiz = data.at(datakeys[i]).hitHoriz;
159  hitloc.SetXYZ(horiz,vert,0.);
160  double xloc1[1] = { horiz-1 };
161  double xloc2[1] = { horiz+1 };
162  lineloc1.SetXYZ(horiz-1,model->EvalPar(xloc1,r->GetParams()),0.);
163  lineloc2.SetXYZ(horiz+1,model->EvalPar(xloc2,r->GetParams()),0.);
164  dist = PointToLineDist(hitloc,lineloc1,lineloc2);
165  if (dist <= t) points.push_back(datakeys[i]);
166  }
167 
168  // If more inliers were found, then we've probably found a good track
169  if (points.size() - n > d)
170  {
171  // Make collection of point locations from improved set
172  for (i = 0; i < points.size(); ++i)
173  {
174  HitLineFitData thisdata = data.at(points[i]);
175  maybebetter->SetPoint(i,thisdata.hitHoriz,thisdata.hitVert);
176  maybebetter->SetPointError(i,thisdata.hitHorizErrLo,thisdata.hitHorizErrHi,thisdata.hitVertErrLo,thisdata.hitVertErrHi);
177  }
178 
179  // Fit to improved data sample
180  TFitResultPtr rb = maybebetter->Fit("model","SQCBR0");
181 
182  // loop over one more time to find all nearby points, and calculate errors in the process
183  ssr = 0;
184  for (i = 0; i < points.size(); ++i)
185  {
186  vert = data.at(points[i]).hitVert;
187  horiz = data.at(points[i]).hitHoriz;
188  hitloc.SetXYZ(horiz,vert,0.);
189  double xloc1[1] = { horiz-1 };
190  double xloc2[1] = { horiz+1 };
191  lineloc1.SetXYZ(horiz-1,model->EvalPar(xloc1,rb->GetParams()),0.);
192  lineloc2.SetXYZ(horiz+1,model->EvalPar(xloc2,rb->GetParams()),0.);
193  dist = PointToLineDist(hitloc,lineloc1,lineloc2);
194  distances.push_back(fabs(dist));
195  if (dist <= t)
196  {
197  ssr += dist*dist;
198  points_best.push_back(points[i]);
199  }
200  }
201  if (points_best.size() < 2) continue;
202 
203  // Computing the log likelihood for this model fit
204  float sigma = TMath::Median(distances.size(),distances.data());
205  float gamma = 0.5;
206  float p_outlier_prob = 0;
207  float v = 0.5;
208  std::vector<float> p_inlier_prob(distances.size());
209  for (int j = 0; j < 3; ++j)
210  {
211  for (i = 0; i < distances.size(); ++i)
212  {
213  p_inlier_prob[i] = gamma*TMath::Exp(-(distances[i]*distances[i])/(2*sigma*sigma))/(TMath::Sqrt(2*TMath::Pi())*sigma);
214  }
215  p_outlier_prob = (1-gamma)/v;
216  gamma = 0;
217  for (i = 0; i < distances.size(); ++i)
218  {
219  gamma += p_inlier_prob[i]/(p_inlier_prob[i]+p_outlier_prob);
220  }
221  if (distances.size() > 0) gamma /= distances.size();
222  }
223  float d_cur_penalty = 0;
224  for (i = 0; i < distances.size(); ++i)
225  {
226  d_cur_penalty += TMath::Log(p_inlier_prob[i]+p_outlier_prob);
227  }
228  d_cur_penalty = (-d_cur_penalty);
229 
230  // If a minimum -Log(L), then take this data set as "true"
231  // Also require that the slope is not zero
232  if (d_cur_penalty < fiterr && fabs(model->GetParameter("linear")) > 0.0015)
233  {
234  diff = d_cur_penalty-fiterr;
235  fiterr = d_cur_penalty;
236  for (auto & ipar : fParIVal)
237  {
238  bestfit.bestVal[ipar.first] = model->GetParameter(ipar.first);
239  bestfit.bestValError[ipar.first] = model->GetParError(ipar.first);
240  }
241  bestfit.chi2 = model->GetChisquare();
242  bestfit.ndf = model->GetNDF();
243  bestfit.sum2resid = ssr;
244  bestfit.mle = fiterr;
245  bestfit.fitsuccess = true;
246 
247  // Designate the "real" hits from the "fake" hits
248  for (i = 0; i < data.size(); i++)
249  {
250  if (std::find(points_best.begin(),points_best.end(),i) != points_best.end())
251  {
252  data.at(i).hitREAL = true;
253  }
254  else
255  {
256  data.at(i).hitREAL = false;
257  }
258  }
259  if (fLogLevel > 1)
260  {
261  std::cout << "-------------Found new minimum!-------------" << std::endl
262  << "FitError=" << fiterr << " delta(fiterr)=" << diff << std::endl
263  << "Number of points included = " << points_best.size() << " out of " << data.size() << std::endl
264  << "--------------------------------------------" << std::endl;
265  }
266  }
267  }
268  }
269  if (bestfit.fitsuccess) return 1;
270  return 0;
271 }
272 
274 {
275  fMinStartPoints = p.get<int>("MinStartPoints");
276  fMinAlsoPoints = p.get<int>("MinAlsoPoints");
277  fIterationsMultiplier = p.get<float>("IterationsMultiplier");
278  fInclusionThreshold = p.get<float>("InclusionThreshold");
279  fLogLevel = p.get<int>("LogLevel",1);
280 }
float PointToLineDist(TVector3 ptloc, TVector3 linept1, TVector3 linept2)
Format
Definition: utils.h:7
HitLineFitAlg(fhicl::ParameterSet const &pset)
std::map< int, ParVals > fParIVal
Definition: HitLineFitAlg.h:72
int FitLine(std::vector< HitLineFitData > &data, HitLineFitResults &bestfit)
Definition: model.py:1
std::map< int, float > bestValError
Definition: HitLineFitAlg.h:37
void swap(Handle< T > &a, Handle< T > &b)
std::void_t< T > n
T get(std::string const &key) const
Definition: ParameterSet.h:271
void SetParameter(int i, double startValue, double minValue, double maxValue)
p
Definition: test.py:223
static int max(int a, int b)
double gamma(double KE, const simb::MCParticle *part)
void reconfigure(fhicl::ParameterSet const &p)
void DeterministicShuffle(std::vector< unsigned int > &vec)
constexpr double dist(const TReal *x, const TReal *y, const unsigned int dimension)
void SetHorizVertRanges(float hmin, float hmax, float vmin, float vmax)
cet::coded_exception< error, detail::translate > exception
Definition: exception.h:33
QTextStream & endl(QTextStream &s)