Logo ROOT   6.10/00
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
LossFunction.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Event *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * LossFunction and associated classes *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * U. of Victoria, Canada *
23  * MPI-K Heidelberg, Germany *
24  * U. of Bonn, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://mva.sourceforge.net/license.txt) *
29  **********************************************************************************/
30 
31 #ifndef ROOT_TMVA_LossFunction
32 #define ROOT_TMVA_LossFunction
33 
34 //#include <iosfwd>
35 #include <vector>
36 #include <map>
37 #include "TMVA/Event.h"
38 #include "TMVA/Types.h"
39 
40 namespace TMVA {
41 
42  ///////////////////////////////////////////////////////////////////////////////////////////////
43  // Data Structure used by LossFunction and LossFunctionBDT to calculate errors, targets, etc
44  ///////////////////////////////////////////////////////////////////////////////////////////////
45 
47 
48  public:
50  trueValue = 0.;
51  predictedValue = 0.;
52  weight = 0.;
53  };
54  LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_){
55  trueValue = trueValue_;
56  predictedValue = predictedValue_;
57  weight = weight_;
58  }
60 
64  };
65 
66 
67  ///////////////////////////////////////////////////////////////////////////////////////////////
68  // Loss Function interface defining base class for general error calculations in
69  // regression/classification
70  ///////////////////////////////////////////////////////////////////////////////////////////////
71 
72  class LossFunction {
73 
74  public:
75 
76  // constructors
78  virtual ~LossFunction(){};
79 
80  // abstract methods that need to be implemented
82  virtual Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
83  virtual Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
84 
85  virtual TString Name() = 0;
86  virtual Int_t Id() = 0;
87  };
88 
89  ///////////////////////////////////////////////////////////////////////////////////////////////
90  // Loss Function interface for boosted decision trees. Inherits from LossFunction
91  ///////////////////////////////////////////////////////////////////////////////////////////////
92 
93  /* Must inherit LossFunction with the virtual keyword so that we only have to implement
94  * the LossFunction interface once.
95  *
96  * LossFunction
97  * / \
98  *SomeLossFunction LossFunctionBDT
99  * \ /
100  * \ /
101  * SomeLossFunctionBDT
102  *
103  * Without the virtual keyword the two would point to their own LossFunction objects
104  * and SomeLossFunctionBDT would have to implement the virtual functions of LossFunction twice, once
105  * for each object. See diagram below.
106  *
107  * LossFunction LossFunction
108  * | |
109  *SomeLossFunction LossFunctionBDT
110  * \ /
111  * \ /
112  * SomeLossFunctionBDT
113  *
114  * Multiple inheritance is often frowned upon. To avoid this, We could make LossFunctionBDT separate
115  * from LossFunction but it really is a type of loss function.
116  * We could also put LossFunction into LossFunctionBDT. In either of these scenarios, if you are doing
117  * different regression methods and want to compare the Loss this makes it more convoluted.
118  * I think that multiple inheritance seems justified in this case, but we could change it if it's a problem.
119  * Usually it isn't a big deal with interfaces and this results in the simplest code in this case.
120  */
121 
122  class LossFunctionBDT : public virtual LossFunction{
123 
124  public:
125 
126  // constructors
128  virtual ~LossFunctionBDT(){};
129 
130  // abstract methods that need to be implemented
131  virtual void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights) = 0;
132  virtual void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap) = 0;
133  virtual Double_t Target(LossFunctionEventInfo& e) = 0;
134  virtual Double_t Fit(std::vector<LossFunctionEventInfo>& evs) = 0;
135  };
136 
137  ///////////////////////////////////////////////////////////////////////////////////////////////
138  // Huber loss function for regression error calculations
139  ///////////////////////////////////////////////////////////////////////////////////////////////
140 
141  class HuberLossFunction : public virtual LossFunction{
142 
143  public:
145  HuberLossFunction(Double_t quantile);
147 
148  // The LossFunction methods
150  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
151  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
152 
153  // We go ahead and implement the simple ones
154  TString Name(){ return TString("Huber"); };
155  Int_t Id(){ return 0; } ;
156 
157  // Functions needed beyond the interface
158  void Init(std::vector<LossFunctionEventInfo>& evs);
159  Double_t CalculateQuantile(std::vector<LossFunctionEventInfo>& evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs);
160  Double_t CalculateSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
161  void SetTransitionPoint(std::vector<LossFunctionEventInfo>& evs);
162  void SetSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
163 
164  protected:
168  };
169 
170  ///////////////////////////////////////////////////////////////////////////////////////////////
171  // Huber loss function with boosted decision tree functionality
172  ///////////////////////////////////////////////////////////////////////////////////////////////
173 
174  // The bdt loss function implements the LossFunctionBDT interface and inherits the HuberLossFunction
175  // functionality.
177 
178  public:
182 
183  // The LossFunctionBDT methods
184  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
185  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
187  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
188 
189  private:
190  // some data fields
191  };
192 
193  ///////////////////////////////////////////////////////////////////////////////////////////////
194  // LeastSquares loss function for regression error calculations
195  ///////////////////////////////////////////////////////////////////////////////////////////////
196 
197  class LeastSquaresLossFunction : public virtual LossFunction{
198 
199  public:
202 
203  // The LossFunction methods
205  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
206  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
207 
208  // We go ahead and implement the simple ones
209  TString Name(){ return TString("LeastSquares"); };
210  Int_t Id(){ return 1; } ;
211  };
212 
213  ///////////////////////////////////////////////////////////////////////////////////////////////
214  // Least Squares loss function with boosted decision tree functionality
215  ///////////////////////////////////////////////////////////////////////////////////////////////
216 
217  // The bdt loss function implements the LossFunctionBDT interface and inherits the LeastSquaresLossFunction
218  // functionality.
220 
221  public:
224 
225  // The LossFunctionBDT methods
226  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
227  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
229  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
230  };
231 
232  ///////////////////////////////////////////////////////////////////////////////////////////////
233  // Absolute Deviation loss function for regression error calculations
234  ///////////////////////////////////////////////////////////////////////////////////////////////
235 
237 
238  public:
241 
242  // The LossFunction methods
244  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
245  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
246 
247  // We go ahead and implement the simple ones
248  TString Name(){ return TString("AbsoluteDeviation"); };
249  Int_t Id(){ return 2; } ;
250  };
251 
252  ///////////////////////////////////////////////////////////////////////////////////////////////
253  // Absolute Deviation loss function with boosted decision tree functionality
254  ///////////////////////////////////////////////////////////////////////////////////////////////
255 
256  // The bdt loss function implements the LossFunctionBDT interface and inherits the AbsoluteDeviationLossFunction
257  // functionality.
259 
260  public:
263 
264  // The LossFunctionBDT methods
265  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
266  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
268  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
269  };
270 }
271 
272 #endif
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
absolute deviation, determine the mean loss for a collection of events
virtual Double_t Fit(std::vector< LossFunctionEventInfo > &evs)=0
Huber Loss Function.
Definition: LossFunction.h:141
virtual void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)=0
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
absolute deviation BDT, set the targets for a collection of events
Absolute Deviation BDT Loss Function.
Definition: LossFunction.h:258
Double_t CalculateLoss(LossFunctionEventInfo &e)
absolute deviation, determine the loss for a single event
Basic string class.
Definition: TString.h:129
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
absolute deviation BDT, initialize the targets and prepare for the regression
int Int_t
Definition: RtypesCore.h:41
virtual Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)=0
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
huber BDT, set the targets for a collection of events
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
absolute deviation BDT, determine the fit value for the terminal node based upon the events in the te...
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
least squares BDT, set the targets for a collection of events
virtual Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)=0
Huber BDT Loss Function.
Definition: LossFunction.h:176
virtual Double_t CalculateLoss(LossFunctionEventInfo &e)=0
Least Squares Loss Function.
Definition: LossFunction.h:197
Double_t CalculateLoss(LossFunctionEventInfo &e)
least squares , determine the loss for a single event
Absolute Deviation Loss Function.
Definition: LossFunction.h:236
Double_t Target(LossFunctionEventInfo &e)
absolute deviation BDT, set the target for a single event
Double_t Target(LossFunctionEventInfo &e)
huber BDT, set the target for a single event
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
huber BDT, initialize the targets and prepare for the regression
Least Squares BDT Loss Function.
Definition: LossFunction.h:219
virtual Int_t Id()=0
virtual TString Name()=0
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
huber BDT, determine the fit value for the terminal node based upon the events in the terminal node ...
Double_t CalculateSumOfWeights(std::vector< LossFunctionEventInfo > &evs)
huber, determine the quantile for a given input
HuberLossFunction()
huber constructor
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
least squares , determine the net loss for a collection of events
Double_t CalculateQuantile(std::vector< LossFunctionEventInfo > &evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs)
huber, determine the quantile for a given input
HuberLossFunctionBDT(Double_t quantile)
Definition: LossFunction.h:180
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
least squares BDT, initialize the targets and prepare for the regression
virtual ~LossFunction()
Definition: LossFunction.h:78
double Double_t
Definition: RtypesCore.h:55
void Init(std::vector< LossFunctionEventInfo > &evs)
figure out the residual that determines the separation between the &quot;core&quot; and the &quot;tails&quot; of the resi...
void SetTransitionPoint(std::vector< LossFunctionEventInfo > &evs)
huber, determine the transition point using the values for fQuantile and fSumOfWeights which presumab...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
huber, determine the net loss for a collection of events
LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_)
Definition: LossFunction.h:54
~HuberLossFunction()
huber destructor
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
huber, determine the mean loss for a collection of events
void SetSumOfWeights(std::vector< LossFunctionEventInfo > &evs)
huber, set the sum of weights given a collection of events
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
least squares , determine the mean loss for a collection of events
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
huber BDT, determine the fit value for the terminal node based upon the events in the terminal node ...
virtual Double_t Target(LossFunctionEventInfo &e)=0
virtual void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)=0
Double_t CalculateLoss(LossFunctionEventInfo &e)
huber, determine the loss for a single event
virtual ~LossFunctionBDT()
Definition: LossFunction.h:128
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
absolute deviation, determine the net loss for a collection of events
Double_t Target(LossFunctionEventInfo &e)
least squares BDT, set the target for a single event