Logo ROOT   6.10/00
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
MethodBoost.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 /*! \class TMVA::MethodBoost
33 \ingroup TMVA
34 
35 Class for boosting a TMVA method
36 
37 This class is meant to boost a single classifier. Boosting means
38 training the classifier a few times. Every time the weights of the
39 events are modified according to how well the classifier performed
40 on the test sample.
41 
42 */
43 
44 #include "TMVA/MethodBoost.h"
45 
46 #include "TMVA/ClassifierFactory.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/DataSet.h"
50 #include "TMVA/DataSetInfo.h"
51 #include "TMVA/IMethod.h"
52 #include "TMVA/MethodBase.h"
53 #include "TMVA/MethodCategory.h"
55 #include "TMVA/MethodDT.h"
56 #include "TMVA/MethodFisher.h"
57 #include "TMVA/PDF.h"
58 #include "TMVA/Results.h"
59 #include "TMVA/Timer.h"
60 #include "TMVA/Tools.h"
61 #include "TMVA/Types.h"
62 
63 #include "TMVA/SeparationBase.h"
65 #include "TMVA/GiniIndex.h"
66 #include "TMVA/CrossEntropy.h"
69 
70 #include "Riostream.h"
71 #include "TRandom3.h"
72 #include "TFile.h"
73 #include "TMath.h"
74 #include "TObjString.h"
75 #include "TH1F.h"
76 #include "TH2F.h"
77 #include "TGraph.h"
78 #include "TSpline.h"
79 #include "TDirectory.h"
80 #include "TTree.h"
81 
82 #include <algorithm>
83 #include <iomanip>
84 #include <vector>
85 #include <cmath>
86 
87 
88 REGISTER_METHOD(Boost)
89 
90 ClassImp(TMVA::MethodBoost)
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 
94  TMVA::MethodBoost::MethodBoost( const TString& jobName,
95  const TString& methodTitle,
96  DataSetInfo& theData,
97  const TString& theOption ) :
98  TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
99  , fBoostNum(0)
100  , fDetailedMonitoring(kFALSE)
101  , fAdaBoostBeta(0)
102  , fRandomSeed(0)
103  , fBaggedSampleFraction(0)
104  , fBoostedMethodTitle(methodTitle)
105  , fBoostedMethodOptions(theOption)
106  , fMonitorBoostedMethod(kFALSE)
107  , fMonitorTree(0)
108  , fBoostWeight(0)
109  , fMethodError(0)
110  , fROC_training(0.0)
111  , fOverlap_integral(0.0)
112  , fMVAvalues(0)
113 {
114  fMVAvalues = new std::vector<Float_t>;
115  fDataSetManager = NULL;
116  fHistoricBoolOption = kFALSE;
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 
122  const TString& theWeightFile)
123  : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
124  , fBoostNum(0)
125  , fDetailedMonitoring(kFALSE)
126  , fAdaBoostBeta(0)
127  , fRandomSeed(0)
128  , fBaggedSampleFraction(0)
129  , fBoostedMethodTitle("")
130  , fBoostedMethodOptions("")
131  , fMonitorBoostedMethod(kFALSE)
132  , fMonitorTree(0)
133  , fBoostWeight(0)
134  , fMethodError(0)
135  , fROC_training(0.0)
136  , fOverlap_integral(0.0)
137  , fMVAvalues(0)
138 {
139  fMVAvalues = new std::vector<Float_t>;
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// destructor
146 
148 {
149  fMethodWeight.clear();
150 
151  // the histogram themselves are deleted when the file is closed
152 
153  fTrainSigMVAHist.clear();
154  fTrainBgdMVAHist.clear();
155  fBTrainSigMVAHist.clear();
156  fBTrainBgdMVAHist.clear();
157  fTestSigMVAHist.clear();
158  fTestBgdMVAHist.clear();
159 
160  if (fMVAvalues) {
161  delete fMVAvalues;
162  fMVAvalues = 0;
163  }
164 }
165 
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 /// Boost can handle classification with 2 classes and regression with one regression-target
169 
171 {
172  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
173  // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
174  return kFALSE;
175 }
176 
177 
178 ////////////////////////////////////////////////////////////////////////////////
179 
181 {
182  DeclareOptionRef( fBoostNum = 1, "Boost_Num",
183  "Number of times the classifier is boosted" );
184 
185  DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
186  "Write monitoring histograms for each boosted classifier" );
187 
188  DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
189  "Produce histograms for detailed boost monitoring" );
190 
191  DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
192  AddPreDefVal(TString("RealAdaBoost"));
193  AddPreDefVal(TString("AdaBoost"));
194  AddPreDefVal(TString("Bagging"));
195 
196  DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
197 
198  DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
199  "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
200 
201  DeclareOptionRef( fTransformString = "step", "Boost_Transform",
202  "Type of transform applied to every boosted method linear, log, step" );
203  AddPreDefVal(TString("step"));
204  AddPreDefVal(TString("linear"));
205  AddPreDefVal(TString("log"));
206  AddPreDefVal(TString("gauss"));
207 
208  DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
209  "Seed for random number generator used for bagging" );
210 
211  TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// options that are used ONLY for the READER to ensure backward compatibility
216 /// they are hence without any effect (the reader is only reading the training
217 /// options that HAD been used at the training of the .xml weight file at hand
218 
220 {
221 
223 
224  DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
225  "How to set the final weight of the boosted classifiers" );
226  AddPreDefVal(TString("ByError"));
227  AddPreDefVal(TString("Average"));
228  AddPreDefVal(TString("ByROC"));
229  AddPreDefVal(TString("ByOverlap"));
230  AddPreDefVal(TString("LastMethod"));
231 
232  DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
233  "Type of transform applied to every boosted method linear, log, step" );
234  AddPreDefVal(TString("step"));
235  AddPreDefVal(TString("linear"));
236  AddPreDefVal(TString("log"));
237  AddPreDefVal(TString("gauss"));
238 
239  // this option here
240  //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
241  // still exists, but these two possible values
242  AddPreDefVal(TString("HighEdgeGauss"));
243  AddPreDefVal(TString("HighEdgeCoPara"));
244  // have been deleted .. hope that works :)
245 
246  DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
247  "Recalculate the classifier MVA Signallike cut at every boost iteration" );
248 
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
252 /// just registering the string from which the boosted classifier will be created
253 
254 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
255 {
256  fBoostedMethodName = Types::Instance().GetMethodName( theMethod );
257  fBoostedMethodTitle = methodTitle;
258  fBoostedMethodOptions = theOption;
259  TString opts=theOption;
260  opts.ToLower();
261  // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
262 
263  return kTRUE;
264 }
265 
266 ////////////////////////////////////////////////////////////////////////////////
267 
269 {
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// initialisation routine
274 
276 {
277 
278  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
279 
280  results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
281  results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
282  results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
283  if (fDetailedMonitoring){
284  results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
285  results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
286  results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
287  results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
288  results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
289  }
290 
291 
292  results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
293  results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
294  results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
295  results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
296  results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
297  results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
298  if (fDetailedMonitoring){
299  results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
300  results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
301  results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
302  results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
303  results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
304  results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
305  results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
306  results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
307  results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
308  results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
309  }
310 
311  results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
312  results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
313  results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
314 
315  results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
316  results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
317  results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
318 
319 
320 
321  fMonitorTree= new TTree("MonitorBoost","Boost variables");
322  fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
323  fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
324  fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
325  fMonitorBoostedMethod = kTRUE;
326 
327 }
328 
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 
333 {
334  Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
335  Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
336  Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
337  Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
338  Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
339  Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
340  Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
341  Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
342  Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
343  Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
344  Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
345  Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
346  Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
347  if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
348  Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
349  if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
350  Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
351 
352 }
353 ////////////////////////////////////////////////////////////////////////////////
354 
356 {
357  TDirectory* methodDir( 0 );
358  TString dirName,dirTitle;
359  Int_t StopCounter=0;
360  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
361 
362 
363  InitHistos();
364 
365  if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
366  Data()->SetCurrentType(Types::kTraining);
367 
368  if (fMethods.size() > 0) fMethods.clear();
369  fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
370 
371  Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
372  Timer timer( fBoostNum, GetName() );
373 
374  ResetBoostWeights();
375 
376  // clean boosted method options
377  CleanBoostOptions();
378 
379 
380  // remove transformations for individual boosting steps
381  // the transformation of the main method will be rerouted to each of the boost steps
382  Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
383  if (varTrafoStart >0) {
384  Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
385  if (varTrafoEnd<varTrafoStart)
386  varTrafoEnd=fBoostedMethodOptions.Length();
387  fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
388  }
389 
390  //
391  // training and boosting the classifiers
392  for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
393  // the first classifier shows the option string output, the rest not
394  if (fCurrentMethodIdx>0) TMVA::MsgLogger::InhibitOutput();
395 
396  IMethod* method = ClassifierFactory::Instance().Create(std::string(fBoostedMethodName),
397  GetJobName(),
398  Form("%s_B%04i", fBoostedMethodTitle.Data(),fCurrentMethodIdx),
399  DataInfo(),
400  fBoostedMethodOptions);
402 
403  // suppressing the rest of the classifier output the right way
404  fCurrentMethod = (dynamic_cast<MethodBase*>(method));
405 
406  if (fCurrentMethod==0) {
407  Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
408  return; // hope that makes coverity happy (as if fears I might use the pointer later on, not knowing that FATAL exits
409  }
410 
411  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
412  if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
413  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
414  if (!methCat) // DSMTEST
415  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
416  methCat->fDataSetManager = fDataSetManager; // DSMTEST
417  } // DSMTEST
418 
419  fCurrentMethod->SetMsgType(kWARNING);
420  fCurrentMethod->SetupMethod();
421  fCurrentMethod->ParseOptions();
422  // put SetAnalysisType here for the needs of MLP
423  fCurrentMethod->SetAnalysisType( GetAnalysisType() );
424  fCurrentMethod->ProcessSetup();
425  fCurrentMethod->CheckSetup();
426 
427 
428  // reroute transformationhandler
429  fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
430 
431 
432  // creating the directory of the classifier
433  if(!IsSilentFile())
434  {
435  if (fMonitorBoostedMethod) {
436  methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
437  if (methodDir==0) {
438  methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
439  }
440  fCurrentMethod->SetMethodDir(methodDir);
441  fCurrentMethod->BaseDir()->cd();
442  }
443  }
444 
445  // training
446  TMVA::MethodCompositeBase::fMethods.push_back(method);
447  timer.DrawProgressBar( fCurrentMethodIdx );
448  if (fCurrentMethodIdx==0) MonitorBoost(Types::kBoostProcBegin,fCurrentMethodIdx);
449  MonitorBoost(Types::kBeforeTraining,fCurrentMethodIdx);
450  TMVA::MsgLogger::InhibitOutput(); //suppressing Logger outside the method
451  if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
452  SingleTrain();
454  if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
455 
456  // calculate MVA values of current method for all events in training sample
457  // (used later on to get 'misclassified events' etc for the boosting
458  CalcMVAValues();
459 
460  if(!IsSilentFile()) if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
461 
462  // get ROC integral and overlap integral for single method on
463  // training sample if fMethodWeightType == "ByROC" or the user
464  // wants detailed monitoring
465 
466  // boosting (reweight training sample)
467  MonitorBoost(Types::kBeforeBoosting,fCurrentMethodIdx);
468  SingleBoost(fCurrentMethod);
469 
470  MonitorBoost(Types::kAfterBoosting,fCurrentMethodIdx);
471  results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
472  results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
473 
474  if (fDetailedMonitoring) {
475  fROC_training = GetBoostROCIntegral(kTRUE, Types::kTraining, kTRUE);
476  results->GetHist("ROCIntegral_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kTRUE, Types::kTesting));
477  results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
478  results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
479  results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
480  results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
481  }
482 
483 
484 
485  fMonitorTree->Fill();
486 
487  // stop boosting if needed when error has reached 0.5
488  // thought of counting a few steps, but it doesn't seem to be necessary
489  Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
490  if (fMethodError > 0.49999) StopCounter++;
491  if (StopCounter > 0 && fBoostType != "Bagging") {
492  timer.DrawProgressBar( fBoostNum );
493  fBoostNum = fCurrentMethodIdx+1;
494  Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
495  if (fBoostNum < 5)
496  Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
497  break;
498  }
499  }
500 
501  //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
502  // to leave "boosted" events to the next classifier in the factory
503 
504  ResetBoostWeights();
505 
506  Timer* timer1= new Timer( fBoostNum, GetName() );
507  // normalizing the weights of the classifiers
508  for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
509  // performing post-boosting actions
510 
511  timer1->DrawProgressBar( fCurrentMethodIdx );
512 
513  if (fCurrentMethodIdx==fBoostNum) {
514  Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
515  << " " << Endl;
516  }
517 
518  TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
519  if (tmp) tmp->SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
520 
521  }
522 
523  // Ensure that in case of only 1 boost the method weight equals
524  // 1.0. This avoids unexpected behaviour in case of very bad
525  // classifiers which have fBoostWeight=1 or fMethodError=0.5,
526  // because their weight would be set to zero. This behaviour is
527  // not ok if one boosts just one time.
528  if (fMethods.size()==1) fMethodWeight[0] = 1.0;
529 
530  MonitorBoost(Types::kBoostProcEnd);
531 
532  delete timer1;
533 }
534 
535 ////////////////////////////////////////////////////////////////////////////////
536 
538 {
539  fBoostedMethodOptions=GetOptions();
540 }
541 
542 ////////////////////////////////////////////////////////////////////////////////
543 
545 {
546  if (fBoostNum <=0) Log() << kFATAL << "CreateHistograms called before fBoostNum is initialized" << Endl;
547  // calculating histograms boundaries and creating histograms..
548  // nrms = number of rms around the average to use for outline (of the 0 classifier)
549  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
550  Int_t signalClass = 0;
551  if (DataInfo().GetClassInfo("Signal") != 0) {
552  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
553  }
554  gTools().ComputeStat( GetEventCollection( Types::kMaxTreeType ), fMVAvalues,
555  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
556 
558  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
559  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
560 
561  // creating all the histograms
562  for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
563  fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
564  fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
565  fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
566  fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
567  fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
568  fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
569  }
570 }
571 
572 ////////////////////////////////////////////////////////////////////////////////
573 /// resetting back the boosted weights of the events to 1
574 
576 {
577  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
578  const Event *ev = Data()->GetEvent(ievt);
579  ev->SetBoostWeight( 1.0 );
580  }
581 }
582 
583 ////////////////////////////////////////////////////////////////////////////////
584 
586 {
587  TDirectory* dir=0;
588  if (fMonitorBoostedMethod) {
589  for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
590 
591  //writing the histograms in the specific classifier's directory
592  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
593  if (!m) continue;
594  dir = m->BaseDir();
595  dir->cd();
596  fTrainSigMVAHist[imtd]->SetDirectory(dir);
597  fTrainSigMVAHist[imtd]->Write();
598  fTrainBgdMVAHist[imtd]->SetDirectory(dir);
599  fTrainBgdMVAHist[imtd]->Write();
600  fBTrainSigMVAHist[imtd]->SetDirectory(dir);
601  fBTrainSigMVAHist[imtd]->Write();
602  fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
603  fBTrainBgdMVAHist[imtd]->Write();
604  }
605  }
606 
607  // going back to the original folder
608  BaseDir()->cd();
609 
610  fMonitorTree->Write();
611 }
612 
613 ////////////////////////////////////////////////////////////////////////////////
614 
616 {
618  if (fMonitorBoostedMethod) {
619  UInt_t nloop = fTestSigMVAHist.size();
620  if (fMethods.size()<nloop) nloop = fMethods.size();
621  //running over all the events and populating the test MVA histograms
622  Data()->SetCurrentType(Types::kTesting);
623  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
624  const Event* ev = GetEvent(ievt);
625  Float_t w = ev->GetWeight();
626  if (DataInfo().IsSignal(ev)) {
627  for (UInt_t imtd=0; imtd<nloop; imtd++) {
628  fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
629  }
630  }
631  else {
632  for (UInt_t imtd=0; imtd<nloop; imtd++) {
633  fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
634  }
635  }
636  }
637  Data()->SetCurrentType(Types::kTraining);
638  }
639 }
640 
641 ////////////////////////////////////////////////////////////////////////////////
642 
644 {
646  if (treetype==Types::kTraining) return;
647  UInt_t nloop = fTestSigMVAHist.size();
648  if (fMethods.size()<nloop) nloop = fMethods.size();
649  if (fMonitorBoostedMethod) {
650  TDirectory* dir=0;
651  for (UInt_t imtd=0;imtd<nloop;imtd++) {
652  //writing the histograms in the specific classifier's directory
653  MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
654  if (!mva) continue;
655  dir = mva->BaseDir();
656  if (dir==0) continue;
657  dir->cd();
658  fTestSigMVAHist[imtd]->SetDirectory(dir);
659  fTestSigMVAHist[imtd]->Write();
660  fTestBgdMVAHist[imtd]->SetDirectory(dir);
661  fTestBgdMVAHist[imtd]->Write();
662  }
663  }
664 }
665 
666 ////////////////////////////////////////////////////////////////////////////////
667 /// process user options
668 
670 {
671 }
672 
673 ////////////////////////////////////////////////////////////////////////////////
674 /// initialization
675 
677 {
678  Data()->SetCurrentType(Types::kTraining);
679  MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
680  if (meth){
681  meth->SetSilentFile(IsSilentFile());
682  if(IsModelPersistence()){
683  TString _fFileDir= DataInfo().GetName();
684  _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
685  meth->SetWeightFileDir(_fFileDir);
686  }
687  meth->SetModelPersistence(IsModelPersistence());
688  meth->TrainMethod();
689  }
690 }
691 
692 ////////////////////////////////////////////////////////////////////////////////
693 /// find the CUT on the individual MVA that defines an event as
694 /// correct or misclassified (to be used in the boosting process)
695 
697 {
698  if (!method || method->GetMethodType() == Types::kDT ){ return;}
699 
700  // creating a fine histograms containing the error rate
701  const Int_t nBins=10001;
702  Double_t minMVA=150000;
703  Double_t maxMVA=-150000;
704  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
705  GetEvent(ievt);
706  Double_t val=method->GetMvaValue();
707  //Helge .. I think one could very well use fMVAValues for that ... -->to do
708  if (val>maxMVA) maxMVA=val;
709  if (val<minMVA) minMVA=val;
710  }
711  maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
712 
713  Double_t sum = 0.;
714 
715  TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
716  TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
717  TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
718  TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
719 
720 
721  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
722  if (fDetailedMonitoring){
723  results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
724  results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
725  results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
726  results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
727  }
728 
729  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
730 
731  Double_t weight = GetEvent(ievt)->GetWeight();
732  Double_t mvaVal=method->GetMvaValue();
733  sum +=weight;
734  if (DataInfo().IsSignal(GetEvent(ievt))){
735  mvaS->Fill(mvaVal,weight);
736  }else {
737  mvaB->Fill(mvaVal,weight);
738  }
739  }
740  SeparationBase *sepGain;
741 
742 
743  // Boosting should use Misclassification not Gini Index (changed, Helge 31.5.2013)
744  // WARNING! It works with Misclassification only if you fix the signal to
745  // background at every step. Strangely enough, there are better results
746  // ( as seen with BDT ) if you use Gini Index, and accept that sometimes no
747  // sensible cut is found - i.e. the cut is then outside the MVA value range,
748  // all events are classified as background and then according to the Boost
749  // algorithm something is renormed 'automatically' ... so that in the next
750  // step again the result is something sensible.
751  // Strange ... that THIS is supposed to be right?
752 
753  // SeparationBase *sepGain2 = new MisClassificationError();
754  //sepGain = new MisClassificationError();
755  sepGain = new GiniIndex();
756  //sepGain = new CrossEntropy();
757 
758  Double_t sTot = mvaS->GetSum();
759  Double_t bTot = mvaB->GetSum();
760 
761  mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
762  mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
763  Double_t sSel=0;
764  Double_t bSel=0;
765  Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
766  Double_t mvaCut=mvaSC->GetBinLowEdge(1);
767  Double_t sSelCut=sSel;
768  Double_t bSelCut=bSel;
769  // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
770 
771  // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
772  Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
773  for (Int_t ibin=1;ibin<=nBins;ibin++){
774  mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
775  mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
776 
777  sSel=mvaSC->GetBinContent(ibin);
778  bSel=mvaBC->GetBinContent(ibin);
779 
780  // if (ibin==nBins){
781  // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
782  // }
783 
784  if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
785  // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
786  ){
787  separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
788  // mvaCut=mvaSC->GetBinCenter(ibin);
789  mvaCut=mvaSC->GetBinLowEdge(ibin+1);
790  // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
791  if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
792  else mvaCutOrientation=1;
793  sSelCut=sSel;
794  bSelCut=bSel;
795  // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
796  }
797  /*
798  Double_t ori;
799  if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
800  else ori=1;
801  std::cout << ibin << " mvacut="<<mvaCut
802  << " sTot=" << sTot
803  << " bTot=" << bTot
804  << " sSel=" << sSel
805  << " bSel=" << bSel
806  << " s/b(1)=" << sSel/bSel
807  << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
808  << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
809  << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
810  << " " <<ori
811  << std::endl;
812  */
813 
814  }
815 
816  if (0){
817  double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
818  double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
819  double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
820  std::cout
821  << " sTot=" << sTot
822  << " bTot=" << bTot
823  << " s="<<sSelCut
824  << " b="<<bSelCut
825  << " s2="<<(sTot-sSelCut)
826  << " b2="<<(bTot-bSelCut)
827  << " s/b(1)=" << sSelCut/bSelCut
828  << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
829  << " index before cut=" << parentIndex
830  << " after: left=" << leftIndex
831  << " after: right=" << rightIndex
832  << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
833  << " sepGain="<<separationGain
834  << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
835  << " cut=" << mvaCut
836  << " idx="<<fCurrentMethodIdx
837  << " cutOrientation="<<mvaCutOrientation
838  << std::endl;
839  }
840  method->SetSignalReferenceCut(mvaCut);
841  method->SetSignalReferenceCutOrientation(mvaCutOrientation);
842 
843  results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
844 
845 
846  Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
847 
848  if(IsSilentFile())
849  {
850  mvaS ->Delete();
851  mvaB ->Delete();
852  mvaSC->Delete();
853  mvaBC->Delete();
854  }
855 }
856 
857 ////////////////////////////////////////////////////////////////////////////////
858 
860 {
861  Double_t returnVal=-1;
862 
863 
864  if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
865  else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
866  else if (fBoostType=="Bagging") returnVal = this->Bagging ();
867  else{
868  Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
869  }
870  fMethodWeight.push_back(returnVal);
871  return returnVal;
872 }
873 ////////////////////////////////////////////////////////////////////////////////
874 /// the standard (discrete or real) AdaBoost algorithm
875 
877 {
878  if (!method) {
879  Log() << kWARNING << " AdaBoost called without classifier reference - needed for calculating AdaBoost " << Endl;
880  return 0;
881  }
882 
883  Float_t w,v; Bool_t sig=kTRUE;
884  Double_t sumAll=0, sumWrong=0;
885  Bool_t* WrongDetection=new Bool_t[GetNEvents()];
886  QuickMVAProbEstimator *MVAProb=NULL;
887 
888  if (discreteAdaBoost) {
889  FindMVACut(method);
890  Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
891  } else {
892  MVAProb=new TMVA::QuickMVAProbEstimator();
893  // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
894  // answer from your single MVA, but a "signal probability" instead (in the BDT case,
895  // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
896  // can also interpreted as a probability, but here I try a general approach to get this
897  // probability from the MVA distributions...
898 
899  for (Long64_t evt=0; evt<GetNEvents(); evt++) {
900  const Event* ev = Data()->GetEvent(evt);
901  MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
902  }
903  }
904 
905 
906  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
907 
908  // finding the wrong events and calculating their total weights
909  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
910  const Event* ev = GetEvent(ievt);
911  sig=DataInfo().IsSignal(ev);
912  v = fMVAvalues->at(ievt);
913  w = ev->GetWeight();
914  sumAll += w;
915  if(!IsSilentFile())
916  {
917  if (fMonitorBoostedMethod) {
918  if (sig) {
919  fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
920  fTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
921  }
922  else {
923  fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
924  fTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
925  }
926  }
927  }
928 
929  if (discreteAdaBoost){
930  if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
931  WrongDetection[ievt]=kFALSE;
932  }else{
933  WrongDetection[ievt]=kTRUE;
934  sumWrong+=w;
935  }
936  }else{
937  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
938  mvaProb = 2*(mvaProb-0.5);
939  Int_t trueType;
940  if (DataInfo().IsSignal(ev)) trueType = 1;
941  else trueType = -1;
942  sumWrong+= w*trueType*mvaProb;
943  }
944  }
945 
946  fMethodError=sumWrong/sumAll;
947 
948  // calculating the fMethodError and the boostWeight out of it uses the formula
949  // w = ((1-err)/err)^beta
950 
951  Double_t boostWeight=0;
952 
953  if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
954  Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
955  }else{
956 
957  if (discreteAdaBoost)
958  boostWeight = TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
959  else
960  boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
961 
962 
963  // std::cout << "boostweight = " << boostWeight << std::endl;
964 
965  // ADA boosting, rescaling the weight of the wrong events according to the error level
966  // over the entire test sample rescaling all the weights to have the same sum, but without
967  // touching the original weights (changing only the boosted weight of all the events)
968  // first reweight
969 
970  Double_t newSum=0., oldSum=0.;
971 
972 
973  Double_t boostfactor = TMath::Exp(boostWeight);
974 
975 
976  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
977  const Event* ev = Data()->GetEvent(ievt);
978  oldSum += ev->GetWeight();
979  if (discreteAdaBoost){
980  // events are classified as Signal OR background .. right or wrong
981  if (WrongDetection[ievt] && boostWeight != 0) {
982  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
983  else ev->ScaleBoostWeight(1./boostfactor);
984  }
985  // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
986 
987  }else{
988  // events are classified by their probability of being signal or background
989  // (eventually you should write this one - i.e. re-use the MVA value that were already
990  // calculated and stored.. however ,for the moment ..
991  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
992  mvaProb = 2*(mvaProb-0.5);
993  // mvaProb = (1-mvaProb);
994 
995  Int_t trueType=1;
996  if (DataInfo().IsSignal(ev)) trueType = 1;
997  else trueType = -1;
998 
999  boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
1000  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
1001  else ev->ScaleBoostWeight(1./boostfactor);
1002 
1003  }
1004  newSum += ev->GetWeight();
1005  }
1006 
1007  Double_t normWeight = oldSum/newSum;
1008  // next normalize the weights
1009  Double_t normSig=0, normBkg=0;
1010  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1011  const Event* ev = Data()->GetEvent(ievt);
1012  ev->ScaleBoostWeight(normWeight);
1013  if (ev->GetClass()) normSig+=ev->GetWeight();
1014  else normBkg+=ev->GetWeight();
1015  }
1016 
1017  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1018  results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1019 
1020  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1021  const Event* ev = Data()->GetEvent(ievt);
1022 
1023  if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1024  else ev->ScaleBoostWeight(oldSum/normBkg/2);
1025  }
1026  }
1027 
1028  delete[] WrongDetection;
1029  if (MVAProb) delete MVAProb;
1030 
1031  fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1032 
1033  return boostWeight;
1034 }
1035 
1036 
1037 ////////////////////////////////////////////////////////////////////////////////
1038 /// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1039 
1041 {
1042  TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1043  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1044  const Event* ev = Data()->GetEvent(ievt);
1045  ev->SetBoostWeight(trandom->PoissonD(fBaggedSampleFraction));
1046  }
1047  fBoostWeight = 1; // used ONLY for the monitoring tree
1048  return 1.;
1049 }
1050 
1051 
1052 ////////////////////////////////////////////////////////////////////////////////
1053 /// Get help message text
1054 ///
1055 /// typical length of text line:
1056 /// "|--------------------------------------------------------------|"
1057 
1059 {
1060  Log() << Endl;
1061  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1062  Log() << Endl;
1063  Log() << "This method combines several classifier of one species in a "<<Endl;
1064  Log() << "single multivariate quantity via the boost algorithm." << Endl;
1065  Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1066  Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1067  Log() << "events that were misclassified in the previous tree a larger " << Endl;
1068  Log() << "weight in the training of the following classifier."<<Endl;
1069  Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1070  Log() << Endl;
1071  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1072  Log() << Endl;
1073  Log() << "The most important parameter in the configuration is the "<<Endl;
1074  Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1075  Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1076  Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1077  Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1078  Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1079  Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1080  Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1081  Log() <<Endl;
1082  Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1083  Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1084  Log() << "due to the boosting." <<Endl;
1085  Log() << "The booking of the boost method is special since it requires"<<Endl;
1086  Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1087  Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1088  Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1089  Log() << "options string. The factory separates the options and initiates"<<Endl;
1090  Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1091  Log() << "macro \"Boost.C\"" <<Endl;
1092 }
1093 
1094 ////////////////////////////////////////////////////////////////////////////////
1095 
1097 {
1098  return 0;
1099 }
1100 
1101 ////////////////////////////////////////////////////////////////////////////////
1102 /// return boosted MVA response
1103 
1105 {
1106  Double_t mvaValue = 0;
1107  Double_t norm = 0;
1108  Double_t epsilon = TMath::Exp(-1.);
1109  //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1110  for (UInt_t i=0;i< fMethods.size(); i++){
1111  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1112  if (m==0) continue;
1113  Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1114  Double_t sigcut = m->GetSignalReferenceCut();
1115 
1116  // default is no transform
1117  if (fTransformString == "linear"){
1118 
1119  }
1120  else if (fTransformString == "log"){
1121  if (val < sigcut) val = sigcut;
1122 
1123  val = TMath::Log((val-sigcut)+epsilon);
1124  }
1125  else if (fTransformString == "step" ){
1126  if (m->IsSignalLike(val)) val = 1.;
1127  else val = -1.;
1128  }
1129  else if (fTransformString == "gauss"){
1130  val = TMath::Gaus((val-sigcut),1);
1131  }
1132  else {
1133  Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1134  }
1135  mvaValue+=val*fMethodWeight[i];
1136  norm +=fMethodWeight[i];
1137  // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1138  }
1139  mvaValue/=norm;
1140  // cannot determine error
1141  NoErrorCalc(err, errUpper);
1142 
1143  return mvaValue;
1144 }
1145 
1146 ////////////////////////////////////////////////////////////////////////////////
1147 /// Calculate the ROC integral of a single classifier or even the
1148 /// whole boosted classifier. The tree type (training or testing
1149 /// sample) is specified by 'eTT'.
1150 ///
1151 /// If tree type kTraining is set, the original training sample is
1152 /// used to compute the ROC integral (original weights).
1153 ///
1154 /// - singleMethod - if kTRUE, return ROC integral of single (last
1155 /// trained) classifier; if kFALSE, return ROC
1156 /// integral of full classifier
1157 ///
1158 /// - eTT - tree type (Types::kTraining / Types::kTesting)
1159 ///
1160 /// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1161 /// signal/background MVA distributions
1162 /// is calculated and stored in
1163 /// 'fOverlap_integral'
1164 
1166 {
1167  // set data sample training / testing
1168  Data()->SetCurrentType(eTT);
1169 
1170  MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no protection against a zero-pointer delivered by dynamic_cast
1171  // to make CoVerity happy (although, OF COURSE, the last method in the committee
1172  // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1173  // will never by "zero" ...
1174  if (singleMethod && !method) {
1175  Log() << kFATAL << " What do you do? Your method:"
1176  << fMethods.back()->GetName()
1177  << " seems not to be a propper TMVA method"
1178  << Endl;
1179  std::exit(1);
1180  }
1181  Double_t err = 0.0;
1182 
1183  // temporary renormalize the method weights in case of evaluation
1184  // of full classifier.
1185  // save the old normalization of the methods
1186  std::vector<Double_t> OldMethodWeight(fMethodWeight);
1187  if (!singleMethod) {
1188  // calculate sum of weights of all methods
1189  Double_t AllMethodsWeight = 0;
1190  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1191  AllMethodsWeight += fMethodWeight.at(i);
1192  // normalize the weights of the classifiers
1193  if (AllMethodsWeight != 0.0) {
1194  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1195  fMethodWeight[i] /= AllMethodsWeight;
1196  }
1197  }
1198 
1199  // calculate MVA values
1200  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1201  std::vector <Float_t>* mvaRes;
1202  if (singleMethod && eTT==Types::kTraining)
1203  mvaRes = fMVAvalues; // values already calculated
1204  else {
1205  mvaRes = new std::vector <Float_t>(GetNEvents());
1206  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1207  GetEvent(ievt);
1208  (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1209  }
1210  }
1211 
1212  // restore the method weights
1213  if (!singleMethod)
1214  fMethodWeight = OldMethodWeight;
1215 
1216  // now create histograms for calculation of the ROC integral
1217  Int_t signalClass = 0;
1218  if (DataInfo().GetClassInfo("Signal") != 0) {
1219  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1220  }
1221  gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1222  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1223 
1225  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1226  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1227 
1228  // calculate ROC integral
1229  TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1230  TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1231  TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1232  if (CalcOverlapIntergral) {
1233  mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1234  mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1235  }
1236  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1237  const Event* ev = GetEvent(ievt);
1238  Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1239  if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1240  else mva_b->Fill( (*mvaRes)[ievt], w );
1241 
1242  if (CalcOverlapIntergral) {
1243  Float_t w_ov = ev->GetWeight();
1244  if (DataInfo().IsSignal(ev))
1245  mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1246  else
1247  mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1248  }
1249  }
1250  gTools().NormHist( mva_s );
1251  gTools().NormHist( mva_b );
1252  PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1253  PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1254 
1255  // calculate ROC integral from fS, fB
1256  Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1257 
1258  // calculate overlap integral
1259  if (CalcOverlapIntergral) {
1260  gTools().NormHist( mva_s_overlap );
1261  gTools().NormHist( mva_b_overlap );
1262 
1263  fOverlap_integral = 0.0;
1264  for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1265  Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1266  Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1267  if (bc_s > 0.0 && bc_b > 0.0)
1268  fOverlap_integral += TMath::Min(bc_s, bc_b);
1269  }
1270 
1271  delete mva_s_overlap;
1272  delete mva_b_overlap;
1273  }
1274 
1275  delete mva_s;
1276  delete mva_b;
1277  delete fS;
1278  delete fB;
1279  if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1280 
1281  Data()->SetCurrentType(Types::kTraining);
1282 
1283  return ROC;
1284 }
1285 
1287 {
1288  // Calculate MVA values of current method fMethods.back() on
1289  // training sample
1290 
1291  Data()->SetCurrentType(Types::kTraining);
1292  MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1293  if (!method) {
1294  Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1295  return;
1296  }
1297  // calculate MVA values
1298  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1299  GetEvent(ievt);
1300  fMVAvalues->at(ievt) = method->GetMvaValue();
1301  }
1302 
1303  // fill cumulative mva distribution
1304 
1305 
1306 }
1307 
1308 ////////////////////////////////////////////////////////////////////////////////
1309 /// fill various monitoring histograms from information of the individual classifiers that
1310 /// have been boosted.
1311 /// of course.... this depends very much on the individual classifiers, and so far, only for
1312 /// Decision Trees, this monitoring is actually implemented
1313 
1315 {
1316  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1317 
1318  if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1319  TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1320  if (currentDT){
1321  if (stage == Types::kBoostProcBegin){
1322  results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1323  results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1324  }
1325 
1326  if (stage == Types::kBeforeTraining){
1327  }
1328  else if (stage == Types::kBeforeBoosting){
1329  results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1330  results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1331  }
1332  else if (stage == Types::kAfterBoosting){
1333 
1334  }
1335  else if (stage != Types::kBoostProcEnd){
1336  Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1337  << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1338  << results->GetHist("NodesAfterPruning")->GetMean()
1339  << Endl;
1340  }
1341  }
1342 
1343  }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1344  if (stage == Types::kAfterBoosting){
1346  }
1347  }else{
1348  if (methodIndex < 3){
1349  Log() << kDEBUG << "No detailed boost monitoring for "
1350  << GetCurrentMethod(methodIndex)->GetMethodName()
1351  << " yet available " << Endl;
1352  }
1353  }
1354 
1355  //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1356 
1357  if (stage == Types::kBeforeBoosting){
1358  // if you want to display the weighted events for 2D case at each boost step:
1359  if (fDetailedMonitoring){
1360  // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1361  if (DataInfo().GetNVariables() == 2) {
1362  results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1363  results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1364  results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1365  results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1366 
1367  Data()->SetCurrentType(Types::kTraining);
1368  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1369  const Event* ev = GetEvent(ievt);
1370  Float_t w = ev->GetWeight();
1371  Float_t v0= ev->GetValue(0);
1372  Float_t v1= ev->GetValue(1);
1373  // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1374  TH2* h;
1375  if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1376  else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1377  if (h) h->Fill(v0,v1,w);
1378  }
1379  }
1380  }
1381  }
1382 
1383  return;
1384 }
1385 
1386 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:366
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3126
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
static long int sum(long int i)
Definition: Factory.cxx:2162
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
float xmin
Definition: THbookFile.cxx:93
Random number generator class based on M.
Definition: TRandom3.h:27
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:186
THist< 1, int, THistStatContent > TH1I
Definition: THist.hxx:313
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:414
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:130
Singleton class for Global types used by TMVA.
Definition: Types.h:73
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4639
long long Long64_t
Definition: RtypesCore.h:69
void SingleTrain()
initialization
Stat_t GetSum() const
Definition: TArrayD.h:46
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t Log(Double_t x)
Definition: TMath.h:649
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
float Float_t
Definition: RtypesCore.h:53
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
static Types & Instance()
the the single instance of &quot;Types&quot; if existing already, or create it (Singleton)
Definition: Types.cxx:70
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:311
Config & gConfig()
TH1 * h
Definition: legend2.C:5
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:125
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:94
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:349
void SetBoostWeight(Double_t w) const
Definition: Event.h:106
Basic string class.
Definition: TString.h:129
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1099
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory &quot;a&quot; or a hierarchy of sub-directories &quot;a/b/c/...&quot;.
Definition: TDirectory.cxx:958
bool Bool_t
Definition: RtypesCore.h:59
const Ranking * CreateRanking()
virtual Int_t GetNbinsX() const
Definition: TH1.h:277
#define NULL
Definition: RtypesCore.h:88
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:362
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:847
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:139
static void InhibitOutput()
Definition: MsgLogger.cxx:74
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
TStopwatch timer
Definition: pirndm.C:37
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8264
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
TString fWeightFileDir
Definition: Config.h:96
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:107
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:38
Int_t GetNNodes()
Definition: MethodDT.h:97
std::vector< std::vector< double > > Data
IONames & GetIONames()
Definition: Config.h:74
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:394
void GetHelpMessage() const
Get help message text.
Types::EMVA GetMethodType() const
Definition: MethodBase.h:317
Class that contains all the data information.
Definition: DataSetInfo.h:60
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Double_t GetOriginalWeight() const
Definition: Event.h:79
Class for boosting a TMVA method.
Definition: MethodBoost.h:56
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:176
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
RooCmdArg Timer(Bool_t flag=kTRUE)
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6763
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:733
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
SVector< double, 2 > v
Definition: Dict.h:5
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Implementation of the GiniIndex as separation criterion.
Definition: GiniIndex.h:63
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8325
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
TAxis * GetYaxis()
Definition: TH1.h:301
float xmax
Definition: THbookFile.cxx:93
Tools & gTools()
An interface to calculate the &quot;SeparationGain&quot; for different separation criteria used in various trai...
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:594
REAL epsilon
Definition: triangle.c:617
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:452
const Bool_t kFALSE
Definition: RtypesCore.h:92
Class for categorizing the phase space.
int Ssiz_t
Definition: RtypesCore.h:63
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:136
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Double_t Exp(Double_t x)
Definition: TMath.h:622
#define ClassImp(name)
Definition: Rtypes.h:336
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:34
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
int type
Definition: TGX11.cxx:120
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
The TH1 histogram class.
Definition: TH1.h:56
UInt_t GetClass() const
Definition: Event.h:81
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
virtual void TestClassification()
initialization
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Double_t GetMVAProbAt(Double_t value)
DataSetManager * fDataSetManager
Definition: MethodBoost.h:188
virtual Bool_t cd(const char *path=0)
Change current directory to &quot;this&quot; directory.
Definition: TDirectory.cxx:435
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
Class that is the base-class for a vector of result.
Definition: Results.h:57
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
void SetWeightFileDir(TString fileDir)
set directory of weight file
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:193
void InitHistos()
initialisation routine
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:338
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:310
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&amp;W caution:
Definition: Timer.cxx:190
A TTree object has a header with a name and a title.
Definition: TTree.h:78
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:344
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
static void EnableOutput()
Definition: MsgLogger.cxx:75
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:317
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
const Bool_t kTRUE
Definition: RtypesCore.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
virtual void TestClassification()
initialization
TAxis * GetXaxis()
Definition: TH1.h:300
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:348