100 , fDetailedMonitoring(
kFALSE)
103 , fBaggedSampleFraction(0)
104 , fBoostedMethodTitle(methodTitle)
105 , fBoostedMethodOptions(theOption)
106 , fMonitorBoostedMethod(kFALSE)
111 , fOverlap_integral(0.0)
114 fMVAvalues =
new std::vector<Float_t>;
115 fDataSetManager =
NULL;
116 fHistoricBoolOption =
kFALSE;
125 , fDetailedMonitoring(
kFALSE)
128 , fBaggedSampleFraction(0)
129 , fBoostedMethodTitle(
"")
130 , fBoostedMethodOptions(
"")
131 , fMonitorBoostedMethod(
kFALSE)
136 , fOverlap_integral(0.0)
149 fMethodWeight.clear();
153 fTrainSigMVAHist.clear();
154 fTrainBgdMVAHist.clear();
155 fBTrainSigMVAHist.clear();
156 fBTrainBgdMVAHist.clear();
157 fTestSigMVAHist.clear();
158 fTestBgdMVAHist.clear();
182 DeclareOptionRef( fBoostNum = 1,
"Boost_Num",
183 "Number of times the classifier is boosted" );
185 DeclareOptionRef( fMonitorBoostedMethod =
kTRUE,
"Boost_MonitorMethod",
186 "Write monitoring histograms for each boosted classifier" );
188 DeclareOptionRef( fDetailedMonitoring =
kFALSE,
"Boost_DetailedMonitoring",
189 "Produce histograms for detailed boost monitoring" );
191 DeclareOptionRef( fBoostType =
"AdaBoost",
"Boost_Type",
"Boosting type for the classifiers" );
192 AddPreDefVal(
TString(
"RealAdaBoost"));
193 AddPreDefVal(
TString(
"AdaBoost"));
194 AddPreDefVal(
TString(
"Bagging"));
196 DeclareOptionRef(fBaggedSampleFraction=.6,
"Boost_BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
198 DeclareOptionRef( fAdaBoostBeta = 1.0,
"Boost_AdaBoostBeta",
199 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
201 DeclareOptionRef( fTransformString =
"step",
"Boost_Transform",
202 "Type of transform applied to every boosted method linear, log, step" );
204 AddPreDefVal(
TString(
"linear"));
206 AddPreDefVal(
TString(
"gauss"));
208 DeclareOptionRef( fRandomSeed = 0,
"Boost_RandomSeed",
209 "Seed for random number generator used for bagging" );
224 DeclareOptionRef( fHistoricOption =
"ByError",
"Boost_MethodWeightType",
225 "How to set the final weight of the boosted classifiers" );
226 AddPreDefVal(
TString(
"ByError"));
227 AddPreDefVal(
TString(
"Average"));
228 AddPreDefVal(
TString(
"ByROC"));
229 AddPreDefVal(
TString(
"ByOverlap"));
230 AddPreDefVal(
TString(
"LastMethod"));
232 DeclareOptionRef( fHistoricOption =
"step",
"Boost_Transform",
233 "Type of transform applied to every boosted method linear, log, step" );
235 AddPreDefVal(
TString(
"linear"));
237 AddPreDefVal(
TString(
"gauss"));
242 AddPreDefVal(
TString(
"HighEdgeGauss"));
243 AddPreDefVal(
TString(
"HighEdgeCoPara"));
246 DeclareOptionRef( fHistoricBoolOption,
"Boost_RecalculateMVACut",
247 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
257 fBoostedMethodTitle = methodTitle;
258 fBoostedMethodOptions = theOption;
280 results->
Store(
new TH1F(
"MethodWeight",
"Normalized Classifier Weight",fBoostNum,0,fBoostNum),
"ClassifierWeight");
281 results->
Store(
new TH1F(
"BoostWeight",
"Boost Weight",fBoostNum,0,fBoostNum),
"BoostWeight");
282 results->
Store(
new TH1F(
"ErrFraction",
"Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),
"ErrorFraction");
283 if (fDetailedMonitoring){
284 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_test");
285 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_test");
286 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_train");
287 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_train");
288 results->
Store(
new TH1F(
"OverlapIntegal_train",
"Overlap integral (training sample)",fBoostNum,0,fBoostNum),
"Overlap");
298 if (fDetailedMonitoring){
311 results->
Store(
new TH1F(
"SoverBtotal",
"S/B in reweighted training sample",fBoostNum,0,fBoostNum),
"SoverBtotal");
315 results->
Store(
new TH1F(
"SeparationGain",
"SeparationGain",fBoostNum,0,fBoostNum),
"SeparationGain");
321 fMonitorTree=
new TTree(
"MonitorBoost",
"Boost variables");
322 fMonitorTree->Branch(
"iMethod",&fCurrentMethodIdx,
"iMethod/I");
323 fMonitorTree->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
324 fMonitorTree->Branch(
"errorFraction",&fMethodError,
"errorFraction/D");
325 fMonitorBoostedMethod =
kTRUE;
334 Log() << kDEBUG <<
"CheckSetup: fBoostType="<<fBoostType <<
Endl;
335 Log() << kDEBUG <<
"CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<
Endl;
336 Log() << kDEBUG <<
"CheckSetup: fBoostWeight="<<fBoostWeight<<
Endl;
337 Log() << kDEBUG <<
"CheckSetup: fMethodError="<<fMethodError<<
Endl;
338 Log() << kDEBUG <<
"CheckSetup: fBoostNum="<<fBoostNum <<
Endl;
339 Log() << kDEBUG <<
"CheckSetup: fRandomSeed=" << fRandomSeed<<
Endl;
340 Log() << kDEBUG <<
"CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<
Endl;
341 Log() << kDEBUG <<
"CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<
Endl;
342 Log() << kDEBUG <<
"CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod?
"true" :
"false") << Endl;
343 Log() << kDEBUG <<
"CheckSetup: MName=" << fBoostedMethodName <<
" Title="<< fBoostedMethodTitle<<
Endl;
344 Log() << kDEBUG <<
"CheckSetup: MOptions="<< fBoostedMethodOptions <<
Endl;
345 Log() << kDEBUG <<
"CheckSetup: fMonitorTree=" << fMonitorTree <<
Endl;
346 Log() << kDEBUG <<
"CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx <<
Endl;
347 if (fMethods.size()>0)
Log() << kDEBUG <<
"CheckSetup: fMethods[0]" <<fMethods[0]<<
Endl;
348 Log() << kDEBUG <<
"CheckSetup: fMethodWeight.size()" << fMethodWeight.size() <<
Endl;
349 if (fMethodWeight.size()>0)
Log() << kDEBUG <<
"CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<
Endl;
350 Log() << kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
365 if (
Data()->GetNTrainingEvents()==0)
Log() << kFATAL <<
"<Train> Data() has zero events" <<
Endl;
368 if (fMethods.size() > 0) fMethods.clear();
369 fMVAvalues->resize(
Data()->GetNTrainingEvents(), 0.0);
371 Log() << kINFO <<
"Training "<< fBoostNum <<
" " << fBoostedMethodName <<
" with title " << fBoostedMethodTitle <<
" Classifiers ... patience please" <<
Endl;
382 Ssiz_t varTrafoStart=fBoostedMethodOptions.Index(
"~VarTransform=");
383 if (varTrafoStart >0) {
384 Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(
":",varTrafoStart);
385 if (varTrafoEnd<varTrafoStart)
386 varTrafoEnd=fBoostedMethodOptions.Length();
387 fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
392 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
398 Form(
"%s_B%04i", fBoostedMethodTitle.Data(),fCurrentMethodIdx),
400 fBoostedMethodOptions);
404 fCurrentMethod = (
dynamic_cast<MethodBase*
>(method));
406 if (fCurrentMethod==0) {
407 Log() << kFATAL <<
"uups.. guess the booking of the " << fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
415 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
419 fCurrentMethod->SetMsgType(kWARNING);
420 fCurrentMethod->SetupMethod();
421 fCurrentMethod->ParseOptions();
423 fCurrentMethod->SetAnalysisType( GetAnalysisType() );
424 fCurrentMethod->ProcessSetup();
425 fCurrentMethod->CheckSetup();
429 fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
435 if (fMonitorBoostedMethod) {
436 methodDir=GetFile()->
GetDirectory(dirName=
Form(
"%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
438 methodDir=BaseDir()->
mkdir(dirName,dirTitle=
Form(
"Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
440 fCurrentMethod->SetMethodDir(methodDir);
441 fCurrentMethod->BaseDir()->
cd();
451 if (fBoostType==
"Bagging") Bagging();
454 if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
460 if(!IsSilentFile())
if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
468 SingleBoost(fCurrentMethod);
474 if (fDetailedMonitoring) {
485 fMonitorTree->Fill();
489 Log() << kDEBUG <<
"AdaBoost (methodErr) err = " << fMethodError <<
Endl;
490 if (fMethodError > 0.49999) StopCounter++;
491 if (StopCounter > 0 && fBoostType !=
"Bagging") {
493 fBoostNum = fCurrentMethodIdx+1;
494 Log() << kINFO <<
"Error rate has reached 0.5 ("<< fMethodError<<
"), boosting process stopped at #" << fBoostNum <<
" classifier" <<
Endl;
496 Log() << kINFO <<
"The classifier might be too strong to boost with Beta = " << fAdaBoostBeta <<
", try reducing it." <<
Endl;
508 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
513 if (fCurrentMethodIdx==fBoostNum) {
518 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
519 if (tmp) tmp->
SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
528 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
539 fBoostedMethodOptions=GetOptions();
546 if (fBoostNum <=0)
Log() << kFATAL <<
"CreateHistograms called before fBoostNum is initialized" <<
Endl;
550 Int_t signalClass = 0;
551 if (DataInfo().GetClassInfo(
"Signal") != 0) {
552 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
555 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
562 for (
UInt_t imtd=0; imtd<fBoostNum; imtd++) {
563 fTrainSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_S_%04i",imtd),
"MVA_Train_S", fNbins, xmin, xmax ) );
564 fTrainBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_B%04i", imtd),
"MVA_Train_B", fNbins, xmin, xmax ) );
565 fBTrainSigMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_S%04i",imtd),
"MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
566 fBTrainBgdMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_B%04i",imtd),
"MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
567 fTestSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_S%04i", imtd),
"MVA_Test_S", fNbins, xmin, xmax ) );
568 fTestBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_B%04i", imtd),
"MVA_Test_B", fNbins, xmin, xmax ) );
577 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
578 const Event *ev =
Data()->GetEvent(ievt);
588 if (fMonitorBoostedMethod) {
589 for (
UInt_t imtd=0;imtd<fBoostNum;imtd++) {
596 fTrainSigMVAHist[imtd]->SetDirectory(dir);
597 fTrainSigMVAHist[imtd]->Write();
598 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
599 fTrainBgdMVAHist[imtd]->Write();
600 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
601 fBTrainSigMVAHist[imtd]->Write();
602 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
603 fBTrainBgdMVAHist[imtd]->Write();
610 fMonitorTree->Write();
618 if (fMonitorBoostedMethod) {
619 UInt_t nloop = fTestSigMVAHist.size();
620 if (fMethods.size()<nloop) nloop = fMethods.size();
623 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
624 const Event* ev = GetEvent(ievt);
626 if (DataInfo().IsSignal(ev)) {
627 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
628 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
632 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
633 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
647 UInt_t nloop = fTestSigMVAHist.size();
648 if (fMethods.size()<nloop) nloop = fMethods.size();
649 if (fMonitorBoostedMethod) {
651 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
656 if (dir==0)
continue;
658 fTestSigMVAHist[imtd]->SetDirectory(dir);
659 fTestSigMVAHist[imtd]->Write();
660 fTestBgdMVAHist[imtd]->SetDirectory(dir);
661 fTestBgdMVAHist[imtd]->Write();
682 if(IsModelPersistence()){
683 TString _fFileDir= DataInfo().GetName();
701 const Int_t nBins=10001;
704 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
708 if (val>maxMVA) maxMVA=val;
709 if (val<minMVA) minMVA=val;
711 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
715 TH1D *mvaS =
new TH1D(
Form(
"MVAS_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
716 TH1D *mvaB =
new TH1D(
Form(
"MVAB_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
717 TH1D *mvaSC =
new TH1D(
Form(
"MVASC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
718 TH1D *mvaBC =
new TH1D(
Form(
"MVABC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
722 if (fDetailedMonitoring){
723 results->
Store(mvaS,
Form(
"MVAS_%d",fCurrentMethodIdx));
724 results->
Store(mvaB,
Form(
"MVAB_%d",fCurrentMethodIdx));
725 results->
Store(mvaSC,
Form(
"MVASC_%d",fCurrentMethodIdx));
726 results->
Store(mvaBC,
Form(
"MVABC_%d",fCurrentMethodIdx));
729 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
731 Double_t weight = GetEvent(ievt)->GetWeight();
734 if (DataInfo().IsSignal(GetEvent(ievt))){
735 mvaS->
Fill(mvaVal,weight);
737 mvaB->
Fill(mvaVal,weight);
773 for (
Int_t ibin=1;ibin<=nBins;ibin++){
784 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
791 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
792 else mvaCutOrientation=1;
825 <<
" s2="<<(sTot-sSelCut)
826 <<
" b2="<<(bTot-bSelCut)
827 <<
" s/b(1)=" << sSelCut/bSelCut
828 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
829 <<
" index before cut=" << parentIndex
830 <<
" after: left=" << leftIndex
831 <<
" after: right=" << rightIndex
832 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
833 <<
" sepGain="<<separationGain
836 <<
" idx="<<fCurrentMethodIdx
837 <<
" cutOrientation="<<mvaCutOrientation
864 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (method,1);
865 else if (fBoostType==
"RealAdaBoost") returnVal = this->AdaBoost (method,0);
866 else if (fBoostType==
"Bagging") returnVal = this->Bagging ();
868 Log() << kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
870 fMethodWeight.push_back(returnVal);
879 Log() << kWARNING <<
" AdaBoost called without classifier reference - needed for calculating AdaBoost " <<
Endl;
888 if (discreteAdaBoost) {
899 for (
Long64_t evt=0; evt<GetNEvents(); evt++) {
906 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=
kTRUE;
909 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
910 const Event* ev = GetEvent(ievt);
911 sig=DataInfo().IsSignal(ev);
912 v = fMVAvalues->at(ievt);
917 if (fMonitorBoostedMethod) {
919 fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
923 fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
929 if (discreteAdaBoost){
931 WrongDetection[ievt]=
kFALSE;
933 WrongDetection[ievt]=
kTRUE;
938 mvaProb = 2*(mvaProb-0.5);
940 if (DataInfo().IsSignal(ev)) trueType = 1;
942 sumWrong+= w*trueType*mvaProb;
946 fMethodError=sumWrong/sumAll;
953 if (fMethodError == 0) {
954 Log() << kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
957 if (discreteAdaBoost)
958 boostWeight =
TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
960 boostWeight =
TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
976 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
977 const Event* ev =
Data()->GetEvent(ievt);
979 if (discreteAdaBoost){
981 if (WrongDetection[ievt] && boostWeight != 0) {
992 mvaProb = 2*(mvaProb-0.5);
996 if (DataInfo().IsSignal(ev)) trueType = 1;
999 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
1007 Double_t normWeight = oldSum/newSum;
1010 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1011 const Event* ev =
Data()->GetEvent(ievt);
1020 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1021 const Event* ev =
Data()->GetEvent(ievt);
1028 delete[] WrongDetection;
1029 if (MVAProb)
delete MVAProb;
1031 fBoostWeight = boostWeight;
1043 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1044 const Event* ev =
Data()->GetEvent(ievt);
1063 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1064 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1065 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1066 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1067 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1068 Log() <<
"weight in the training of the following classifier."<<
Endl;
1069 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1073 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1074 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1075 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1076 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1077 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1078 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1079 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1080 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1082 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1083 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1084 Log() <<
"due to the boosting." <<
Endl;
1085 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1086 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1087 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1088 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1089 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1090 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1091 Log() <<
"macro \"Boost.C\"" <<
Endl;
1110 for (
UInt_t i=0;i< fMethods.size(); i++){
1117 if (fTransformString ==
"linear"){
1120 else if (fTransformString ==
"log"){
1121 if (val < sigcut) val = sigcut;
1125 else if (fTransformString ==
"step" ){
1129 else if (fTransformString ==
"gauss"){
1133 Log() << kFATAL <<
"error unknown transformation " << fTransformString<<
Endl;
1135 mvaValue+=val*fMethodWeight[i];
1136 norm +=fMethodWeight[i];
1141 NoErrorCalc(err, errUpper);
1168 Data()->SetCurrentType(eTT);
1174 if (singleMethod && !method) {
1175 Log() << kFATAL <<
" What do you do? Your method:"
1176 << fMethods.back()->GetName()
1177 <<
" seems not to be a propper TMVA method"
1186 std::vector<Double_t> OldMethodWeight(fMethodWeight);
1187 if (!singleMethod) {
1190 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1191 AllMethodsWeight += fMethodWeight.at(i);
1193 if (AllMethodsWeight != 0.0) {
1194 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1195 fMethodWeight[i] /= AllMethodsWeight;
1201 std::vector <Float_t>* mvaRes;
1203 mvaRes = fMVAvalues;
1205 mvaRes =
new std::vector <Float_t>(GetNEvents());
1206 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1208 (*mvaRes)[ievt] = singleMethod ? method->
GetMvaValue(&err) : GetMvaValue(&err);
1214 fMethodWeight = OldMethodWeight;
1217 Int_t signalClass = 0;
1218 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1219 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1222 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1229 TH1* mva_s =
new TH1F(
"MVA_S",
"MVA_S", fNbins, xmin, xmax );
1230 TH1* mva_b =
new TH1F(
"MVA_B",
"MVA_B", fNbins, xmin, xmax );
1231 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1232 if (CalcOverlapIntergral) {
1233 mva_s_overlap =
new TH1F(
"MVA_S_OVERLAP",
"MVA_S_OVERLAP", fNbins, xmin, xmax );
1234 mva_b_overlap =
new TH1F(
"MVA_B_OVERLAP",
"MVA_B_OVERLAP", fNbins, xmin, xmax );
1236 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1237 const Event* ev = GetEvent(ievt);
1239 if (DataInfo().IsSignal(ev)) mva_s->
Fill( (*mvaRes)[ievt], w );
1240 else mva_b->
Fill( (*mvaRes)[ievt], w );
1242 if (CalcOverlapIntergral) {
1244 if (DataInfo().IsSignal(ev))
1245 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1247 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1259 if (CalcOverlapIntergral) {
1263 fOverlap_integral = 0.0;
1266 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1267 if (bc_s > 0.0 && bc_b > 0.0)
1271 delete mva_s_overlap;
1272 delete mva_b_overlap;
1294 Log() << kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1298 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1322 results->
Store(
new TH1I(
"NodesBeforePruning",
"nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesBeforePruning");
1323 results->
Store(
new TH1I(
"NodesAfterPruning",
"nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesAfterPruning");
1336 Log() << kINFO <<
"<Train> average number of nodes before/after pruning : "
1348 if (methodIndex < 3){
1349 Log() << kDEBUG <<
"No detailed boost monitoring for "
1350 << GetCurrentMethod(methodIndex)->GetMethodName()
1351 <<
" yet available " <<
Endl;
1359 if (fDetailedMonitoring){
1361 if (DataInfo().GetNVariables() == 2) {
1362 results->
Store(
new TH2F(
Form(
"EventDistSig_%d",methodIndex),
Form(
"EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1364 results->
Store(
new TH2F(
Form(
"EventDistBkg_%d",methodIndex),
Form(
"EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1368 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1369 const Event* ev = GetEvent(ievt);
1375 if (DataInfo().IsSignal(ev)) h=results->
GetHist2D(
Form(
"EventDistSig_%d",methodIndex));
1376 else h=results->
GetHist2D(
Form(
"EventDistBkg_%d",methodIndex));
1377 if (h) h->
Fill(v0,v1,w);
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetModelPersistence(Bool_t status)
std::string GetName(const std::string &scope_name)
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
static long int sum(long int i)
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Random number generator class based on M.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
THist< 1, int, THistStatContent > TH1I
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
MsgLogger & Endl(MsgLogger &ml)
TH1 * GetHist(const TString &alias) const
Singleton class for Global types used by TMVA.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
void SingleTrain()
initialization
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Virtual base Class for all MVA method.
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
void SetBoostWeight(Double_t w) const
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Ranking for variables in method (implementation)
Short_t Min(Short_t a, Short_t b)
void ToLower()
Change string to lower-case.
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
const Ranking * CreateRanking()
virtual Int_t GetNbinsX() const
void SetSilentFile(Bool_t status)
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
TH2 * GetHist2D(const TString &alias) const
static void InhibitOutput()
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
void ScaleBoostWeight(Double_t s) const
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
std::vector< std::vector< double > > Data
void GetHelpMessage() const
Get help message text.
Types::EMVA GetMethodType() const
Class that contains all the data information.
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetOriginalWeight() const
Class for boosting a TMVA method.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
virtual void Delete(Option_t *option="")
Delete this object.
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
RooCmdArg Timer(Bool_t flag=kTRUE)
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Implementation of the GiniIndex as separation criterion.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
tomato 1-D histogram with a double per channel (see TH1 documentation)}
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Class for categorizing the phase space.
TString GetMethodName(Types::EMVA method) const
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Describe directory structure in memory.
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Int_t GetNNodesBeforePruning()
virtual void TestClassification()
initialization
Interface for all concrete MVA method implementations.
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Double_t GetMVAProbAt(Double_t value)
DataSetManager * fDataSetManager
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Class that is the base-class for a vector of result.
Short_t Max(Short_t a, Short_t b)
void SetWeightFileDir(TString fileDir)
set directory of weight file
Bool_t fHistoricBoolOption
void InitHistos()
initialisation routine
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
A TTree object has a header with a name and a title.
Double_t GetSignalReferenceCut() const
void Store(TObject *obj, const char *alias=0)
static void EnableOutput()
Int_t Fill(Double_t)
Invalid Fill method.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
double norm(double *x, double *p)
Timing information for training and evaluation of MVA methods.
virtual void TestClassification()
initialization
Analysis of Boosted Decision Trees.
void SetSignalReferenceCut(Double_t cut)