100 || std::abs(x-y) < std::numeric_limits<float>::min();
108 || std::abs(x-y) < std::numeric_limits<double>::min();
121 fMinLinCorrForFisher (1),
122 fUseExclusiveVars (
kTRUE),
130 fPruneMethod (kNoPruning),
131 fNNodesBeforePruning(0),
132 fNodePurityLimit(0.5),
140 fAnalysisType (
Types::kClassification),
158 fMinLinCorrForFisher (1),
159 fUseExclusiveVars (
kTRUE),
163 fMinNodeSize (minSize),
167 fPruneMethod (kNoPruning),
168 fNNodesBeforePruning(0),
169 fNodePurityLimit(purityLimit),
170 fRandomisedTree (randomisedTree),
171 fUseNvars (useNvars),
172 fUsePoissonNvars(usePoissonNvars),
174 fMaxDepth (nMaxDepth),
177 fAnalysisType (
Types::kClassification),
178 fDataSetInfo (dataInfo)
180 if (sepType ==
NULL) {
187 Log() << kWARNING <<
" You had chosen the training mode using optimal cuts, not\n"
188 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n"
189 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid"
205 fUseFisherCuts (d.fUseFisherCuts),
206 fMinLinCorrForFisher (d.fMinLinCorrForFisher),
207 fUseExclusiveVars (d.fUseExclusiveVars),
208 fSepType (d.fSepType),
209 fRegType (d.fRegType),
210 fMinSize (d.fMinSize),
211 fMinNodeSize(d.fMinNodeSize),
212 fMinSepGain (d.fMinSepGain),
213 fUseSearchTree (d.fUseSearchTree),
214 fPruneStrength (d.fPruneStrength),
215 fPruneMethod (d.fPruneMethod),
216 fNodePurityLimit(d.fNodePurityLimit),
217 fRandomisedTree (d.fRandomisedTree),
218 fUseNvars (d.fUseNvars),
219 fUsePoissonNvars(d.fUsePoissonNvars),
220 fMyTrandom (new
TRandom3(fgRandomSeed)),
221 fMaxDepth (d.fMaxDepth),
222 fSigClass (d.fSigClass),
224 fAnalysisType(d.fAnalysisType),
225 fDataSetInfo (d.fDataSetInfo)
240 if (fMyTrandom)
delete fMyTrandom;
241 if (fRegType)
delete fRegType;
253 Log() << kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
258 if ((this->GetLeftDaughter(n) ==
NULL) && (this->GetRightDaughter(n) !=
NULL) ) {
259 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
261 }
else if ((this->GetLeftDaughter(n) !=
NULL) && (this->GetRightDaughter(n) ==
NULL) ) {
262 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
266 if (this->GetLeftDaughter(n) !=
NULL) {
267 this->SetParentTreeInNodes( this->GetLeftDaughter(n) );
269 if (this->GetRightDaughter(n) !=
NULL) {
270 this->SetParentTreeInNodes( this->GetRightDaughter(n) );
274 if (n->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(n->
GetDepth());
282 std::string
type(
"");
286 dt->
ReadXML( node, tmva_Version_Code );
303 this->GetRoot()->SetPos(
's');
304 this->GetRoot()->SetDepth(0);
305 this->GetRoot()->SetParentTree(
this);
306 fMinSize = fMinNodeSize/100. * eventSample.size();
308 Log() << kDEBUG <<
"\tThe minimal node size MinNodeSize=" << fMinNodeSize <<
" fMinNodeSize="<<fMinNodeSize<<
"% is translated to an actual number of events = "<< fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
309 Log() << kDEBUG <<
"\tNote: This number will be taken as absolute minimum in the node, " <<
Endl;
310 Log() << kDEBUG <<
" \tin terms of 'weighted events' and unweighted ones !! " <<
Endl;
314 UInt_t nevents = eventSample.size();
317 if (fNvars==0) fNvars = eventSample[0]->GetNVariables();
318 fVariableImportance.resize(fNvars);
320 else Log() << kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
328 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
329 xmin[ivar]=xmax[ivar]=0;
331 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
345 if ( DoRegression() ) {
348 target2+=weight*tgt*tgt;
351 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
353 if (iev==0) xmin[ivar]=xmax[ivar]=val;
354 if (val < xmin[ivar]) xmin[ivar]=val;
355 if (val > xmax[ivar]) xmax[ivar]=val;
361 Log() << kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. "
362 <<
"(Nsig="<<s<<
" Nbkg="<<
b<<
" Probably you use a Monte Carlo with negative weights. That should in principle "
363 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the "
364 <<
"minimal number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<fMinNodeSize
365 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough "
366 <<
"to allow for reasonable averaging!!!" << Endl
367 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events "
368 <<
"with negative weight in the training." <<
Endl;
370 for (
UInt_t i=0; i<eventSample.size(); i++) {
371 if (eventSample[i]->
GetClass() != fSigClass) {
372 nBkg += eventSample[i]->GetWeight();
373 Log() << kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
374 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
377 Log() << kDEBUG <<
" that gives in total: " << nBkg<<
Endl;
387 if (node == this->GetRoot()) {
392 for (
UInt_t ivar=0; ivar<fNvars; ivar++) {
410 if ((eventSample.size() >= 2*fMinSize && s+
b >= 2*fMinSize) && node->
GetDepth() < fMaxDepth
411 && ( ( s!=0 && b !=0 && !DoRegression()) || ( (s+b)!=0 && DoRegression()) ) ) {
414 separationGain = this->TrainNodeFast(eventSample, node);
416 separationGain = this->TrainNodeFull(eventSample, node);
422 if (DoRegression()) {
437 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
441 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
442 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
445 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
447 for (
UInt_t ie=0; ie< nevents ; ie++) {
449 rightSample.push_back(eventSample[ie]);
450 nRight += eventSample[ie]->GetWeight();
451 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
454 leftSample.push_back(eventSample[ie]);
455 nLeft += eventSample[ie]->GetWeight();
456 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
461 if (leftSample.empty() || rightSample.empty()) {
463 Log() << kERROR <<
"<TrainNode> all events went to the same branch" << Endl
464 <<
"--- Hence new node == old node ... check" << Endl
465 <<
"--- left:" << leftSample.size()
466 <<
" right:" << rightSample.size() << Endl
467 <<
" while the separation is thought to be " << separationGain
468 <<
"\n when cutting on variable " << node->
GetSelector()
470 << kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
498 if (DoRegression()) {
519 if (node->
GetDepth() > this->GetTotalTreeDepth()) this->SetTotalTreeDepth(node->
GetDepth());
532 for (
UInt_t i=0; i<eventSample.size(); i++) {
533 this->FillEvent(*(eventSample[i]),
NULL);
545 node = this->GetRoot();
551 if (event.
GetClass() == fSigClass) {
564 this->FillEvent(event,static_cast<TMVA::DecisionTreeNode*>(node->
GetRight())) ;
566 this->FillEvent(event,static_cast<TMVA::DecisionTreeNode*>(node->
GetLeft())) ;
575 if (this->GetRoot()!=
NULL) this->GetRoot()->ClearNodeAndAllDaughters();
590 node = this->GetRoot();
601 this->PruneNode(node);
605 return this->CountNodes();
619 if( fPruneMethod == kNoPruning )
return 0.0;
621 if (fPruneMethod == kExpectedErrorPruning)
624 else if (fPruneMethod == kCostComplexityPruning)
629 Log() << kFATAL <<
"Selected pruning method not yet implemented "
633 if(!tool)
return 0.0;
637 if(validationSample ==
NULL){
638 Log() << kFATAL <<
"Cannot automate the pruning algorithm without an "
639 <<
"independent validation sample!" <<
Endl;
640 }
else if(validationSample->size() == 0) {
641 Log() << kFATAL <<
"Cannot automate the pruning algorithm with "
642 <<
"independent validation sample of ZERO events!" <<
Endl;
649 Log() << kFATAL <<
"Error pruning tree! Check prune.log for more information."
669 return pruneStrength;
681 GetRoot()->ResetValidationData();
682 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
683 CheckEventWithPrunedTree((*validationSample)[ievt]);
698 Log() << kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
704 return (TestPrunedTreeQuality( n->
GetLeft(), mode ) +
705 TestPrunedTreeQuality( n->
GetRight(), mode ));
708 if (DoRegression()) {
714 if (n->
GetPurity() > this->GetNodePurityLimit())
719 else if ( mode == 1 ) {
724 throw std::string(
"Unknown ValidationQualityMode");
738 if (current ==
NULL) {
739 Log() << kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
742 while(current !=
NULL) {
771 for( EventConstList::const_iterator it = validationSample->begin();
772 it != validationSample->end(); ++it ) {
773 sumWeights += (*it)->GetWeight();
786 Log() << kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
793 if ((this->GetLeftDaughter(n) ==
NULL) && (this->GetRightDaughter(n) ==
NULL) ) {
797 if (this->GetLeftDaughter(n) !=
NULL) {
798 countLeafs += this->CountLeafNodes( this->GetLeftDaughter(n) );
800 if (this->GetRightDaughter(n) !=
NULL) {
801 countLeafs += this->CountLeafNodes( this->GetRightDaughter(n) );
815 Log() << kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
820 if ((this->GetLeftDaughter(n) ==
NULL) && (this->GetRightDaughter(n) ==
NULL) ) {
823 else if ((this->GetLeftDaughter(n) ==
NULL) && (this->GetRightDaughter(n) !=
NULL) ) {
824 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
827 else if ((this->GetLeftDaughter(n) !=
NULL) && (this->GetRightDaughter(n) ==
NULL) ) {
828 Log() << kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
832 if (this->GetLeftDaughter(n) !=
NULL) {
833 this->DescendTree( this->GetLeftDaughter(n) );
835 if (this->GetRightDaughter(n) !=
NULL) {
836 this->DescendTree( this->GetRightDaughter(n) );
868 if(node ==
NULL)
return;
871 node->
SetAlpha( std::numeric_limits<double>::infinity( ) );
883 Node* current = this->GetRoot();
885 for (
UInt_t i =0; i < depth; i++) {
887 if ( tmp & sequence) current = this->GetRightDaughter(current);
888 else current = this->GetLeftDaughter(current);
898 for (
UInt_t ivar=0; ivar<fNvars; ivar++) useVariable[ivar]=
kFALSE;
904 else useNvars = fUseNvars;
907 while (nSelectedVars < useNvars) {
908 Double_t bla = fMyTrandom->Rndm()*fNvars;
911 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
912 if (useVariable[ivar] ==
kTRUE) {
913 mapVariable[nSelectedVars] = ivar;
918 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
933 Double_t separationGainTotal = -1, sepTmp;
937 for (
UInt_t ivar=0; ivar <= fNvars; ivar++) {
938 separationGain[ivar]=-1;
944 Int_t nTotS_unWeighted, nTotB_unWeighted;
945 UInt_t nevents = eventSample.size();
953 std::vector<Double_t> fisherCoeff;
955 if (fRandomisedTree) {
957 GetRandomisedVariables(useVariable,mapVariable,tmp);
960 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
961 useVariable[ivar] =
kTRUE;
962 mapVariable[ivar] = ivar;
965 useVariable[fNvars] =
kFALSE;
968 if (fUseFisherCuts) {
969 useVariable[fNvars] =
kTRUE;
975 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
976 useVarInFisher[ivar] =
kFALSE;
977 mapVarInFisher[ivar] = ivar;
980 std::vector<TMatrixDSym*>* covMatrices;
983 Log() << kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
991 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
992 for (
UInt_t jvar=ivar+1; jvar < fNvars; jvar++) {
993 if ( (
TMath::Abs( (*s)(ivar, jvar)) > fMinLinCorrForFisher) ||
994 (
TMath::Abs( (*b)(ivar, jvar)) > fMinLinCorrForFisher) ){
995 useVarInFisher[ivar] =
kTRUE;
996 useVarInFisher[jvar] =
kTRUE;
1004 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1007 if (useVarInFisher[ivar] && useVariable[ivar]) {
1008 mapVarInFisher[nFisherVars++]=ivar;
1011 if (fUseExclusiveVars) useVariable[ivar] =
kFALSE;
1016 fisherCoeff = this->GetFisherCoefficients(eventSample, nFisherVars, mapVarInFisher);
1019 delete [] useVarInFisher;
1020 delete [] mapVarInFisher;
1026 if (fUseFisherCuts && fisherOK) cNvars++;
1040 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1041 nBins[ivar] = fNCuts+1;
1042 if (ivar < fNvars) {
1043 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') {
1048 nSelS[ivar] =
new Double_t [nBins[ivar]];
1049 nSelB[ivar] =
new Double_t [nBins[ivar]];
1050 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1051 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1052 target[ivar] =
new Double_t [nBins[ivar]];
1053 target2[ivar] =
new Double_t [nBins[ivar]];
1054 cutValues[ivar] =
new Double_t [nBins[ivar]];
1061 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1068 useVariable[ivar]=
kFALSE;
1076 for (
UInt_t iev=0; iev<nevents; iev++) {
1079 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
1080 result += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1081 if (result > xmax[ivar]) xmax[ivar]=
result;
1082 if (result < xmin[ivar]) xmin[ivar]=
result;
1085 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1086 nSelS[ivar][ibin]=0;
1087 nSelB[ivar][ibin]=0;
1088 nSelS_unWeighted[ivar][ibin]=0;
1089 nSelB_unWeighted[ivar][ibin]=0;
1090 target[ivar][ibin]=0;
1091 target2[ivar][ibin]=0;
1092 cutValues[ivar][ibin]=0;
1097 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1099 if ( useVariable[ivar] ) {
1112 binWidth[ivar] = ( xmax[ivar] - xmin[ivar] ) /
Double_t(nBins[ivar]);
1113 invBinWidth[ivar] = 1./binWidth[ivar];
1114 if (ivar < fNvars) {
1115 if (fDataSetInfo->GetVariableInfo(ivar).GetVarType() ==
'I') { invBinWidth[ivar] = 1; binWidth[ivar] = 1; }
1123 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1124 cutValues[ivar][icut]=xmin[ivar]+(
Double_t(icut+1))*binWidth[ivar];
1131 nTotS_unWeighted=0; nTotB_unWeighted=0;
1132 for (
UInt_t iev=0; iev<nevents; iev++) {
1134 Double_t eventWeight = eventSample[iev]->GetWeight();
1135 if (eventSample[iev]->
GetClass() == fSigClass) {
1137 nTotS_unWeighted++; }
1144 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1147 if ( useVariable[ivar] ) {
1149 if (ivar < fNvars) eventData = eventSample[iev]->GetValueFast(ivar);
1151 eventData = fisherCoeff[fNvars];
1152 for (
UInt_t jvar=0; jvar<fNvars; jvar++)
1153 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValueFast(jvar);
1158 if (eventSample[iev]->
GetClass() == fSigClass) {
1159 nSelS[ivar][iBin]+=eventWeight;
1160 nSelS_unWeighted[ivar][iBin]++;
1163 nSelB[ivar][iBin]+=eventWeight;
1164 nSelB_unWeighted[ivar][iBin]++;
1166 if (DoRegression()) {
1167 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1168 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1174 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1175 if (useVariable[ivar]) {
1176 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1177 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
1178 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
1179 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
1180 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
1181 if (DoRegression()) {
1182 target[ivar][ibin] +=target[ivar][ibin-1] ;
1183 target2[ivar][ibin]+=target2[ivar][ibin-1];
1186 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
1187 Log() << kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= "
1188 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
1189 <<
" while eventsample size = " << eventSample.size()
1192 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
1193 double totalSum=nTotS+nTotB;
1194 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1195 Log() << kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= "
1197 <<
" while total number of events = " << totalSum
1204 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1205 if (useVariable[ivar]) {
1206 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1218 Double_t sl = nSelS_unWeighted[ivar][iBin];
1219 Double_t bl = nSelB_unWeighted[ivar][iBin];
1231 if ( ((sl+bl)>=fMinSize && (sr+br)>=fMinSize)
1232 && ((slW+blW)>=fMinSize && (srW+brW)>=fMinSize)
1235 if (DoRegression()) {
1236 sepTmp = fRegType->GetSeparationGain(nSelS[ivar][iBin]+nSelB[ivar][iBin],
1237 target[ivar][iBin],target2[ivar][iBin],
1239 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
1241 sepTmp = fSepType->GetSeparationGain(nSelS[ivar][iBin], nSelB[ivar][iBin], nTotS, nTotB);
1243 if (separationGain[ivar] < sepTmp) {
1244 separationGain[ivar] = sepTmp;
1245 cutIndex[ivar] = iBin;
1254 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1255 if (useVariable[ivar] ) {
1256 if (separationGainTotal < separationGain[ivar]) {
1257 separationGainTotal = separationGain[ivar];
1264 if (DoRegression()) {
1265 node->
SetSeparationIndex(fRegType->GetSeparationIndex(nTotS+nTotB,target[0][nBins[mxVar]-1],target2[0][nBins[mxVar]-1]));
1266 node->
SetResponse(target[0][nBins[mxVar]-1]/(nTotS+nTotB));
1267 if (
almost_equal_double(target2[0][nBins[mxVar]-1]/(nTotS+nTotB), target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB))) {
1270 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) - target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
1276 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
1281 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1284 if (mxVar < (
Int_t) fNvars){
1286 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1293 for (
UInt_t ivar=0; ivar<=fNvars; ivar++) {
1297 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1303 separationGainTotal = 0;
1320 for (
UInt_t i=0; i<cNvars; i++) {
1323 delete [] nSelS_unWeighted[i];
1324 delete [] nSelB_unWeighted[i];
1325 delete [] target[i];
1326 delete [] target2[i];
1327 delete [] cutValues[i];
1331 delete [] nSelS_unWeighted;
1332 delete [] nSelB_unWeighted;
1335 delete [] cutValues;
1340 delete [] useVariable;
1341 delete [] mapVariable;
1343 delete [] separationGain;
1348 delete [] invBinWidth;
1350 return separationGainTotal;
1358 std::vector<Double_t> fisherCoeff(fNvars+1);
1381 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
1383 UInt_t nevents = eventSample.size();
1385 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1388 const Event * ev = eventSample[ievt];
1392 if (ev->
GetClass() == fSigClass) sumOfWeightsS += weight;
1393 else sumOfWeightsB += weight;
1396 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1397 sum[ivar] += ev->
GetValueFast( mapVarInFisher[ivar] )*weight;
1400 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1401 (*meanMatx)( ivar, 2 ) = sumS[ivar];
1402 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
1404 (*meanMatx)( ivar, 2 ) += sumB[ivar];
1405 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
1408 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
1420 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
1424 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
1428 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
1429 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
1432 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1436 const Event* ev = eventSample.at(ievt);
1446 if ( ev->
GetClass() == fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
1447 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
1455 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
1475 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
1476 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
1477 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
1478 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
1480 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
1489 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
1504 Log() << kWARNING <<
"FisherCoeff matrix is almost singular with determinant="
1506 <<
" did you use the variables that are linear combinations or highly correlated?"
1510 Log() << kFATAL <<
"FisherCoeff matrix is singular with determinant="
1512 <<
" did you use the variables that are linear combinations?"
1519 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
1522 std::vector<Double_t> diffMeans( nFisherVars );
1524 for (
UInt_t ivar=0; ivar<=fNvars; ivar++) fisherCoeff[ivar] = 0;
1525 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1526 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
1527 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
1528 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*d;
1532 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
1537 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
1538 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
1542 fisherCoeff[fNvars] = f0;
1555 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
1557 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
1560 std::vector<Double_t> lCutValue( fNvars, 0.0 );
1561 std::vector<Double_t> lSepGain( fNvars, -1.0e6 );
1562 std::vector<Char_t> lCutType( fNvars );
1567 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
1568 if((*it)->GetClass() == fSigClass) {
1569 nTotS += (*it)->GetWeight();
1573 nTotB += (*it)->GetWeight();
1579 std::vector<Char_t> useVariable(fNvars);
1583 if (fRandomisedTree) {
1584 if (fUseNvars ==0 ) {
1588 Int_t nSelectedVars = 0;
1589 while (nSelectedVars < fUseNvars) {
1590 Double_t bla = fMyTrandom->Rndm()*fNvars;
1593 for (
UInt_t ivar=0; ivar < fNvars; ivar++) {
1594 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
1602 for(
UInt_t ivar = 0; ivar < fNvars; ivar++ ) {
1603 if(!useVariable[ivar])
continue;
1605 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
1607 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
1608 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
1609 for( ; it != it_end; ++it ) {
1610 if((**it)->GetClass() == fSigClass )
1611 sigWeightCtr += (**it)->GetWeight();
1613 bkgWeightCtr += (**it)->GetWeight();
1615 it->SetCumulativeWeight(
false,bkgWeightCtr);
1616 it->SetCumulativeWeight(
true,sigWeightCtr);
1622 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0,
norm = 0.0;
1624 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
1625 if( index == 0 ) { ++index;
continue; }
1626 if( *(*it) ==
NULL ) {
1627 Log() << kFATAL <<
"In TrainNodeFull(): have a null event! Where index="
1628 << index <<
", and parent node=" << node->
GetParent() <<
Endl;
1631 dVal = bdtEventSample[index].GetVal() - bdtEventSample[index-1].GetVal();
1632 norm =
TMath::Abs(bdtEventSample[index].GetVal() + bdtEventSample[index-1].GetVal());
1635 if( index >= fMinSize && (nTotS_unWeighted + nTotB_unWeighted) - index >= fMinSize &&
TMath::Abs(dVal/(0.5*
norm + 1)) > fPMin ) {
1636 sepTmp = fSepType->GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
1637 if( sepTmp > separationGain ) {
1638 separationGain = sepTmp;
1639 cutValue = it->GetVal() - 0.5*dVal;
1640 Double_t nSelS = it->GetCumulativeWeight(
true);
1641 Double_t nSelB = it->GetCumulativeWeight(
false);
1644 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
1650 lCutType[ivar] =
Char_t(cutType);
1651 lCutValue[ivar] = cutValue;
1652 lSepGain[ivar] = separationGain;
1656 Int_t iVarIndex = -1;
1657 for(
UInt_t ivar = 0; ivar < fNvars; ivar++ ) {
1658 if( lSepGain[ivar] > separationGain ) {
1660 separationGain = lSepGain[ivar];
1664 if(iVarIndex >= 0) {
1669 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
1672 separationGain = 0.0;
1675 return separationGain;
1703 Log() << kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
1712 Log() << kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
1717 if ( DoRegression() ){
1731 Double_t sumsig=0, sumbkg=0, sumtot=0;
1732 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
1733 if (eventSample[ievt]->
GetClass() != fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
1734 else sumsig+=eventSample[ievt]->GetWeight();
1735 sumtot+=eventSample[ievt]->GetWeight();
1738 if (sumtot!= (sumsig+sumbkg)){
1739 Log() << kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg"
1740 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
1742 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
1754 std::vector<Double_t> relativeImportance(fNvars);
1756 for (
UInt_t i=0; i< fNvars; i++) {
1757 sum += fVariableImportance[i];
1758 relativeImportance[i] = fVariableImportance[i];
1761 for (
UInt_t i=0; i< fNvars; i++) {
1763 relativeImportance[i] /=
sum;
1765 relativeImportance[i] = 0;
1767 return relativeImportance;
1775 std::vector<Double_t> relativeImportance = this->GetVariableImportance();
1776 if (ivar < fNvars)
return relativeImportance[ivar];
1778 Log() << kFATAL <<
"<GetVariableImportance>" <<
Endl
1779 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
void SetNTerminal(Int_t n)
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
static long int sum(long int i)
Random number generator class based on M.
void SetSelector(Short_t i)
MsgLogger & Endl(MsgLogger &ml)
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
Singleton class for Global types used by TMVA.
Float_t GetSumTarget() const
UInt_t GetNTargets() const
accessor to the number of targets
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
virtual DecisionTreeNode * GetRight() const
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in...
void IncrementNEvents_unweighted()
void IncrementNEvents(Float_t nev)
Short_t Min(Short_t a, Short_t b)
Int_t GetNodeType(void) const
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
virtual void SetParentTree(TMVA::BinaryTree *t)
virtual void SetRight(Node *r)
virtual ~DecisionTree(void)
destructor
virtual DecisionTreeNode * GetLeft() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual DecisionTreeNode * GetParent() const
void SetNSigEvents_unweighted(Float_t s)
void SetResponse(Float_t r)
void SetNBValidation(Double_t b)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
virtual DecisionTreeNode * GetRoot() const
void SetNFisherCoeff(Int_t nvars)
std::vector< const TMVA::Event * > EventConstList
Base class for BinarySearch and Decision Trees.
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
static const Int_t fgRandomSeed
Double_t GetNSValidation() const
void FillTree(const EventList &eventSample)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void IncrementNBkgEvents(Float_t b)
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Float_t GetPurity(void) const
void SetSeparationGain(Float_t sep)
void SetNBkgEvents(Float_t b)
void SetNSValidation(Double_t s)
Class that contains all the data information.
UInt_t CountLeafNodes(TMVA::Node *n=NULL)
return the number of terminal nodes in the sub-tree below Node n
void AddToSumTarget(Float_t t)
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
ROOT::Math::KDTree< _DataPoint > * BuildTree(const std::vector< const _DataPoint * > &vDataPoints, const unsigned int iBucketSize)
Double_t GetOriginalWeight() const
void DescendTree(Node *n=NULL)
descend a tree to find all its leaf nodes
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
void SetNEvents(Float_t nev)
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
TMatrixT< Double_t > TMatrixD
Float_t GetNBkgEvents(void) const
void SetSubTreeR(Double_t r)
Double_t GetNBValidation() const
virtual void SetLeft(Node *l)
void SetAlpha(Double_t alpha)
UInt_t CleanTree(DecisionTreeNode *node=NULL)
remove those last splits that result in two leaf nodes that are both of the type (i.e.
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node ...
void SetCutValue(Float_t c)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
Implementation of a Decision Tree.
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
train a node by finding the single optimal cut for a single variable that best separates signal and b...
void SetParentTreeInNodes(Node *n=NULL)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time...
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event through a pruned decision tree on the way down the tree...
Float_t GetValueFast(UInt_t ivar) const
bool almost_equal_float(float x, float y, int ulp=4)
void SetCutType(Bool_t t)
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node ...
void IncrementNSigEvents_unweighted()
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
read attributes from XML
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
bool almost_equal_double(double x, double y, int ulp=4)
Bool_t IsTerminal() const
static void SetVarIndex(Int_t iVar)
void AddToSumTarget2(Float_t t2)
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node ...
Double_t GetNodeR() const
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
void IncrementNSigEvents(Float_t s)
Float_t GetSumTarget2() const
void SetNodeType(Int_t t)
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree ...
void SetAlphaMinSubtree(Double_t g)
Types::EAnalysisType fAnalysisType
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void SetNEvents_unboosted(Float_t nev)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
void SetNSigEvents_unboosted(Float_t s)
void SetTerminal(Bool_t s=kTRUE)
RegressionVariance * fRegType
void SetNSigEvents(Float_t s)
Float_t GetTarget(UInt_t itgt) const
void SetNBkgEvents_unboosted(Float_t b)
Short_t GetSelector() const
void SetNBkgEvents_unweighted(Float_t b)
void IncrementNBkgEvents_unweighted()
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining several different pruning m...
Node for the BinarySearch or Decision Trees.
Short_t Max(Short_t a, Short_t b)
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=NULL, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
virtual Double_t Determinant() const
Return the matrix determinant.
Float_t GetNSigEvents(void) const
void SetSeparationIndex(Float_t sep)
Double_t Sqrt(Double_t x)
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
double norm(double *x, double *p)
virtual Bool_t GoesRight(const Event &) const
test event if it descends the tree at this node to the right
Float_t GetResponse(void) const
void SetNEvents_unweighted(Float_t nev)
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node ...
Float_t GetCutValue(void) const