104 #define READXML kTRUE
121 fTransformations (
"I" ),
126 fJobName ( jobName ),
127 fAnalysisType (
Types::kClassification ),
128 fModelPersistence (
kTRUE)
154 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
155 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
158 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
160 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
163 "Option to save the trained model in xml file or using serialization");
167 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
197 fTransformations (
"I" ),
201 fSilentFile (
kTRUE ),
202 fJobName ( jobName ),
203 fAnalysisType (
Types::kClassification ),
204 fModelPersistence (
kTRUE)
231 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
232 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
235 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
237 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
240 "Option to save the trained model in xml file or using serialization");
244 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
291 return fModelPersistence;
299 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
300 for (;trfIt != fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
302 this->DeleteAllMethods();
317 std::map<TString,MVector*>::iterator itrMap;
319 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
321 MVector *methods=itrMap->second;
323 MVector::iterator itrMethod = methods->begin();
324 for (; itrMethod != methods->end(); itrMethod++) {
325 Log() << kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
365 if(fMethodsMap.find(datasetname)!=fMethodsMap.end())
367 if (
GetMethod( datasetname,methodTitle ) != 0) {
368 Log() << kFATAL <<
"Booking failed since method with title <"
369 << methodTitle <<
"> already exists "<<
"in with DataSet Name <"<< loader->
GetName()<<
"> "
375 Log() << kHEADER <<
"Booking method: " <<
gTools().
Color(
"bold") << methodTitle
383 "Number of times the classifier will be boosted" );
387 if(fModelPersistence)
403 Log() << kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
411 Log() << kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
417 methBoost->
SetFile(fgTargetFile);
422 if (method==0)
return 0;
428 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
433 methCat->
SetFile(fgTargetFile);
441 Log() << kWARNING <<
"Method " << method->
GetMethodTypeName() <<
" is not capable of handling " ;
466 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
469 fMethodsMap[datasetname]=mvector;
471 fMethodsMap[datasetname]->push_back( method );
491 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
493 MVector *methods=fMethodsMap.find(datasetname)->second;
495 MVector::const_iterator itrMethod;
497 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
509 if(fMethodsMap.find(datasetname)==fMethodsMap.end())
return 0;
511 std::string methodName = methodTitle.
Data();
513 return ( 0 == methodName.compare(
m->
GetName() ) );
517 Bool_t isMethodNameExisting = std::any_of( methods->begin(), methods->end(), isEqualToMethodName);
519 return isMethodNameExisting;
528 if(!RootBaseDir()->GetDirectory(fDataSetInfo.
GetName())) RootBaseDir()->mkdir(fDataSetInfo.
GetName());
531 RootBaseDir()->cd(fDataSetInfo.
GetName());
578 processTrfs = fTransformations;
581 std::vector<TMVA::TransformationHandler*> trfs;
585 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
586 for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
591 Log() << kDEBUG <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
597 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
603 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
605 for (;trfIt != trfs.end(); trfIt++) {
607 (*trfIt)->SetRootDir(RootBaseDir()->GetDirectory(fDataSetInfo.
GetName()));
608 (*trfIt)->CalcTransformations(inputEvents);
613 for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++)
delete *trfIt;
625 std::map<TString,MVector*>::iterator itrMap;
626 std::map<TString,Double_t> TunedParameters;
627 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
629 MVector *methods=itrMap->second;
631 MVector::iterator itrMethod;
634 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
638 Log() << kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
639 return TunedParameters;
644 <<
" not trained (training tree has less entries ["
655 Log() << kINFO <<
"Optimization of tuning parameters finished for Method:"<<mva->
GetName() <<
Endl;
659 return TunedParameters;
684 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
685 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
689 if (!this->HasMethod(datasetname, theMethodName)) {
690 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data())
696 if (allowedAnalysisTypes.count(this->fAnalysisType) == 0) {
697 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.")
708 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
718 std::vector<Bool_t> *mvaResTypes =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
719 std::vector<Float_t> mvaResWeights;
722 mvaResWeights.reserve(eventCollection.size());
723 for (
auto ev : eventCollection) {
724 mvaResWeights.push_back(ev->GetWeight());
727 rocCurve =
new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
730 std::vector<Float_t> mvaRes;
731 std::vector<Bool_t> mvaResTypes;
732 std::vector<Float_t> mvaResWeights;
734 std::vector<std::vector<Float_t>> *rawMvaRes =
dynamic_cast<ResultsMulticlass *
>(results)->GetValueVector();
739 mvaRes.reserve(rawMvaRes->size());
740 for (
auto item : *rawMvaRes) {
741 mvaRes.push_back(item[iClass]);
745 mvaResTypes.reserve(eventCollection.size());
746 mvaResWeights.reserve(eventCollection.size());
747 for (
auto ev : eventCollection) {
748 mvaResTypes.push_back(ev->GetClass() == iClass);
749 mvaResWeights.push_back(ev->GetWeight());
752 rocCurve =
new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
768 return GetROCIntegral((
TString)loader->
GetName(), theMethodName, iClass);
781 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
782 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
786 if ( ! this->HasMethod(datasetname, theMethodName) ) {
787 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
792 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
793 Log() << kERROR <<
Form(
"Can only generate ROC integral for analysis type kClassification. and kMulticlass.")
798 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
800 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ",
801 theMethodName.
Data(), datasetname.
Data())
829 return GetROCCurve( (
TString)loader->
GetName(), theMethodName, setTitles, iClass );
848 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
849 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
853 if ( ! this->HasMethod(datasetname, theMethodName) ) {
854 Log() << kERROR <<
Form(
"Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
859 if ( allowedAnalysisTypes.count(this->fAnalysisType) == 0 ) {
860 Log() << kERROR <<
Form(
"Can only generate ROC curves for analysis type kClassification and kMulticlass.") <<
Endl;
864 TMVA::ROCCurve *rocCurve = GetROC(datasetname, theMethodName, iClass);
868 Log() << kFATAL <<
Form(
"ROCCurve object was not created in Method = %s not found with Dataset = %s ", theMethodName.
Data(), datasetname.
Data()) <<
Endl;
878 graph->
SetTitle(
Form(
"Signal efficiency vs. Background rejection (%s)", theMethodName.
Data()));
898 return GetROCCurveAsMultiGraph((
TString)loader->
GetName(), iClass);
919 MVector *methods = fMethodsMap[datasetname.
Data()];
920 for (
auto * method_raw : *methods) {
922 if (method ==
nullptr) {
continue; }
928 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.", iClass, nClasses) <<
Endl;
934 TGraph *graph = this->GetROCCurve(datasetname, methodName,
false, iClass);
941 multigraph->
Add(graph);
945 Log() << kERROR <<
Form(
"No metohds have class %i defined.", iClass) <<
Endl;
982 if (fMethodsMap.find(datasetname) == fMethodsMap.end()) {
983 Log() << kERROR <<
Form(
"DataSet = %s not found in methods map.", datasetname.
Data()) <<
Endl;
988 TCanvas *canvas =
new TCanvas(name,
"ROC Curve", 200, 10, 700, 500);
991 TMultiGraph *multigraph = this->GetROCCurveAsMultiGraph(datasetname, iClass);
994 multigraph->
Draw(
"AL");
999 TString titleString =
Form(
"Signal efficiency vs. Background rejection");
1001 titleString =
Form(
"%s (Class=%i)", titleString.
Data(), iClass);
1006 multigraph->
SetTitle( titleString );
1008 canvas->BuildLegend(0.15, 0.15, 0.35, 0.3,
"MVA Method");
1024 if (fMethodsMap.empty()) {
1025 Log() << kINFO <<
"...nothing found to train" <<
Endl;
1031 Log() << kDEBUG <<
"Train all methods for "
1035 std::map<TString,MVector*>::iterator itrMap;
1037 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1039 MVector *methods=itrMap->second;
1040 MVector::iterator itrMethod;
1043 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
1047 if(mva==0)
continue;
1050 Log() << kFATAL <<
"No input data for the training provided!" <<
Endl;
1054 Log() << kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
1057 Log() << kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
1060 if(!IsSilentFile()) WriteDataInformation(mva->
fDataSetInfo);
1065 <<
" not trained (training tree has less entries ["
1075 Log() << kHEADER <<
"Training finished" << Endl <<
Endl;
1082 Log() << kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1083 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1088 const Ranking* ranking = (*itrMethod)->CreateRanking();
1089 if (ranking != 0) ranking->
Print();
1090 else Log() << kINFO <<
"No variable ranking supplied by classifier: "
1100 if (fModelPersistence) {
1102 Log() << kHEADER <<
"=== Destroy and recreate all methods via weight files for testing ===" <<
Endl <<
Endl;
1104 if(!IsSilentFile())RootBaseDir()->cd();
1107 for (
UInt_t i=0; i<methods->size(); i++) {
1125 dataSetInfo, weightfile ) );
1128 if( !methCat )
Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1158 if (fMethodsMap.empty()) {
1159 Log() << kINFO <<
"...nothing found to test" <<
Endl;
1162 std::map<TString,MVector*>::iterator itrMap;
1164 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1166 MVector *methods=itrMap->second;
1167 MVector::iterator itrMethod;
1170 for( itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++ ) {
1173 if(mva==0)
continue;
1187 if (methodTitle !=
"") {
1191 Log() << kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
1192 <<
"\" in list" <<
Endl;
1198 MVector *methods=fMethodsMap.find(datasetname)->second;
1199 MVector::const_iterator itrMethod;
1200 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1202 if(method==0)
continue;
1215 if (methodTitle !=
"") {
1219 Log() << kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1220 <<
"\" in list" <<
Endl;
1226 MVector *methods=fMethodsMap.find(datasetname)->second;
1227 MVector::const_iterator itrMethod ;
1228 for (itrMethod = methods->begin(); itrMethod != methods->end(); itrMethod++) {
1230 if(method==0)
continue;
1242 Log() << kINFO <<
"Evaluating all variables..." <<
Endl;
1247 if (options.
Contains(
"V")) s +=
":V";
1248 this->BookMethod(loader,
"Variable", s );
1260 if (fMethodsMap.empty()) {
1261 Log() << kINFO <<
"...nothing found to evaluate" <<
Endl;
1264 std::map<TString,MVector*>::iterator itrMap;
1266 for(itrMap = fMethodsMap.begin();itrMap != fMethodsMap.end();itrMap++)
1268 MVector *methods=itrMap->second;
1278 Int_t nmeth_used[2] = {0,0};
1280 std::vector<std::vector<TString> > mname(2);
1281 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1282 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1283 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1284 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1286 std::vector<std::vector<Float_t> > multiclass_testEff;
1287 std::vector<std::vector<Float_t> > multiclass_trainEff;
1288 std::vector<std::vector<Float_t> > multiclass_testPur;
1289 std::vector<std::vector<Float_t> > multiclass_trainPur;
1292 std::vector<TMatrixD> multiclass_testConfusionEffB01;
1293 std::vector<TMatrixD> multiclass_testConfusionEffB10;
1294 std::vector<TMatrixD> multiclass_testConfusionEffB30;
1296 std::vector<std::vector<Double_t> > biastrain(1);
1297 std::vector<std::vector<Double_t> > biastest(1);
1298 std::vector<std::vector<Double_t> > devtrain(1);
1299 std::vector<std::vector<Double_t> > devtest(1);
1300 std::vector<std::vector<Double_t> > rmstrain(1);
1301 std::vector<std::vector<Double_t> > rmstest(1);
1302 std::vector<std::vector<Double_t> > minftrain(1);
1303 std::vector<std::vector<Double_t> > minftest(1);
1304 std::vector<std::vector<Double_t> > rhotrain(1);
1305 std::vector<std::vector<Double_t> > rhotest(1);
1308 std::vector<std::vector<Double_t> > biastrainT(1);
1309 std::vector<std::vector<Double_t> > biastestT(1);
1310 std::vector<std::vector<Double_t> > devtrainT(1);
1311 std::vector<std::vector<Double_t> > devtestT(1);
1312 std::vector<std::vector<Double_t> > rmstrainT(1);
1313 std::vector<std::vector<Double_t> > rmstestT(1);
1314 std::vector<std::vector<Double_t> > minftrainT(1);
1315 std::vector<std::vector<Double_t> > minftestT(1);
1324 for (MVector::iterator itrMethod =methods->begin(); itrMethod != methods->end(); itrMethod++) {
1327 if(theMethod==0)
continue;
1328 theMethod->
SetFile(fgTargetFile);
1333 doRegression =
kTRUE;
1341 biastest[0] .push_back( bias );
1342 devtest[0] .push_back( dev );
1343 rmstest[0] .push_back( rms );
1344 minftest[0] .push_back( mInf );
1345 rhotest[0] .push_back( rho );
1346 biastestT[0] .push_back( biasT );
1347 devtestT[0] .push_back( devT );
1348 rmstestT[0] .push_back( rmsT );
1349 minftestT[0] .push_back( mInfT );
1352 biastrain[0] .push_back( bias );
1353 devtrain[0] .push_back( dev );
1354 rmstrain[0] .push_back( rms );
1355 minftrain[0] .push_back( mInf );
1356 rhotrain[0] .push_back( rho );
1357 biastrainT[0].push_back( biasT );
1358 devtrainT[0] .push_back( devT );
1359 rmstrainT[0] .push_back( rmsT );
1360 minftrainT[0].push_back( mInfT );
1366 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1374 doMulticlass =
kTRUE;
1375 Log() << kINFO <<
"Evaluate multiclass classification method: " << theMethod->
GetMethodName() <<
Endl;
1388 if (not IsSilentFile()) {
1389 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1412 eff01err[isel].push_back(err);
1414 eff10err[isel].push_back(err);
1416 eff30err[isel].push_back(err);
1425 if (!IsSilentFile()) {
1426 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
1434 std::vector<TString> vtemps = mname[0];
1435 std::vector< std::vector<Double_t> > vtmp;
1436 vtmp.push_back( devtest[0] );
1437 vtmp.push_back( devtrain[0] );
1438 vtmp.push_back( biastest[0] );
1439 vtmp.push_back( biastrain[0] );
1440 vtmp.push_back( rmstest[0] );
1441 vtmp.push_back( rmstrain[0] );
1442 vtmp.push_back( minftest[0] );
1443 vtmp.push_back( minftrain[0] );
1444 vtmp.push_back( rhotest[0] );
1445 vtmp.push_back( rhotrain[0] );
1446 vtmp.push_back( devtestT[0] );
1447 vtmp.push_back( devtrainT[0] );
1448 vtmp.push_back( biastestT[0] );
1449 vtmp.push_back( biastrainT[0]);
1450 vtmp.push_back( rmstestT[0] );
1451 vtmp.push_back( rmstrainT[0] );
1452 vtmp.push_back( minftestT[0] );
1453 vtmp.push_back( minftrainT[0]);
1456 devtest[0] = vtmp[0];
1457 devtrain[0] = vtmp[1];
1458 biastest[0] = vtmp[2];
1459 biastrain[0] = vtmp[3];
1460 rmstest[0] = vtmp[4];
1461 rmstrain[0] = vtmp[5];
1462 minftest[0] = vtmp[6];
1463 minftrain[0] = vtmp[7];
1464 rhotest[0] = vtmp[8];
1465 rhotrain[0] = vtmp[9];
1466 devtestT[0] = vtmp[10];
1467 devtrainT[0] = vtmp[11];
1468 biastestT[0] = vtmp[12];
1469 biastrainT[0] = vtmp[13];
1470 rmstestT[0] = vtmp[14];
1471 rmstrainT[0] = vtmp[15];
1472 minftestT[0] = vtmp[16];
1473 minftrainT[0] = vtmp[17];
1474 }
else if (doMulticlass) {
1482 for (
Int_t k=0; k<2; k++) {
1483 std::vector< std::vector<Double_t> > vtemp;
1484 vtemp.push_back( effArea[k] );
1485 vtemp.push_back( eff10[k] );
1486 vtemp.push_back( eff01[k] );
1487 vtemp.push_back( eff30[k] );
1488 vtemp.push_back( eff10err[k] );
1489 vtemp.push_back( eff01err[k] );
1490 vtemp.push_back( eff30err[k] );
1491 vtemp.push_back( trainEff10[k] );
1492 vtemp.push_back( trainEff01[k] );
1493 vtemp.push_back( trainEff30[k] );
1494 vtemp.push_back( sig[k] );
1495 vtemp.push_back(
sep[k] );
1496 vtemp.push_back( roc[k] );
1497 std::vector<TString> vtemps = mname[k];
1499 effArea[k] = vtemp[0];
1500 eff10[k] = vtemp[1];
1501 eff01[k] = vtemp[2];
1502 eff30[k] = vtemp[3];
1503 eff10err[k] = vtemp[4];
1504 eff01err[k] = vtemp[5];
1505 eff30err[k] = vtemp[6];
1506 trainEff10[k] = vtemp[7];
1507 trainEff01[k] = vtemp[8];
1508 trainEff30[k] = vtemp[9];
1524 const Int_t nmeth = methodsNoCuts.size();
1527 if (!doRegression && !doMulticlass ) {
1533 std::vector<Double_t> rvec;
1541 std::vector<TString>* theVars =
new std::vector<TString>;
1542 std::vector<ResultsClassification*> mvaRes;
1543 for (MVector::iterator itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1548 theVars->back().ReplaceAll(
"MVA_",
"" );
1568 for (
Int_t im=0; im<nmeth; im++) {
1572 Log() << kWARNING <<
"Found NaN return value in event: " << ievt
1573 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1576 else dvec[im] = retval;
1580 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1583 for (
Int_t im=0; im<nmeth; im++) {
1584 for (
Int_t jm=im; jm<nmeth; jm++) {
1585 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1587 if (im != jm) (*theMat)(jm,im)++;
1607 if (corrMatS != 0 && corrMatB != 0) {
1612 for (
Int_t im=0; im<nmeth; im++) {
1613 for (
Int_t jm=0; jm<nmeth; jm++) {
1614 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1615 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1620 std::vector<TString> theInputVars;
1623 for (
Int_t iv=0; iv<nvar; iv++) {
1625 for (
Int_t jm=0; jm<nmeth; jm++) {
1626 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1627 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1637 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Inter-MVA correlation matrix (background):" << Endl;
1642 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1646 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Correlations between input variables and MVA response (background):" << Endl;
1653 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1654 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1655 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1657 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1660 if (nmeth != (
Int_t)methods->size())
1661 Log() << kINFO <<
Form(
"Dataset[%s] : ",method->
fDataSetInfo.
GetName())<<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1693 TString hLine =
"--------------------------------------------------------------------------------------------------";
1694 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1695 Log() << kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1696 Log() << kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1697 Log() << kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1698 Log() << kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1699 Log() << kINFO << hLine <<
Endl;
1701 Log() << kINFO << hLine <<
Endl;
1703 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1705 if(theMethod==0)
continue;
1707 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1709 (
const char*)mname[0][i],
1710 biastest[0][i], biastestT[0][i],
1711 rmstest[0][i], rmstestT[0][i],
1712 minftest[0][i], minftestT[0][i] )
1715 Log() << kINFO << hLine <<
Endl;
1717 Log() << kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1718 Log() << kINFO <<
"(overtraining check)" <<
Endl;
1719 Log() << kINFO << hLine <<
Endl;
1720 Log() << kINFO <<
"DataSet Name: MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1721 Log() << kINFO << hLine <<
Endl;
1723 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1725 if(theMethod==0)
continue;
1726 Log() << kINFO <<
Form(
"%-20s %-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1728 (
const char*)mname[0][i],
1729 biastrain[0][i], biastrainT[0][i],
1730 rmstrain[0][i], rmstrainT[0][i],
1731 minftrain[0][i], minftrainT[0][i] )
1734 Log() << kINFO << hLine <<
Endl;
1736 }
else if (doMulticlass) {
1742 "-------------------------------------------------------------------------------------------------------";
1782 Form(
"%-15s%-15s%-10s%-10s%-10s%-10s",
"Dataset",
"MVA Method",
"",
"Sig eff",
"Sig eff",
"Sig eff");
1784 Form(
"%-15s%-15s%-10s%-10s%-10s%-10s",
"Name:",
"/ Class:",
"ROC AUC",
"@B=0.01",
"@B=0.10",
"@B=0.30");
1785 Log() << kINFO <<
"1-vs-rest performance metrics per class" <<
Endl;
1786 Log() << kINFO << hLine <<
Endl;
1788 Log() << kINFO <<
"Considers the listed class as signal and the other classes" <<
Endl;
1789 Log() << kINFO <<
"as background, reporting the resulting binary performance." <<
Endl;
1792 Log() << kINFO << header1 <<
Endl;
1793 Log() << kINFO << header2 <<
Endl;
1794 for (
Int_t k = 0; k < 2; k++) {
1795 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
1797 mname[k][i].ReplaceAll(
"Variable_",
"");
1800 const TString datasetName = itrMap->first;
1801 const TString mvaName = mname[k][i];
1804 if (theMethod == 0) {
1810 Log() << kINFO << row <<
Endl;
1816 for (
UInt_t iClass = 0; iClass < numClasses; ++iClass) {
1817 std::vector<Float_t> mvaRes;
1818 std::vector<Bool_t> mvaResType;
1819 std::vector<Float_t> mvaResWeight;
1821 std::vector<std::vector<Float_t>> *rawMvaRes =
1827 mvaRes.reserve(rawMvaRes->size());
1828 for (
auto item : *rawMvaRes) {
1829 mvaRes.push_back(item[iClass]);
1833 mvaResType.reserve(eventCollection.size());
1834 mvaResWeight.reserve(eventCollection.size());
1835 for (
auto ev : eventCollection) {
1836 mvaResType.push_back(ev->GetClass() == iClass);
1837 mvaResWeight.push_back(ev->GetWeight());
1847 row =
Form(
"%-15s%-15s%-10.3f%-10.3f%-10.3f%-10.3f",
"", className.
Data(), rocauc, effB01, effB10,
1849 Log() << kINFO << row <<
Endl;
1853 Log() << kINFO << hLine <<
Endl;
1863 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1864 header +=
Form(
" %-12s", classnames[iCol].
Data());
1866 stream << kINFO << header <<
Endl;
1868 for (
UInt_t iRow = 0; iRow < numClasses; ++iRow) {
1869 stream << kINFO <<
Form(
"%-12s", classnames[iRow].
Data());
1871 for (
UInt_t iCol = 0; iCol < numClasses; ++iCol) {
1873 stream << kINFO <<
Form(
" %-12s",
"-");
1878 stream << kINFO <<
Form(
" %-12.3f", value);
1880 stream << kINFO <<
Endl;
1885 Log() << kINFO <<
"Confusion matrices for all methods" <<
Endl;
1886 Log() << kINFO << hLine <<
Endl;
1888 Log() << kINFO <<
"Does a binary comparison between the two classes given by a " <<
Endl;
1889 Log() << kINFO <<
"particular row-column combination. In each case, the class " <<
Endl;
1890 Log() << kINFO <<
"given by the row is considered signal while the class given " <<
Endl;
1891 Log() << kINFO <<
"by the column index is considered background." <<
Endl;
1893 for (
UInt_t iMethod = 0; iMethod < methods->size(); ++iMethod) {
1895 if (theMethod ==
nullptr) {
1900 std::vector<TString> classnames;
1901 for (
UInt_t iCls = 0; iCls < numClasses; ++iCls) {
1904 Log() << kINFO <<
"Showing confusion matrix for method : " <<
Form(
"%-15s", (
const char *)mname[0][iMethod])
1906 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.01%)" <<
Endl;
1907 printMatrix(multiclass_testConfusionEffB01[iMethod], classnames, numClasses,
Log());
1910 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.10%)" <<
Endl;
1911 printMatrix(multiclass_testConfusionEffB10[iMethod], classnames, numClasses,
Log());
1914 Log() << kINFO <<
"(Signal Efficiency for Background Efficiency 0.30%)" <<
Endl;
1915 printMatrix(multiclass_testConfusionEffB30[iMethod], classnames, numClasses,
Log());
1918 Log() << kINFO << hLine <<
Endl;
1924 Log().EnableOutput();
1927 TString hLine =
"------------------------------------------------------------------------------------------"
1928 "-------------------------";
1929 Log() << kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
1930 Log() << kINFO << hLine <<
Endl;
1931 Log() << kINFO <<
"DataSet MVA " <<
Endl;
1932 Log() << kINFO <<
"Name: Method: ROC-integ" <<
Endl;
1937 Log() << kDEBUG << hLine <<
Endl;
1938 for (
Int_t k = 0; k < 2; k++) {
1939 if (k == 1 && nmeth_used[k] > 0) {
1940 Log() << kINFO << hLine <<
Endl;
1941 Log() << kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1943 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
1944 TString datasetName = itrMap->first;
1945 TString methodName = mname[k][i];
1952 if (theMethod == 0) {
1958 std::vector<Bool_t> *mvaResType =
1962 if (mvaResType->size() != 0) {
1963 rocIntegral = GetROCIntegral(datasetName, methodName);
1966 if (
sep[k][i] < 0 || sig[k][i] < 0) {
1968 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), effArea[k][i])
1980 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), rocIntegral)
1994 Log() << kINFO << hLine <<
Endl;
1996 Log() << kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
1997 Log() << kINFO << hLine <<
Endl;
1999 <<
"DataSet MVA Signal efficiency: from test sample (from training sample) "
2001 Log() << kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 "
2003 Log() << kINFO << hLine <<
Endl;
2004 for (
Int_t k = 0; k < 2; k++) {
2005 if (k == 1 && nmeth_used[k] > 0) {
2006 Log() << kINFO << hLine <<
Endl;
2007 Log() << kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
2009 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
2010 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"");
2012 if (theMethod == 0)
continue;
2014 Log() << kINFO <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
2016 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
2020 Log() << kINFO << hLine <<
Endl;
2023 if (
gTools().CheckForSilentOption(GetOptions()))
Log().InhibitOutput();
2028 std::list<TString> datasets;
2029 for (
Int_t k=0; k<2; k++) {
2030 for (
Int_t i=0; i<nmeth_used[k]; i++) {
2032 if(theMethod==0)
continue;
2035 if(std::find(datasets.begin(), datasets.end(), theMethod->
fDataSetInfo.
GetName()) == datasets.end())
2054 fModelPersistence=
kFALSE;
2059 if(vitype==VIType::kShort)
2060 return EvaluateImportanceShort(loader,theMethod,methodTitle,theOption);
2061 else if(vitype==VIType::kAll)
2062 return EvaluateImportanceAll(loader,theMethod,methodTitle,theOption);
2063 else if(vitype==VIType::kRandom&&nbits>10)
2065 return EvaluateImportanceRandom(loader,
pow(2,nbits),theMethod,methodTitle,theOption);
2068 std::cerr<<
"Error in Variable Importance: Random mode require more that 10 variables in the dataset."<<std::endl;
2085 uint64_t range =
pow(2, nbits);
2088 std::vector<Double_t> importances(nbits);
2090 std::vector<Double_t> ROC(range);
2092 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2095 for ( x = 1; x <range ; x++) {
2097 std::bitset<VIBITS> xbitset(x);
2098 if (x == 0)
continue;
2104 for (
int index = 0; index < nbits; index++) {
2105 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2112 BookMethod(seedloader, theMethod, methodTitle, theOption);
2117 EvaluateAllMethods();
2120 ROC[
x] = GetROCIntegral(xbitset.to_string(), methodTitle);
2127 this->DeleteAllMethods();
2129 fMethodsMap.clear();
2134 for ( x = 0; x <range ; x++)
2137 for (uint32_t i = 0; i <
VIBITS; ++i) {
2140 std::bitset<VIBITS> ybitset(y);
2146 importances[
ny] = SROC - 0.5;
2152 importances[
ny] += SROC - SSROC;
2158 std::cout<<
"--- Variable Importance Results (All)"<<std::endl;
2159 return GetImportance(nbits,importances,varNames);
2162 static long int sum(
long int i)
2165 for(
long int n=0;
n<i;
n++) _sum+=
pow(2,
n);
2180 long int range =
sum(nbits);
2183 std::vector<Double_t> importances(nbits);
2184 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2190 std::bitset<VIBITS> xbitset(x);
2191 if (x == 0)
Log()<<kFATAL<<
"Error: need at least one variable.";
2198 for (
int index = 0; index < nbits; index++) {
2199 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2206 BookMethod(seedloader, theMethod, methodTitle, theOption);
2211 EvaluateAllMethods();
2214 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2221 this->DeleteAllMethods();
2222 fMethodsMap.clear();
2226 for (uint32_t i = 0; i <
VIBITS; ++i) {
2229 std::bitset<VIBITS> ybitset(y);
2235 importances[
ny] = SROC - 0.5;
2242 for (
int index = 0; index < nbits; index++) {
2243 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2250 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2255 EvaluateAllMethods();
2258 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2259 importances[
ny] += SROC - SSROC;
2265 delete subseedloader;
2266 this->DeleteAllMethods();
2267 fMethodsMap.clear();
2270 std::cout<<
"--- Variable Importance Results (Short)"<<std::endl;
2271 return GetImportance(nbits,importances,varNames);
2287 long int range =
pow(2, nbits);
2290 std::vector<Double_t> importances(nbits);
2292 for (
int i = 0; i < nbits; i++)importances[i] = 0;
2296 x = rangen -> Integer(range);
2298 std::bitset<32> xbitset(x);
2299 if (x == 0)
continue;
2306 for (
int index = 0; index < nbits; index++) {
2307 if (xbitset[index]) seedloader->
AddVariable(varNames[index],
'F');
2314 BookMethod(seedloader, theMethod, methodTitle, theOption);
2319 EvaluateAllMethods();
2322 SROC = GetROCIntegral(xbitset.to_string(), methodTitle);
2330 this->DeleteAllMethods();
2331 fMethodsMap.clear();
2335 for (uint32_t i = 0; i < 32; ++i) {
2338 std::bitset<32> ybitset(y);
2344 importances[
ny] = SROC - 0.5;
2345 importances_norm += importances[
ny];
2353 for (
int index = 0; index < nbits; index++) {
2354 if (ybitset[index]) subseedloader->
AddVariable(varNames[index],
'F');
2361 BookMethod(subseedloader, theMethod, methodTitle, theOption);
2366 EvaluateAllMethods();
2369 SSROC = GetROCIntegral(ybitset.to_string(), methodTitle);
2370 importances[
ny] += SROC - SSROC;
2376 delete subseedloader;
2377 this->DeleteAllMethods();
2378 fMethodsMap.clear();
2382 std::cout<<
"--- Variable Importance Results (Random)"<<std::endl;
2383 return GetImportance(nbits,importances,varNames);
2390 TH1F *vih1 =
new TH1F(
"vih1",
"", nbits, 0, nbits);
2395 for (
int i = 0; i < nbits; i++) {
2396 normalization = normalization + importances[i];
2406 for (
Int_t i = 1; i < nbits + 1; i++) {
2407 x_ie[i - 1] = (i - 1) * 1.;
2408 roc = 100.0 * importances[i - 1] / normalization;
2410 std::cout<<
"--- "<<varNames[i-1]<<
" = "<<roc<<
" %"<<std::endl;
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
void SetModelPersistence(Bool_t status)
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual void SetLineWidth(Width_t lwidth)
Set the line width.
TH1F * GetImportance(const int nbits, std::vector< Double_t > importances, std::vector< TString > varNames)
DataSetManager * fDataSetManager
static long int sum(long int i)
Principal Components Analysis (PCA)
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Double_t GetEffSForEffB(Double_t effB, const UInt_t num_points=41)
Calculate the signal efficiency (sensitivity) for a given background efficiency (sensitivity).
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
Singleton class for Global types used by TMVA.
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
void EvaluateAllVariables(DataLoader *loader, TString options="")
Iterates over all MVA input variables and evaluates them.
Bool_t fROC
enable to calculate corelations
TList * GetListOfGraphs() const
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
TString & ReplaceAll(const TString &s1, const TString &s2)
const char * GetName() const
R__EXTERN TStyle * gStyle
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
void CheckForUnusedOptions() const
checks for unused options in option string
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
UInt_t GetNClasses() const
TH1F * EvaluateImportanceShort(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
A TMultiGraph is a collection of TGraph (or derived) objects.
virtual int MakeDirectory(const char *name)
Make a directory.
DataSetInfo & DefaultDataSetInfo()
default creation
virtual void MakeClass(const TString &classFileName=TString("")) const =0
UInt_t GetNTargets() const
Virtual base Class for all MVA method.
TString fTransformations
option string given by construction (presently only "V")
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Ranking for variables in method (implementation)
void ToLower()
Change string to lower-case.
void TrainAllMethods()
Iterates through all booked methods and calls training.
virtual void SetTitle(const char *title="")
Set graph title.
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
TMultiGraph * GetROCCurveAsMultiGraph(DataLoader *loader, UInt_t iClass)
Generate a collection of graphs, for all methods for a given class.
void WriteDataInformation(DataSetInfo &fDataSetInfo)
void SetSilentFile(Bool_t status)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
UInt_t GetNVariables() const
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
IMethod * GetMethod(const TString &datasetname, const TString &title) const
Returns pointer to MVA that corresponds to given method title.
DataSet * GetDataSet() const
returns data set
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
TH1F * EvaluateImportanceRandom(DataLoader *loader, UInt_t nseeds, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
overwrite existing object with same name
const TString & GetMethodName() const
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
TString GetWeightFileName() const
retrieve weight file name
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual void SetBarWidth(Float_t width=0.5)
TGraph * GetROCCurve(DataLoader *loader, TString theMethodName, Bool_t setTitles=kTRUE, UInt_t iClass=0)
Argument iClass specifies the class to generate the ROC curve in a multiclass setting.
virtual const char * GetName() const
Returns name of object.
const char * Data() const
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
Types::EAnalysisType GetAnalysisType() const
static void InhibitOutput()
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
DataSetInfo & fDataSetInfo
TH1F * EvaluateImportance(DataLoader *loader, VIType vitype, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Evaluate Variable Importance.
Bool_t IsSignal(const Event *ev) const
Bool_t fModelPersistence
the training type
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
void ReadStateFromFile()
Function to write options and weights to file.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
double pow(double, double)
Bool_t DoMulticlass() const
std::vector< std::vector< double > > Data
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
void SetMinType(EMsgType minType)
void SetDrawProgressBar(Bool_t d)
TH1F * EvaluateImportanceAll(DataLoader *loader, Types::EMVA theMethod, TString methodTitle, const char *theOption="")
Types::EMVA GetMethodType() const
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Bool_t IsModelPersistence()
Class that contains all the data information.
virtual void Draw(Option_t *chopt="")
Draw this multigraph with its current attributes.
TAxis * GetXaxis() const
Get x axis of the graph.
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
void PrintHelpMessage(const TString &datasetname, const TString &methodTitle="") const
Print predefined help message of classifier.
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
Class for boosting a TMVA method.
const TMatrixD * GetCovarianceMatrix() const
TMatrixT< Double_t > TMatrixD
const Int_t MinNoTrainingEvents
Class that contains all the data information.
virtual ~Factory()
Destructor.
virtual void SetLineColor(Color_t lcolor)
Set the line color.
std::map< TString, Double_t > OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
Iterates through all booked methods and sees if they use parameter tuning and if so.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
std::string GetMethodName(TCppMethod_t)
TAxis * GetXaxis() const
Get x axis of the graph.
Double_t GetROCIntegral(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Calculate the integral of the ROC curve, also known as the area under curve (AUC), for a given method.
DataSetManager * GetDataSetManager()
Service class for 2-Dim histogram classes.
R__EXTERN TSystem * gSystem
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Bool_t fCorrelations
verbose mode
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods. ...
class TMVA::Config::VariablePlotting fVariablePlotting
void printMatrix(const M &m)
ClassInfo * GetClassInfo(Int_t clNum) const
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
const TMatrixD * CorrelationMatrix(const TString &className) const
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb"...
virtual const char * GetName() const
Returns name of object.
virtual Double_t GetSignificance() const
compute significance of mean difference
void Greetings()
Print welcome message.
This is the main MVA steering class.
const TCut & GetCut(Int_t i) const
virtual void MakePrincipals()
Perform the principal components analysis.
void SetBoostedMethodName(TString methodName)
void SetVerbose(Bool_t v=kTRUE)
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
Class for categorizing the phase space.
const TString & GetSplitOptions() const
const Event * GetEvent() const
void SetCurrentType(Types::ETreeType type) const
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
virtual const char * GetName() const
Returns name of object.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual const char * GetName() const
Returns name of object.
virtual void PrintHelpMessage() const =0
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
Class which takes the results of a multiclass classification.
void SetFile(TFile *file)
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
static void DestroyInstance()
static function: destroy TMVA instance
Bool_t HasMethod(const TString &datasetname, const TString &title) const
Checks whether a given method name is defined for a given dataset.
DataSetInfo & DataInfo() const
VariableInfo & GetVariableInfo(Int_t i)
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void AddPreDefVal(const T &)
TH1F * GetHistogram() const
Returns a pointer to the histogram used to draw the axis.
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
ostringstream derivative to redirect and format output
TAxis * GetYaxis() const
Get y axis of the graph.
void SetUseColor(Bool_t uc)
void SetConfigName(const char *n)
Interface for all concrete MVA method implementations.
void SetSource(const std::string &source)
Bool_t DoRegression() const
void SetTitleXOffset(Float_t offset=1)
virtual void MakeClass(const TString &datasetname, const TString &methodTitle="") const
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
DataSetManager * fDataSetManager
Bool_t Verbose(void) const
ROCCurve * GetROC(DataLoader *loader, TString theMethodName, UInt_t iClass=0)
Private method to generate an instance of a ROCCurve regardless of analysis type. ...
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
Standard constructor.
DataInputHandler & DataInput()
void PrintHelpMessage() const
prints out method-specific help method
Class that is the base-class for a vector of result.
const TString & GetOptions() const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetWeightFileDir(TString fileDir)
set directory of weight file
A Graph is a graphics object made of two arrays X and Y with npoints each.
void DeleteAllMethods(void)
Delete methods.
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
virtual Double_t GetTrainingEfficiency(const TString &)
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
std::vector< TString > GetListOfVariables() const
returns list of variables
Long64_t GetNTrainingEvents() const
Types::EAnalysisType fAnalysisType
jobname, used as extension in weight file names
virtual void SetTitle(const char *title)
Change (i.e.
std::vector< IMethod * > MVector
Double_t GetSignalReferenceCut() const
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
virtual void Print() const
get maximum length of variable names
TString GetMethodTypeName() const
const TString & GetLabel() const
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
void SetTestvarName(const TString &v="")
TAxis * GetYaxis() const
Get y axis of the graph.
virtual void TestClassification()
initialization
virtual void SetAnalysisType(Types::EAnalysisType type)
Class that is the base-class for a vector of result.
void SetConfigDescription(const char *d)
Bool_t fVerbose
List of transformations to test.
const TString & GetTestvarName() const