81 , fGDPathStep ( 0.01 )
82 , fGDNPathSteps ( 1000 )
107 if (fNTCoeff) {
delete fNTCoeff; fNTCoeff = 0; }
108 if (fNTLinCoeff) {
delete fNTLinCoeff;fNTLinCoeff = 0; }
117 if (fRuleFit==0)
return;
118 if (fRuleFit->GetMethodRuleFit()==0) {
119 Log() << kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
121 UInt_t neve = fRuleFit->GetTrainingEvents().size();
123 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
124 fNRules = fRuleEnsemble->GetNRules();
125 fNLinear = fRuleEnsemble->GetNLinear();
134 fPerfIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
139 ofs = neve - fPerfIdx2 - 1;
149 fPathIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
158 for (
UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
159 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
163 for (
UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
164 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
167 Log() << kVERBOSE <<
"Path constr. - event index range = [ " << fPathIdx1 <<
", " << fPathIdx2 <<
" ]"
168 <<
", effective N(events) = " << fNEveEffPath <<
Endl;
169 Log() << kVERBOSE <<
"Error estim. - event index range = [ " << fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]"
170 <<
", effective N(events) = " << fNEveEffPerf <<
Endl;
172 if (fRuleEnsemble->DoRules())
173 Log() << kDEBUG <<
"Number of rules in ensemble: " << fNRules <<
Endl;
175 Log() << kDEBUG <<
"Rules are disabled " <<
Endl;
177 if (fRuleEnsemble->DoLinear())
178 Log() << kDEBUG <<
"Number of linear terms: " << fNLinear <<
Endl;
180 Log() << kDEBUG <<
"Linear terms are disabled " <<
Endl;
188 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
189 fGDNtuple->Branch(
"risk", &fNTRisk,
"risk/D");
190 fGDNtuple->Branch(
"error", &fNTErrorRate,
"error/D");
191 fGDNtuple->Branch(
"nuval", &fNTNuval,
"nuval/D");
192 fGDNtuple->Branch(
"coefrad", &fNTCoefRad,
"coefrad/D");
193 fGDNtuple->Branch(
"offset", &fNTOffset,
"offset/D");
195 fNTCoeff = (fNRules >0 ?
new Double_t[fNRules] : 0);
196 fNTLinCoeff = (fNLinear>0 ?
new Double_t[fNLinear] : 0);
198 for (
UInt_t i=0; i<fNRules; i++) {
199 fGDNtuple->Branch(
Form(
"a%d",i+1),&fNTCoeff[i],
Form(
"a%d/D",i+1));
201 for (
UInt_t i=0; i<fNLinear; i++) {
202 fGDNtuple->Branch(
Form(
"b%d",i+1),&fNTLinCoeff[i],
Form(
"b%d/D",i+1));
210 std::vector<Double_t> &avsel,
211 std::vector<Double_t> &avrul )
213 UInt_t neve = ind2-ind1+1;
215 Log() << kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
221 if (fNLinear>0) avsel.resize(fNLinear,0);
222 if (fNRules>0) avrul.resize(fNRules,0);
223 const std::vector<UInt_t> *eventRuleMap=0;
229 if (fRuleEnsemble->IsRuleMapOK()) {
230 for (
UInt_t i=ind1; i<ind2+1; i++) {
231 ew = fRuleFit->GetTrainingEventWeight(i);
233 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
234 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
238 if (fRuleEnsemble->DoRules()) {
239 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
240 nrules = (*eventRuleMap).size();
243 avrul[(*eventRuleMap)[
r]] += ew;
248 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
249 for (
UInt_t i=ind1; i<ind2+1; i++) {
250 ew = fRuleFit->GetTrainingEventWeight(i);
253 fRuleEnsemble->EvalLinEvent(*((*events)[i]));
254 fRuleEnsemble->EvalEvent(*((*events)[i]));
256 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
257 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
261 avrul[
r] += ew*fRuleEnsemble->GetEventRuleVal(
r);
266 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
267 avsel[sel] = avsel[sel] / sumew;
271 avrul[
r] = avrul[
r] / sumew;
282 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
294 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
296 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
305 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
307 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
309 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
317 UInt_t neve = ind2-ind1+1;
319 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
324 for (
UInt_t i=ind1; i<ind2+1; i++) {
337 UInt_t neve = ind2-ind1+1;
339 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
344 for (
UInt_t i=ind1; i<ind2+1; i++) {
359 Log() << kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
361 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
362 for (
UInt_t i=0; i<fNRules; i++) {
363 rval +=
TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
365 for (
UInt_t i=0; i<fNLinear; i++) {
392 fGDTauVec.resize( fGDNTau );
394 fGDTauVec[0] = fGDTau;
398 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
399 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
400 fGDTauVec[itau] =
static_cast<Double_t>(itau)*dtau + fGDTauMin;
401 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
409 fGradVecLinTst.clear();
414 fGDCoefLinTst.clear();
418 fGDCoefTst.resize(fGDNTau);
419 fGradVec.resize(fNRules,0);
420 fGradVecTst.resize(fGDNTau);
421 for (
UInt_t i=0; i<fGDNTau; i++) {
422 fGradVecTst[i].resize(fNRules,0);
423 fGDCoefTst[i].resize(fNRules,0);
428 fGDCoefLinTst.resize(fGDNTau);
429 fGradVecLin.resize(fNLinear,0);
430 fGradVecLinTst.resize(fGDNTau);
431 for (
UInt_t i=0; i<fGDNTau; i++) {
432 fGradVecLinTst[i].resize(fNLinear,0);
433 fGDCoefLinTst[i].resize(fNLinear,0);
438 fGDErrTst.resize(fGDNTau,0);
439 fGDErrTstOK.resize(fGDNTau,
kTRUE);
440 fGDOfsTst.resize(fGDNTau,0);
441 fGDNTauTstOK = fGDNTau;
452 if (fGDNTau<2)
return 0;
453 if (fGDTauScan==0)
return 0;
455 if (fGDOfsTst.size()<1)
456 Log() << kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
458 Log() << kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
461 UInt_t nscan = fGDTauScan;
481 MakeTstGradientVector();
483 UpdateTstCoefficients();
487 if ( (ip==0) || ((ip+1)%netst==0) ) {
489 itauMin = RiskPerfTst();
490 Log() << kVERBOSE <<
Form(
"%4d",ip+1) <<
". tau = " <<
Form(
"%4.4f",fGDTauVec[itauMin])
491 <<
" => error rate = " << fGDErrTst[itauMin] <<
Endl;
494 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496 if (
Log().GetMinType()>kVERBOSE)
504 Log() << kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
506 fGDTau = fGDTauVec[itauMin];
507 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
508 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
509 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
510 Log() << kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",fGDTau)
541 Log() << kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found"
543 Log() << kVERBOSE <<
"Number of events used per path step = " << fPathIdx2-fPathIdx1+1 <<
Endl;
544 Log() << kVERBOSE <<
"Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 <<
Endl;
547 const Bool_t isVerbose = (
Log().GetMinType()<=kVERBOSE);
548 const Bool_t isDebug = (
Log().GetMinType()<=kDEBUG);
554 EvaluateAveragePath();
555 EvaluateAveragePerf();
558 Log() << kVERBOSE <<
"Creating GD path" <<
Endl;
559 Log() << kVERBOSE <<
" N(steps) = " << fGDNPathSteps <<
Endl;
560 Log() << kVERBOSE <<
" step = " << fGDPathStep <<
Endl;
561 Log() << kVERBOSE <<
" N(tau) = " << fGDNTau <<
Endl;
562 Log() << kVERBOSE <<
" N(tau steps) = " << fGDTauScan <<
Endl;
563 Log() << kVERBOSE <<
" tau range = [ " << fGDTauVec[0] <<
" , " << fGDTauVec[fGDNTau-1] <<
" ]" <<
Endl;
566 if (isDebug) InitNtuple();
578 std::vector<Double_t> coefsMin;
579 std::vector<Double_t> lincoefsMin;
594 std::vector<Double_t> valx;
595 std::vector<Double_t> valy;
596 std::vector<Double_t> valxy;
606 int imod = fGDNPathSteps/100;
607 if (imod<100) imod = std::min(100,fGDNPathSteps);
608 if (imod>100) imod=100;
611 fAverageTruth = -CalcAverageTruth();
612 offsetMin = fAverageTruth;
613 fRuleEnsemble->SetOffset(offsetMin);
614 fRuleEnsemble->ClearCoefficients(0);
615 fRuleEnsemble->ClearLinCoefficients(0);
616 for (
UInt_t i=0; i<fGDOfsTst.size(); i++) {
617 fGDOfsTst[i] = offsetMin;
619 Log() << kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
622 Int_t nprescan = FindGDTau();
637 Int_t stopCondition=0;
639 Log() << kINFO <<
"Fitting model..." <<
Endl;
645 if (isVerbose) t0 = clock();
646 MakeGradientVector();
648 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
649 stgradvec += tgradvec;
653 if (isVerbose) t0 = clock();
654 UpdateCoefficients();
656 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
657 stupgrade += tupgrade;
661 docheck = ((iloop==0) ||((iloop+1)%imod==0));
667 fNTNuval =
Double_t(iloop)*fGDPathStep;
672 if (isDebug) FillCoefficients();
673 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
677 fNTRisk = RiskPath();
678 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
685 if (fNTRisk>=rprev) {
688 Log() <<
"Risk(i+1)>=Risk(i) in path" <<
Endl;
689 riskFlat=(nbadrisk>3);
691 Log() << kWARNING <<
"Chaotic behaviour of risk evolution" <<
Endl;
692 Log() <<
"--- STOPPING MINIMISATION ---" <<
Endl;
693 Log() <<
"This may be OK if minimum is already found" <<
Endl;
703 if (isVerbose) t0 = clock();
713 fNTErrorRate = errroc;
716 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
723 if (fNTErrorRate<=errmin) {
724 errmin = fNTErrorRate;
727 fRuleEnsemble->GetCoefficients(coefsMin);
728 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
729 offsetMin = fRuleEnsemble->GetOffset();
731 if ( fNTErrorRate > fGDErrScale*errmin) found =
kTRUE;
735 if (valx.size()==npreg) {
736 valx.erase(valx.begin());
737 valy.erase(valy.begin());
738 valxy.erase(valxy.begin());
740 valx.push_back(fNTNuval);
741 valy.push_back(fNTErrorRate);
742 valxy.push_back(fNTErrorRate*fNTNuval);
747 if (isDebug) fGDNtuple->Fill();
749 Log() << kVERBOSE <<
"ParamsIRE : "
751 <<
Form(
"%8d",iloop+1) <<
" "
752 <<
Form(
"%4.4f",fNTRisk) <<
" "
753 <<
Form(
"%4.4f",riskPerf) <<
" "
754 <<
Form(
"%4.4f",fNTRisk+riskPerf) <<
" "
770 Bool_t endOfLoop = (iloop==fGDNPathSteps);
771 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
775 else if (endOfLoop) {
779 Log() << kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
780 errmin = fNTErrorRate;
783 fRuleEnsemble->GetCoefficients(coefsMin);
784 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
785 offsetMin = fRuleEnsemble->GetOffset();
793 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
794 Log() << kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
795 Log() << kINFO <<
"Reason for ending loop: ";
796 switch (stopCondition) {
798 Log() << kINFO <<
"clear minima found";
801 Log() << kINFO <<
"chaotic behaviour of risk";
804 Log() << kINFO <<
"end of loop reached";
807 Log() << kINFO <<
"unknown!";
811 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
815 Log() << kWARNING <<
"Reached minimum early in the search" <<
Endl;
816 Log() <<
"Check results and maybe decrease GDStep size" <<
Endl;
826 Log() << kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
827 Log() << kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
833 fRuleEnsemble->SetCoefficients( coefsMin );
834 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
835 fRuleEnsemble->SetOffset( offsetMin );
838 Log() << kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
845 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
846 Log() << kVERBOSE <<
"Timing per loop (ms):" <<
Endl;
847 Log() << kVERBOSE <<
" gradvec = " << 1000*stgradvec/iloop <<
Endl;
848 Log() << kVERBOSE <<
" upgrade = " << 1000*stupgrade/iloop <<
Endl;
849 Log() << kVERBOSE <<
" risk = " << 1000*strisk/iloop <<
Endl;
850 Log() << kVERBOSE <<
" perf = " << 1000*stperf/iloop <<
Endl;
851 Log() << kVERBOSE <<
" loop = " << 1000*stloop/iloop <<
Endl;
854 Log() << kVERBOSE <<
" GDPtr = " << 1000*
gGDPtr/iloop <<
Endl;
863 if (isDebug) fGDNtuple->
Write();
871 fNTOffset = fRuleEnsemble->GetOffset();
873 for (
UInt_t i=0; i<fNRules; i++) {
874 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
876 for (
UInt_t i=0; i<fNLinear; i++) {
877 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
888 Log() << kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
889 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
891 Log() << kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
895 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
898 std::vector<Double_t> fstarSorted;
901 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
902 const Event&
e = *(*events)[i];
903 fstarVal = fRuleEnsemble->FStar(e);
904 fFstar.push_back(fstarVal);
905 fstarSorted.push_back(fstarVal);
909 std::sort( fstarSorted.begin(), fstarSorted.end() );
912 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
915 fFstarMedian = fstarSorted[ind];
928 Log() << kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
929 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
931 Log() << kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
934 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
945 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
946 const Event&
e = *(*events)[i];
947 yhat = fRuleEnsemble->EvalEvent(i);
948 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0);
949 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
952 sumyhaty += w*yhat*
y;
957 Double_t cov = sumyhaty - sumyhat*sumy;
969 Log() << kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
970 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
972 Log() << kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
974 if (fFstar.size()!=neve) {
975 Log() << kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
976 <<
" Fstar.size() = " << fFstar.size() <<
" , N(events) = " << neve <<
Endl;
981 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
990 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
991 const Event&
e = *(*events)[i];
992 sF = fRuleEnsemble->EvalEvent( e );
994 sumdf +=
TMath::Abs(fFstar[i-fPerfIdx1] - sF);
995 sumdfmed +=
TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
1000 return sumdf/sumdfmed;
1013 Log() << kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1014 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1016 Log() << kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1019 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1026 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1027 const Event&
e = *(*events)[i];
1028 sF = fRuleEnsemble->EvalEvent( e );
1030 signF = (sF>0 ? +1:-1);
1032 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1046 std::vector<Double_t> & sFbkg )
1049 std::sort(sFsig.begin(), sFsig.end());
1050 std::sort(sFbkg.begin(), sFbkg.end());
1051 const Double_t minsig = sFsig.front();
1052 const Double_t minbkg = sFbkg.front();
1053 const Double_t maxsig = sFsig.back();
1054 const Double_t maxbkg = sFbkg.back();
1055 const Double_t minf = std::min(minsig,minbkg);
1056 const Double_t maxf = std::max(maxsig,maxbkg);
1059 const Int_t np = std::min((nsig+nbkg)/4,50);
1060 const Double_t df = (maxf-minf)/(np-1);
1065 std::vector<Double_t>::const_iterator indit;
1080 for (
Int_t i=0; i<np; i++) {
1082 indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1083 nesig = sFsig.end()-indit;
1086 indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1087 nrbkg = indit-sFbkg.begin();
1099 area += 0.5*(1+rejb)*effs;
1112 Log() << kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1113 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1115 Log() << kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1118 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1122 std::vector<Double_t> sFsig;
1123 std::vector<Double_t> sFbkg;
1129 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1130 const Event&
e = *(*events)[i];
1131 sF = fRuleEnsemble->EvalEvent(i);
1132 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1133 sFsig.push_back(sF);
1138 sFbkg.push_back(sF);
1143 fsigave = sumfsig/sFsig.size();
1144 fbkgave = sumfbkg/sFbkg.size();
1148 return ErrorRateRocRaw( sFsig, sFbkg );
1160 Log() << kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1161 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1163 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1167 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1171 std::vector< std::vector<Double_t> > sFsig;
1172 std::vector< std::vector<Double_t> > sFbkg;
1174 sFsig.resize( fGDNTau );
1175 sFbkg.resize( fGDNTau );
1178 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1179 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1182 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1183 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1184 sFsig[itau].push_back(sF);
1187 sFbkg[itau].push_back(sF);
1193 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1194 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1195 fGDErrTst[itau] = err;
1206 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1208 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1218 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1219 if (fGDErrTstOK[itau]) {
1221 fGDErrTst[itau] = RiskPerf(itau);
1222 sumx += fGDErrTst[itau];
1223 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1224 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1225 if (fGDErrTst[itau]<minx) {
1226 minx=fGDErrTst[itau];
1236 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1237 if (fGDErrTstOK[itau]) {
1238 if (fGDErrTst[itau] > maxacc) {
1239 fGDErrTstOK[itau] =
kFALSE;
1248 Log() << kVERBOSE <<
"TAU: "
1264 UInt_t neve = fPathIdx1-fPathIdx2+1;
1266 Log() << kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1272 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1275 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1276 if (fGDErrTstOK[itau]) {
1277 for (
UInt_t ir=0; ir<fNRules; ir++) {
1278 fGradVecTst[itau][ir]=0;
1280 for (
UInt_t il=0; il<fNLinear; il++) {
1281 fGradVecLinTst[itau][il]=0;
1290 const std::vector<UInt_t> *eventRuleMap=0;
1296 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1297 const Event *
e = (*events)[i];
1299 if (fRuleEnsemble->DoRules()) {
1300 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1301 nrules = (*eventRuleMap).size();
1303 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1306 if (fGDErrTstOK[itau]) {
1307 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1311 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1312 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1314 for (
UInt_t ir=0; ir<nrules; ir++) {
1315 rind = (*eventRuleMap)[ir];
1316 fGradVecTst[itau][rind] +=
r;
1319 for (
UInt_t il=0; il<fNLinear; il++) {
1320 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i,
kTRUE );
1336 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1337 if (fGDErrTstOK[itau]) {
1339 maxr = ( (fNRules>0 ?
1340 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(),
AbsValue()))):0) );
1341 maxl = ( (fNLinear>0 ?
1342 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(),
AbsValue()))):0) );
1345 Double_t maxv = (maxr>maxl ? maxr:maxl);
1346 cthresh = maxv * fGDTauVec[itau];
1356 for (
UInt_t i=0; i<fNRules; i++) {
1357 val = fGradVecTst[itau][i];
1359 if (TMath::Abs(val)>=cthresh) {
1360 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1364 for (
UInt_t i=0; i<fNLinear; i++) {
1365 val = fGradVecLinTst[itau][i];
1366 if (TMath::Abs(val)>=cthresh) {
1367 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1374 CalcTstAverageResponse();
1386 UInt_t neve = fPathIdx2-fPathIdx1+1;
1388 Log() << kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1394 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1397 for (
UInt_t ir=0; ir<fNRules; ir++) {
1400 for (
UInt_t il=0; il<fNLinear; il++) {
1408 const std::vector<UInt_t> *eventRuleMap=0;
1413 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1414 const Event *
e = (*events)[i];
1417 sF = fRuleEnsemble->EvalEvent( i );
1421 if (fRuleEnsemble->DoRules()) {
1422 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1423 nrules = (*eventRuleMap).size();
1425 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1426 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1428 for (
UInt_t ir=0; ir<nrules; ir++) {
1429 rind = (*eventRuleMap)[ir];
1430 fGradVec[rind] +=
r;
1435 for (
UInt_t il=0; il<fNLinear; il++) {
1436 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i,
kTRUE );
1448 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1450 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1451 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(),
AbsValue()))):0) );
1453 Double_t maxv = (maxr>maxl ? maxr:maxl);
1461 useRThresh = cthresh;
1462 useLThresh = cthresh;
1470 for (
UInt_t i=0; i<fGradVec.size(); i++) {
1473 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1474 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1479 for (
UInt_t i=0; i<fGradVecLin.size(); i++) {
1480 lval = fGradVecLin[i];
1482 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1483 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1487 Double_t offset = CalcAverageResponse();
1488 fRuleEnsemble->SetOffset( offset );
1498 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1499 if (fGDErrTstOK[itau]) {
1500 fGDOfsTst[itau] = 0;
1501 for (
UInt_t s=0; s<fNLinear; s++) {
1502 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1505 fGDOfsTst[itau] -= fGDCoefTst[itau][
r] * fAverageRulePath[
r];
1520 for (
UInt_t s=0; s<fNLinear; s++) {
1521 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1524 ofs -= fRuleEnsemble->GetRules(
r)->GetCoefficient() * fAverageRulePath[
r];
1534 if (fPathIdx2<=fPathIdx1) {
1535 Log() << kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1541 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1542 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1543 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1544 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1546 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1548 Log() << kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
1550 return sum/fNEveEffPath;
1556 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1562 fLogger->SetMinType(t);
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
static long int sum(long int i)
MsgLogger & Endl(MsgLogger &ml)
void MakeGradientVector()
make gradient vector
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Short_t Min(Short_t a, Short_t b)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t CalcAverageTruth()
calculate the average truth
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Int_t Type(const Event *e) const
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t Optimism()
implementation of eq.
char * Form(const char *fmt,...)
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitNtuple()
initializes the ntuple
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
virtual ~RuleFitParams()
destructor
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
ostringstream derivative to redirect and format output
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
RuleFitParams()
constructor
Short_t Max(Short_t a, Short_t b)
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Sqrt(Double_t x)
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
double norm(double *x, double *p)
Timing information for training and evaluation of MVA methods.
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...