75 : MethodBase(jobName, Types::kDNN, methodTitle, theData, theOption), fWeightInitialization(), fOutputFunction(),
76 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
77 fArchitectureString(), fTrainingSettings(), fResume(false), fSettings()
86 : MethodBase( Types::kDNN, theData, theWeightFile),
87 fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
88 fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
89 fTrainingSettings(), fResume(false), fSettings()
125 auto &dsi = this->DataInfo();
126 auto numClasses = dsi.GetNClasses();
127 for (
UInt_t i = 0; i < numClasses; ++i) {
128 if (dsi.GetWeightExpression(i) !=
TString(
"")) {
129 Log() << kERROR <<
"Currently event weights are not considered properly by this method." <<
Endl;
146 DeclareOptionRef(fLayoutString=
"SOFTSIGN|(N+100)*2,LINEAR",
148 "Layout of the network.");
150 DeclareOptionRef(fErrorStrategy=
"CROSSENTROPY",
152 "Loss function: Mean squared error (regression)"
153 " or cross entropy (binary classification).");
154 AddPreDefVal(
TString(
"CROSSENTROPY"));
155 AddPreDefVal(
TString(
"SUMOFSQUARES"));
156 AddPreDefVal(
TString(
"MUTUALEXCLUSIVE"));
158 DeclareOptionRef(fWeightInitializationString=
"XAVIER",
159 "WeightInitialization",
160 "Weight initialization strategy");
161 AddPreDefVal(
TString(
"XAVIER"));
162 AddPreDefVal(
TString(
"XAVIERUNIFORM"));
164 DeclareOptionRef(fArchitectureString =
"CPU",
"Architecture",
"Which architecture to perform the training on.");
165 AddPreDefVal(
TString(
"STANDARD"));
168 AddPreDefVal(
TString(
"OPENCL"));
171 fTrainingStrategyString =
"LearningRate=1e-1,"
174 "ConvergenceSteps=50,"
180 "DropRepetitions=5|LearningRate=1e-4,"
183 "ConvergenceSteps=50,"
188 "DropConfig=0.0+0.5+0.5,"
190 "Multithreading=True",
192 "Defines the training strategies.");
202 LayoutVector_t layout;
203 const TString layerDelimiter(
",");
204 const TString subDelimiter(
"|");
206 const size_t inputSize = GetNvar();
208 TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
209 TIter nextLayer (layerStrings);
212 for (; layerString !=
nullptr; layerString = (
TObjString*) nextLayer()) {
217 TIter nextToken (subStrings);
220 for (; token !=
nullptr; token = (
TObjString *) nextToken()) {
226 if (strActFnc ==
"RELU") {
228 }
else if (strActFnc ==
"TANH") {
230 }
else if (strActFnc ==
"SYMMRELU") {
232 }
else if (strActFnc ==
"SOFTSIGN") {
234 }
else if (strActFnc ==
"SIGMOID") {
236 }
else if (strActFnc ==
"LINEAR") {
238 }
else if (strActFnc ==
"GAUSS") {
247 strNumNodes.ReplaceAll (
"N", strN);
248 strNumNodes.ReplaceAll (
"n", strN);
250 numNodes = fml.Eval (inputSize);
256 layout.push_back(std::make_pair(numNodes, activationFunction));
269 KeyValueVector_t blockKeyValues;
270 const TString keyValueDelim (
"=");
272 TObjArray* blockStrings = parseString.Tokenize (blockDelim);
273 TIter nextBlock (blockStrings);
276 for (; blockString !=
nullptr; blockString = (
TObjString *) nextBlock())
278 blockKeyValues.push_back (std::map<TString,TString>());
279 std::map<TString,TString>& currentBlock = blockKeyValues.back ();
282 TIter nextToken (subStrings);
285 for (; token !=
nullptr; token = (
TObjString *)nextToken())
288 int delimPos = strKeyValue.
First (keyValueDelim.Data ());
294 TString strValue =
TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
299 currentBlock.insert (std::make_pair (strKey, strValue));
302 return blockKeyValues;
310 std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
311 if (it == keyValueMap.end()) {
319 template <
typename T>
320 T fetchValue(
const std::map<TString,TString>& keyValueMap,
335 return value.
Atoi ();
341 double fetchValue (
const std::map<TString,TString>& keyValueMap,
342 TString key,
double defaultValue)
348 return value.
Atof ();
367 bool fetchValue (
const std::map<TString,TString>& keyValueMap,
368 TString key,
bool defaultValue)
375 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
384 std::vector<double>
fetchValue(
const std::map<TString, TString> & keyValueMap,
386 std::vector<double> defaultValue)
389 if (parseString ==
"") {
393 std::vector<double> values;
395 const TString tokenDelim (
"+");
397 TIter nextToken (tokenStrings);
399 for (; tokenString !=
NULL; tokenString = (
TObjString*)nextToken ()) {
400 std::stringstream sstr;
403 sstr >> currentValue;
404 values.push_back (currentValue);
413 if (IgnoreEventsWithNegWeightsInTraining()) {
415 <<
"Will ignore negative events in training!"
419 if (fArchitectureString ==
"STANDARD") {
420 Log() << kERROR <<
"The STANDARD architecture has been deprecated. "
421 "Please use Architecture=CPU or Architecture=CPU."
422 "See the TMVA Users' Guide for instructions if you "
423 "encounter problems."
425 Log() << kFATAL <<
"The STANDARD architecture has been deprecated. "
426 "Please use Architecture=CPU or Architecture=CPU."
427 "See the TMVA Users' Guide for instructions if you "
428 "encounter problems."
432 if (fArchitectureString ==
"OPENCL") {
433 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
434 "Please use Architecture=CPU or Architecture=CPU for the "
435 "time being. See the TMVA Users' Guide for instructions "
436 "if you encounter problems."
438 Log() << kFATAL <<
"The OPENCL architecture has not been implemented yet. "
439 "Please use Architecture=CPU or Architecture=CPU for the "
440 "time being. See the TMVA Users' Guide for instructions "
441 "if you encounter problems."
445 if (fArchitectureString ==
"GPU") {
446 #ifndef DNNCUDA // Included only if DNNCUDA flag is _not_ set.
447 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
448 "you have CUDA installed and it was successfully "
451 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
452 "you have CUDA installed and it was successfully "
458 if (fArchitectureString ==
"CPU") {
459 #ifndef DNNCPU // Included only if DNNCPU flag is _not_ set.
460 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure "
461 "you have a BLAS implementation and it was successfully "
462 "detected by CMake as well that the imt CMake flag is set."
464 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure "
465 "you have a BLAS implementation and it was successfully "
466 "detected by CMake as well that the imt CMake flag is set."
476 size_t inputSize = GetNVariables ();
477 size_t outputSize = 1;
479 outputSize = GetNTargets();
481 outputSize = DataInfo().GetNClasses();
484 fNet.SetBatchSize(1);
485 fNet.SetInputWidth(inputSize);
487 auto itLayout = std::begin (fLayout);
488 auto itLayoutEnd = std::end (fLayout)-1;
489 for ( ; itLayout != itLayoutEnd; ++itLayout) {
490 fNet.AddLayer((*itLayout).first, (*itLayout).second);
492 fNet.AddLayer(outputSize, EActivationFunction::kIdentity);
498 fOutputFunction = EOutputFunction::kSigmoid;
501 if (fErrorStrategy ==
"SUMOFSQUARES") {
502 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
504 if (fErrorStrategy ==
"CROSSENTROPY") {
505 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
507 fOutputFunction = EOutputFunction::kSigmoid;
509 if (fErrorStrategy !=
"SUMOFSQUARES") {
510 Log () << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
511 <<
" neural net error function. Setting error function to "
512 <<
" SUMOFSQUARES now." <<
Endl;
514 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
515 fOutputFunction = EOutputFunction::kIdentity;
517 if (fErrorStrategy ==
"SUMOFSQUARES") {
518 fNet.SetLossFunction(ELossFunction::kMeanSquaredError);
520 if (fErrorStrategy ==
"CROSSENTROPY") {
521 fNet.SetLossFunction(ELossFunction::kCrossEntropy);
523 if (fErrorStrategy ==
"MUTUALEXCLUSIVE") {
524 fNet.SetLossFunction(ELossFunction::kSoftmaxCrossEntropy);
526 fOutputFunction = EOutputFunction::kSoftmax;
533 if (fWeightInitializationString ==
"XAVIER") {
536 else if (fWeightInitializationString ==
"XAVIERUNIFORM") {
547 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString,
550 for (
auto& block : strategyKeyValues) {
560 std::vector<Double_t>());
564 if (regularization ==
"L1") {
566 }
else if (regularization ==
"L2") {
578 fTrainingSettings.push_back(settings);
586 if (fInteractive && fInteractive->NotInitialized()){
587 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
588 fInteractive->Init(titles);
593 if (fArchitectureString ==
"GPU") {
595 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
598 }
else if (fArchitectureString ==
"OpenCL") {
599 Log() << kFATAL <<
"OpenCL backend not yet supported." <<
Endl;
601 }
else if (fArchitectureString ==
"CPU") {
603 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
608 Log() << kINFO <<
"Using Standard Implementation.";
610 std::vector<Pattern> trainPattern;
611 std::vector<Pattern> testPattern;
613 const std::vector<TMVA::Event*>& eventCollectionTraining = GetEventCollection (
Types::kTraining);
614 const std::vector<TMVA::Event*>& eventCollectionTesting = GetEventCollection (
Types::kTesting);
616 for (
auto &event : eventCollectionTraining) {
617 const std::vector<Float_t>& values =
event->GetValues();
619 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
620 trainPattern.push_back(
Pattern (values.begin(),
623 event->GetWeight()));
624 trainPattern.back().addInput(1.0);
626 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
627 oneHot[
event->GetClass()] = 1.0;
628 trainPattern.push_back(
Pattern (values.begin(), values.end(),
629 oneHot.cbegin(), oneHot.cend(),
630 event->GetWeight()));
631 trainPattern.back().addInput(1.0);
633 const std::vector<Float_t>& targets =
event->GetTargets ();
634 trainPattern.push_back(
Pattern(values.begin(),
638 event->GetWeight ()));
639 trainPattern.back ().addInput (1.0);
643 for (
auto &event : eventCollectionTesting) {
644 const std::vector<Float_t>& values =
event->GetValues();
646 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
647 testPattern.push_back(
Pattern (values.begin(),
650 event->GetWeight()));
651 testPattern.back().addInput(1.0);
653 std::vector<Float_t> oneHot(DataInfo().GetNClasses(), 0.0);
654 oneHot[
event->GetClass()] = 1.0;
655 testPattern.push_back(
Pattern (values.begin(), values.end(),
656 oneHot.cbegin(), oneHot.cend(),
657 event->GetWeight()));
658 testPattern.back().addInput(1.0);
660 const std::vector<Float_t>& targets =
event->GetTargets ();
661 testPattern.push_back(
Pattern(values.begin(),
665 event->GetWeight ()));
666 testPattern.back ().addInput (1.0);
671 std::vector<double> weights;
678 for (
size_t i = 0; i < fNet.GetDepth(); i++) {
682 case EActivationFunction::kIdentity: g = EnumFunction::LINEAR;
break;
683 case EActivationFunction::kRelu: g = EnumFunction::RELU;
break;
684 case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID;
break;
685 case EActivationFunction::kTanh: g = EnumFunction::TANH;
break;
686 case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU;
break;
687 case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN;
break;
688 case EActivationFunction::kGauss: g = EnumFunction::GAUSS;
break;
690 if (i < fNet.GetDepth() - 1) {
694 switch(fOutputFunction) {
695 case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT;
break;
696 case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID;
break;
697 case EOutputFunction::kSoftmax: h = ModeOutputValues::SOFTMAX;
break;
703 switch(fNet.GetLossFunction()) {
704 case ELossFunction::kMeanSquaredError:
707 case ELossFunction::kCrossEntropy:
710 case ELossFunction::kSoftmaxCrossEntropy:
715 switch(fWeightInitialization) {
716 case EInitialization::kGauss:
718 std::back_inserter(weights));
720 case EInitialization::kUniform:
722 std::back_inserter(weights));
726 std::back_inserter(weights));
731 for (
auto s : fTrainingSettings) {
734 switch(s.regularization) {
736 case ERegularization::kL1: r = EnumRegularization::L1;
break;
737 case ERegularization::kL2: r = EnumRegularization::L2;
break;
741 s.testInterval, s.weightDecay, r,
743 s.momentum, 1, s.multithreading);
744 std::shared_ptr<Settings> ptrSettings(settings);
745 ptrSettings->setMonitoring (0);
747 <<
"Training with learning rate = " << ptrSettings->learningRate ()
748 <<
", momentum = " << ptrSettings->momentum ()
749 <<
", repetitions = " << ptrSettings->repetitions ()
752 ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
753 (idxSetting+1)*100.0/(fSettings.size ()));
755 const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
756 if (!dropConfig.empty ()) {
757 Log () << kINFO <<
"Drop configuration" <<
Endl
758 <<
" drop repetitions = " << ptrSettings->dropRepetitions()
763 for (
auto f : dropConfig) {
764 Log () << kINFO <<
" Layer " << idx <<
" = " <<
f <<
Endl;
770 ptrSettings->momentum(),
771 ptrSettings->repetitions());
772 net.
train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
777 size_t weightIndex = 0;
778 for (
size_t l = 0;
l < fNet.GetDepth();
l++) {
779 auto & layerWeights = fNet.GetLayer(
l).GetWeights();
780 for (
Int_t j = 0; j < layerWeights.GetNcols(); j++) {
781 for (
Int_t i = 0; i < layerWeights.GetNrows(); i++) {
782 layerWeights(i,j) = weights[weightIndex];
786 auto & layerBiases = fNet.GetLayer(
l).GetBiases();
788 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
789 layerBiases(i,0) = weights[weightIndex];
793 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
794 layerBiases(i,0) = 0.0;
798 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
807 #ifdef DNNCUDA // Included only if DNNCUDA flag is set.
812 Log() << kINFO <<
"Start of neural network training on GPU." <<
Endl;
814 size_t trainingPhase = 1;
815 fNet.Initialize(fWeightInitialization);
819 fInteractive->ClearGraphs();
828 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
829 for (
auto & p : dropoutVector) {
835 auto testNet = net.
CreateClone(settings.batchSize);
837 Log() << kINFO <<
"Training phase " << trainingPhase <<
" of "
838 << fTrainingSettings.size() <<
":" <<
Endl;
851 testNet.GetBatchSize(),
855 settings.convergenceSteps,
856 settings.testInterval);
858 std::vector<TNet<TCuda<>>> nets{};
859 std::vector<TBatch<TCuda<>>> batches{};
860 nets.reserve(nThreads);
861 for (
size_t i = 0; i < nThreads; i++) {
863 for (
size_t j = 0; j < net.
GetDepth(); j++)
865 auto &masterLayer = net.
GetLayer(j);
866 auto &layer = nets.back().GetLayer(j);
868 masterLayer.GetWeights());
870 masterLayer.GetBiases());
874 bool converged =
false;
875 size_t stepCount = 0;
876 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
878 std::chrono::time_point<std::chrono::system_clock> start, end;
879 start = std::chrono::system_clock::now();
882 Log() << std::setw(10) <<
"Epoch" <<
" | "
883 << std::setw(12) <<
"Train Err."
884 << std::setw(12) <<
"Test Err."
885 << std::setw(12) <<
"GFLOP/s"
886 << std::setw(12) <<
"Conv. Steps" <<
Endl;
887 std::string separator(62,
'-');
896 trainingData.Shuffle();
897 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
899 for (
size_t j = 0; j < nThreads; j++) {
900 batches.reserve(nThreads);
901 batches.push_back(trainingData.GetBatch());
903 if (settings.momentum > 0.0) {
904 minimizer.StepMomentum(net, nets, batches, settings.momentum);
906 minimizer.Step(net, nets, batches);
910 if ((stepCount % minimizer.GetTestInterval()) == 0) {
914 for (
auto batch : testData) {
915 auto inputMatrix = batch.GetInput();
916 auto outputMatrix = batch.GetOutput();
917 testError += testNet.Loss(inputMatrix, outputMatrix);
919 testError /= (
Double_t) (nTestSamples / settings.batchSize);
921 end = std::chrono::system_clock::now();
925 for (
auto batch : trainingData) {
926 auto inputMatrix = batch.GetInput();
927 auto outputMatrix = batch.GetOutput();
928 trainingError += net.
Loss(inputMatrix, outputMatrix);
930 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
933 std::chrono::duration<double> elapsed_seconds = end - start;
934 double seconds = elapsed_seconds.count();
935 double nFlops = (double) (settings.testInterval * batchesInEpoch);
938 converged = minimizer.HasConverged(testError);
939 start = std::chrono::system_clock::now();
942 fInteractive->AddPoint(stepCount, trainingError, testError);
943 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
944 / minimizer.GetConvergenceSteps ();
945 if (fExitFromTraining)
break;
947 Log() << std::setw(10) << stepCount <<
" | "
948 << std::setw(12) << trainingError
949 << std::setw(12) << testError
950 << std::setw(12) << nFlops / seconds
951 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
964 #else // DNNCUDA flag not set.
966 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
967 "you have CUDA installed and it was successfully "
968 "detected by CMAKE." <<
Endl;
977 #ifdef DNNCPU // Included only if DNNCPU flag is set.
982 Log() << kINFO <<
"Start of neural network training on CPU." <<
Endl <<
Endl;
984 fNet.Initialize(fWeightInitialization);
986 size_t trainingPhase = 1;
990 fInteractive->ClearGraphs();
993 Log() <<
"Training phase " << trainingPhase <<
" of "
994 << fTrainingSettings.size() <<
":" <<
Endl;
1002 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1003 for (
auto & p : dropoutVector) {
1009 auto testNet = net.
CreateClone(settings.batchSize);
1013 size_t nThreads = 1;
1021 testNet.GetBatchSize(),
1025 settings.convergenceSteps,
1026 settings.testInterval);
1028 std::vector<TNet<TCpu<>>> nets{};
1029 std::vector<TBatch<TCpu<>>> batches{};
1030 nets.reserve(nThreads);
1031 for (
size_t i = 0; i < nThreads; i++) {
1032 nets.push_back(net);
1033 for (
size_t j = 0; j < net.
GetDepth(); j++)
1035 auto &masterLayer = net.
GetLayer(j);
1036 auto &layer = nets.back().GetLayer(j);
1038 masterLayer.GetWeights());
1040 masterLayer.GetBiases());
1044 bool converged =
false;
1045 size_t stepCount = 0;
1046 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
1048 std::chrono::time_point<std::chrono::system_clock> start, end;
1049 start = std::chrono::system_clock::now();
1051 if (!fInteractive) {
1052 Log() << std::setw(10) <<
"Epoch" <<
" | "
1053 << std::setw(12) <<
"Train Err."
1054 << std::setw(12) <<
"Test Err."
1055 << std::setw(12) <<
"GFLOP/s"
1056 << std::setw(12) <<
"Conv. Steps" <<
Endl;
1057 std::string separator(62,
'-');
1065 trainingData.Shuffle();
1066 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
1068 for (
size_t j = 0; j < nThreads; j++) {
1069 batches.reserve(nThreads);
1070 batches.push_back(trainingData.GetBatch());
1072 if (settings.momentum > 0.0) {
1073 minimizer.StepMomentum(net, nets, batches, settings.momentum);
1075 minimizer.Step(net, nets, batches);
1079 if ((stepCount % minimizer.GetTestInterval()) == 0) {
1083 for (
auto batch : testData) {
1084 auto inputMatrix = batch.GetInput();
1085 auto outputMatrix = batch.GetOutput();
1086 testError += testNet.Loss(inputMatrix, outputMatrix);
1088 testError /= (
Double_t) (nTestSamples / settings.batchSize);
1090 end = std::chrono::system_clock::now();
1094 for (
auto batch : trainingData) {
1095 auto inputMatrix = batch.GetInput();
1096 auto outputMatrix = batch.GetOutput();
1097 trainingError += net.
Loss(inputMatrix, outputMatrix);
1099 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
1102 fInteractive->AddPoint(stepCount, trainingError, testError);
1103 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
1104 if (fExitFromTraining)
break;
1108 std::chrono::duration<double> elapsed_seconds = end - start;
1109 double seconds = elapsed_seconds.count();
1110 double nFlops = (double) (settings.testInterval * batchesInEpoch);
1113 converged = minimizer.HasConverged(testError);
1114 start = std::chrono::system_clock::now();
1117 fInteractive->AddPoint(stepCount, trainingError, testError);
1118 fIPyCurrentIter = 100.0 * minimizer.GetConvergenceCount()
1119 / minimizer.GetConvergenceSteps ();
1120 if (fExitFromTraining)
break;
1122 Log() << std::setw(10) << stepCount <<
" | "
1123 << std::setw(12) << trainingError
1124 << std::setw(12) << testError
1125 << std::setw(12) << nFlops / seconds
1126 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
1136 auto & layer = fNet.GetLayer(
l);
1142 #else // DNNCPU flag not set.
1143 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure "
1144 "you have a BLAS implementation and it was successfully "
1145 "detected by CMake as well that the imt CMake flag is set." <<
Endl;
1153 size_t nVariables = GetEvent()->GetNVariables();
1157 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1158 for (
size_t i = 0; i < nVariables; i++) {
1159 X(0,i) = inputValues[i];
1162 fNet.Prediction(YHat, X, fOutputFunction);
1170 size_t nVariables = GetEvent()->GetNVariables();
1173 const Event *ev = GetEvent();
1174 const std::vector<Float_t>& inputValues = ev->
GetValues();
1175 for (
size_t i = 0; i < nVariables; i++) {
1176 X(0,i) = inputValues[i];
1179 size_t nTargets = std::max(1u, ev->
GetNTargets());
1181 std::vector<Float_t>
output(nTargets);
1182 auto net = fNet.CreateClone(1);
1183 net.Prediction(YHat, X, fOutputFunction);
1185 for (
size_t i = 0; i < nTargets; i++)
1186 output[i] = YHat(0, i);
1188 if (fRegressionReturnVal ==
NULL) {
1189 fRegressionReturnVal =
new std::vector<Float_t>();
1191 fRegressionReturnVal->clear();
1194 for (
size_t i = 0; i < nTargets; ++i) {
1198 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1199 for (
size_t i = 0; i < nTargets; ++i) {
1200 fRegressionReturnVal->push_back(evT2->
GetTarget(i));
1203 return *fRegressionReturnVal;
1208 size_t nVariables = GetEvent()->GetNVariables();
1210 Matrix_t YHat(1, DataInfo().GetNClasses());
1211 if (fMulticlassReturnVal ==
NULL) {
1212 fMulticlassReturnVal =
new std::vector<Float_t>(DataInfo().GetNClasses());
1215 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1216 for (
size_t i = 0; i < nVariables; i++) {
1217 X(0,i) = inputValues[i];
1220 fNet.Prediction(YHat, X, fOutputFunction);
1221 for (
size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
1222 (*fMulticlassReturnVal)[i] = YHat(0, i);
1224 return *fMulticlassReturnVal;
1232 Int_t inputWidth = fNet.GetInputWidth();
1233 Int_t depth = fNet.GetDepth();
1234 char lossFunction =
static_cast<char>(fNet.GetLossFunction());
1236 gTools().StringFromInt(inputWidth));
1240 TString(static_cast<char>(fOutputFunction)));
1242 for (
Int_t i = 0; i < depth; i++) {
1243 const auto& layer = fNet.GetLayer(i);
1245 int activationFunction =
static_cast<int>(layer.GetActivationFunction());
1248 WriteMatrixXML(layerxml,
"Weights", layer.GetWeights());
1249 WriteMatrixXML(layerxml,
"Biases", layer.GetBiases());
1263 fNet.SetBatchSize(1);
1265 size_t inputWidth, depth;
1268 char lossFunctionChar;
1270 char outputFunctionChar;
1273 fNet.SetInputWidth(inputWidth);
1274 fNet.SetLossFunction(static_cast<ELossFunction>(lossFunctionChar));
1277 size_t previousWidth = inputWidth;
1279 for (
size_t i = 0; i < depth; i++) {
1292 fNet.AddLayer(width, f);
1295 ReadMatrixXML(layerXML,
"Weights", weights);
1296 ReadMatrixXML(layerXML,
"Biases", biases);
1297 fNet.GetLayer(i).GetWeights() = weights;
1298 fNet.GetLayer(i).GetBiases() = biases;
1301 previousWidth = width;
1316 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1317 fRanking->AddRank(
Rank( GetInputLabel(ivar), 1.0));
1341 Log() << col <<
"--- Short description:" << colres <<
Endl;
1343 Log() <<
"The DNN neural network is a feedforward" <<
Endl;
1344 Log() <<
"multilayer perceptron implementation. The DNN has a user-" <<
Endl;
1345 Log() <<
"defined hidden layer architecture, where the number of input (output)" <<
Endl;
1346 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1347 Log() <<
"signal and one background, regression or multiclass). " <<
Endl;
1349 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1352 const char* txt =
"The DNN supports various options to improve performance in terms of training speed and \n \
1353 reduction of overfitting: \n \
1355 - different training settings can be stacked. Such that the initial training \n\
1356 is done with a large learning rate and a large drop out fraction whilst \n \
1357 in a later stage learning rate and drop out can be reduced. \n \
1360 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \
1361 later training stage: 0.1 or 0.0 for all layers \n \
1362 final training stage: 0.0] \n \
1363 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \
1364 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \
1365 - L1 and L2 regularization are available \n \
1367 [recommended 10 - 150] \n \
1368 Arbitrary mini-batch sizes can be chosen. \n \
1369 - Multithreading \n \
1370 [recommended: True] \n \
1371 Multithreading can be turned on. The minibatches are distributed to the available \n \
1372 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \
1376 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \
1378 . two hidden layers (separated by \",\") \n \
1379 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \
1380 . the activation function for the output layer is LINEAR \n \
1381 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \
1382 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \
1383 . the number of nodes in the output layer is determined by the number of output nodes \n \
1384 and can therefore not be chosen freely. \n \
1386 \"ErrorStrategy\": \n \
1388 The error of the neural net is determined by a sum-of-squares error function \n \
1389 For regression, this is the only possible choice. \n \
1391 The error of the neural net is determined by a cross entropy function. The \n \
1392 output values are automatically (internally) transformed into probabilities \n \
1393 using a sigmoid function. \n \
1394 For signal/background classification this is the default choice. \n \
1395 For multiclass using cross entropy more than one or no output classes \n \
1396 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \
1397 A and C is true, Event 2: C is true, ...) \n \
1398 - MUTUALEXCLUSIVE \n \
1399 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \
1401 \"WeightInitialization\" \n \
1404 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \
1405 such that the variance of the values of the nodes is preserved for each layer. \n \
1406 - XAVIERUNIFORM \n \
1407 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \
1409 Random values scaled by the layer size \n \
1411 \"TrainingStrategy\" \n \
1412 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \
1413 - explanation: two stacked training settings separated by \"|\" \n \
1414 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \
1415 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \
1416 . LearningRate : \n \
1417 - recommended for classification: 0.1 initially, 1e-4 later \n \
1418 - recommended for regression: 1e-4 and less \n \
1420 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \
1421 . Repetitions : \n \
1422 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \
1423 . ConvergenceSteps : \n \
1424 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \
1425 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \
1426 cycle the test samples are evaluated and thus the convergence is checked) \n \
1428 Size of the mini-batches. \n \
1429 . TestRepetitions \n \
1430 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \
1432 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \
1434 NONE, L1 (|w|) or L2 (w^2) \n \
1436 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \
1437 in the DropConfig. \n \
1438 [example: DropConfig=0.0+0.5+0.3 \n \
1439 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \
1441 recommended: leave all the nodes turned on for the input layer (layer 0) \n \
1442 turn off half of the nodes in later layers for the initial training; leave all nodes \n \
1443 turned on (0.0) in later training stages] \n \
1444 . DropRepetitions \n \
1445 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \
1446 [recommended : 1] \n \
1447 . Multithreading \n \
1448 turn on multithreading [recommended: True] \n \
std::string GetName(const std::string &scope_name)
size_t GetOutputWidth() const
LayoutVector_t ParseLayoutString(TString layerSpec)
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
MsgLogger & Endl(MsgLogger &ml)
Collectable string class.
UInt_t GetNTargets() const
accessor to the number of targets
Steepest Gradient Descent algorithm (SGD)
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Double_t Atof() const
Return floating-point value contained in string.
void GetHelpMessage() const
void ToUpper()
Change string to upper case.
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
size_t GetBatchSize() const
typename Architecture_t::Matrix_t Matrix_t
Ranking for variables in method (implementation)
void MakeClassSpecific(std::ostream &, const TString &) const
Scalar_t Loss(const Matrix_t &Y, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
void SetWeightDecay(Scalar_t weightDecay)
const char * Data() const
std::vector< std::map< TString, TString >> KeyValueVector_t
void ReadWeightsFromStream(std::istream &i)
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
std::vector< Double_t > dropoutProbabilities
void ReadWeightsFromXML(void *wghtnode)
void setInputSize(size_t sizeInput)
set the input size of the DNN
Generic neural network class.
Int_t Atoi() const
Return integer value of string.
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
const Ranking * CreateRanking()
void SetRegularization(ERegularization R)
size_t GetInputWidth() const
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
TSubString Strip(EStripType s=kTrailing, char c= ' ') const
Return a substring of self stripped at beginning and/or end.
Settings for the training of the neural net.
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Layer defines the layout of a layer.
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
void Copy(void *source, void *dest)
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
EOutputFunction
Enum that represents output functions.
const TString & GetString() const
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
DNN::ERegularization regularization
Float_t GetTarget(UInt_t itgt) const
#define REGISTER_METHOD(CLASS)
for example
void addLayer(Layer &layer)
add a layer (layout)
std::vector< Float_t > & GetValues()
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
void InitializeGradients()
Initialize the gradients in the net to zero.
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xml node
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
EActivationFunction
Enum that represents layer activation functions.
Bool_t WriteOptionsReference() const
virtual const std::vector< Float_t > & GetMulticlassValues()
virtual const std::vector< Float_t > & GetRegressionValues()
void AddWeightsXMLTo(void *parent) const
Ssiz_t First(char c) const
Find first occurrence of a character c.
Layer_t & GetLayer(size_t i)