Version: 8.3.0
PMMLlib.cxx
Go to the documentation of this file.
1 
2 // Copyright (C) 2013-2016 CEA/DEN
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Lesser General Public License as published
6 // by the Free Software Foundation, either version 3 of the License, or any
7 // later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Lesser General Public License for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
26 // includes Salomé
27 #include "PMMLlib.hxx"
28 
29 // includes C
30 #include <stdlib.h>
31 
32 // includes C++
33 #include <cstdlib>
34 #include <iostream>
35 #include <fstream>
36 #include <sstream>
37 
38 using namespace std;
39 
40 namespace PMMLlib
41 {
42 
43 //**************************************************************
44 // *
45 // *
46 // *
47 // méthodes communes à tous les types de modèles *
48 // *
49 // *
50 // *
51 //**************************************************************
52 
58 PMMLlib::PMMLlib(std::string file,bool log) :
59  _log(log),
60  _pmmlFile(file),
61  _doc(NULL),
62  _rootNode(NULL),
63  _currentNode(NULL),
64  _nbModels(0),
65  _currentModelName(""),
66  _currentModelType(kUNDEFINED)
67 {
68  try
69  {
70  xmlKeepBlanksDefault(0);
71  xmlInitParser();
72  _doc = xmlParseFile(_pmmlFile.c_str());
73  if ( _doc != NULL )
74  {
75  _rootNode = xmlDocGetRootElement(_doc);
76  CountModels();
77  }
78  else
79  throw string("Unable to read PMML file.");
80  }
81  catch ( std::string msg )
82  {
83  std::cerr << msg;
84  xmlFreeDoc(_doc);
85  xmlCleanupParser();
86  throw;
87  }
88 }
89 
96  _log(log),
97  _pmmlFile(""),
98  _doc(NULL),
99  _rootNode(NULL),
100  _currentNode(NULL),
101  _nbModels(0),
102  _currentModelName(""),
103  _currentModelType(kUNDEFINED)
104 {
105  SetRootNode();
106 }
107 
111 PMMLlib::~PMMLlib()
112 {
113  if (_doc)
114  xmlFreeDoc(_doc);
115  xmlCleanupParser();
116  if ( _log )
117  cout << "~PMMLlib" << endl;
118 }
119 
125 void PMMLlib::SetCurrentModel(std::string modelName,
126  PMMLType type)
127 {
128  _currentModelName = modelName;
129  _currentModelType = type;
130  switch(type)
131  {
132  case kANN:
133  _currentModelNode = GetNeuralNetPtr(modelName);
134  break;
135  case kLR:
136  _currentModelNode = GetRegressionPtr(modelName);
137  break;
138  default:
139  throw string("Unknown PMML type.");
140  break;
141  }
142  if ( _currentModelNode == NULL )
143  throw string("Model not found.");
144 }
145 
151 void PMMLlib::SetCurrentModel(std::string modelName)
152 {
153  if (_rootNode == NULL)
154  throw string("No PMML file set.");
155  xmlNodePtr node = NULL;
156  int nC = 0;
157  node = _rootNode->children;
158  while (node)
159  {
160  string nodeModelName = _getProp(node, string("modelName"));
161  if ( nodeModelName == modelName )
162  {
163  nC++;
164  _currentModelNode = node;
165  _currentModelName = modelName;
167  }
168  node = node->next;
169  }
170  if ( nC != 1 )
171  {
172  std::ostringstream oss;
173  oss << nC;
174  string msg = "SetCurrentModel(modelName) : found " + oss.str() + " model(s) in PMML file.\n";
175  msg += "Use SetCurrentModel(modelName,type).";
176  throw msg;
177  }
178 }
179 
184 void PMMLlib::SetCurrentModel()
185 {
186  int nC = _nbModels;
187  if ( nC != 1 )
188  {
189  std::ostringstream oss;
190  oss << nC;
191  string msg = "SetCurrentModel() : found " + oss.str() + " model(s) in PMML file.\n";
192  msg += "Use SetCurrentModel(modelName) or SetCurrentModel(modelName,type).";
193  throw msg;
194  }
195  _currentModelNode = GetChildByName(_rootNode,"NeuralNetwork");
197  if (_currentModelNode == NULL)
198  {
199  _currentModelNode = GetChildByName(_rootNode,"RegressionModel");
201  }
202  if (_currentModelNode == NULL)
203  {
204  string msg("Couldn't get node in SetCurrentModel().");
205  throw msg;
206  }
207  _currentModelName = _getProp(_currentModelNode, string("modelName"));
208 }
209 
214 std::string PMMLlib::makeLog() const
215 {
216  ostringstream out;
217  out << "**\n**** Display of PMMLlib ****" << endl;
218  out << " ** _pmmlFile[" << _pmmlFile << "]" << endl;
219  out << " ** _log[" << (_log?1:0) << "]" << endl;
220  out << "**\n**** End of display of PMMLlib ****" << endl;
221  return out.str();
222 }
223 
227 void PMMLlib::printLog() const
228 {
229  string log = makeLog();
230  cout << log << endl;
231 }
232 
239 void PMMLlib::SetRootNode()
240 {
241  xmlChar * xs = _stringToXmlChar("1.0");
242  _doc = xmlNewDoc(xs);
243  xmlFree(xs);
244 
245  xmlChar *xp = _stringToXmlChar("PMML");
246  _rootNode = xmlNewNode(0, xp);
247  xmlFree(xp);
248 
249  xmlNewProp(_rootNode, (const xmlChar*)"xmlns", (const xmlChar*)"http://www.dmg.org/PMML-4_1");
250  xmlNewProp(_rootNode, (const xmlChar*)"version", (const xmlChar*)"4.1");
251 
252  xmlDocSetRootElement(_doc, _rootNode);
253 }
254 
255 
264 void PMMLlib::SetHeader(std::string copyright,
265  std::string description,
266  std::string appName,
267  std::string appVersion,
268  std::string annotation)
269 {
270  xmlNodePtr headerNode = xmlNewChild(_rootNode, 0, (const xmlChar*)"Header", 0);
271  xmlNewProp(headerNode, (const xmlChar*)"copyright", (const xmlChar*)(copyright.c_str()));
272  xmlNewProp(headerNode, (const xmlChar*)"description", (const xmlChar*)(description.c_str()));
273 
274  xmlNodePtr appNode = xmlNewChild(headerNode, 0, (const xmlChar*)"Application", 0);
275  xmlNewProp(appNode, (const xmlChar*)"name", (const xmlChar*)(appName.c_str()));
276  xmlNewProp(appNode, (const xmlChar*)"version", (const xmlChar*)(appVersion.c_str()));
277 
278  xmlNewChild(headerNode, 0, (const xmlChar*)"Annotation", (const xmlChar*)(annotation.c_str()));
279 }
280 
287 void PMMLlib::AddMiningSchema(std::string name,
288  std::string usageType)
289 {
290  xmlNodePtr netNode = _currentModelNode;
291 
292  // if 'MiningSchema' node does not exist, create it
293  xmlNodePtr miningSchemaNode = GetChildByName(netNode, "MiningSchema");
294  if(!miningSchemaNode)
295  {
296  miningSchemaNode = xmlNewChild(netNode, 0, (const xmlChar*)"MiningSchema", 0);
297  }
298 
299  // then append the node
300  xmlNodePtr miningFieldNode = xmlNewChild(miningSchemaNode, 0, (const xmlChar*)"MiningField", 0);
301  xmlNewProp(miningFieldNode, (const xmlChar*)"name", (const xmlChar*)(name.c_str()) );
302  xmlNewProp(miningFieldNode, (const xmlChar*)"usageType", (const xmlChar*)(usageType.c_str()) );
303 }
304 
311 xmlNodePtr PMMLlib::GetChildByName(xmlNodePtr node,
312  std::string nodeName)
313 {
314  if ( node == NULL )
315  return node;
316 
317  xmlNodePtr childNode = node->children;
318  if ( childNode == NULL )
319  return childNode;
320 
321  const xmlChar* name = childNode->name;
322  string strName("");
323  if ( name != NULL )
324  strName = _xmlCharToString(name);
325 
326  while( (childNode != NULL) && (strName != nodeName) )
327  {
328  childNode = childNode->next;
329  if ( childNode == NULL )
330  return childNode;
331  name = childNode->name;
332  if ( name != NULL )
333  strName = _xmlCharToString(name);
334  }
335  return childNode;
336 }
337 
342 void PMMLlib::CountModels()
343 {
344  int nCount = 0;
346  if ( _log)
347  cout << " ** End Of Count Models nCount[" << nCount << "]" << endl;
348  _nbModels = nCount ;
349 }
350 
355 int PMMLlib::CountNeuralNetModels()
356 {
357  int nCount = 0;
358  xmlNodePtr ptr = GetChildByName(_rootNode,"NeuralNetwork");
359  // Count the models
360  while (ptr != NULL && _xmlCharToString(ptr->name) == "NeuralNetwork")
361  {
362  nCount++;
363  if (_log)
364  cout << " ** nCount[" << nCount << "]" << endl;
365  ptr = ptr->next;
366  }
367  if ( _log)
368  cout << " ** End Of CountNetworks nCount[" << nCount << "]" << endl;
369  return nCount;
370 }
371 
376 int PMMLlib::CountRegressionModels()
377 {
378  int nCount = 0;
379  xmlNodePtr ptr = GetChildByName(_rootNode,"RegressionModel");
380  // Count the models
381  while (ptr != NULL && _xmlCharToString(ptr->name) == "RegressionModel")
382  {
383  nCount++;
384  if (_log)
385  cout << " ** nCount[" << nCount << "]" << endl;
386  ptr = ptr->next;
387  }
388  if ( _log)
389  cout << " ** End Of CountRegressions nCount[" << nCount << "]" << endl;
390  return nCount;
391 }
392 
397 int PMMLlib::GetModelsNb()
398 {
399  return _nbModels;
400 }
401 
407 std::string PMMLlib::GetModelName(xmlNodePtr node)
408 {
409  string name("");
410  name = _getProp(node, string("modelName") );
411  return name;
412 }
413 
420 xmlNodePtr PMMLlib::GetPtr(int index,
421  std::string name)
422 {
423  xmlNodePtr node = NULL;
424 
425  if (_doc != NULL)
426  {
427  _rootNode = xmlDocGetRootElement(_doc);
428  node = GetChildByName(_rootNode, name);
429 
430  int i=0;
431  while ((i != index) && (node != NULL))
432  {
433  node = node->next;
434  i++;
435  }
436  }
437  return node;
438 }
439 
446 xmlNodePtr PMMLlib::GetPtr(std::string myModelName,
447  std::string nodeName)
448 {
449  xmlNodePtr node = NULL;
450  if (_doc != NULL)
451  {
452  node = GetChildByName(_rootNode, nodeName);
453  if( node )
454  {
455  string modelName = _getProp(node, string("modelName"));
456 
457  while ( (node != NULL) && modelName != myModelName )
458  {
459  node = node->next;
460  if( node )
461  {
462  modelName = _getProp(node, string("modelName"));
463  }
464  }
465  }
466  }
467  return node;
468 }
469 
474 std::string PMMLlib::GetTypeString()
475 {
476  string name = "";
477  switch(_currentModelType)
478  {
479  case kANN:
480  name = "NeuralNetwork";
481  break;
482  case kLR:
483  name = "RegressionModel";
484  break;
485  default:
486  throw string("Unknown PMML type.");
487  break;
488  }
489  return name;
490 }
491 
497 PMMLType PMMLlib::GetCurrentModelType()
498 {
499  PMMLType type = kUNDEFINED ;
500  if ( ! _currentModelNode )
501  return type;
502  string name = _xmlCharToString(_currentModelNode->name);
503  if ( name == "NeuralNetwork" )
504  type = kANN;
505  else if ( name == "RegressionModel" )
506  type = kLR;
507  return type;
508 }
509 
515 std::string PMMLlib::GetCurrentModelName()
516 {
517  if ( ! _currentModelNode )
518  return string("");
519  string name = _getProp(_currentModelNode, string("modelName"));
520  return name;
521 }
522 
526 void PMMLlib::UnlinkNode()
527 {
528  xmlNodePtr ptr = _currentModelNode ;
529  xmlUnlinkNode( ptr );
530  xmlFreeNode( ptr );
531 }
532 
536 void PMMLlib::BackupNode()
537 {
538  // Node name depending of PMML type
539  string name = GetTypeString();
540  // Find the last save index number
541  int nCrtIndex = 0;
542  stringstream ss;
543  ss << _currentModelName << "_" << nCrtIndex;
544  xmlNodePtr ptr = GetPtr(ss.str(), name);
545  while( ptr )
546  {
547  nCrtIndex++;
548  if (_log)
549  cout << " ** nCrtIndex[" << nCrtIndex << "]" << endl;
550 
551  ss.str("");
552  ss << _currentModelName << "_" << nCrtIndex;
553  ptr = GetPtr(ss.str(), name);
554  }
555  if(_log)
556  cout << " *** Node \"" << _currentModelName << "\" found, then backup it with index [" << nCrtIndex << "]" << endl;
557  // Rename model
558  xmlUnsetProp(_currentModelNode, (const xmlChar*)"modelName");
559  xmlNewProp(_currentModelNode, (const xmlChar*)"modelName", (const xmlChar*)(ss.str().c_str()));
560 }
561 
565 void PMMLlib::Write()
566 {
567  // Enregistrement de l'arbre DOM dans le fichier pmml
568  Write(_pmmlFile);
569  // Mise à jour du nombre de modèles
570  CountModels();
571 }
572 
577 void PMMLlib::Write(std::string file)
578 {
579  // Enregistrement de l'arbre DOM sous forme de fichier pmml
580  int ret = xmlSaveFormatFile( file.c_str(), _doc, 1);
581  if ( ret == -1 )
582  {
583  std::string msg(" *** Error :: unable to write the PMML file \"" + file + "\"") ;
584  cout << msg << endl;
585  throw msg;
586  }
587  if ( _log )
588  cout << " *** Write the PMML file \"" << file <<"\"" << endl;
589 }
590 
597 void PMMLlib::ExportCpp(std::string file,
598  std::string functionName,
599  std::string header)
600 {
601  if ( _currentModelType == kANN )
602  ExportNeuralNetworkCpp(file,functionName, header);
603  else if ( _currentModelType == kLR )
604  {
605  ExportLinearRegressionCpp(file, functionName, header);
606  }
607  else
608  throw string("ExportCpp : PMML type not handled.");
609 }
610 
617 void PMMLlib::ExportFortran(std::string file,
618  std::string functionName,
619  std::string header)
620 {
621  if ( _currentModelType == kANN )
622  ExportNeuralNetworkFortran(file,functionName, header);
623  else if ( _currentModelType == kLR )
624  ExportLinearRegressionFortran(file,functionName, header);
625  else
626  throw string("ExportFortran : PMML type not handled.");
627 }
628 
635 void PMMLlib::ExportPython(std::string file,
636  std::string functionName,
637  std::string header)
638 {
639  if ( _currentModelType == kANN )
640  ExportNeuralNetworkPython(file,functionName, header);
641  else if ( _currentModelType == kLR )
642  ExportLinearRegressionPython(file,functionName, header);
643  else
644  throw string("ExportPython : PMML type not handled.");
645 }
646 
654 std::string PMMLlib::ExportPyStr(std::string functionName,
655  std::string header)
656 {
657  if ( _currentModelType == kANN )
658  return ExportNeuralNetworkPyStr(functionName, header);
659  else if ( _currentModelType == kLR )
660  return ExportLinearRegressionPyStr(functionName, header);
661  else
662  throw string("ExportPyStr : PMML type not handled.");
663 }
664 
670 std::string PMMLlib::_xmlCharToString(const xmlChar *xs) const
671 {
672  size_t i, L = xmlStrlen(xs);
673  std::string s;
674  s.resize(L);
675  for (i=0; *xs; s[i++] = *xs++);
676  return s;
677 }
678 
684 xmlChar * PMMLlib::_stringToXmlChar(const std::string &s) const
685 {
686  return xmlCharStrdup(s.c_str());
687 }
688 
695 std::string PMMLlib::_getProp(const xmlNodePtr node,
696  std::string const & prop ) const
697 {
698  std::string name("");
699  if (_doc != NULL)
700  {
701  xmlChar *xp = _stringToXmlChar(prop);
702  xmlChar * attr ;
703  attr = xmlGetProp(node, xp );
704  if ( attr )
705  {
706  name = _xmlCharToString(attr );
707  xmlFree(attr);
708  }
709  xmlFree(xp);
710  }
711  return name;
712 }
713 
714 //**************************************************************
715 // *
716 // *
717 // *
718 // méthodes propres au NeuralNetwork *
719 // *
720 // *
721 // *
722 //**************************************************************
723 
729 void PMMLlib::CheckNeuralNetwork()
730 {
731  if ( _currentModelType != kANN )
732  throw string("Use this method with NeuralNetwork models.");
733 }
734 
740 xmlNodePtr PMMLlib::GetNeuralNetPtr(int index)
741 {
742  return GetPtr(index, GetTypeString() );
743 }
744 
750 xmlNodePtr PMMLlib::GetNeuralNetPtr(std::string name)
751 {
752  return GetPtr(name, GetTypeString() );
753 }
754 
760 std::string PMMLlib::ReadNetworkStructure()
761 {
763 
764  string structure("");
765  // Treatment of the input
766  xmlNodePtr inputNodes = GetChildByName(_currentModelNode,"NeuralInputs");
767  if ( inputNodes != NULL )
768  {
769  xmlNodePtr inputNode = GetChildByName(inputNodes,"NeuralInput");
770  if ( inputNode != NULL )
771  {
772  while (inputNode != NULL)
773  {
774  xmlNodePtr child = GetChildByName(inputNode,"DerivedField");
775  if ( child != NULL )
776  {
777  xmlNodePtr fieldName = child->children; // NormContinuous
778  if ( fieldName != NULL )
779  {
780  string field = _getProp(fieldName, string("field"));
781  structure += field;
782  structure += ":";
783  }
784  }
785  inputNode = inputNode->next;
786  }
787  // Delete the last comma
788  structure.erase(structure.size()-1);
789  }
790  }
791  // Intermediary layers
792  xmlNodePtr node_layer = GetChildByName(_currentModelNode,"NeuralLayer");
793  if ( node_layer != NULL )
794  {
795  string name = string((const char*)(node_layer->name));
796  structure += ",";
797 
798  while ( node_layer != NULL &&
799  (string((const char*)(node_layer->name)) == "NeuralLayer") &&
800  node_layer->next != NULL &&
801  (string((const char*)(node_layer->next->name)) != "NeuralOutputs") )
802  {
803  // Get the number of neurons of the current layer
804  string nbneurons = _getProp(node_layer, string("numberOfNeurons"));
805  structure += nbneurons;
806  structure += ",";
807  node_layer = node_layer->next;
808  }
809  }
810  // Output layers
811  xmlNodePtr node_outputs = GetChildByName(_currentModelNode,"NeuralOutputs");
812  if ( node_outputs != NULL )
813  {
814  xmlNodePtr node_output = GetChildByName(node_outputs,"NeuralOutput");
815  if ( node_output != NULL )
816  {
817  while (node_output != NULL)
818  {
819  // Get the input of the current layer
820  xmlNodePtr child = GetChildByName(node_output,"DerivedField");
821  if ( child != NULL )
822  {
823  xmlNodePtr fieldName = child->children; // NormContinuous
824  if ( fieldName != NULL )
825  {
826  if (string((const char*)(fieldName->name)) == "NormContinuous")
827  structure += "@";
828 
829  string field = _getProp(fieldName, string("field"));
830  structure += field;
831  structure += ":";
832  }
833  }
834  node_output = node_output->next;
835  }
836  // Delete the last comma
837  structure.erase(structure.size()-1);
838  }
839  }
840  return structure;
841 }
842 
848 int PMMLlib::GetNbInputs()
849 {
851 
852  int nb=0;
853  xmlNodePtr node_inputs = GetChildByName(_currentModelNode,"NeuralInputs");
854  if ( node_inputs == NULL )
855  return nb;
856 
857  node_inputs = node_inputs->children;
858  while (node_inputs != NULL)
859  {
860  nb++;
861  node_inputs = node_inputs->next;
862  }
863 
864  return nb;
865 }
866 
872 int PMMLlib::GetNbOutputs()
873 {
875 
876  int nb=0;
877  xmlNodePtr node_outputs = GetChildByName(_currentModelNode,"NeuralOutputs");
878  if ( node_outputs == NULL )
879  return nb;
880 
881  node_outputs = node_outputs->children;
882 
883  while (node_outputs != NULL)
884  {
885  nb++;
886  node_outputs = node_outputs->next;
887  }
888 
889  return nb;
890 }
891 
898 std::string PMMLlib::GetNameInput(int index)
899 {
901 
902  string name("");
903  xmlNodePtr node_inputs = GetChildByName(_currentModelNode,"NeuralInputs");
904  if ( node_inputs == NULL )
905  return name;
906 
907  node_inputs = node_inputs->children;
908  if ( node_inputs == NULL )
909  return name;
910 
911  for(int i = 0;i<index;i++)
912  {
913  node_inputs = node_inputs->next;
914  if ( node_inputs == NULL )
915  return name;
916  }
917 
918  node_inputs = node_inputs->children;
919  if ( node_inputs == NULL )
920  return name;
921 
922  node_inputs = node_inputs->children;
923  if ( node_inputs == NULL )
924  return name;
925 
926  name = _getProp(node_inputs, string("field"));
927 
928  return name;
929 }
930 
937 std::string PMMLlib::GetNameOutput(int index)
938 {
940 
941  string name("");
942  xmlNodePtr node_outputs = GetChildByName(_currentModelNode,"NeuralOutputs");
943  if ( node_outputs == NULL )
944  return name;
945  node_outputs = node_outputs->children;
946  if ( node_outputs == NULL )
947  return name;
948  for(int i = 0;i<index;i++)
949  {
950  node_outputs = node_outputs->next;
951  if ( node_outputs == NULL )
952  return name;
953  }
954 
955  node_outputs = node_outputs->children;
956  if ( node_outputs == NULL )
957  return name;
958  node_outputs = node_outputs->children;
959  if ( node_outputs == NULL )
960  return name;
961 
962  name = _getProp(node_outputs, string("field") );
963 
964  return name;
965 }
966 
972 int PMMLlib::GetNormalizationType()
973 {
975 
976  xmlNodePtr node_inputs = GetChildByName(_currentModelNode,"NeuralInputs");
977  node_inputs = GetChildByName(node_inputs,"NeuralInput");
978  xmlNodePtr nodeTmp = GetChildByName(node_inputs,"DerivedField");
979  xmlNodePtr node_field = nodeTmp->children;
980  xmlNodePtr node_linearnorm;
981  string str_tmp;
982  double dorig1, dnorm1;
983  double dorig2, dnorm2;
984  if (string((const char*)(node_field->name)) == "NormContinuous")
985  {
986  // Get mean and standard deviation
987  node_linearnorm = node_field->children;
988  str_tmp = _getProp(node_linearnorm, string("orig"));
989  dorig1 = atof(str_tmp.c_str());
990  str_tmp = _getProp(node_linearnorm, string("norm"));
991  dnorm1 = atof(str_tmp.c_str());
992  node_linearnorm = node_linearnorm->next;
993  str_tmp = _getProp(node_linearnorm, string("orig"));
994  dorig2 = atof(str_tmp.c_str());
995  str_tmp = _getProp(node_linearnorm, string("norm"));
996  dnorm2 = atof(str_tmp.c_str());
997  if ( dnorm1 * dnorm2 < -0.5 )
998  { // case of kMinusOneOne
999  return 0;
1000  }
1001  else
1002  { // case of kCR, kZeroOne
1003  return 1;
1004  }
1005  }
1006  string msg("Unable to retrieve the normalization type.");
1007  throw msg;
1008 }
1009 
1017 void PMMLlib::GetNormalisationInput(int index,
1018  double *dnorm)
1019 {
1020  CheckNeuralNetwork();
1021  dnorm[0] = 0.0;
1022  dnorm[1] = 0.0;
1023  xmlNodePtr node_inputs = GetChildByName(_currentModelNode,"NeuralInputs");
1024  if ( node_inputs == NULL )
1025  return ;
1026  node_inputs = GetChildByName(node_inputs,"NeuralInput");
1027  if ( node_inputs == NULL )
1028  return ;
1029  // Positionnement sur la bonne entree
1030  for(int i=0;i<index;i++)
1031  {
1032  node_inputs = node_inputs->next;
1033  if ( node_inputs == NULL )
1034  return ;
1035  }
1036  xmlNodePtr tmpNode = GetChildByName(node_inputs,"DerivedField");
1037  if ( tmpNode == NULL )
1038  return ;
1039  xmlNodePtr node_field = GetChildByName(tmpNode,"NormContinuous");
1040  if ( node_field == NULL )
1041  return ;
1042  if (string((const char*)(node_field->name)) == "NormContinuous")
1043  {
1044  //Get mean and standard deviation
1045  string str_tmp;
1046  xmlNodePtr node_linearnorm = node_field->children;
1047  str_tmp = _getProp(node_linearnorm, string("orig"));
1048  double dorig1 = atof(str_tmp.c_str());
1049  str_tmp = _getProp(node_linearnorm, string("norm"));
1050  double dnorm1 = atof(str_tmp.c_str());
1051  node_linearnorm = node_linearnorm->next;
1052  str_tmp = _getProp(node_linearnorm, string("orig"));
1053  double dorig2 = atof(str_tmp.c_str());
1054  str_tmp = _getProp(node_linearnorm, string("norm"));
1055  double dnorm2 = atof(str_tmp.c_str());
1056  if ( dnorm1 * dnorm2 < -0.5 ) // <=> GetNormalizationType == 0
1057  {
1058  // case of kMinusOneOne
1059  dnorm[0] = dorig1;
1060  dnorm[1] = dorig2;
1061  }
1062  else // <=> GetNormalizationType == 1
1063  {
1064  // case of kCR, kZeroOne
1065  dnorm[0] = dorig2;
1066  dnorm[1] = -1.0 * dnorm1 * dorig2; //dorig2 / dnorm1;
1067  }
1068  }
1069 }
1070 
1077 void PMMLlib::GetNormalisationOutput(int index,
1078  double *dnorm)
1079 {
1080  CheckNeuralNetwork();
1081  dnorm[0] = 0.0;
1082  dnorm[1] = 0.0;
1083 
1084  xmlNodePtr node_outputs = GetChildByName(_currentModelNode,"NeuralOutputs");
1085  if ( node_outputs == NULL )
1086  return ;
1087  node_outputs = GetChildByName(node_outputs,"NeuralOutput");
1088  if ( node_outputs == NULL )
1089  return ;
1090  // Positionnement sur la bonne sortie
1091  for(int i=0;i< index;i++)
1092  {
1093  node_outputs = node_outputs->next;
1094  if ( node_outputs == NULL )
1095  return ;
1096  }
1097  xmlNodePtr tmpNode = GetChildByName(node_outputs,"DerivedField");
1098  if ( tmpNode == NULL )
1099  return ;
1100  xmlNodePtr node_field = GetChildByName(tmpNode,"NormContinuous");
1101  if ( node_field == NULL )
1102  return ;
1103 
1104  if (string((const char*)(node_field->name)) == "NormContinuous")
1105  {
1106  // Recuperation de la moyenne et de l'ecart type
1107  string str_tmp;
1108  xmlNodePtr node_linearnorm = node_field->children;
1109  str_tmp = _getProp(node_linearnorm, string("orig"));
1110  double dorig1 = atof(str_tmp.c_str());
1111  str_tmp = _getProp(node_linearnorm, string("norm"));
1112  double dnorm1 = atof(str_tmp.c_str());
1113  node_linearnorm = node_linearnorm->next;
1114  str_tmp = _getProp(node_linearnorm,string("orig"));
1115  double dorig2 = atof(str_tmp.c_str());
1116  str_tmp = _getProp(node_linearnorm, string("norm"));
1117  double dnorm2 = atof(str_tmp.c_str());
1118  if ( dnorm1 * dnorm2 < -0.5 )
1119  {
1120  // case of kMinusOneOne
1121  dnorm[0] = dorig1;
1122  dnorm[1] = dorig2;
1123  }
1124  else
1125  {
1126  // case of kCR, kZeroOne
1127  dnorm[0] = dorig2;
1128  dnorm[1] = -1.0 * dorig2 * dnorm1; //-1.0 * dorig2 / dnorm1;
1129  }
1130  }
1131 }
1132 
1138 int PMMLlib::GetNbHiddenLayers()
1139 {
1141 
1142  int nb_layers = 0;
1143  xmlNodePtr node_layers = GetChildByName(_currentModelNode,"NeuralLayer");
1144  if ( node_layers == NULL )
1145  return nb_layers;
1146 
1147  while (string((const char*)(node_layers->name)) == "NeuralLayer")
1148  {
1149  nb_layers++;
1150  node_layers = node_layers->next;
1151  if ( node_layers == NULL )
1152  return nb_layers;
1153  }
1154  return nb_layers;
1155 }
1156 
1161 int PMMLlib::GetNbLayers()
1162 {
1163  return (GetNbHiddenLayers() + 2);
1164 }
1165 
1171 int PMMLlib::GetNbNeuronsAtLayer(int index)
1172 {
1173  CheckNeuralNetwork();
1174 
1175  int nb_neurons = 0;
1176  xmlNodePtr node_layers = GetChildByName(_currentModelNode,"NeuralLayer");
1177  if ( node_layers == NULL )
1178  return nb_neurons;
1179 
1180  // Positionnement à la bonne couche
1181  for(int i=0;i<index;i++)
1182  {
1183  node_layers = node_layers->next;
1184  if ( node_layers == NULL )
1185  return nb_neurons;
1186  }
1187 
1188  xmlNodePtr node_neurons = GetChildByName(node_layers,"Neuron");
1189  while(node_neurons != NULL)
1190  {
1191  nb_neurons++;
1192  node_neurons = node_neurons->next;
1193  }
1194 
1195  return nb_neurons;
1196 }
1197 
1205 double PMMLlib::GetNeuronBias(int layer_index,
1206  int neu_index)
1207 {
1208  CheckNeuralNetwork();
1209 
1210  double bias = 0.;
1211  xmlNodePtr node_layers = GetChildByName(_currentModelNode,"NeuralLayer");
1212  if ( node_layers == NULL )
1213  return bias;
1214  // Positionnement a la bonne couche
1215  for(int i=0;i<layer_index;i++)
1216  {
1217  node_layers = node_layers->next;
1218  if ( node_layers == NULL )
1219  return bias;
1220  }
1221  xmlNodePtr node_neurons = GetChildByName(node_layers,"Neuron");
1222  // Positionnement sur le bon neurone
1223  for(int j=0;j<neu_index;j++)
1224  {
1225  node_neurons = node_neurons->next;
1226  if ( node_neurons == NULL )
1227  return bias;
1228  }
1229  string str_tmp = _getProp(node_neurons, string("bias"));
1230  bias = atof(str_tmp.c_str());
1231  return bias;
1232 }
1233 
1242 double PMMLlib::GetPrecNeuronSynapse(int layer_index,
1243  int neu_index,
1244  int prec_index)
1245 {
1247 
1248  double weight = 0.;
1249  xmlNodePtr node_layers = GetChildByName(_currentModelNode,"NeuralLayer");
1250  if ( node_layers == NULL )
1251  return weight;
1252  // Positionnement a la bonne couche
1253  for(int i=0;i<layer_index;i++)
1254  {
1255  node_layers = node_layers->next;
1256  if ( node_layers == NULL )
1257  return weight;
1258  }
1259  xmlNodePtr node_neurons = GetChildByName(node_layers,"Neuron");
1260  // Positionnement sur le bon neurone
1261  for(int i=0;i<neu_index;i++)
1262  {
1263  node_neurons = node_neurons->next;
1264  if ( node_neurons == NULL )
1265  return weight;
1266  }
1267  xmlNodePtr node_con = GetChildByName(node_neurons,"Con");
1268  // Positionnement sur la bonne synapse
1269  for(int i=0;i<prec_index;i++)
1270  {
1271  node_con = node_con->next;
1272  if ( node_con == NULL )
1273  return weight;
1274  }
1275  string str_tmp = _getProp(node_con, string("weight"));
1276  weight = atof(str_tmp.c_str());
1277  return weight;
1278 }
1279 
1286 // LCOV_EXCL_START
1287 void PMMLlib::SetNeuralNetName(int index,
1288  std::string name)
1289 {
1291 
1292  int i=0;
1293  if (_doc != NULL)
1294  {
1295  xmlNodePtr node_ann = GetChildByName(_rootNode,"NeuralNetwork");
1296  while ((i != index) && (node_ann != NULL))
1297  {
1298  node_ann = node_ann->next;
1299  i++;
1300  }
1301  xmlNewProp(node_ann, (const xmlChar*)"modelName", (const xmlChar*)(name.c_str()));
1302  }
1303  xmlSaveFormatFile( string(_pmmlFile+".pmml").c_str(), _doc, 1);
1304 }
1305 // LCOV_EXCL_STOP
1306 
1318 void PMMLlib::AddDataField(std::string fieldName,
1319  std::string displayName,
1320  std::string optype,
1321  std::string dataType,
1322  std::string closure,
1323  double leftMargin,
1324  double rightMargin,
1325  bool interval)
1326 {
1327  // if 'DataDictionary' node does not exist, create it
1328  xmlNodePtr dataDictNode = GetChildByName(_rootNode, "DataDictionary");
1329  if(!dataDictNode)
1330  {
1331  dataDictNode = xmlNewChild(_rootNode, 0, (const xmlChar*)"DataDictionary", 0);
1332  }
1333 
1334  // then append the node
1335  xmlNodePtr dataFieldNode = xmlNewChild(dataDictNode, 0, (const xmlChar*)"DataField", 0);
1336  xmlNewProp(dataFieldNode, (const xmlChar*)"name", (const xmlChar*)(fieldName.c_str()) );
1337  xmlNewProp(dataFieldNode, (const xmlChar*)"displayName", (const xmlChar*)(displayName.c_str()) );
1338  xmlNewProp(dataFieldNode, (const xmlChar*)"optype", (const xmlChar*)(optype.c_str()) );
1339  xmlNewProp(dataFieldNode, (const xmlChar*)"dataType", (const xmlChar*)(dataType.c_str()) );
1340 
1341  if ( interval )
1342  {
1343  xmlNodePtr intervalNode = xmlNewChild(dataFieldNode, 0, (const xmlChar*)"Interval", 0);
1344  xmlNewProp(intervalNode, (const xmlChar*)"closure", (const xmlChar*)(closure.c_str()) );
1345  stringstream ss;
1346  ss << scientific << leftMargin;
1347  xmlNewProp(intervalNode, (const xmlChar*)"leftMargin", (const xmlChar*)(ss.str().c_str()) );
1348  ss.str("");
1349  ss << scientific << rightMargin;
1350  xmlNewProp(intervalNode, (const xmlChar*)"rightMargin", (const xmlChar*)(ss.str().c_str()) );
1351  }
1352 }
1353 
1360 void PMMLlib::AddNeuralNetwork(std::string modelName,
1361  PMMLMiningFunction functionName)
1362 {
1364  _currentModelName = modelName;
1365 
1367 
1368  string function;
1369  switch(functionName)
1370  {
1371  case kREGRESSION:
1372  function = "regression";
1373  break;
1374  }
1375 
1376  xmlNodePtr netNode = xmlNewChild(_rootNode, 0, (const xmlChar*)"NeuralNetwork", 0);
1377  xmlNewProp(netNode, (const xmlChar*)"modelName", (const xmlChar*)(_currentModelName.c_str()) );
1378  xmlNewProp(netNode, (const xmlChar*)"functionName", (const xmlChar*)(function.c_str()) );
1379  xmlNewProp(netNode, (const xmlChar*)"numberOfLayers", (const xmlChar*)"0" );
1380  _currentModelNode = netNode;
1381 }
1382 
1395 void PMMLlib::AddNeuralInput(int id,
1396  std::string inputName,
1397  std::string optype,
1398  std::string dataType,
1399  double orig1, double norm1,
1400  double orig2, double norm2)
1401 {
1403 
1404  xmlNodePtr netNode = _currentModelNode;
1405  // if 'NeuralInputs' node does not exist, create it
1406  xmlNodePtr neuralInputsNode = GetChildByName(netNode, "NeuralInputs");
1407  if(!neuralInputsNode)
1408  {
1409  neuralInputsNode = xmlNewChild(netNode, 0, (const xmlChar*)"NeuralInputs", 0);
1410  xmlNewProp(neuralInputsNode, (const xmlChar*)"numberOfInputs", (const xmlChar*)"0" );
1411  }
1412  // increment the number of inputs
1413  string numberOfInputsStr = _getProp(neuralInputsNode, string("numberOfInputs"));
1414  int numberOfInputs;
1415  istringstream( numberOfInputsStr ) >> numberOfInputs;
1416  numberOfInputs++;
1417  stringstream ss;
1418  ss << numberOfInputs;
1419  xmlSetProp(neuralInputsNode, (const xmlChar*)"numberOfInputs", (const xmlChar*)(ss.str().c_str()) );
1420  // then append the node and its children
1421  xmlNodePtr neuralInputNode = xmlNewChild(neuralInputsNode, 0, (const xmlChar*)"NeuralInput", 0);
1422  ss.str(""); ss << id;
1423  xmlNewProp(neuralInputNode, (const xmlChar*)"id", (const xmlChar*)(ss.str().c_str()) );
1424 
1425  xmlNodePtr derivedFieldNode = xmlNewChild(neuralInputNode, 0, (const xmlChar*)"DerivedField", 0);
1426  xmlNewProp(derivedFieldNode, (const xmlChar*)"optype", (const xmlChar*)(optype.c_str()) );
1427  xmlNewProp(derivedFieldNode, (const xmlChar*)"dataType", (const xmlChar*)(dataType.c_str()) );
1428 
1429  xmlNodePtr normcontNode = xmlNewChild(derivedFieldNode, 0, (const xmlChar*)"NormContinuous", 0);
1430  xmlNewProp(normcontNode, (const xmlChar*)"field", (const xmlChar*)(inputName.c_str()) );
1431 
1432  xmlNodePtr node_linearnorm1 = xmlNewChild(normcontNode, 0, (const xmlChar*)"LinearNorm", 0);
1433  ss.str(""); ss << scientific << orig1;
1434  xmlNewProp(node_linearnorm1, (const xmlChar*)"orig", (const xmlChar*)(ss.str().c_str()) );
1435  ss.str(""); ss << scientific << norm1;
1436  xmlNewProp(node_linearnorm1, (const xmlChar*)"norm", (const xmlChar*)(ss.str().c_str()) );
1437  xmlNodePtr node_linearnorm2 = xmlNewChild(normcontNode, 0, (const xmlChar*)"LinearNorm", 0);
1438  ss.str(""); ss << scientific << orig2;
1439  xmlNewProp(node_linearnorm2, (const xmlChar*)"orig", (const xmlChar*)(ss.str().c_str()) );
1440  ss.str(""); ss << scientific << norm2;
1441  xmlNewProp(node_linearnorm2, (const xmlChar*)"norm", (const xmlChar*)(ss.str().c_str()) );
1442 }
1443 
1456 void PMMLlib::AddNeuralOutput(int outputNeuron,
1457  std::string outputName,
1458  std::string optype,
1459  std::string dataType,
1460  double orig1, double norm1,
1461  double orig2, double norm2)
1462 {
1464 
1465  xmlNodePtr netNode = _currentModelNode;
1466  // if 'NeuralOutputs' node does not exist, create it
1467  xmlNodePtr neuralOutputsNode = GetChildByName(netNode, "NeuralOutputs");
1468  if(!neuralOutputsNode)
1469  {
1470  neuralOutputsNode = xmlNewChild(netNode, 0, (const xmlChar*)"NeuralOutputs", 0);
1471  xmlNewProp(neuralOutputsNode, (const xmlChar*)"numberOfOutputs", (const xmlChar*)"0" );
1472  }
1473  // increment the number of inputs
1474  string numberOfOutputsStr = _getProp(neuralOutputsNode, string("numberOfOutputs"));
1475  int numberOfOutputs;
1476  istringstream( numberOfOutputsStr ) >> numberOfOutputs;
1477  numberOfOutputs++;
1478  stringstream ss;
1479  ss << numberOfOutputs;
1480  xmlSetProp(neuralOutputsNode, (const xmlChar*)"numberOfOutputs", (const xmlChar*)(ss.str().c_str()) );
1481 
1482  // then append the node and its children
1483  xmlNodePtr neuralOutputNode = xmlNewChild(neuralOutputsNode, 0, (const xmlChar*)"NeuralOutput", 0);
1484  ss.str(""); ss << outputNeuron;
1485  xmlNewProp(neuralOutputNode, (const xmlChar*)"outputNeuron", (const xmlChar*)(ss.str().c_str()) );
1486 
1487  xmlNodePtr derivedFieldNode = xmlNewChild(neuralOutputNode, 0, (const xmlChar*)"DerivedField", 0);
1488  xmlNewProp(derivedFieldNode, (const xmlChar*)"optype", (const xmlChar*)(optype.c_str()) );
1489  xmlNewProp(derivedFieldNode, (const xmlChar*)"dataType", (const xmlChar*)(dataType.c_str()) );
1490 
1491  xmlNodePtr normcontNode = xmlNewChild(derivedFieldNode, 0, (const xmlChar*)"NormContinuous", 0);
1492  xmlNewProp(normcontNode, (const xmlChar*)"field", (const xmlChar*)(outputName.c_str()) );
1493 
1494  xmlNodePtr node_linearnorm1 = xmlNewChild(normcontNode, 0, (const xmlChar*)"LinearNorm", 0);
1495  ss.str(""); ss << scientific << orig1;
1496  xmlNewProp(node_linearnorm1, (const xmlChar*)"orig", (const xmlChar*)(ss.str().c_str()) );
1497  ss.str(""); ss << scientific << norm1;
1498  xmlNewProp(node_linearnorm1, (const xmlChar*)"norm", (const xmlChar*)(ss.str().c_str()) );
1499  xmlNodePtr node_linearnorm2 = xmlNewChild(normcontNode, 0, (const xmlChar*)"LinearNorm", 0);
1500  ss.str(""); ss << scientific << orig2;
1501  xmlNewProp(node_linearnorm2, (const xmlChar*)"orig", (const xmlChar*)(ss.str().c_str()) );
1502  ss.str(""); ss << scientific << norm2;
1503  xmlNewProp(node_linearnorm2, (const xmlChar*)"norm", (const xmlChar*)(ss.str().c_str()) );
1504 }
1505 
1511 void PMMLlib::AddNeuralLayer(PMMLActivationFunction activationFunction)
1512 {
1514 
1515  string functionName;
1516  switch(activationFunction)
1517  {
1518  case kIDENTITY:
1519  functionName = "identity";
1520  break;
1521  case kTANH:
1522  functionName = "tanh";
1523  break;
1524  case kLOGISTIC:
1525  functionName = "logistic";
1526  break;
1527  }
1528  xmlNodePtr netNode = _currentModelNode;
1529  // Increment the number of layers
1530  string numberOfLayersStr = _getProp(_currentModelNode, string("numberOfLayers"));
1531  int numberOfLayers;
1532  istringstream( numberOfLayersStr ) >> numberOfLayers;
1533  numberOfLayers++;
1534  stringstream ss;
1535  ss << numberOfLayers;
1536  xmlSetProp(netNode, (const xmlChar*)"numberOfLayers", (const xmlChar*)(ss.str().c_str()) );
1537  // Add the neural layer node
1538  xmlNodePtr neuralLayerNode = xmlNewChild(netNode, 0, (const xmlChar*)"NeuralLayer", 0);
1539  xmlNewProp(neuralLayerNode, (const xmlChar*)"activationFunction", (const xmlChar*)(functionName.c_str()) );
1540  xmlNewProp(neuralLayerNode, (const xmlChar*)"numberOfNeurons", (const xmlChar*)"0" );
1541  // Save the current layer in the _currentNode attribute
1542  _currentNode = neuralLayerNode;
1543 }
1544 
1554 void PMMLlib::AddNeuron(int id,
1555  double bias,
1556  int conNb,
1557  int firstFrom,
1558  vector<double> weights)
1559 {
1561 
1562  stringstream ss;
1563 
1564  // increment the number of neurons
1565  string numberOfNeuronsStr = _getProp(_currentNode, string("numberOfNeurons"));
1566  int numberOfNeurons;
1567  istringstream( numberOfNeuronsStr ) >> numberOfNeurons;
1568  numberOfNeurons++;
1569  ss << numberOfNeurons;
1570  xmlSetProp(_currentNode, (const xmlChar*)"numberOfNeurons", (const xmlChar*)(ss.str().c_str()) );
1571 
1572  // append a neuron
1573  xmlNodePtr neuronNode = xmlNewChild(_currentNode, 0, (const xmlChar*)"Neuron", 0);
1574  ss.str(""); ss << id;
1575  xmlNewProp(neuronNode, (const xmlChar*)"id", (const xmlChar*)(ss.str().c_str()) );
1576  ss.str(""); ss << scientific << bias;
1577  xmlNewProp(neuronNode, (const xmlChar*)"bias", (const xmlChar*)(ss.str().c_str()) );
1578 
1579  // append multiple 'Con' to the neuron
1580  for(int k=0 ; k<conNb ; k++)
1581  {
1582  xmlNodePtr conNode = xmlNewChild(neuronNode, 0, (const xmlChar*)"Con", 0);
1583  ss.str(""); ss << firstFrom+k;
1584  xmlNewProp(conNode, (const xmlChar*)"from", (const xmlChar*)(ss.str().c_str()) ); // !!! ce n'est pas k !!!
1585  ss.str(""); ss << scientific << weights[k];
1586  xmlNewProp(conNode, (const xmlChar*)"weight", (const xmlChar*)(ss.str().c_str()) );
1587  }
1588 }
1589 
1603 void PMMLlib::fillVectorsForExport(int nInput,
1604  int nOutput,
1605  int nHidden,
1606  int normType,
1607  vector<double> &minInput,
1608  vector<double> &maxInput,
1609  vector<double> &minOutput,
1610  vector<double> &maxOutput,
1611  vector<double> &valW )
1612 {
1614 
1615  xmlNodePtr netNode = _currentModelNode ;
1616  // Get the different values required
1617  // Build min/max input/output vectors
1618  for(int i=0 ; i<nInput ; i++)
1619  {
1620  xmlNodePtr node_inputs = GetChildByName(netNode,"NeuralInputs");
1621  node_inputs = node_inputs->children;
1622  for(int j = 0;j<i;j++)
1623  {
1624  node_inputs = node_inputs->next;
1625  }
1626  node_inputs = node_inputs->children; // DerivedField
1627  node_inputs = node_inputs->children; // NormContinuous
1628  node_inputs = node_inputs->children; // LinearNorm
1629  string strOrig1 = _getProp(node_inputs, string("orig") );
1630  double orig1 = atof( strOrig1.c_str() );
1631  string strNorm1 = _getProp(node_inputs, string("norm") );
1632  double norm1 = atof( strNorm1.c_str() );
1633  node_inputs = node_inputs->next;
1634  string strOrig2 = _getProp(node_inputs, string("orig") );
1635  double orig2 = atof( strOrig2.c_str() );
1636  string strNorm2 = _getProp(node_inputs, string("norm") );
1637  if( normType==0 )
1638  { // kMinusOneOne
1639  minInput[i] = orig1;
1640  maxInput[i] = orig2;
1641  }
1642  else
1643  { // kCR, kZeroOne
1644  minInput[i] = orig2;
1645  maxInput[i] = -1.0*norm1*orig2;
1646  }
1647  }
1648  xmlNodePtr node_outputs = GetChildByName(netNode,"NeuralOutputs");
1649  node_outputs = node_outputs->children;
1650  node_outputs = node_outputs->children; // DerivedField
1651  node_outputs = node_outputs->children; // NormContinuous
1652  node_outputs = node_outputs->children; // LinearNorm
1653  string strOrig1 = _getProp(node_outputs, string("orig") );
1654  double orig1 = atof( strOrig1.c_str() );
1655  string strNorm1 = _getProp(node_outputs, string("norm") );
1656  double norm1 = atof( strNorm1.c_str() );
1657  node_outputs = node_outputs->next;
1658  string strOrig2 = _getProp(node_outputs, string("orig") );
1659  double orig2 = atof( strOrig2.c_str() );
1660  if( normType==0 )
1661  { // kMinusOneOne
1662  minOutput[0] = orig1;
1663  maxOutput[0] = orig2;
1664  }
1665  else
1666  { // kCR, kZeroOne
1667  minOutput[0] = orig2;
1668  maxOutput[0] = -1.0*norm1*orig2;
1669  }
1670  // Build weight vector
1671  for(int j=0 ; j<nHidden ; j++) // hidden layers
1672  {
1673  valW[j*(nInput+nOutput+1)+2] = GetNeuronBias( 0, j);
1674  for(int i=0 ; i<nInput ; i++)
1675  {
1676  valW[j*(nInput+nOutput+1)+3+i] = GetPrecNeuronSynapse( 0, j, i);
1677  }
1678  }
1679  for(int j=0 ; j<nOutput ; j++) // output layers
1680  {
1681  valW[0] = GetNeuronBias( 1, j);
1682  for(int i=0 ; i<nHidden ; i++)
1683  {
1684  valW[i*(nInput+nOutput+1)+1] = GetPrecNeuronSynapse( 1, j, i);
1685  }
1686  }
1687 }
1688 
1696 void PMMLlib::ExportNeuralNetworkCpp(std::string file,
1697  std::string functionName,
1698  std::string header)
1699 {
1701 
1702  // Get the different values required
1703  int nInput = GetNbInputs();
1704  int nOutput = GetNbOutputs();
1705  int nHidden = GetNbNeuronsAtLayer(0);
1706  int nNeurons = nInput+nOutput+nHidden;
1707  int nWeights = nHidden*(nInput+nOutput+1)+nOutput;
1708  int normType = GetNormalizationType();
1709  // Build min/max input/output vectors
1710  vector<double> minInput(nInput);
1711  vector<double> maxInput(nInput);
1712  vector<double> minOutput(nOutput);
1713  vector<double> maxOutput(nOutput);
1714  vector<double> valW(nWeights);
1715  fillVectorsForExport(nInput,nOutput,nHidden,normType,minInput,maxInput,minOutput,maxOutput,valW);
1716  // Write the file
1717  ofstream sourcefile(file.c_str());
1718  // ActivationFunction
1719  if( normType==0 )
1720  { // kMinusOneOne
1721  sourcefile << "#define ActivationFunction(sum) ( tanh(sum) )" << endl;
1722  }
1723  else
1724  { // kCR, kZeroOne
1725  sourcefile << "#define ActivationFunction(sum) ( 1.0 / ( 1.0 + exp( -1.0 * sum )) )" << endl;
1726  }
1727  //
1728  sourcefile << "void " << functionName <<"(double *param, double *res)" << endl;
1729  sourcefile << "{" << endl;
1730  // header
1731  sourcefile << " ////////////////////////////// " << endl;
1732  sourcefile << " //" << endl;
1733  // insert comments in header
1734  header = " // " + header;
1735  size_t pos = 0;
1736  while ((pos = header.find("\n", pos)) != std::string::npos)
1737  {
1738  header.replace(pos, 1, "\n //");
1739  pos += 5;
1740  }
1741  sourcefile << header << endl;
1742  sourcefile << " //" << endl;
1743  sourcefile << " ////////////////////////////// " << endl;
1744  sourcefile << endl;
1745  sourcefile << " int nInput = " << nInput << ";" << endl;
1746  sourcefile << " int nOutput = " << nOutput << ";" << endl;
1747  // sourcefile << " int nWeights = " << _nWeight << ";" << endl;
1748  sourcefile << " int nHidden = " << nHidden << ";" << endl;
1749  sourcefile << " const int nNeurones = " << nNeurons << ";" << endl;
1750  sourcefile << " double " << functionName << "_act[nNeurones];" << endl;
1751  sourcefile << endl;
1752  sourcefile << " // --- Preprocessing of the inputs and outputs" << endl;
1753  sourcefile << " double " << functionName << "_minInput[] = {" << endl << " ";
1754  for(int i=0 ; i<nInput ; i++)
1755  {
1756  sourcefile << minInput[i] << ", ";
1757  if( (i+1)%5==0 )
1758  sourcefile << "\n ";
1759  }
1760  if( nInput%5 != 0 )
1761  sourcefile << endl;
1762  sourcefile << " };" << endl;
1763  //
1764  sourcefile << " double " << functionName << "_minOutput[] = {" << endl << " ";
1765  sourcefile << minOutput[0] << ", ";
1766  sourcefile << " };" << endl;
1767  //
1768  sourcefile << " double " << functionName << "_maxInput[] = {" << endl << " ";
1769  for(int i=0 ; i<nInput ; i++)
1770  {
1771  sourcefile << maxInput[i] << ", ";
1772  if( (i+1)%5==0 )
1773  sourcefile << "\n ";
1774  }
1775  if( nInput%5 != 0 )
1776  sourcefile << endl;
1777  sourcefile << " };" << endl;
1778  //
1779  sourcefile << " double " << functionName << "_maxOutput[] = {" << endl << " ";
1780  sourcefile << maxOutput[0] << ", ";
1781  sourcefile << " };" << endl;
1782  // Weights vector
1783  sourcefile << endl;
1784  sourcefile << " // --- Values of the weights" << endl;
1785  sourcefile << " double " << functionName << "_valW[] = {" << endl << " ";
1786  for(int i=0 ; i<nWeights ; i++)
1787  {
1788  sourcefile << valW[i] << ", ";
1789  if ( (i+1)%5 == 0 )
1790  sourcefile << endl << " ";
1791  }
1792  sourcefile << endl << " };"<<endl;
1793  //
1794  sourcefile << " // --- Constants";
1795  sourcefile << endl;
1796  sourcefile << " int indNeurone = 0;"<<endl;
1797  sourcefile << " int CrtW;"<<endl;
1798  sourcefile << " double sum;"<<endl;
1799 
1800  // couche entree
1801  sourcefile << endl;
1802  sourcefile << " // --- Input Layers"<<endl;
1803  sourcefile << " for(int i = 0; i < nInput; i++) {"<<endl;
1804  if( normType==0 )
1805  { // kMinusOneOne
1806  sourcefile << " " << functionName << "_act[indNeurone++] = 2.0 * ( param[i] - "
1807  << functionName << "_minInput[i] ) / ( " << functionName << "_maxInput[i] - "
1808  << functionName << "_minInput[i] ) - 1.0;"<<endl;
1809  }
1810  else
1811  { // kCR, kZeroOne
1812  sourcefile << " " << functionName << "_act[indNeurone++] = ( param[i] - "
1813  << functionName << "_minInput[i] ) / " << functionName << "_maxInput[i];"
1814  << endl;
1815  }
1816  sourcefile << " }"<<endl;
1817 
1818 
1819  // couche cachee
1820  sourcefile << endl;
1821  sourcefile << " // --- Hidden Layers"<<endl;
1822  sourcefile << " for (int member = 0; member < nHidden; member++) {"<<endl;
1823  sourcefile << " int CrtW = member * ( nInput + 2) + 2;" << endl;
1824  sourcefile << " sum = " << functionName << "_valW[CrtW++];" << endl;
1825  sourcefile << " for (int source = 0; source < nInput; source++) {" << endl;
1826  sourcefile << " sum += " << functionName << "_act[source] * " << functionName << "_valW[CrtW++];" << endl;
1827  sourcefile << " }" << endl;
1828  sourcefile << " " << functionName << "_act[indNeurone++] = ActivationFunction(sum);" << endl;
1829  sourcefile << " }"<<endl;
1830  // couche sortie
1831  sourcefile << endl;
1832  sourcefile << " // --- Output"<<endl;
1833  sourcefile << " for (int member = 0; member < nOutput; member++) {"<<endl;
1834  sourcefile << " sum = " << functionName << "_valW[0];"<<endl;
1835  sourcefile << " for (int source = 0; source < nHidden; source++) {"<<endl;
1836  sourcefile << " CrtW = source * ( nInput + 2) + 1;"<<endl;
1837  sourcefile << " sum += " << functionName << "_act[nInput+source] * " << functionName << "_valW[CrtW];"<<endl;
1838  sourcefile << " }"<<endl;
1839  sourcefile << " " << functionName << "_act[indNeurone++] = sum;"<<endl;
1840  if( normType==0 )
1841  { // kMinusOneOne
1842  sourcefile << " res[member] = " << functionName
1843  << "_minOutput[member] + 0.5 * ( " << functionName
1844  << "_maxOutput[member] - " << functionName
1845  << "_minOutput[member] ) * ( sum + 1.0);" << endl;
1846  }
1847  else
1848  { // kCR, kZeroOne
1849  sourcefile << " res[member] = " << functionName
1850  << "_minOutput[member] + " << functionName
1851  << "_maxOutput[member] * sum;" << endl;
1852  }
1853  sourcefile << " }"<<endl;
1854  //
1855  sourcefile << "}" << endl;
1856  sourcefile.close();
1857 }
1858 
1866 void PMMLlib::ExportNeuralNetworkFortran(std::string file,
1867  std::string functionName,
1868  std::string header)
1869 {
1870  CheckNeuralNetwork();
1871 
1872  // Get the different values required
1873  int nInput = GetNbInputs();
1874  int nOutput = GetNbOutputs();
1875  int nHidden = GetNbNeuronsAtLayer(0);
1876  int nWeights = nHidden*(nInput+nOutput+1)+nOutput;
1877  int normType = GetNormalizationType();
1878  // Build min/max input/output vectors
1879  vector<double> minInput(nInput);
1880  vector<double> maxInput(nInput);
1881  vector<double> minOutput(nOutput);
1882  vector<double> maxOutput(nOutput);
1883  vector<double> valW(nWeights);
1884  fillVectorsForExport(nInput,nOutput,nHidden,normType,minInput,maxInput,minOutput,maxOutput,valW);
1885  // Write the file
1886  ofstream sourcefile(file.c_str());
1887 
1888  sourcefile << " SUBROUTINE " << functionName << "(";
1889  for(int i=0 ; i<GetNbInputs() ; i++)
1890  {
1891  sourcefile << GetNameInput(i) << ",";
1892  }
1893  sourcefile << GetNameOutput(0) << ")" << endl;
1894  // header
1895  sourcefile << "C --- *********************************************" << endl;
1896  sourcefile << "C --- " << endl;
1897  // insert comments in header
1898  header = "C --- " + header;
1899  size_t pos = 0;
1900  while ((pos = header.find("\n", pos)) != std::string::npos)
1901  {
1902  header.replace(pos, 1, "\nC --- ");
1903  pos += 5;
1904  }
1905  sourcefile << header << endl;
1906  sourcefile << "C --- " << endl;
1907  sourcefile << "C --- *********************************************" << endl;
1908 
1909  sourcefile << " IMPLICIT DOUBLE PRECISION (V)" << endl;
1910  for(int i=0 ; i<GetNbInputs() ; i++)
1911  {
1912  sourcefile << " DOUBLE PRECISION " << GetNameInput(i) << endl;
1913  }
1914  sourcefile << " DOUBLE PRECISION " << GetNameOutput(0) << endl;
1915  sourcefile << endl;
1916 
1917  sourcefile << "C --- Preprocessing of the inputs" << endl;
1918  for(int i=0 ; i<GetNbInputs() ; i++)
1919  {
1920  sourcefile << " VXN" << GetNameInput(i) << " = ";
1921 
1922  if( normType==0 )
1923  { // kMinusOneOne
1924  sourcefile << "2.D0 * ( " << GetNameInput(i) << " - " << minInput[i] << "D0 ) / " << maxInput[i] - minInput[i] << "D0 - 1.0" << endl;
1925  }
1926  else
1927  { // kCR, kZeroOne
1928  sourcefile << "( " << GetNameInput(i) << " - " << minInput[i] << "D0 ) / " << maxInput[i] << "D0" << endl;
1929  }
1930  }
1931 
1932  // Weights vector
1933  sourcefile << endl;
1934  sourcefile << "C --- Values of the weights" << endl;
1935  for(int i=0 ; i<nWeights ; i++)
1936  {
1937  sourcefile << " VW" << i+1 << " = " << valW[i] << endl;
1938  }
1939  // Loop on hidden neurons
1940  sourcefile << endl;
1941  for(int member = 0; member < nHidden; member++)
1942  {
1943  sourcefile << "C --- hidden neural number " << member+1 << endl;
1944  int CrtW = member * ( nInput + 2) + 3;
1945  sourcefile << " VAct" << member+1 << " = VW" << CrtW++ << endl;
1946  for (int source = 0; source < nInput; source++)
1947  {
1948  sourcefile << " 1 + VW"<< CrtW++ << " * VXN" << GetNameInput(source) << endl;
1949  }
1950  sourcefile << endl;
1951 
1952 
1953  if( normType==0 )
1954  { // kMinusOneOne
1955  sourcefile << " VPot" << member+1 << " = 2.D0 / (1.D0 + DEXP(-2.D0 * VAct" << member+1 <<")) - 1.D0" << endl;
1956  }
1957  else
1958  { // kCR, kZeroOne
1959  sourcefile << " VPot" << member+1 << " = 1.D0 / (1.D0 + DEXP(-1.D0 * VAct" << member+1 <<"))" << endl;
1960  }
1961  sourcefile << endl;
1962  }
1963 
1964  // Ouput of the model
1965  sourcefile << "C --- Output" << endl;
1966  sourcefile << " VOut = VW1" << endl;
1967  for(int source=0 ; source < nHidden ; source++)
1968  {
1969  int CrtW = source * ( nInput + 2) + 2;
1970  sourcefile << " 1 + VW"<< CrtW << " * VPot" << source+1 << endl;
1971  }
1972 
1973  // Denormalize Output
1974  sourcefile << endl;
1975  sourcefile << "C --- Pretraitment of the output" << endl;
1976  if( normType==0 )
1977  { // kMinusOneOne
1978  sourcefile << " VDelta = " << 0.5*(maxOutput[0]-minOutput[0]) << "D0 * ( VOut + 1.0D0)" << endl;
1979  sourcefile << " " << GetNameOutput(0) << " = " << minOutput[0] << "D0 + VDelta" << endl;
1980 
1981  }
1982  else
1983  { // kCR, kZeroOne
1984  sourcefile << " " << GetNameOutput(0) << " = "<< minOutput[0] << "D0 + " << maxOutput[0] << "D0 * VOut;" << endl;
1985  }
1986 
1987  sourcefile << endl;
1988  sourcefile << "C --- " << endl;
1989  sourcefile << " RETURN" << endl;
1990  sourcefile << " END" << endl;
1991 
1992  sourcefile.close();
1993 }
1994 
2002 void PMMLlib::ExportNeuralNetworkPython(std::string file,
2003  std::string functionName,
2004  std::string header)
2005 {
2006  string str(ExportNeuralNetworkPyStr(functionName, header));
2007  // Write the file
2008  ofstream exportfile(file.c_str());
2009  exportfile << str;
2010  exportfile.close();
2011 }
2012 
2013 
2021 std::string PMMLlib::ExportNeuralNetworkPyStr(std::string functionName,
2022  std::string header)
2023 {
2025 
2026  ostringstream out;
2027 
2028  // Get the different values required
2029  int nInput = GetNbInputs();
2030  int nOutput = GetNbOutputs();
2031  int nHidden = GetNbNeuronsAtLayer(0);
2032  int nNeurons = nInput+nOutput+nHidden;
2033  int nWeights = nHidden*(nInput+nOutput+1)+nOutput;
2034  int normType = GetNormalizationType();
2035  // Build min/max input/output vectors
2036  vector<double> minInput(nInput);
2037  vector<double> maxInput(nInput);
2038  vector<double> minOutput(nOutput);
2039  vector<double> maxOutput(nOutput);
2040  vector<double> valW(nWeights);
2041  fillVectorsForExport(nInput,nOutput,nHidden,normType,minInput,maxInput,minOutput,maxOutput,valW);
2042 
2043  // Shebang et imports
2044  out << "#!/usr/bin/env python" << endl;
2045  out << "# -*- coding: utf-8 -*-" << endl;
2046  out << endl;
2047  out << "from math import tanh, exp" << endl;
2048  out << endl;
2049 
2050  // ActivationFunction
2051  if( normType==0 )
2052  { // kMinusOneOne
2053  out << "def ActivationFunction(sum): " << endl;
2054  out << " return tanh(sum); " << endl;
2055  }
2056  else
2057  { // kCR, kZeroOne
2058  out << "def ActivationFunction(sum): " << endl;
2059  out << " return ( 1.0 / ( 1.0 + exp( -1.0 * sum ) ) ); " << endl;
2060  }
2061 
2062  out << endl;
2063  out << "def " << functionName <<"(param):" << endl;
2064  out << endl;
2065 
2066  // header
2067  out << " ############################## " << endl;
2068  out << " #" << endl;
2069  // insert comments in header
2070  header = " # " + header;
2071  size_t pos = 0;
2072  while ((pos = header.find("\n", pos)) != std::string::npos)
2073  {
2074  header.replace(pos, 1, "\n #");
2075  pos += 5;
2076  }
2077  out << header << endl;
2078  out << " #" << endl;
2079  out << " ############################## " << endl;
2080  out << endl;
2081 
2082  // Initialisations
2083  out << " nInput = " << nInput << ";" << endl;
2084  out << " nOutput = " << nOutput << ";" << endl;
2085  out << " nHidden = " << nHidden << ";" << endl;
2086  out << " nNeurones = " << nNeurons << ";" << endl;
2087  out << " " << functionName << "_act = [];" << endl;
2088  out << " res = [];" << endl;
2089  out << endl;
2090 
2091  out << " # --- Preprocessing of the inputs and outputs" << endl;
2092  out << " " << functionName << "_minInput = [" << endl << " ";
2093  out << " " ;
2094  for(int i=0 ; i<nInput ; i++)
2095  {
2096  out << minInput[i] << ", ";
2097  if( (i+1)%5==0 )
2098  {
2099  out << endl ;
2100  out << " " ;
2101  }
2102  }
2103  out << endl << " ];" << endl;
2104 
2105  out << " " << functionName << "_minOutput = [" << endl << " ";
2106  out << " " << minOutput[0] ;
2107  out << endl << " ];" << endl;
2108 
2109  out << " " << functionName << "_maxInput = [" << endl << " ";
2110  for(int i=0 ; i<nInput ; i++)
2111  {
2112  out << maxInput[i] << ", ";
2113  if( (i+1)%5==0 )
2114  {
2115  out << endl;
2116  out << " " ;
2117  }
2118  }
2119  out << endl << " ];" << endl;
2120 
2121  out << " " << functionName << "_maxOutput = [" << endl << " ";
2122  out << " " << maxOutput[0] ;
2123  out << endl << " ];" << endl;
2124 
2125  // Weights vector
2126  out << " # --- Values of the weights" << endl;
2127  out << " " << functionName << "_valW = [" << endl << " ";
2128  for(int i=0 ; i<nWeights ; i++)
2129  {
2130  out << valW[i] << ", ";
2131  if ( (i+1)%5 == 0 )
2132  {
2133  out << endl;
2134  out << " " ;
2135  }
2136  }
2137  out << endl << " ];"<<endl;
2138 
2139  out << " # --- Constants" << endl;
2140  out << " indNeurone = 0;" << endl;
2141  out << endl;
2142 
2143  // couche entree
2144  out << " # --- Input Layers" << endl;
2145  out << " for i in range(nInput) :" << endl;
2146  if( normType==0 )
2147  { // kMinusOneOne
2148  out << " " << functionName << "_act.append( 2.0 * ( param[i] - "
2149  << functionName << "_minInput[i] ) / ( " << functionName << "_maxInput[i] - "
2150  << functionName << "_minInput[i] ) - 1.0 ) ;"
2151  << endl;
2152  }
2153  else
2154  { // kCR, kZeroOne
2155  out << " " << functionName << "_act.append( ( param[i] - "
2156  << functionName << "_minInput[i] ) / " << functionName << "_maxInput[i] ) ;"
2157  << endl;
2158  }
2159  out << " indNeurone += 1 ;" << endl;
2160  out << " pass" << endl;
2161 
2162  // couche cachee
2163  out << endl;
2164  out << " # --- Hidden Layers" << endl;
2165  out << " for member in range(nHidden):" << endl;
2166  out << " CrtW = member * ( nInput + 2) + 2;" << endl;
2167  out << " sum = " << functionName << "_valW[CrtW];" << endl;
2168  out << " CrtW += 1 ;" << endl;
2169  out << " for source in range(nInput) :" << endl;
2170  out << " sum += " << functionName << "_act[source] * " << functionName << "_valW[CrtW];" << endl;
2171  out << " CrtW += 1 ;" << endl;
2172  out << " pass" << endl;
2173  out << " " << functionName << "_act.append( ActivationFunction(sum) ) ;" << endl;
2174  out << " indNeurone += 1 ;" << endl;
2175  out << " pass" << endl;
2176  out << endl;
2177 
2178  // couche sortie
2179  out << " # --- Output"<<endl;
2180  out << " for member in range(nOutput):" << endl;
2181  out << " sum = " << functionName << "_valW[0];" << endl;
2182  out << " for source in range(nHidden):" << endl;
2183  out << " CrtW = source * ( nInput + 2) + 1;"<<endl;
2184  out << " sum += " << functionName << "_act[nInput+source] * " << functionName << "_valW[CrtW];" << endl;
2185  out << " pass" << endl;
2186  out << " " << functionName << "_act.append( sum );" << endl;
2187  out << " indNeurone += 1 ;" << endl;
2188  if( normType==0 )
2189  { // kMinusOneOne
2190  out << " res[member] = " << functionName
2191  << "_minOutput[member] + 0.5 * ( " << functionName
2192  << "_maxOutput[member] - " << functionName
2193  << "_minOutput[member] ) * ( sum + 1.0);" << endl;
2194  }
2195  else
2196  { // kCR, kZeroOne
2197  out << " res.append( " << functionName
2198  << "_minOutput[member] + " << functionName
2199  << "_maxOutput[member] * sum );" << endl;
2200  }
2201  out << " pass" << endl;
2202  out << endl;
2203 
2204  // return result
2205  out << " return res;" << endl << endl;
2206  out << endl;
2207 
2208  return out.str();
2209 }
2210 
2211 //**************************************************************
2212 // *
2213 // *
2214 // *
2215 // méthodes propres au RegressionModel *
2216 // *
2217 // *
2218 // *
2219 //**************************************************************
2220 
2226 void PMMLlib::CheckRegression()
2227 {
2228  if ( _currentModelType != kLR )
2229  throw string("Use this method with Regression models.");
2230 }
2231 
2237 xmlNodePtr PMMLlib::GetRegressionPtr(std::string name)
2238 {
2239  return GetPtr(name, GetTypeString() );
2240 }
2241 
2249 void PMMLlib::AddRegressionModel(std::string modelName,
2250  PMMLMiningFunction functionName,
2251  std::string targetFieldName)
2252 {
2254  _currentModelName = modelName;
2255  // Check regression after setting model type!
2256  CheckRegression();
2257 
2258  string function;
2259  switch(functionName)
2260  {
2261  case kREGRESSION:
2262  function = "regression";
2263  break;
2264  }
2265  xmlNodePtr netNode = xmlNewChild(_rootNode, 0, (const xmlChar*)"RegressionModel", 0);
2266  xmlNewProp(netNode, (const xmlChar*)"functionName", (const xmlChar*)(function.c_str()) );
2267  xmlNewProp(netNode, (const xmlChar*)"modelName", (const xmlChar*)(_currentModelName.c_str()) );
2268  xmlNewProp(netNode, (const xmlChar*)"targetFieldName", (const xmlChar*)(targetFieldName.c_str()) );
2269  _currentModelNode = netNode ;
2270 }
2271 
2277 void PMMLlib::AddRegressionTable()
2278 {
2279  CheckRegression();
2280  xmlNodePtr tableNode = xmlNewChild(_currentModelNode, 0, (const xmlChar*)"RegressionModel", 0);
2281  _currentNode = tableNode;
2282 }
2283 
2289 void PMMLlib::AddRegressionTable(double intercept)
2290 {
2291  CheckRegression();
2292 
2293  stringstream ss;
2294  xmlNodePtr tableNode = xmlNewChild(_currentModelNode, 0, (const xmlChar*)"RegressionTable", 0);
2295  if(intercept!=0.0)
2296  {
2297  ss << scientific << intercept;
2298  xmlNewProp(tableNode, (const xmlChar*)"intercept", (const xmlChar*)(ss.str().c_str()) );
2299  }
2300  _currentNode = tableNode;
2301 }
2302 
2310 void PMMLlib::AddNumericPredictor(std::string neuronName,
2311  int exponent,
2312  double coefficient)
2313 {
2314  CheckRegression();
2315  stringstream ss;
2316  xmlNodePtr numPrecNode = xmlNewChild(_currentNode, 0, (const xmlChar*)"NumericPredictor", 0);
2317  xmlNewProp(numPrecNode, (const xmlChar*)"name", (const xmlChar*)(neuronName.c_str()) );
2318  ss.str(""); ss << exponent;
2319  xmlNewProp(numPrecNode, (const xmlChar*)"exponent", (const xmlChar*)(ss.str().c_str()) );
2320  ss.str(""); ss << scientific << coefficient;
2321  xmlNewProp(numPrecNode, (const xmlChar*)"coefficient", (const xmlChar*)(ss.str().c_str()) );
2322 }
2323 
2330 void PMMLlib::AddPredictorTerm(double coefficient,
2331  std::vector<std::string> fieldRef)
2332 {
2333  CheckRegression();
2334  stringstream ss;
2335  xmlNodePtr predTermNode = xmlNewChild(_currentNode, 0, (const xmlChar*)"PredictorTerm", 0);
2336  ss.str(""); ss << scientific << coefficient;
2337  xmlNewProp(predTermNode, (const xmlChar*)"coefficient", (const xmlChar*)(ss.str().c_str()) );
2338  vector<string>::iterator it;
2339  for(it=fieldRef.begin() ; it!=fieldRef.end() ; it++)
2340  {
2341  xmlNodePtr fieldRefNode = xmlNewChild(predTermNode, 0, (const xmlChar*)"FieldRef", 0);
2342  ss.str(""); ss << (*it);
2343  xmlNewProp(fieldRefNode, (const xmlChar*)"field", (const xmlChar*)(ss.str().c_str()) );
2344  }
2345 }
2346 
2352 bool PMMLlib::HasIntercept()
2353 {
2354  CheckRegression();
2355  bool b = false;
2356  xmlNodePtr tableNode = GetChildByName(_currentModelNode,"RegressionTable");
2357  if ( tableNode == NULL )
2358  return b;
2359  xmlChar *xp = _stringToXmlChar("intercept");
2360  xmlChar * attr ;
2361  attr = xmlGetProp(tableNode, xp);
2362  if ( attr )
2363  {
2364  xmlFree(attr);
2365  xmlFree(xp);
2366  return true;
2367  }
2368  xmlFree(xp);
2369  return false;
2370 }
2371 
2377 double PMMLlib::GetRegressionTableIntercept()
2378 {
2379  CheckRegression();
2380  double reg = 0.;
2381  xmlNodePtr tableNode = GetChildByName(_currentModelNode,"RegressionTable");
2382  if ( tableNode == NULL )
2383  return reg;
2384  string strValue = _getProp(tableNode, string("intercept") );
2385  return atof(strValue.c_str());
2386 }
2387 
2393 int PMMLlib::GetNumericPredictorNb()
2394 {
2395  CheckRegression();
2396 
2397  int nb=0;
2398  xmlNodePtr tableNode = GetChildByName(_currentModelNode,"RegressionTable");
2399  if ( tableNode == NULL )
2400  return nb;
2401  xmlNodePtr numPredNodes = tableNode->children;
2402  while (numPredNodes != NULL )
2403  {
2404  if ( string((const char*)(numPredNodes->name)) == "NumericPredictor" )
2405  nb++;
2406  numPredNodes = numPredNodes->next;
2407  }
2408  return nb;
2409 }
2410 
2416 int PMMLlib::GetPredictorTermNb()
2417 {
2418  CheckRegression();
2419  int nb=0;
2420  xmlNodePtr tableNode = GetChildByName(_currentModelNode,"RegressionTable");
2421  if ( tableNode == NULL )
2422  return nb;
2423  xmlNodePtr numPredNodes = tableNode->children;
2424  while ( numPredNodes != NULL )
2425  {
2426  if ( string((const char*)(numPredNodes->name)) == "PredictorTerm" )
2427  nb++;
2428  numPredNodes = numPredNodes->next;
2429  }
2430  return nb;
2431 }
2432 
2439 std::string PMMLlib::GetNumericPredictorName(int num_pred_index)
2440 {
2441  CheckRegression();
2442  string strName("");
2443  xmlNodePtr numPredNodes = GetChildByName(_currentModelNode,"RegressionTable");
2444  if ( numPredNodes == NULL )
2445  return strName;
2446 
2447  numPredNodes = GetChildByName(numPredNodes,"NumericPredictor");
2448  if ( numPredNodes == NULL )
2449  return strName;
2450  // Positionnement sur la bonne sortie PredictorTerm
2451  for(int i=0;i<num_pred_index;i++)
2452  {
2453  numPredNodes = numPredNodes->next;
2454  if ( numPredNodes == NULL ||
2455  string((const char*)(numPredNodes->name)) != "NumericPredictor" )
2456  return strName;
2457  }
2458  strName = _getProp(numPredNodes, string("name"));
2459  return strName;
2460 }
2461 
2468 std::string PMMLlib::GetPredictorTermName(int pred_term_index)
2469 {
2470  CheckRegression();
2471  string strName("");
2472  xmlNodePtr fieldRefNodes = GetChildByName(_currentModelNode,"RegressionTable");
2473  if ( fieldRefNodes == NULL )
2474  return strName;
2475 
2476  fieldRefNodes = GetChildByName(fieldRefNodes,"PredictorTerm");
2477  if ( fieldRefNodes == NULL )
2478  return strName;
2479  // Positionnement sur la bonne sortie
2480  for(int i=0;i<pred_term_index;i++)
2481  {
2482  fieldRefNodes = fieldRefNodes->next;
2483  if ( fieldRefNodes == NULL ||
2484  string((const char*)(fieldRefNodes->name)) != "PredictorTerm" )
2485  return strName;
2486  }
2487 
2488  fieldRefNodes = fieldRefNodes->children;
2489  while (fieldRefNodes != NULL)
2490  {
2491  strName += _getProp(fieldRefNodes, string("field"));
2492  fieldRefNodes = fieldRefNodes->next;
2493  }
2494  return strName;
2495 }
2496 
2504 double PMMLlib::GetNumericPredictorCoefficient(int num_pred_index)
2505 {
2506  CheckRegression();
2507 
2508  double coef = 0.;
2509  xmlNodePtr numPredNodes = GetChildByName(_currentModelNode,"RegressionTable");
2510  if ( numPredNodes == NULL )
2511  return coef;
2512  numPredNodes = GetChildByName(numPredNodes,"NumericPredictor");
2513  if ( numPredNodes == NULL )
2514  return coef;
2515  // Positionnement sur la bonne sortie
2516  for(int i=0;i<num_pred_index;i++)
2517  {
2518  numPredNodes = numPredNodes->next;
2519  if ( numPredNodes == NULL ||
2520  string((const char*)(numPredNodes->name)) != "NumericPredictor" )
2521  return coef;
2522  }
2523  string strValue = _getProp(numPredNodes, string("coefficient"));
2524  coef = atof(strValue.c_str());
2525  return coef;
2526 }
2527 
2535 double PMMLlib::GetPredictorTermCoefficient(int pred_term_index)
2536 {
2537  CheckRegression();
2538 
2539  double coef = 0.;
2540  xmlNodePtr predTermNodes = GetChildByName(_currentModelNode,"RegressionTable");
2541  if ( predTermNodes == NULL )
2542  return coef;
2543  predTermNodes = GetChildByName(predTermNodes,"PredictorTerm");
2544  if ( predTermNodes == NULL )
2545  return coef;
2546  // Positionnement sur la bonne sortie
2547  for(int i=0;i<pred_term_index;i++)
2548  {
2549  predTermNodes = predTermNodes->next;
2550  if ( predTermNodes == NULL ||
2551  string((const char*)(predTermNodes->name)) != "PredictorTerm" )
2552  return coef;
2553  }
2554  string strValue = _getProp(predTermNodes, string("coefficient"));
2555  coef = atof(strValue.c_str());
2556  return coef;
2557 }
2558 
2565 int PMMLlib::GetPredictorTermFieldRefNb(int index)
2566 {
2567  CheckRegression();
2568 
2569  int nb=0;
2570  xmlNodePtr fieldRefNodes = GetChildByName(_currentModelNode,"RegressionTable");
2571  if ( fieldRefNodes == NULL )
2572  return nb;
2573  fieldRefNodes = GetChildByName(fieldRefNodes,"PredictorTerm");
2574  if ( fieldRefNodes == NULL )
2575  return nb;
2576  // Positionnement sur la bonne sortie
2577  for(int i=0;i<index;i++)
2578  {
2579  fieldRefNodes = fieldRefNodes->next;
2580  if ( fieldRefNodes == NULL ||
2581  string((const char*)(fieldRefNodes->name)) != "PredictorTerm" )
2582  return nb;
2583  }
2584  fieldRefNodes = fieldRefNodes->children;
2585  while (fieldRefNodes != NULL)
2586  {
2587  nb++;
2588  fieldRefNodes = fieldRefNodes->next;
2589  }
2590  return nb;
2591 }
2592 
2601 std::string PMMLlib::GetPredictorTermFieldRefName(int pred_term_index, int field_index)
2602 {
2603  CheckRegression();
2604 
2605  string strName("");
2606  xmlNodePtr fieldRefNodes = GetChildByName(_currentModelNode,"RegressionTable");
2607  if ( fieldRefNodes == NULL )
2608  return strName;
2609  fieldRefNodes = GetChildByName(fieldRefNodes,"PredictorTerm");
2610  if ( fieldRefNodes == NULL )
2611  return strName;
2612  // Positionnement sur la bonne sortie PredictorTerm
2613  for(int i=0;i<pred_term_index;i++)
2614  {
2615  fieldRefNodes = fieldRefNodes->next;
2616  if ( fieldRefNodes == NULL ||
2617  string((const char*)(fieldRefNodes->name)) != "PredictorTerm" )
2618  return strName;
2619  }
2620  fieldRefNodes = fieldRefNodes->children;
2621  if ( fieldRefNodes == NULL )
2622  return strName;
2623  // Positionnement sur la bonne sortie FieldRef
2624  for(int i=0;i<field_index;i++)
2625  {
2626  fieldRefNodes = fieldRefNodes->next;
2627  if ( fieldRefNodes == NULL )
2628  return strName;
2629  }
2630  strName = _getProp(fieldRefNodes, string("field"));
2631  return strName;
2632 }
2633 
2641 void PMMLlib::ExportLinearRegressionCpp(std::string file,
2642  std::string functionName,
2643  std::string header)
2644 {
2645  CheckRegression();
2646 
2647  // Write the file
2648  ofstream exportfile(file.c_str());
2649 
2650  exportfile << "void " << functionName <<"(double *param, double *res)" << endl;
2651  exportfile << "{" << endl;
2652  // header
2653  exportfile << " ////////////////////////////// " << endl;
2654  exportfile << " //" << endl;
2655  // insert comments in header
2656  header = " // " + header;
2657  size_t pos = 0;
2658  while ((pos = header.find("\n", pos)) != std::string::npos)
2659  {
2660  header.replace(pos, 1, "\n //");
2661  pos += 5;
2662  }
2663  exportfile << header << endl;
2664  exportfile << " //" << endl;
2665  exportfile << " ////////////////////////////// " << endl << endl;
2666 
2667  double intercept = 0.0;
2668  if ( HasIntercept() )
2669  {
2670  exportfile << " // Intercept"<< endl;
2671  intercept = GetRegressionTableIntercept();
2672  }
2673  else
2674  exportfile << " // No Intercept"<< endl;
2675  exportfile << " double y = " << intercept << ";";
2676  exportfile << endl << endl;
2677 
2678  int nPred = GetNumericPredictorNb();
2679  for (int i=0; i<nPred; i++)
2680  {
2681  exportfile << " // Attribute : " << GetNumericPredictorName(i) << endl;
2682  exportfile << " y += param["<<i<<"]*" << GetNumericPredictorCoefficient(i) << ";";
2683  exportfile << endl << endl;
2684  }
2685  nPred = GetPredictorTermNb();
2686  for (int i=0; i<nPred; i++)
2687  {
2688  exportfile << " // Attribute : " << GetPredictorTermName(i) << endl;
2689  exportfile << " y += param["<<(i+nPred)<<"]*" << GetPredictorTermCoefficient(i) << ";";
2690  exportfile << endl << endl;
2691  }
2692 
2693  exportfile << " // Return the value"<< endl;
2694  exportfile << " res[0] = y;" << endl;
2695  exportfile << "}" << endl;
2696  exportfile.close();
2697 }
2698 
2706 void PMMLlib::ExportLinearRegressionFortran(std::string file,
2707  std::string functionName,
2708  std::string header)
2709 {
2710  CheckRegression();
2711 
2712  int nNumPred = GetNumericPredictorNb();
2713  int nPredTerm = GetPredictorTermNb();
2714  vector<string>strParam(nNumPred+nPredTerm);
2715  for(int i=0; i<(nNumPred+nPredTerm); i++)
2716  {
2717  strParam[i] = "P" + NumberToString(i) ;
2718  }
2719 
2720  // Write the file
2721  ofstream exportfile(file.c_str());
2722 
2723  exportfile << " SUBROUTINE " << functionName <<"(";
2724  for(int i=0; i<(nNumPred+nPredTerm); i++)
2725  {
2726  exportfile << strParam[i] << ", ";
2727  }
2728  exportfile << "RES)" << endl;
2729 
2730  // header
2731  exportfile << "C --- *********************************************" << endl;
2732  exportfile << "C --- " << endl;
2733  // insert comments in header
2734  header = "C --- " + header;
2735  size_t pos = 0;
2736  while ((pos = header.find("\n", pos)) != std::string::npos)
2737  {
2738  header.replace(pos, 1, "\nC --- ");
2739  pos += 5;
2740  }
2741  exportfile << header << endl;
2742  exportfile << "C --- " << endl;
2743  exportfile << "C --- *********************************************" << endl << endl;
2744 
2745  exportfile << " IMPLICIT DOUBLE PRECISION (P)" << endl;
2746  exportfile << " DOUBLE PRECISION RES" << endl;
2747  exportfile << " DOUBLE PRECISION Y" << endl;
2748  exportfile << endl;
2749 
2750  double intercept = 0.0;
2751  if ( HasIntercept() )
2752  {
2753  exportfile << "C --- Intercept"<< endl;
2754  intercept = GetRegressionTableIntercept();
2755  }
2756  else
2757  exportfile << "C --- No Intercept"<< endl;
2758  exportfile << " Y = " << intercept << ";";
2759  exportfile << endl << endl;
2760 
2761  for (int i=0; i<nNumPred; i++)
2762  {
2763  exportfile << "C --- Attribute : " << GetNumericPredictorName(i) << endl;
2764  exportfile << " Y += P["<<i<<"]*" << GetNumericPredictorCoefficient(i) << ";";
2765  exportfile << endl << endl;
2766  }
2767 
2768  for (int i=0; i<nPredTerm; i++)
2769  {
2770  exportfile << "C --- Attribute : " << GetPredictorTermName(i) << endl;
2771  exportfile << " Y += P["<<(i+nNumPred)<<"]*" << GetPredictorTermCoefficient(i) << ";";
2772  exportfile << endl << endl;
2773  }
2774 
2775  exportfile << "C --- Return the value"<< endl;
2776  exportfile << " RES = Y " << endl;
2777  exportfile << " RETURN" << endl;
2778  exportfile << " END" << endl;
2779  exportfile.close();
2780 }
2781 
2789 void PMMLlib::ExportLinearRegressionPython(std::string file,
2790  std::string functionName,
2791  std::string header)
2792 {
2793  string str(ExportLinearRegressionPyStr(functionName, header));
2794  // Write the file
2795  ofstream exportfile(file.c_str());
2796  exportfile << str;
2797  exportfile.close();
2798 }
2799 
2806 std::string PMMLlib::ExportLinearRegressionPyStr(std::string functionName,
2807  std::string header)
2808 {
2809  CheckRegression();
2810 
2811  ostringstream out;
2812 
2813  // Shebang et imports
2814  out << "#!/usr/bin/env python" << endl;
2815  out << "# -*- coding: utf-8 -*-" << endl;
2816  out << endl;
2817 
2818  // Function
2819  out << "def " << functionName <<"(param):" << endl;
2820  out << endl;
2821 
2822  // header
2823  out << " ############################## " << endl;
2824  out << " # " << endl;
2825  // insert comments in header
2826  header = " # " + header;
2827  size_t pos = 0;
2828  while ((pos = header.find("\n", pos)) != std::string::npos)
2829  {
2830  header.replace(pos, 1, "\n #");
2831  pos += 5;
2832  }
2833  out << header << endl;
2834  out << " # " << endl;
2835  out << " ############################## " << endl << endl;
2836 
2837  double intercept = 0.0;
2838  if ( HasIntercept() )
2839  {
2840  out << " # Intercept"<< endl;
2841  intercept = GetRegressionTableIntercept();
2842  }
2843  else
2844  out << " # No Intercept"<< endl;
2845  out << " y = " << intercept << ";";
2846  out << endl << endl;
2847 
2848  int nPred = GetNumericPredictorNb();
2849  for (int i=0; i<nPred; i++)
2850  {
2851  out << " # Attribute : " << GetNumericPredictorName(i) << endl;
2852  out << " y += param["<<i<<"]*" << GetNumericPredictorCoefficient(i) << ";";
2853  out << endl << endl;
2854  }
2855  nPred = GetPredictorTermNb();
2856  for (int i=0; i<nPred; i++)
2857  {
2858  out << " # Attribute : " << GetPredictorTermName(i) << endl;
2859  out << " y += param["<<(i+nPred)<<"]*" << GetPredictorTermCoefficient(i) << ";";
2860  out << endl << endl;
2861  }
2862 
2863  out << " # Return the value"<< endl;
2864  out << " return [y];" << endl;
2865 
2866  return out.str() ;
2867 }
2868 
2874 std::string PMMLlib::ReadRegressionStructure()
2875 {
2876  CheckRegression();
2877 
2878  string structure("");
2879  string structureActive("");
2880  string structurePredicted("@");
2881  int nPred = 0;
2882  xmlNodePtr mNode = GetChildByName(_currentModelNode,"MiningSchema");
2883  if ( mNode != NULL )
2884  {
2885  xmlNodePtr dNode = GetChildByName(mNode,"MiningField");
2886  while (dNode != NULL)
2887  {
2888  string name = _getProp(dNode, string("name"));
2889  string usage = _getProp(dNode, string("usageType"));
2890  if ( usage == "active" )
2891  {
2892  structureActive += name;
2893  structureActive += ":";
2894  }
2895  else if ( usage == "predicted" )
2896  {
2897  structurePredicted += name;
2898  structurePredicted += ":";
2899  nPred++;
2900  }
2901  dNode = dNode->next;
2902  }
2903  // Delete the last ":"
2904  if ( structureActive.length() > 0 )
2905  structureActive.erase(structureActive.size()-1);
2906  structurePredicted.erase(structurePredicted.size()-1);
2907  }
2908  std::ostringstream oss;
2909  oss << nPred;
2910  structure = structureActive + "," + oss.str() + "," + structurePredicted;
2911  return structure;
2912 }
2913 
2914 } // end of namespace
2915 
2916