00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <iostream>
00019 #include <fstream>
00020 #include <string>
00021 #include <sstream>
00022 #include <cmath>
00023 #include <algorithm>
00024 #include <cstdlib>
00025 #include <stdexcept>
00026 #include <ctime>
00027 #include <exception>
00028
00029
00030
00031 #include "instances_information.h"
00032
00033
00034
00035 #include "../../parsers/tinyxml/tinyxml.h"
00036
00037 namespace OpenNN
00038 {
00039
00040
00041
00042
00046
00047 InstancesInformation::InstancesInformation(void)
00048 {
00049 set();
00050 }
00051
00052
00053
00054
00059
00060 InstancesInformation::InstancesInformation(const unsigned int& new_instances_number)
00061 {
00062 set(new_instances_number);
00063 }
00064
00065
00066
00067
00071
00072 InstancesInformation::InstancesInformation(TiXmlElement* instances_information_element)
00073 {
00074 set(instances_information_element);
00075 }
00076
00077
00078
00079
00083
00084 InstancesInformation::InstancesInformation(const InstancesInformation& other_instances_information)
00085 {
00086 instances_number = other_instances_information.instances_number;
00087
00088 training_indices = other_instances_information.training_indices;
00089 generalization_indices = other_instances_information.generalization_indices;
00090 testing_indices = other_instances_information.testing_indices;
00091
00092 display = other_instances_information.display;
00093 }
00094
00095
00096
00097
00099
00100 InstancesInformation::~InstancesInformation(void)
00101 {
00102 }
00103
00104
00105
00106
00110
00111 InstancesInformation& InstancesInformation::operator=(const InstancesInformation& other_instances_information)
00112 {
00113 if(this != &other_instances_information)
00114 {
00115 instances_number = other_instances_information.instances_number;
00116
00117
00118
00119 training_indices = other_instances_information.training_indices;
00120 generalization_indices = other_instances_information.generalization_indices;
00121 testing_indices = other_instances_information.testing_indices;
00122
00123
00124
00125 display = other_instances_information.display;
00126 }
00127
00128 return(*this);
00129 }
00130
00131
00132
00133
00134
00135
00140
00141 bool InstancesInformation::operator == (const InstancesInformation& other_instances_information) const
00142 {
00143 if(instances_number == other_instances_information.instances_number
00144 && training_indices == other_instances_information.training_indices
00145 && generalization_indices == other_instances_information.generalization_indices
00146 && testing_indices == other_instances_information.testing_indices
00147 && display == other_instances_information.display)
00148 {
00149 return(true);
00150 }
00151 else
00152 {
00153 return(false);
00154 }
00155 }
00156
00157
00158
00159
00160
00162
00163 const Vector<unsigned int>& InstancesInformation::get_training_indices(void) const
00164 {
00165 return(training_indices);
00166 }
00167
00168
00169
00170
00172
00173 const Vector<unsigned int>& InstancesInformation::get_generalization_indices(void) const
00174 {
00175 return(generalization_indices);
00176 }
00177
00178
00179
00180
00182
00183 const Vector<unsigned int>& InstancesInformation::get_testing_indices(void) const
00184 {
00185 return(testing_indices);
00186 }
00187
00188
00189
00190
00193
00194 const bool& InstancesInformation::get_display(void) const
00195 {
00196 return(display);
00197 }
00198
00199
00200
00201
00203
00204 void InstancesInformation::set(void)
00205 {
00206 set_instances_number(0);
00207
00208 display = true;
00209 }
00210
00211
00212
00213
00217
00218 void InstancesInformation::set(const unsigned int& new_instances_number)
00219 {
00220 set_instances_number(new_instances_number);
00221
00222 display = true;
00223 }
00224
00225
00226
00227
00230
00231 void InstancesInformation::set(TiXmlElement* instances_information_element)
00232 {
00233 set();
00234 from_XML(instances_information_element);
00235 }
00236
00237
00238
00239
00244
00245 void InstancesInformation::set_training_indices(const Vector<unsigned int>& new_training_indices)
00246 {
00247
00248
00249 #ifdef _DEBUG
00250
00251 unsigned int size = new_training_indices.size();
00252
00253 if(size > instances_number)
00254 {
00255 std::ostringstream buffer;
00256
00257 buffer << "OpenNN Exception: InstancesInformation class.\n"
00258 << "void set_training_indices(const Vector<double>&) method.\n"
00259 << "Size must be less or equal than number of instances.\n";
00260
00261 throw std::logic_error(buffer.str());
00262 }
00263
00264 #endif
00265
00266 training_indices = new_training_indices;
00267 }
00268
00269
00270
00271
00276
00277 void InstancesInformation::set_generalization_indices(const Vector<unsigned int>& new_generalization_indices)
00278 {
00279
00280
00281 #ifdef _DEBUG
00282
00283 unsigned int size = new_generalization_indices.size();
00284
00285 if(size > instances_number)
00286 {
00287 std::ostringstream buffer;
00288
00289 buffer << "OpenNN Exception: InstancesInformation class.\n"
00290 << "void set_generalization_indices(const Vector<double>&) method.\n"
00291 << "Size must be less or equal than number of instances.\n";
00292
00293 throw std::logic_error(buffer.str());
00294 }
00295
00296 #endif
00297
00298 generalization_indices = new_generalization_indices;
00299 }
00300
00301
00302
00303
00308
00309 void InstancesInformation::set_testing_indices(const Vector<unsigned int>& new_testing_indices)
00310 {
00311
00312
00313 #ifdef _DEBUG
00314
00315 unsigned int size = new_testing_indices.size();
00316
00317 if(size > instances_number)
00318 {
00319 std::ostringstream buffer;
00320
00321 buffer << "OpenNN Exception: InstancesInformation class.\n"
00322 << "void set_testing_indices(const Vector<double>&) method.\n"
00323 << "Size must be less or equal than number of instances.\n";
00324
00325 throw std::logic_error(buffer.str());
00326 }
00327
00328 #endif
00329
00330 testing_indices = new_testing_indices;
00331 }
00332
00333
00334
00335
00337
00338 void InstancesInformation::set_training(void)
00339 {
00340 training_indices.set(0, 1, instances_number-1);
00341 generalization_indices.set(0);
00342 testing_indices.set(0);
00343 }
00344
00345
00346
00347
00349
00350 void InstancesInformation::set_generalization(void)
00351 {
00352 training_indices.set(0);
00353 generalization_indices.set(0, 1, instances_number-1);
00354 testing_indices.set(0);
00355 }
00356
00357
00358
00359
00361
00362 void InstancesInformation::set_testing(void)
00363 {
00364 training_indices.set(0);
00365 generalization_indices.set(0);
00366 testing_indices.set(0, 1, instances_number-1);
00367 }
00368
00369
00370
00371
00373
00374 void InstancesInformation::set_default_instances_indices(void)
00375 {
00376 split_sequential_indices();
00377 }
00378
00379
00380
00381
00386
00387 void InstancesInformation::set_display(const bool& new_display)
00388 {
00389 display = new_display;
00390 }
00391
00392
00393
00394
00398
00399 void InstancesInformation::set_instances_number(const unsigned int& new_instances_number)
00400 {
00401 instances_number = new_instances_number;
00402
00403 training_indices.set(new_instances_number);
00404 training_indices.initialize_sequential();
00405
00406 generalization_indices.set();
00407
00408 testing_indices.set();
00409 }
00410
00411
00412
00413
00416
00417 TiXmlElement* InstancesInformation::to_XML(void) const
00418 {
00419 std::ostringstream buffer;
00420
00421
00422
00423 TiXmlElement* instances_information_element = new TiXmlElement("InstancesInformation");
00424 instances_information_element->SetAttribute("Version", 4);
00425
00426
00427
00428 TiXmlElement* instances_number_element = new TiXmlElement("InstancesNumber");
00429 instances_information_element->LinkEndChild(instances_number_element);
00430
00431 buffer.str("");
00432 buffer << instances_number;
00433
00434 TiXmlText* instances_number_text = new TiXmlText(buffer.str().c_str());
00435 instances_number_element->LinkEndChild(instances_number_text);
00436
00437
00438
00439 if(!training_indices.empty())
00440 {
00441 TiXmlElement* training_indices_element = new TiXmlElement("TrainingIndices");
00442 instances_information_element->LinkEndChild(training_indices_element);
00443
00444 Vector<unsigned int> new_training_indices = training_indices + 1;
00445
00446 buffer.str("");
00447 buffer << new_training_indices;
00448
00449 TiXmlText* training_indices_text = new TiXmlText(buffer.str().c_str());
00450 training_indices_element->LinkEndChild(training_indices_text);
00451 }
00452
00453
00454
00455 if(!generalization_indices.empty())
00456 {
00457 TiXmlElement* generalization_indices_element = new TiXmlElement("GeneralizationIndices");
00458 instances_information_element->LinkEndChild(generalization_indices_element);
00459
00460 Vector<unsigned int> new_generalization_indices = generalization_indices + 1;
00461
00462 buffer.str("");
00463 buffer << new_generalization_indices;
00464
00465 TiXmlText* generalization_indices_text = new TiXmlText(buffer.str().c_str());
00466 generalization_indices_element->LinkEndChild(generalization_indices_text);
00467 }
00468
00469
00470
00471 if(!testing_indices.empty())
00472 {
00473 TiXmlElement* testing_indices_element = new TiXmlElement("TestingIndices");
00474 instances_information_element->LinkEndChild(testing_indices_element);
00475
00476 Vector<unsigned int> new_testing_indices = testing_indices + 1;
00477
00478 buffer.str("");
00479 buffer << new_testing_indices;
00480
00481 TiXmlText* testing_indices_text = new TiXmlText(buffer.str().c_str());
00482 testing_indices_element->LinkEndChild(testing_indices_text);
00483 }
00484
00485 return(instances_information_element);
00486 }
00487
00488
00489
00490
00493
00494 void InstancesInformation::from_XML(TiXmlElement* instances_information_element)
00495 {
00496 if(!instances_information_element)
00497 {
00498 std::ostringstream buffer;
00499
00500 buffer << "OpenNN Exception: InstancesInformation class.\n"
00501 << "void from_XML(TiXmlElement*) method.\n"
00502 << "Pointer to instances information element is NULL.\n";
00503
00504 throw std::logic_error(buffer.str());
00505 }
00506
00507
00508
00509 TiXmlElement* instances_number_element = instances_information_element->FirstChildElement("InstancesNumber");
00510
00511 if(instances_number_element)
00512 {
00513 unsigned int new_instances_number = atoi(instances_number_element->GetText());
00514
00515 try
00516 {
00517 set_instances_number(new_instances_number);
00518 }
00519 catch(std::exception& e)
00520 {
00521 std::cout << e.what() << std::endl;
00522 }
00523 }
00524
00525
00526
00527 TiXmlElement* training_indices_element = instances_information_element->FirstChildElement("TrainingIndices");
00528
00529 if(training_indices_element)
00530 {
00531 Vector<unsigned int> new_training_indices;
00532 new_training_indices.parse(training_indices_element->GetText());
00533 new_training_indices -= 1;
00534
00535 try
00536 {
00537 set_training_indices(new_training_indices);
00538 }
00539 catch(std::exception& e)
00540 {
00541 std::cout << e.what() << std::endl;
00542 }
00543 }
00544
00545
00546
00547 TiXmlElement* generalization_indices_element = instances_information_element->FirstChildElement("GeneralizationIndices");
00548
00549 if(generalization_indices_element)
00550 {
00551 Vector<unsigned int> new_generalization_indices;
00552 new_generalization_indices.parse(generalization_indices_element->GetText());
00553 new_generalization_indices -= 1;
00554
00555 try
00556 {
00557 set_generalization_indices(new_generalization_indices);
00558 }
00559 catch(std::exception& e)
00560 {
00561 std::cout << e.what() << std::endl;
00562 }
00563 }
00564
00565
00566
00567 TiXmlElement* testing_indices_element = instances_information_element->FirstChildElement("TestingIndices");
00568
00569 if(testing_indices_element)
00570 {
00571 Vector<unsigned int> new_testing_indices;
00572 new_testing_indices.parse(testing_indices_element->GetText());
00573 new_testing_indices -= 1;
00574
00575 try
00576 {
00577 set_testing_indices(new_testing_indices);
00578 }
00579 catch(std::exception& e)
00580 {
00581 std::cout << e.what() << std::endl;
00582 }
00583 }
00584 }
00585
00586
00587
00588
00593
00594 void InstancesInformation::split_given_indices
00595 (const Vector<unsigned int>& new_training_indices, const Vector<unsigned int>& new_generalization_indices, const Vector<unsigned int>& new_testing_indices)
00596 {
00597 const unsigned int new_training_instances_number = new_training_indices.size();
00598 const unsigned int new_generalization_instances_number = new_generalization_indices.size();
00599 const unsigned int new_testing_instances_number = new_testing_indices.size();
00600
00601 const unsigned int new_instances_number = new_training_instances_number+new_generalization_instances_number+new_testing_instances_number;
00602
00603 if(display && new_instances_number != instances_number)
00604 {
00605 std::cout << "OpenNN Warning: InstancesInformation class.\n"
00606 << "void split_given_indices(const Vector<double>&, const Vector<double>&, const Vector<double>&) method.\n"
00607 << "New number of instances is not equal to number of instances.\n";
00608 }
00609
00610 training_indices = new_training_indices;
00611 generalization_indices = new_generalization_indices;
00612 testing_indices = new_testing_indices;
00613 }
00614
00615
00616
00617
00622
00623 void InstancesInformation::split_random_indices
00624 (const double& training_instances_ratio, const double& generalization_instances_ratio, const double& testing_instances_ratio)
00625 {
00626 const double total_ratio = training_instances_ratio + generalization_instances_ratio + testing_instances_ratio;
00627
00628
00629
00630 const unsigned int generalization_instances_number = (unsigned int)(generalization_instances_ratio*instances_number/total_ratio);
00631 const unsigned int testing_instances_number = (unsigned int)(testing_instances_ratio*instances_number/total_ratio);
00632 const unsigned int training_instances_number = instances_number - generalization_instances_number - testing_instances_number;
00633
00634 const unsigned int sum_instances_number = training_instances_number + generalization_instances_number + testing_instances_number;
00635
00636 if(sum_instances_number != instances_number)
00637 {
00638 std::ostringstream buffer;
00639
00640 buffer << "OpenNN Warning: InstancesInformation class.\n"
00641 << "void split_random_indices(double, double, double) method.\n"
00642 << "Sum of numbers of training, generalization and testing instances is not equal to number of instances.\n";
00643 }
00644
00645
00646
00647 generalization_indices.set(generalization_instances_number);
00648 testing_indices.set(testing_instances_number);
00649 training_indices.set(training_instances_number);
00650
00651 Vector<int> indices(0, 1, instances_number-1);
00652 std::random_shuffle(indices.begin(), indices.end());
00653
00654
00655
00656 for(unsigned int i = 0; i < training_instances_number; i++)
00657 {
00658 training_indices[i] = indices[i];
00659 }
00660
00661 std::sort(training_indices.begin(), training_indices.end());
00662
00663
00664
00665 for(unsigned int i = 0; i < generalization_instances_number; i++)
00666 {
00667 generalization_indices[i] = indices[training_instances_number+i];
00668 }
00669
00670 std::sort(generalization_indices.begin(), generalization_indices.end());
00671
00672
00673
00674 for(unsigned int i = 0; i < testing_instances_number; i++)
00675 {
00676 testing_indices[i] = indices[training_instances_number+generalization_instances_number+i];
00677 }
00678
00679 std::sort(testing_indices.begin(), testing_indices.end());
00680 }
00681
00682
00683
00684
00692
00693 void InstancesInformation::split_random_indices(void)
00694 {
00695 split_random_indices(0.6, 0.2, 0.2);
00696 }
00697
00698
00699
00700
00705
00706 void InstancesInformation::split_sequential_indices(const double& training_instances_ratio, const double& generalization_instances_ratio, const double& testing_instances_ratio)
00707 {
00708 const double total_ratio = training_instances_ratio + generalization_instances_ratio + testing_instances_ratio;
00709
00710
00711
00712 const unsigned int generalization_instances_number = (unsigned int)(generalization_instances_ratio*instances_number/total_ratio);
00713 const unsigned int testing_instances_number = (unsigned int)(testing_instances_ratio*instances_number/total_ratio);
00714 const unsigned int training_instances_number = instances_number - generalization_instances_number - testing_instances_number;
00715
00716 const unsigned int sum_instances_number = training_instances_number + generalization_instances_number + testing_instances_number;
00717
00718 if(sum_instances_number != instances_number)
00719 {
00720 std::ostringstream buffer;
00721
00722 buffer << "OpenNN Warning: InstancesInformation class.\n"
00723 << "void split_random_indices(double, double, double) method.\n"
00724 << "Sum of numbers of training, generalization and testing instances is not equal to number of instances.\n";
00725
00726 }
00727
00728
00729
00730 training_indices.set(0, 1, training_instances_number-1);
00731
00732
00733
00734 generalization_indices.set(training_instances_number, 1, training_instances_number+ generalization_instances_number-1);
00735
00736
00737
00738 testing_indices.set(training_instances_number+generalization_instances_number, 1, instances_number-1);
00739 }
00740
00741
00742
00743
00751
00752 void InstancesInformation::split_sequential_indices(void)
00753 {
00754 split_sequential_indices(0.6, 0.2, 0.2);
00755 }
00756
00757
00758
00759
00761
00762 std::string InstancesInformation::to_string(void) const
00763 {
00764 std::ostringstream buffer;
00765
00766 buffer << "InstancesInformation\n"
00767 << "Training indices: " << training_indices << "\n"
00768 << "Generalization indices: " << generalization_indices << "\n"
00769 << "Testing indices: " << testing_indices << "\n"
00770 << "Display: " << display << "\n";
00771
00772 return(buffer.str());
00773 }
00774
00775
00776 }
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794