00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <iostream>
00019 #include <fstream>
00020 #include <string>
00021 #include <sstream>
00022 #include <cmath>
00023 #include <algorithm>
00024 #include <cstdlib>
00025 #include <stdexcept>
00026 #include <ctime>
00027 #include <exception>
00028
00029
00030
00031 #include "data_set.h"
00032
00033
00034
00035 #include "../../parsers/tinyxml/tinyxml.h"
00036
00037 namespace OpenNN
00038 {
00039
00040
00041
00042
00045
00046 DataSet::DataSet(void)
00047 {
00048 set();
00049
00050 set_default();
00051 }
00052
00053
00054
00055
00062
00063 DataSet::DataSet(const unsigned int& new_instances_number, const unsigned int& new_variables_number)
00064 {
00065 set(new_instances_number, new_variables_number);
00066
00067 set_default();
00068 }
00069
00070
00071
00072
00079
00080 DataSet::DataSet(const unsigned int& new_instances_number, const unsigned int& new_inputs_number, const unsigned int& new_targets_number)
00081 {
00082 set(new_instances_number, new_inputs_number, new_targets_number);
00083
00084 set_default();
00085 }
00086
00087
00088
00089
00092
00093 DataSet::DataSet(TiXmlElement* data_set_element)
00094 {
00095 set_default();
00096
00097 from_XML(data_set_element);
00098 }
00099
00100
00101
00102
00106
00107 DataSet::DataSet(const std::string& filename)
00108 {
00109 set();
00110
00111 set_default();
00112
00113 load(filename);
00114 }
00115
00116
00117
00118
00122
00123 DataSet::DataSet(const DataSet& other_data_set)
00124 {
00125 set_default();
00126
00127 set(other_data_set);
00128 }
00129
00130
00131
00132
00134
00135 DataSet::~DataSet(void)
00136 {
00137 }
00138
00139
00140
00141
00145
00146 DataSet& DataSet::operator = (const DataSet& other_data_set)
00147 {
00148 if(this != &other_data_set)
00149 {
00150 data_filename = other_data_set.data_filename;
00151
00152
00153
00154 data = other_data_set.data;
00155
00156
00157
00158 variables_information = other_data_set.variables_information;
00159
00160
00161
00162 instances_information = other_data_set.instances_information;
00163
00164
00165
00166 display = other_data_set.display;
00167 }
00168
00169 return(*this);
00170 }
00171
00172
00173
00174
00175
00176
00181
00182 bool DataSet::operator == (const DataSet& other_data_set) const
00183 {
00184 if(data_filename == other_data_set.data_filename
00185 && data == other_data_set.data
00186 && variables_information == other_data_set.variables_information
00187 && instances_information == other_data_set.instances_information
00188 && display == other_data_set.display)
00189 {
00190 return(true);
00191 }
00192 else
00193 {
00194 return(false);
00195 }
00196
00197 }
00198
00199
00200
00201
00202
00203
00205
00206 const VariablesInformation& DataSet::get_variables_information(void) const
00207 {
00208 return(variables_information);
00209 }
00210
00211
00212
00213
00215
00216 VariablesInformation* DataSet::get_variables_information_pointer(void)
00217 {
00218 return(&variables_information);
00219 }
00220
00221
00222
00223
00225
00226 const InstancesInformation& DataSet::get_instances_information(void) const
00227 {
00228 return(instances_information);
00229 }
00230
00231
00232
00233
00235
00236 InstancesInformation* DataSet::get_instances_information_pointer(void)
00237 {
00238 return(&instances_information);
00239 }
00240
00241
00242
00243
00245
00246 const DataSet::ScalingUnscalingMethod& DataSet::get_scaling_unscaling_method(void) const
00247 {
00248 return(scaling_unscaling_method);
00249 }
00250
00251
00252
00253
00255
00256 std::string DataSet::write_scaling_unscaling_method(void) const
00257 {
00258 if(scaling_unscaling_method == MinimumMaximum)
00259 {
00260 return("MinimumMaximum");
00261 }
00262 else if(scaling_unscaling_method == MeanStandardDeviation)
00263 {
00264 return("MeanStandardDeviation");
00265 }
00266 else
00267 {
00268 std::ostringstream buffer;
00269
00270 buffer << "OpenNN Exception: DataSet class.\n"
00271 << "std::string write_scaling_unscaling_method(void) const method.\n"
00272 << "Unknown scaling and unscaling method.\n";
00273
00274 throw std::logic_error(buffer.str());
00275 }
00276 }
00277
00278
00279
00280
00283
00284 const bool& DataSet::get_display(void) const
00285 {
00286 return(display);
00287 }
00288
00289
00290
00291
00295
00296 const Matrix<double>& DataSet::get_data(void) const
00297 {
00298 return(data);
00299 }
00300
00301
00302
00303
00305
00306 const std::string& DataSet::get_data_filename(void) const
00307 {
00308 return(data_filename);
00309 }
00310
00311
00312
00313
00317
00318 Matrix<double> DataSet::arrange_training_data(void) const
00319 {
00320 const unsigned int variables_number = get_variables_number();
00321
00322 Vector<unsigned int> variables_indices(0, 1, variables_number-1);
00323
00324 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
00325
00326 return(data.arrange_submatrix(training_indices, variables_indices));
00327 }
00328
00329
00330
00331
00335
00336 Matrix<double> DataSet::arrange_generalization_data(void) const
00337 {
00338 const unsigned int variables_number = get_variables_number();
00339
00340 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
00341
00342 Vector<unsigned int> variables_indices(0, 1, variables_number-1);
00343
00344 return(data.arrange_submatrix(generalization_indices, variables_indices));
00345 }
00346
00347
00348
00349
00353
00354 Matrix<double> DataSet::arrange_testing_data(void) const
00355 {
00356 const unsigned int variables_number = get_variables_number();
00357 Vector<unsigned int> variables_indices(0, 1, variables_number-1);
00358
00359 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
00360
00361 return(data.arrange_submatrix(testing_indices, variables_indices));
00362 }
00363
00364
00365
00366
00370
00371 Matrix<double> DataSet::arrange_input_data(void) const
00372 {
00373 const unsigned int instances_number = get_instances_number();
00374 Vector<unsigned int> instances_indices(0, 1, instances_number-1);
00375
00376 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00377
00378 return(data.arrange_submatrix(instances_indices, inputs_indices));
00379 }
00380
00381
00382
00383
00387
00388 Matrix<double> DataSet::arrange_target_data(void) const
00389 {
00390 const unsigned int instances_number = get_instances_number();
00391 Vector<unsigned int> instances_indices(0, 1, instances_number-1);
00392
00393 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00394
00395 return(data.arrange_submatrix(instances_indices, targets_indices));
00396 }
00397
00398
00399
00400
00404
00405 Matrix<double> DataSet::arrange_training_input_data(void) const
00406 {
00407 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00408
00409 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
00410
00411 return(data.arrange_submatrix(training_indices, inputs_indices));
00412 }
00413
00414
00415
00416
00420
00421 Matrix<double> DataSet::arrange_training_target_data(void) const
00422 {
00423 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
00424
00425 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00426
00427 return(data.arrange_submatrix(training_indices, targets_indices));
00428 }
00429
00430
00431
00432
00436
00437 Matrix<double> DataSet::get_generalization_input_data(void) const
00438 {
00439 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
00440
00441 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00442
00443 return(data.arrange_submatrix(generalization_indices, inputs_indices));
00444 }
00445
00446
00447
00448
00452
00453 Matrix<double> DataSet::get_generalization_target_data(void) const
00454 {
00455 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
00456
00457 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00458
00459 return(data.arrange_submatrix(generalization_indices, targets_indices));
00460 }
00461
00462
00463
00464
00468
00469 Matrix<double> DataSet::arrange_testing_input_data(void) const
00470 {
00471 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00472
00473 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
00474
00475 return(data.arrange_submatrix(testing_indices, inputs_indices));
00476 }
00477
00478
00479
00480
00484
00485 Matrix<double> DataSet::arrange_testing_target_data(void) const
00486 {
00487 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00488
00489 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
00490
00491 return(data.arrange_submatrix(testing_indices, targets_indices));
00492 }
00493
00494
00495
00496
00499
00500 Vector<double> DataSet::get_instance(const unsigned int& i) const
00501 {
00502
00503
00504 #ifdef _DEBUG
00505
00506 unsigned int instances_number = get_instances_number();
00507
00508 if(i >= instances_number)
00509 {
00510 std::ostringstream buffer;
00511
00512 buffer << "OpenNN Exception: DataSet class.\n"
00513 << "Vector<double> get_instance(const unsigned int&) const method.\n"
00514 << "Index of instance must be less than number of instances.\n";
00515
00516 throw std::logic_error(buffer.str());
00517 }
00518
00519 #endif
00520
00521
00522
00523 return(data.arrange_row(i));
00524 }
00525
00526
00527
00528
00531
00532 Vector<double> DataSet::get_training_instance(const unsigned int& training_instance_index) const
00533 {
00534
00535
00536 #ifdef _DEBUG
00537
00538 const unsigned int training_instances_number = instances_information.count_training_instances_number();
00539
00540 if(training_instance_index >= training_instances_number)
00541 {
00542 std::ostringstream buffer;
00543
00544 buffer << "OpenNN Exception: DataSet class.\n"
00545 << "Vector<double> get_training_instance(const unsigned int&) const method.\n"
00546 << "Index of training instance must be less than number of training instances.\n";
00547
00548 throw std::logic_error(buffer.str());
00549 }
00550
00551 #endif
00552
00553 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
00554
00555 return(data.arrange_row(training_indices[training_instance_index]));
00556 }
00557
00558
00559
00560
00563
00564 Vector<double> DataSet::get_generalization_instance(const unsigned int& generalization_instance_index) const
00565 {
00566
00567
00568 #ifdef _DEBUG
00569
00570 const unsigned int generalization_instances_number = instances_information.count_generalization_instances_number();
00571
00572 if(generalization_instance_index >= generalization_instances_number)
00573 {
00574 std::ostringstream buffer;
00575
00576 buffer << "OpenNN Exception: DataSet class.\n"
00577 << "Vector<double> get_generalization_instance(const unsigned int&) const method.\n"
00578 << "Index of generalization instance must be less than number of generalization instances.\n";
00579
00580 throw std::logic_error(buffer.str());
00581 }
00582
00583 #endif
00584
00585 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
00586
00587 return(data.arrange_row(generalization_indices[generalization_instance_index]));
00588 }
00589
00590
00591
00592
00595
00596 Vector<double> DataSet::get_testing_instance(const unsigned int& testing_instance_index) const
00597 {
00598
00599
00600 #ifdef _DEBUG
00601
00602 const unsigned int testing_instances_number = instances_information.count_testing_instances_number();
00603
00604 if(testing_instance_index >= testing_instances_number)
00605 {
00606 std::ostringstream buffer;
00607
00608 buffer << "OpenNN Exception: DataSet class.\n"
00609 << "Vector<double> get_testing_instance(const unsigned int&) const method.\n"
00610 << "Index of testing instance must be less than number of testing instances.\n";
00611
00612 throw std::logic_error(buffer.str());
00613 }
00614
00615 #endif
00616
00617 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
00618
00619 return(data.arrange_row(testing_indices[testing_instance_index]));
00620 }
00621
00622
00623
00624
00627
00628 Vector<double> DataSet::get_input_instance(const unsigned int& instance_index) const
00629 {
00630
00631
00632 #ifdef _DEBUG
00633
00634 unsigned int instances_number = get_instances_number();
00635
00636 if(instance_index >= instances_number)
00637 {
00638 std::ostringstream buffer;
00639
00640 buffer << "OpenNN Exception: DataSet class.\n"
00641 << "Vector<double> get_input_instance(const unsigned int&) const method.\n"
00642 << "Index of instance must be less than number of instances.\n";
00643
00644 throw std::logic_error(buffer.str());
00645 }
00646
00647 #endif
00648
00649 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00650
00651 return(data.arrange_row(instance_index, inputs_indices));
00652 }
00653
00654
00655
00656
00659
00660 Vector<double> DataSet::get_target_instance(const unsigned int& instance_index) const
00661 {
00662
00663
00664 #ifdef _DEBUG
00665
00666 unsigned int instances_number = get_instances_number();
00667
00668 if(instance_index >= instances_number)
00669 {
00670 std::ostringstream buffer;
00671
00672 buffer << "OpenNN Exception: DataSet class.\n"
00673 << "Vector<double> get_target_instance(const unsigned int&) const method.\n"
00674 << "Index of instance must be less than number of instances.\n";
00675
00676 throw std::logic_error(buffer.str());
00677 }
00678
00679 #endif
00680
00681 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00682
00683 return(data.arrange_row(instance_index, targets_indices));
00684 }
00685
00686
00687
00688
00689
00692
00693 Vector<double> DataSet::get_training_input_instance(const unsigned int& training_instance_index) const
00694 {
00695
00696
00697 #ifdef _DEBUG
00698
00699 unsigned int training_instances_number = instances_information.count_training_instances_number();
00700
00701 if(training_instance_index >= training_instances_number)
00702 {
00703 std::ostringstream buffer;
00704
00705 buffer << "OpenNN Exception: DataSet class.\n"
00706 << "Vector<double> get_training_input_instance(const unsigned int&) const method.\n"
00707 << "Index of training instance (" << training_instance_index << ") must be less than number of training instances (" << training_instances_number << ").\n";
00708
00709 throw std::logic_error(buffer.str());
00710 }
00711
00712 #endif
00713
00714 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00715
00716 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
00717
00718 return(data.arrange_row(training_indices[training_instance_index], inputs_indices));
00719 }
00720
00721
00722
00723
00726
00727 Vector<double> DataSet::get_generalization_input_instance(const unsigned int& generalization_instance_index) const
00728 {
00729
00730
00731 #ifdef _DEBUG
00732
00733 const unsigned int generalization_instances_number = instances_information.count_generalization_instances_number();
00734
00735 if(generalization_instance_index >= generalization_instances_number)
00736 {
00737 std::ostringstream buffer;
00738
00739 buffer << "OpenNN Exception: DataSet class.\n"
00740 << "Vector<double> get_generalization_input_instance(const unsigned int&) const method.\n"
00741 << "Index of generalization instance must be less than number of generalization instances.\n";
00742
00743 throw std::logic_error(buffer.str());
00744 }
00745
00746 #endif
00747
00748 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00749
00750 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
00751
00752 return(data.arrange_row(generalization_indices[generalization_instance_index], inputs_indices));
00753 }
00754
00755
00756
00757
00760
00761 Vector<double> DataSet::get_testing_input_instance(const unsigned int& testing_instance_index) const
00762 {
00763
00764
00765 #ifdef _DEBUG
00766
00767 const unsigned int testing_instances_number = instances_information.count_testing_instances_number();
00768
00769 if(testing_instance_index >= testing_instances_number)
00770 {
00771 std::ostringstream buffer;
00772
00773 buffer << "OpenNN Exception: DataSet class.\n"
00774 << "Vector<double> get_testing_input_instance(const unsigned int&) const method.\n"
00775 << "Index of testing instance must be less than number of testing instances.\n";
00776
00777 throw std::logic_error(buffer.str());
00778 }
00779
00780 #endif
00781
00782 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
00783
00784 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
00785
00786 return(data.arrange_row(testing_indices[testing_instance_index], inputs_indices));
00787 }
00788
00789
00790
00791
00794
00795 Vector<double> DataSet::get_training_target_instance(const unsigned int& training_instance_index) const
00796 {
00797
00798
00799 #ifdef _DEBUG
00800
00801 unsigned int training_instances_number = instances_information.count_training_instances_number();
00802
00803 if(training_instance_index >= training_instances_number)
00804 {
00805 std::ostringstream buffer;
00806
00807 buffer << "OpenNN Exception: DataSet class.\n"
00808 << "Vector<double> get_training_target_instance(const unsigned int&) const method.\n"
00809 << "Index of training instance must be less than number of training instances.\n";
00810
00811 throw std::logic_error(buffer.str());
00812 }
00813
00814 #endif
00815
00816 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00817
00818 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
00819
00820 return(data.arrange_row(training_indices[training_instance_index], targets_indices));
00821 }
00822
00823
00824
00825
00828
00829 Vector<double> DataSet::get_generalization_target_instance(const unsigned int& generalization_instance_index) const
00830 {
00831
00832
00833 #ifdef _DEBUG
00834
00835 const unsigned int generalization_instances_number = instances_information.count_generalization_instances_number();
00836
00837 if(generalization_instance_index >= generalization_instances_number)
00838 {
00839 std::ostringstream buffer;
00840
00841 buffer << "OpenNN Exception: DataSet class.\n"
00842 << "Vector<double> get_generalization_target_instance(const unsigned int&) const method.\n"
00843 << "Index of generalization instance must be less than number of generalization instances.\n";
00844
00845 throw std::logic_error(buffer.str());
00846 }
00847
00848 #endif
00849
00850 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00851
00852 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
00853
00854 return(data.arrange_row(generalization_indices[generalization_instance_index], targets_indices));
00855 }
00856
00857
00858
00859
00862
00863 Vector<double> DataSet::get_testing_target_instance(const unsigned int& testing_instance_index) const
00864 {
00865
00866
00867 #ifdef _DEBUG
00868
00869 const unsigned int testing_instances_number = instances_information.count_testing_instances_number();
00870
00871 if(testing_instance_index >= testing_instances_number)
00872 {
00873 std::ostringstream buffer;
00874
00875 buffer << "OpenNN Exception: DataSet class.\n"
00876 << "Vector<double> get_testing_target_instance(const unsigned int&) const method.\n"
00877 << "Index of testing instance must be less than number of testing instances.\n";
00878
00879 throw std::logic_error(buffer.str());
00880 }
00881
00882 #endif
00883
00884 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
00885
00886 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
00887
00888 return(data.arrange_row(testing_indices[testing_instance_index], targets_indices));
00889 }
00890
00891
00892
00893
00896
00897 Vector<double> DataSet::get_variable(const unsigned int& i) const
00898 {
00899
00900
00901 #ifdef _DEBUG
00902
00903 unsigned int variables_number = get_variables_number();
00904
00905 if(i >= variables_number)
00906 {
00907 std::ostringstream buffer;
00908
00909 buffer << "OpenNN Exception: DataSet class.\n"
00910 << "Vector<double> get_variable(const unsigned int&) const method.\n"
00911 << "Index of variable must be less than number of instances.\n";
00912
00913 throw std::logic_error(buffer.str());
00914 }
00915
00916 #endif
00917
00918
00919
00920 return(data.arrange_column(i));
00921 }
00922
00923
00924
00925
00927
00928 void DataSet::set(void)
00929 {
00930 data_filename = "";
00931
00932 data.set();
00933
00934 instances_information.set();
00935 variables_information.set();
00936
00937 display = true;
00938 }
00939
00940
00941
00942
00948
00949 void DataSet::set(const unsigned int& new_instances_number, const unsigned int& new_variables_number)
00950 {
00951 data_filename = "";
00952
00953 data.set(new_instances_number, new_variables_number);
00954
00955 instances_information.set(new_instances_number);
00956
00957 variables_information.set(new_variables_number);
00958
00959 display = true;
00960 }
00961
00962
00963
00964
00974
00975 void DataSet::set(const unsigned int& new_instances_number, const unsigned int& new_inputs_number, const unsigned int& new_targets_number)
00976 {
00977 data_filename = "";
00978
00979 const unsigned int new_variables_number = new_inputs_number + new_targets_number;
00980
00981 data.set(new_instances_number, new_variables_number);
00982
00983 instances_information.set(new_instances_number);
00984
00985 variables_information.set(new_inputs_number, new_targets_number);
00986
00987 display = true;
00988 }
00989
00990
00991
00992
00995
00996 void DataSet::set(const DataSet& other_data_set)
00997 {
00998 data_filename = other_data_set.data_filename;
00999
01000 data = other_data_set.data;
01001
01002 variables_information = other_data_set.variables_information;
01003
01004 instances_information = other_data_set.instances_information;
01005
01006 display = other_data_set.display;
01007 }
01008
01009
01010
01011
01014
01015 void DataSet::set(TiXmlElement* data_set_element)
01016 {
01017 from_XML(data_set_element);
01018 }
01019
01020
01021
01022
01025
01026 void DataSet::set(const std::string& filename)
01027 {
01028 load(filename);
01029 }
01030
01031
01032
01033
01036
01037 void DataSet::set_scaling_unscaling_method(const ScalingUnscalingMethod& new_scaling_unscaling_method)
01038 {
01039 scaling_unscaling_method = new_scaling_unscaling_method;
01040 }
01041
01042
01043
01044
01047
01048 void DataSet::set_scaling_unscaling_method(const std::string& new_scaling_unscaling_method)
01049 {
01050 if(new_scaling_unscaling_method == "MinimumMaximum")
01051 {
01052 set_scaling_unscaling_method(MinimumMaximum);
01053 }
01054 else if(new_scaling_unscaling_method == "MeanStandardDeviation")
01055 {
01056 set_scaling_unscaling_method(MinimumMaximum);
01057 }
01058 else
01059 {
01060 std::ostringstream buffer;
01061
01062 buffer << "OpenNN Exception: DataSet class.\n"
01063 << "void set_conditions_method(const std::string&) method.\n"
01064 << "Unknown scaling and unscaling method: " << new_scaling_unscaling_method << ".\n";
01065
01066 throw std::logic_error(buffer.str());
01067 }
01068 }
01069
01070
01071
01072
01077
01078 void DataSet::set_display(const bool& new_display)
01079 {
01080 display = new_display;
01081 }
01082
01083
01084
01085
01091
01092 void DataSet::set_default(void)
01093 {
01094 scaling_unscaling_method = MinimumMaximum;
01095
01096 display = true;
01097 }
01098
01099
01100
01101
01107
01108 void DataSet::set_data(const Matrix<double>& new_data)
01109 {
01110
01111
01112 #ifdef _DEBUG
01113
01114 const unsigned int rows_number = new_data.get_rows_number();
01115 const unsigned int instances_number = get_instances_number();
01116
01117 if(rows_number != instances_number)
01118 {
01119 std::ostringstream buffer;
01120
01121 buffer << "OpenNN Exception: DataSet class.\n"
01122 << "void set_data(const Matrix<double>&) method.\n"
01123 << "Number of rows must be equal to number of instances.\n";
01124
01125 throw std::logic_error(buffer.str());
01126 }
01127
01128 const unsigned int columns_number = new_data.get_columns_number();
01129 const unsigned int variables_number = get_variables_number();
01130
01131 if(columns_number != variables_number)
01132 {
01133 std::ostringstream buffer;
01134
01135 buffer << "OpenNN Exception: DataSet class.\n"
01136 << "void set_data(const Matrix<double>&) method.\n"
01137 << "Number of columns must be equal to number of variables.\n";
01138
01139 throw std::logic_error(buffer.str());
01140 }
01141
01142 #endif
01143
01144
01145
01146 data = new_data;
01147 }
01148
01149
01150
01151
01156
01157 void DataSet::set_data_filename(const std::string& new_data_filename)
01158 {
01159 data_filename = new_data_filename;
01160
01161 try
01162 {
01163 data.load(data_filename);
01164
01165 const unsigned int variables_number = get_variables_number();
01166
01167 const unsigned int instances_number = get_instances_number();
01168
01169 variables_information.set(variables_number);
01170 instances_information.set(instances_number);
01171 }
01172 catch(std::exception& e)
01173 {
01174 std::cout << e.what() << std::endl;
01175 }
01176 }
01177
01178
01179
01180
01185
01186 void DataSet::set_instances_number(const unsigned int& new_instances_number)
01187 {
01188 const unsigned int variables_number = get_variables_number();
01189
01190 data.set(new_instances_number, variables_number);
01191
01192 instances_information.set(new_instances_number);
01193 }
01194
01195
01196
01197
01202
01203 void DataSet::set_variables_number(const unsigned int& new_variables_number)
01204 {
01205 const unsigned int instances_number = get_instances_number();
01206
01207 data.set(instances_number, new_variables_number);
01208
01209 variables_information.set(new_variables_number);
01210 }
01211
01212
01213
01214
01218
01219 void DataSet::set_instance(const unsigned int& instance_index, const Vector<double>& instance)
01220 {
01221
01222
01223 #ifdef _DEBUG
01224
01225 const unsigned int instances_number = get_instances_number();
01226
01227 if(instance_index >= instances_number)
01228 {
01229 std::ostringstream buffer;
01230
01231 buffer << "OpenNN Exception: DataSet class.\n"
01232 << "void set_instance(const unsigned int&, const Vector<double>&) method.\n"
01233 << "Index of instance must be less than number of instances.\n";
01234
01235 throw std::logic_error(buffer.str());
01236 }
01237
01238 const unsigned int size = instance.size();
01239 const unsigned int variables_number = get_variables_number();
01240
01241 if(size != variables_number)
01242 {
01243 std::ostringstream buffer;
01244
01245 buffer << "OpenNN Exception: DataSet class.\n"
01246 << "void set_instance(const unsigned int&, const Vector<double>&) method.\n"
01247 << "Size (" << size << ") must be equal to number of variables (" << variables_number << ").\n";
01248
01249 throw std::logic_error(buffer.str());
01250 }
01251
01252 #endif
01253
01254
01255
01256 data.set_row(instance_index, instance);
01257 }
01258
01259
01260
01261
01265
01266 void DataSet::set_training_instance(const unsigned int& i, const Vector<double>& new_training_instance)
01267 {
01268
01269
01270 #ifdef _DEBUG
01271
01272 const unsigned int training_instances_number = instances_information.count_training_instances_number();
01273
01274 if(i >= training_instances_number)
01275 {
01276 std::ostringstream buffer;
01277
01278 buffer << "OpenNN Exception: DataSet class.\n"
01279 << "void set_training_instance(const unsigned int&, const Vector<double>&) method.\n"
01280 << "Training instance index must be less than number of training instances.\n";
01281
01282 throw std::logic_error(buffer.str());
01283 }
01284
01285 #endif
01286
01287 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
01288
01289 const unsigned int index = training_indices[i];
01290
01291 set_instance(index, new_training_instance);
01292 }
01293
01294
01295
01296
01300
01301 void DataSet::set_generalization_instance(const unsigned int& i, const Vector<double>& new_generalization_instance)
01302 {
01303
01304
01305 #ifdef _DEBUG
01306
01307 const unsigned int generalization_instances_number = instances_information.count_generalization_instances_number();
01308
01309 if(i >= generalization_instances_number)
01310 {
01311 std::ostringstream buffer;
01312
01313 buffer << "OpenNN Exception: DataSet class.\n"
01314 << "void set_generalization_instance(const unsigned int&, const Vector<double>&) method.\n"
01315 << "Generalization instance index must be less than number of generalization instances.\n";
01316
01317 throw std::logic_error(buffer.str());
01318 }
01319
01320 #endif
01321
01322 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
01323
01324 const unsigned int index = generalization_indices[i];
01325
01326 set_instance(index, new_generalization_instance);
01327 }
01328
01329
01330
01331
01335
01336 void DataSet::set_testing_instance(const unsigned int& i, const Vector<double>& new_testing_instance)
01337 {
01338
01339
01340 #ifdef _DEBUG
01341
01342 const unsigned int testing_instances_number = instances_information.count_testing_instances_number();
01343
01344 if(i >= testing_instances_number)
01345 {
01346 std::ostringstream buffer;
01347
01348 buffer << "OpenNN Exception: DataSet class.\n"
01349 << "void set_testing_instance(const unsigned int&, const Vector<double>&) method.\n"
01350 << "Testing instance index must be less than number of testing instances.\n";
01351
01352 throw std::logic_error(buffer.str());
01353 }
01354
01355 #endif
01356
01357 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
01358
01359 const unsigned int index = testing_indices[i];
01360
01361 set_instance(index, new_testing_instance);
01362 }
01363
01364
01365
01366
01370
01371 void DataSet::set_input_instance(const unsigned int& instance_index, const Vector<double>& input_instance)
01372 {
01373
01374
01375 #ifdef _DEBUG
01376
01377 const unsigned int instances_number = get_instances_number();
01378
01379 if(instance_index >= instances_number)
01380 {
01381 std::ostringstream buffer;
01382
01383 buffer << "OpenNN Exception: DataSet class.\n"
01384 << "void set_input_instance(const unsigned int&, const Vector<double>&) method.\n"
01385 << "Index of instance must be less than number of instances.\n";
01386
01387 throw std::logic_error(buffer.str());
01388 }
01389
01390 #endif
01391
01392 const unsigned int inputs_number = variables_information.count_inputs_number();
01393
01394 #ifdef _DEBUG
01395
01396 const unsigned int size = input_instance.size();
01397
01398 if(size != inputs_number)
01399 {
01400 std::ostringstream buffer;
01401
01402 buffer << "OpenNN Exception: DataSet class.\n"
01403 << "void set_input_instance(const unsigned int&, const Vector<double>&) method.\n"
01404 << "Size of inputs instance must be equal to number of input variables.\n";
01405
01406 throw std::logic_error(buffer.str());
01407 }
01408
01409 #endif
01410
01411 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
01412
01413 unsigned int input_variable_index;
01414
01415 for(unsigned int i = 0; i < inputs_number; i++)
01416 {
01417 input_variable_index = inputs_indices[i];
01418
01419 data[instance_index][input_variable_index] = input_instance[i];
01420 }
01421 }
01422
01423
01424
01425
01429
01430 void DataSet::set_target_instance(const unsigned int& instance_index, const Vector<double>& target_instance)
01431 {
01432
01433
01434 #ifdef _DEBUG
01435
01436 const unsigned int instances_number = get_instances_number();
01437
01438 if(instance_index >= instances_number)
01439 {
01440 std::ostringstream buffer;
01441
01442 buffer << "OpenNN Exception: DataSet class.\n"
01443 << "void set_target_instance(const unsigned int&, const Vector<double>&) method.\n"
01444 << "Index of instance must be less than number of instances.\n";
01445
01446 throw std::logic_error(buffer.str());
01447 }
01448
01449 #endif
01450
01451 const unsigned int targets_number = variables_information.count_targets_number();
01452
01453 #ifdef _DEBUG
01454
01455 const unsigned int size = target_instance.size();
01456
01457 if(size != targets_number)
01458 {
01459 std::ostringstream buffer;
01460
01461 buffer << "OpenNN Exception: DataSet class.\n"
01462 << "void set_target_instance(const unsigned int&, const Vector<double>&) method.\n"
01463 << "Size of targets instance must be equal to number of target variables.\n";
01464
01465 throw std::logic_error(buffer.str());
01466 }
01467
01468 #endif
01469
01470 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
01471
01472 unsigned int target_variable_index;
01473
01474 for(unsigned int i = 0; i < targets_number; i++)
01475 {
01476 target_variable_index = targets_indices[i];
01477
01478 data[instance_index][target_variable_index] = target_instance[i];
01479 }
01480 }
01481
01482
01483
01484
01488
01489 void DataSet::set_training_input_instance(const unsigned int& i, const Vector<double>& new_training_input_instance)
01490 {
01491
01492
01493 #ifdef _DEBUG
01494
01495 const unsigned int training_instances_number = instances_information.count_training_instances_number();
01496
01497 if(i >= training_instances_number)
01498 {
01499 std::ostringstream buffer;
01500
01501 buffer << "OpenNN Exception: DataSet class.\n"
01502 << "void set_training_input_instance(const unsigned int&, const Vector<double>&) method.\n"
01503 << "Training instance index must be less than number of training instances.\n";
01504
01505 throw std::logic_error(buffer.str());
01506 }
01507
01508 #endif
01509
01510 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
01511
01512 const unsigned int index = training_indices[i];
01513
01514 set_input_instance(index, new_training_input_instance);
01515 }
01516
01517
01518
01519
01523
01524 void DataSet::set_generalization_input_instance(const unsigned int& i, const Vector<double>& new_generalization_input_instance)
01525 {
01526
01527
01528 #ifdef _DEBUG
01529
01530 const unsigned int generalization_instances_number = instances_information.count_generalization_instances_number();
01531
01532 if(i >= generalization_instances_number)
01533 {
01534 std::ostringstream buffer;
01535
01536 buffer << "OpenNN Exception: DataSet class.\n"
01537 << "void set_generalization_input_instance(const unsigned int&, const Vector<double>&) method.\n"
01538 << "Generalization instance index must be less than number of generalization instances.\n";
01539
01540 throw std::logic_error(buffer.str());
01541 }
01542
01543 #endif
01544
01545 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
01546
01547 const unsigned int index = generalization_indices[i];
01548
01549 set_input_instance(index, new_generalization_input_instance);
01550 }
01551
01552
01553
01554
01558
01559 void DataSet::set_testing_input_instance(const unsigned int& i, const Vector<double>& new_testing_input_instance)
01560 {
01561
01562
01563 #ifdef _DEBUG
01564
01565 const unsigned int testing_instances_number = instances_information.count_testing_instances_number();
01566
01567 if(i >= testing_instances_number)
01568 {
01569 std::ostringstream buffer;
01570
01571 buffer << "OpenNN Exception: DataSet class.\n"
01572 << "void set_testing_input_instance(const unsigned int&, const Vector<double>&) method.\n"
01573 << "Testing instance index must be less than number of testing instances.\n";
01574
01575 throw std::logic_error(buffer.str());
01576 }
01577
01578 #endif
01579
01580 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
01581
01582 const unsigned int index = testing_indices[i];
01583
01584 set_input_instance(index, new_testing_input_instance);
01585 }
01586
01587
01588
01589
01593
01594 void DataSet::set_training_target_instance(const unsigned int& i, const Vector<double>& new_training_target_instance)
01595 {
01596
01597
01598 #ifdef _DEBUG
01599
01600 const unsigned int training_instances_number = instances_information.count_training_instances_number();
01601
01602 if(i >= training_instances_number)
01603 {
01604 std::ostringstream buffer;
01605
01606 buffer << "OpenNN Exception: DataSet class.\n"
01607 << "void set_training_target_instance(const unsigned int&, const Vector<double>&) method.\n"
01608 << "Training instance index must be less than number of training instances.\n";
01609
01610 throw std::logic_error(buffer.str());
01611 }
01612
01613 #endif
01614
01615 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
01616
01617 const unsigned int index = training_indices[i];
01618
01619 set_target_instance(index, new_training_target_instance);
01620 }
01621
01622
01623
01624
01628
01629 void DataSet::set_generalization_target_instance(const unsigned int& i, const Vector<double>& new_generalization_target_instance)
01630 {
01631
01632
01633 #ifdef _DEBUG
01634
01635 const unsigned int generalization_instances_number = instances_information.count_generalization_instances_number();
01636
01637 if(i >= generalization_instances_number)
01638 {
01639 std::ostringstream buffer;
01640
01641 buffer << "OpenNN Exception: DataSet class.\n"
01642 << "void set_generalization_target_instance(const unsigned int&, const Vector<double>&) method.\n"
01643 << "Generalization instance index must be less than number of generalization instances.\n";
01644
01645 throw std::logic_error(buffer.str());
01646 }
01647
01648 #endif
01649
01650 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
01651
01652 const unsigned int index = generalization_indices[i];
01653
01654 set_target_instance(index, new_generalization_target_instance);
01655 }
01656
01657
01658
01659
01663
01664 void DataSet::set_testing_target_instance(const unsigned int& i, const Vector<double>& new_testing_target_instance)
01665 {
01666
01667
01668 #ifdef _DEBUG
01669
01670 const unsigned int testing_instances_number = instances_information.count_testing_instances_number();
01671
01672 if(i >= testing_instances_number)
01673 {
01674 std::ostringstream buffer;
01675
01676 buffer << "OpenNN Exception: DataSet class.\n"
01677 << "void set_testing_target_instance(const unsigned int&, const Vector<double>&) method.\n"
01678 << "Testing instance index must be less than number of testing instances.\n";
01679
01680 throw std::logic_error(buffer.str());
01681 }
01682
01683 #endif
01684
01685 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
01686
01687 const unsigned int index = testing_indices[i];
01688
01689 set_target_instance(index, new_testing_target_instance);
01690 }
01691
01692
01693
01694
01700
01701 void DataSet::add_instance(const Vector<double>& instance)
01702 {
01703
01704
01705 #ifdef _DEBUG
01706
01707 const unsigned int size = instance.size();
01708 const unsigned int variables_number = get_variables_number();
01709
01710 if(size != variables_number)
01711 {
01712 std::ostringstream buffer;
01713
01714 buffer << "OpenNN Exception: DataSet class.\n"
01715 << "void add_instance(const Vector<double>&) method.\n"
01716 << "Size of instance must be equal to number of variables.\n";
01717
01718 throw std::logic_error(buffer.str());
01719 }
01720
01721 #endif
01722
01723 data.append_row(instance);
01724
01725 instances_information.set_training();
01726 }
01727
01728
01729
01730
01735
01736 void DataSet::subtract_instance(const unsigned int& instance_index)
01737 {
01738
01739
01740 #ifdef _DEBUG
01741
01742 const unsigned int instances_number = get_instances_number();
01743
01744 if(instance_index >= instances_number)
01745 {
01746 std::ostringstream buffer;
01747
01748 buffer << "OpenNN Exception: DataSet class.\n"
01749 << "void subtract_instance(unsigned int) method.\n"
01750 << "Index of instance must be less than number of instances.\n";
01751
01752 throw std::logic_error(buffer.str());
01753 }
01754
01755 #endif
01756
01757 data.subtract_row(instance_index);
01758
01759 instances_information.set_training();
01760 }
01761
01762
01763
01764
01767
01768 void DataSet::append_variable(const Vector<double>& variable)
01769 {
01770
01771
01772 #ifdef _DEBUG
01773
01774 const unsigned int size = variable.size();
01775 const unsigned int instances_number = get_instances_number();
01776
01777 if(size != instances_number)
01778 {
01779 std::ostringstream buffer;
01780
01781 buffer << "OpenNN Exception: DataSet class.\n"
01782 << "void append_variable(const Vector<double>&) method.\n"
01783 << "Size of variable must be equal to number of instances.\n";
01784
01785 throw std::logic_error(buffer.str());
01786 }
01787
01788 #endif
01789
01790 const unsigned int variables_number = get_variables_number();
01791
01792 data.append_column(variable);
01793
01794 Matrix<double> new_data(data);
01795
01796 const unsigned int new_variables_number = variables_number + 1;
01797
01798 set_variables_number(new_variables_number);
01799
01800 set_data(new_data);
01801 }
01802
01803
01804
01805
01808
01809 void DataSet::subtract_variable(const unsigned int& variable_index)
01810 {
01811 const unsigned int variables_number = get_variables_number();
01812
01813
01814
01815 #ifdef _DEBUG
01816
01817 if(variable_index >= variables_number)
01818 {
01819 std::ostringstream buffer;
01820
01821 buffer << "OpenNN Exception: DataSet class.\n"
01822 << "void subtract_variable(unsigned int) method.\n"
01823 << "Index of variable must be less than number of variables.\n";
01824
01825 throw std::logic_error(buffer.str());
01826 }
01827
01828 #endif
01829
01830 data.subtract_column(variable_index);
01831
01832 Matrix<double> new_data(data);
01833
01834 const unsigned int new_variables_number = variables_number - 1;
01835
01836 set_variables_number(new_variables_number);
01837
01838 set_data(new_data);
01839 }
01840
01841
01842
01843
01845
01846 void DataSet::subtract_constant_variables(void)
01847 {
01848 }
01849
01850
01851
01852
01854
01855 void DataSet::subtract_repeated_instances(void)
01856 {
01857
01858 }
01859
01860
01861
01862
01867
01868 Vector< Vector< Vector<double> > > DataSet::calculate_data_histogram(const unsigned int& bins_number) const
01869 {
01870 return(data.calculate_histogram(bins_number));
01871 }
01872
01873
01874
01875
01879
01880 Vector< Vector< Vector<double> > > DataSet::calculate_data_histogram(void) const
01881 {
01882 return(data.calculate_histogram());
01883 }
01884
01885
01886
01887
01896
01897 Vector< Vector<double> > DataSet::calculate_data_statistics(void) const
01898 {
01899 Vector< Vector<double> > minimum_maximum = data.calculate_minimum_maximum();
01900 Vector< Vector<double> > mean_standard_deviation = data.calculate_mean_standard_deviation();
01901
01902 Vector< Vector<double> > statistics(4);
01903
01904 statistics[0] = minimum_maximum[0];
01905 statistics[1] = minimum_maximum[1];
01906 statistics[2] = mean_standard_deviation[0];
01907 statistics[3] = mean_standard_deviation[1];
01908
01909 return(statistics);
01910 }
01911
01912
01913
01914
01919
01924
01925 Vector< Vector<double> > DataSet::calculate_training_instances_statistics(void) const
01926 {
01927 const unsigned int variables_number = get_variables_number();
01928 const Vector<unsigned int> variables_indices(0, 1, variables_number-1);
01929
01930 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
01931
01932 const Vector< Vector<double> > mean_standard_deviation = data.calculate_mean_standard_deviation(training_indices, variables_indices);
01933
01934 const Vector< Vector<double> > minimum_maximum = data.calculate_minimum_maximum(training_indices, variables_indices);
01935
01936 Vector< Vector<double> > statistics(4);
01937
01938 statistics[0] = mean_standard_deviation[0];
01939 statistics[1] = mean_standard_deviation[1];
01940 statistics[2] = minimum_maximum[0];
01941 statistics[3] = minimum_maximum[1];
01942
01943 return(statistics);
01944 }
01945
01946
01947
01948
01957
01958 Vector< Vector<double> > DataSet::calculate_generalization_instances_statistics(void) const
01959 {
01960 const unsigned int variables_number = get_variables_number();
01961 const Vector<unsigned int> variables_indices(0, 1, variables_number-1);
01962
01963 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
01964
01965 const Vector< Vector<double> > mean_standard_deviation = data.calculate_mean_standard_deviation(generalization_indices, variables_indices);
01966
01967 const Vector< Vector<double> > minimum_maximum = data.calculate_minimum_maximum(generalization_indices, variables_indices);
01968
01969 Vector< Vector<double> > statistics(4);
01970
01971 statistics[0] = mean_standard_deviation[0];
01972 statistics[1] = mean_standard_deviation[1];
01973 statistics[2] = minimum_maximum[0];
01974 statistics[3] = minimum_maximum[1];
01975
01976 return(statistics);
01977 }
01978
01979
01980
01981
01990
01991 Vector< Vector<double> > DataSet::calculate_testing_instances_statistics(void) const
01992 {
01993 const unsigned int variables_number = get_variables_number();
01994 const Vector<unsigned int> variables_indices(0, 1, variables_number-1);
01995
01996 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
01997
01998 const Vector< Vector<double> > mean_standard_deviation = data.calculate_mean_standard_deviation(testing_indices, variables_indices);
01999
02000 const Vector< Vector<double> > minimum_maximum = data.calculate_minimum_maximum(testing_indices, variables_indices);
02001
02002 Vector< Vector<double> > statistics(4);
02003
02004 statistics[0] = mean_standard_deviation[0];
02005 statistics[1] = mean_standard_deviation[1];
02006 statistics[2] = minimum_maximum[0];
02007 statistics[3] = minimum_maximum[1];
02008
02009 return(statistics);
02010 }
02011
02012
02013
02014
02036
02037 Vector< Vector<double> > DataSet::calculate_instances_statistics(void) const
02038 {
02039 Vector< Vector<double> > data_statistics = calculate_data_statistics();
02040 Vector< Vector<double> > training_instances_statistics = calculate_training_instances_statistics();
02041 Vector< Vector<double> > generalization_instances_statistics = calculate_generalization_instances_statistics();
02042 Vector< Vector<double> > testing_instances_statistics = calculate_testing_instances_statistics();
02043
02044 return(data_statistics.get_assembly(training_instances_statistics).get_assembly(generalization_instances_statistics).get_assembly(testing_instances_statistics));
02045 }
02046
02047
02048
02049
02058
02059 Vector< Vector<double> > DataSet::calculate_inputs_statistics(void) const
02060 {
02061 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
02062
02063 const Vector< Vector<double> > mean_standard_deviation = data.calculate_mean_standard_deviation(inputs_indices);
02064 const Vector< Vector<double> > minimum_maximum = data.calculate_minimum_maximum(inputs_indices);
02065
02066 Vector< Vector<double> > statistics(4);
02067
02068 statistics[0] = mean_standard_deviation[0];
02069 statistics[1] = mean_standard_deviation[1];
02070 statistics[2] = minimum_maximum[0];
02071 statistics[3] = minimum_maximum[1];
02072
02073 return(statistics);
02074 }
02075
02076
02077
02078
02087
02088 Vector< Vector<double> > DataSet::calculate_targets_statistics(void) const
02089 {
02090 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02091
02092 const Vector< Vector<double> > mean_standard_deviation = data.calculate_mean_standard_deviation(targets_indices);
02093 const Vector< Vector<double> > minimum_maximum = data.calculate_minimum_maximum(targets_indices);
02094
02095 Vector< Vector<double> > statistics(4);
02096
02097 statistics[0] = mean_standard_deviation[0];
02098 statistics[1] = mean_standard_deviation[1];
02099 statistics[2] = minimum_maximum[0];
02100 statistics[3] = minimum_maximum[1];
02101
02102 return(statistics);
02103 }
02104
02105
02106
02107
02117
02118 Vector< Vector<double> > DataSet::calculate_inputs_targets_minimum_maximum(void) const
02119 {
02120 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
02121 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02122
02123 const Vector< Vector<double> > inputs_minimum_maximum = data.calculate_minimum_maximum(inputs_indices);
02124 const Vector< Vector<double> > targets_minimum_maximum = data.calculate_minimum_maximum(targets_indices);
02125
02126 return(inputs_minimum_maximum.get_assembly(targets_minimum_maximum));
02127 }
02128
02129
02130
02131
02140
02141 Vector< Vector<double> > DataSet::calculate_inputs_targets_mean_standard_deviation(void) const
02142 {
02143 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
02144 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02145
02146 const Vector< Vector<double> > inputs_mean_standard_deviation = data.calculate_mean_standard_deviation(inputs_indices);
02147 const Vector< Vector<double> > targets_mean_standard_deviation = data.calculate_mean_standard_deviation(targets_indices);
02148
02149 return(inputs_mean_standard_deviation.get_assembly(targets_mean_standard_deviation));
02150 }
02151
02152
02153
02154
02167
02168 Vector< Vector<double> > DataSet::calculate_inputs_targets_statistics(void) const
02169 {
02170 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
02171 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02172
02173 Vector< Vector<double> > statistics(8);
02174
02175
02176
02177 const Vector< Vector<double> > inputs_minimum_maximum = data.calculate_minimum_maximum(inputs_indices);
02178 const Vector< Vector<double> > inputs_mean_standard_deviation = data.calculate_mean_standard_deviation(inputs_indices);
02179
02180 statistics[0] = inputs_minimum_maximum[0];
02181 statistics[1] = inputs_minimum_maximum[1];
02182 statistics[2] = inputs_mean_standard_deviation[0];
02183 statistics[3] = inputs_mean_standard_deviation[1];
02184
02185
02186
02187 const Vector< Vector<double> > targets_minimum_maximum = data.calculate_minimum_maximum(targets_indices);
02188 const Vector< Vector<double> > targets_mean_standard_deviation = data.calculate_mean_standard_deviation(targets_indices);
02189
02190 statistics[4] = targets_minimum_maximum[0];
02191 statistics[5] = targets_minimum_maximum[1];
02192 statistics[6] = targets_mean_standard_deviation[0];
02193 statistics[7] = targets_mean_standard_deviation[1];
02194
02195 return(statistics);
02196 }
02197
02198
02199
02200
02202
02203 Vector<double> DataSet::calculate_training_target_data_mean(void) const
02204 {
02205 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02206
02207 const Vector<unsigned int>& training_indices = instances_information.get_training_indices();
02208
02209 return(data.calculate_mean(training_indices, targets_indices));
02210 }
02211
02212
02213
02214
02216
02217 Vector<double> DataSet::calculate_generalization_target_data_mean(void) const
02218 {
02219 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02220
02221 const Vector<unsigned int>& generalization_indices = instances_information.get_generalization_indices();
02222
02223 return(data.calculate_mean(generalization_indices, targets_indices));
02224 }
02225
02226
02227
02228
02230
02231 Vector<double> DataSet::calculate_testing_target_data_mean(void) const
02232 {
02233 const Vector<unsigned int>& testing_indices = instances_information.get_testing_indices();
02234
02235 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02236
02237 return(data.calculate_mean(testing_indices, targets_indices));
02238 }
02239
02240
02241
02242
02244
02245 Matrix<double> DataSet::calculate_variables_correlation(void) const
02246 {
02247 Matrix<double> variables_correlation;
02248
02249 return(variables_correlation);
02250 }
02251
02252
02253
02254
02259
02260 void DataSet::scale_data_mean_standard_deviation(const Vector<double>& means, const Vector<double>& standard_deviations)
02261 {
02262
02263
02264 #ifdef _DEBUG
02265
02266 std::ostringstream buffer;
02267
02268 const unsigned int columns_number = data.get_columns_number();
02269
02270 const unsigned int means_size = means.size();
02271 const unsigned int standard_deviations_size = standard_deviations.size();
02272
02273 if(means_size != columns_number)
02274 {
02275 buffer << "OpenNN Exception: DataSet class.\n"
02276 << "void scale_data_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02277 << "Size of means must be equal to number of columns.\n";
02278
02279 throw std::logic_error(buffer.str());
02280 }
02281
02282 if(standard_deviations_size != columns_number)
02283 {
02284 buffer << "OpenNN Exception: DataSet class.\n"
02285 << "void scale_data_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02286 << "Size of standard deviations must be equal to number of columns.\n";
02287
02288 throw std::logic_error(buffer.str());
02289 }
02290
02291 #endif
02292
02293 data.scale_mean_standard_deviation(means, standard_deviations);
02294 }
02295
02296
02297
02298
02303
02304 void DataSet::scale_data_minimum_maximum(const Vector<double>& minimums, const Vector<double>& maximums)
02305 {
02306
02307
02308 #ifdef _DEBUG
02309
02310 std::ostringstream buffer;
02311
02312 const unsigned int columns_number = data.get_columns_number();
02313
02314 const unsigned int minimums_size = minimums.size();
02315 const unsigned int maximums_size = maximums.size();
02316
02317 if(minimums_size != columns_number)
02318 {
02319 buffer << "OpenNN Exception: DataSet class.\n"
02320 << "void scale_data_minimum_maximum(const Vector<double>&, const Vector<double>&) method.\n"
02321 << "Size of minimums must be equal to number of columns.\n";
02322
02323 throw std::logic_error(buffer.str());
02324 }
02325
02326 if(maximums_size != columns_number)
02327 {
02328 std::ostringstream buffer;
02329
02330 buffer << "OpenNN Exception: DataSet class.\n"
02331 << "void scale_data_minimumn_maximum(const Vector<double>&, const Vector<double>&) method.\n"
02332 << "Size of maximums must be equal to number of columns.\n";
02333
02334 throw std::logic_error(buffer.str());
02335 }
02336
02337 #endif
02338
02339 data.scale_minimum_maximum(minimums, maximums);
02340 }
02341
02342
02343
02344
02348
02349 void DataSet::scale_data(const Vector< Vector<double> >& statistics)
02350 {
02351 const Vector<double>& minimums = statistics[0];
02352 const Vector<double>& maximums = statistics[0];
02353
02354 const Vector<double>& means = statistics[0];
02355 const Vector<double>& standard_deviations = statistics[0];
02356
02357 switch(scaling_unscaling_method)
02358 {
02359 case MinimumMaximum:
02360 {
02361 scale_data_minimum_maximum(minimums, maximums);
02362 }
02363 break;
02364
02365 case MeanStandardDeviation:
02366 {
02367 scale_data_mean_standard_deviation(means, standard_deviations);
02368 }
02369 break;
02370
02371 default:
02372 {
02373 std::ostringstream buffer;
02374
02375 buffer << "OpenNN Exception: DataSet class\n"
02376 << "void scale_data(const Vector< Vector<double> >&) method.\n"
02377 << "Unknown scaling and unscaling method.\n";
02378
02379 throw std::logic_error(buffer.str());
02380 }
02381 break;
02382 }
02383 }
02384
02385
02386
02387
02390
02391 Vector< Vector<double> > DataSet::scale_data(void)
02392 {
02393 const Vector< Vector<double> >& statistics = data.calculate_statistics();
02394
02395 const Vector<double>& minimums = statistics[0];
02396 const Vector<double>& maximums = statistics[0];
02397
02398 const Vector<double>& means = statistics[0];
02399 const Vector<double>& standard_deviations = statistics[0];
02400
02401 switch(scaling_unscaling_method)
02402 {
02403 case MinimumMaximum:
02404 {
02405 scale_data_minimum_maximum(minimums, maximums);
02406 }
02407 break;
02408
02409 case MeanStandardDeviation:
02410 {
02411 scale_data_mean_standard_deviation(means, standard_deviations);
02412 }
02413 break;
02414
02415 default:
02416 {
02417 std::ostringstream buffer;
02418
02419 buffer << "OpenNN Exception: DataSet class\n"
02420 << "const Vector< Vector<double> > scale_data(void) method.\n"
02421 << "Unknown scaling and unscaling method.\n";
02422
02423 throw std::logic_error(buffer.str());
02424 }
02425 break;
02426 }
02427
02428 return(statistics);
02429 }
02430
02431
02432
02433
02438
02439 void DataSet::scale_inputs_mean_standard_deviation(const Vector<double>& inputs_mean, const Vector<double>& inputs_standard_deviation)
02440 {
02441 const unsigned int inputs_number = variables_information.count_inputs_number();
02442
02443
02444
02445 #ifdef _DEBUG
02446
02447 const unsigned int inputs_mean_size = inputs_mean.size();
02448 const unsigned int inputs_standard_deviation_size = inputs_standard_deviation.size();
02449
02450 if(inputs_mean_size != inputs_number)
02451 {
02452 std::ostringstream buffer;
02453
02454 buffer << "OpenNN Exception: DataSet class.\n"
02455 << "void scale_inputs_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02456 << "Size of input variables mean must be equal to number of input variables.\n";
02457
02458 throw std::logic_error(buffer.str());
02459
02460 return;
02461 }
02462
02463 if(inputs_standard_deviation_size != inputs_number)
02464 {
02465 std::ostringstream buffer;
02466
02467 buffer << "OpenNN Exception: DataSet class.\n"
02468 << "void scale_inputs_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02469 << "Size of input variables standard deviation must be equal to number of input variables.\n";
02470
02471 throw std::logic_error(buffer.str());
02472
02473 return;
02474 }
02475
02476 #endif
02477
02478 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
02479
02480 const unsigned int instances_number = get_instances_number();
02481
02482 unsigned int variable_index;
02483
02484
02485
02486 for(unsigned int j = 0; j < inputs_number; j++)
02487 {
02488 variable_index = inputs_indices[j];
02489
02490 if(inputs_standard_deviation[j] < 1e-99)
02491 {
02492 if(display)
02493 {
02494 std::cout << "OpenNN Warning: DataSet class.\n"
02495 << "void scale_inputs_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02496 << "Standard deviation of input variable " << j << " is zero.\n"
02497 << "Those inputs won't be scaled.\n";
02498 }
02499
02500
02501 }
02502 else
02503 {
02504 for(unsigned int i = 0; i < instances_number; i++)
02505 {
02506 data[i][variable_index] = (data[i][variable_index] - inputs_mean[j])/inputs_standard_deviation[j];
02507 }
02508 }
02509 }
02510 }
02511
02512
02513
02514
02518
02519 Vector< Vector<double> > DataSet::scale_inputs_mean_standard_deviation(void)
02520 {
02521 const Vector< Vector<double> > inputs_targets_statistics = calculate_inputs_targets_statistics();
02522
02523 const Vector<double>& inputs_means = inputs_targets_statistics[2];
02524 const Vector<double>& inputs_standard_deviations = inputs_targets_statistics[3];
02525
02526 scale_inputs_mean_standard_deviation(inputs_means, inputs_standard_deviations);
02527
02528 return(inputs_targets_statistics);
02529 }
02530
02531
02532
02533
02538
02539 void DataSet::scale_inputs_minimum_maximum(const Vector<double>& inputs_minimum, const Vector<double>& inputs_maximum)
02540 {
02541 const Vector<unsigned int>& inputs_indices = variables_information.get_inputs_indices();
02542
02543 const unsigned int instances_number = get_instances_number();
02544 const unsigned int inputs_number = variables_information.count_inputs_number();
02545
02546 unsigned int variable_index;
02547
02548 for(unsigned int j = 0; j < inputs_number; j++)
02549 {
02550 variable_index = inputs_indices[j];
02551
02552 if(inputs_maximum[j] - inputs_minimum[j] < 1e-99)
02553 {
02554 if(display)
02555 {
02556 std::cout << "OpenNN Warning: DataSet class.\n"
02557 << "void scale_inputs_minimum_maximum(const Vector<double>&, const Vector<double>&) method.\n"
02558 << "Minimum and maximum values of input variable " << j << " are equal. "
02559 << "Those inputs won't be scaled.\n";
02560 }
02561
02562
02563 }
02564 else
02565 {
02566 for(unsigned int i = 0; i < instances_number; i++)
02567 {
02568 data[i][variable_index] = 2.0*(data[i][variable_index] - inputs_minimum[j])/(inputs_maximum[j]-inputs_minimum[j])-1.0;
02569 }
02570 }
02571 }
02572 }
02573
02574
02575
02576
02580
02581 Vector< Vector<double> > DataSet::scale_inputs_minimum_maximum(void)
02582 {
02583 const Vector< Vector<double> > inputs_targets_statistics = calculate_inputs_targets_statistics();
02584
02585 const Vector<double> & inputs_minimums = inputs_targets_statistics[0];
02586 const Vector<double> & inputs_maximums = inputs_targets_statistics[1];
02587
02588 scale_inputs_minimum_maximum(inputs_minimums, inputs_maximums);
02589
02590 return(inputs_targets_statistics);
02591 }
02592
02593
02594
02595
02600
02601 Vector< Vector<double> > DataSet::scale_inputs(void)
02602 {
02603 switch(scaling_unscaling_method)
02604 {
02605 case MinimumMaximum:
02606 {
02607 return(scale_inputs_minimum_maximum());
02608 }
02609 break;
02610
02611 case MeanStandardDeviation:
02612 {
02613 return(scale_inputs_mean_standard_deviation());
02614 }
02615 break;
02616
02617 default:
02618 {
02619 std::ostringstream buffer;
02620
02621 buffer << "OpenNN Exception: DataSet class\n"
02622 << "Vector< Vector<double> > scale_inputs(void) method.\n"
02623 << "Unknown scaling and unscaling method.\n";
02624
02625 throw std::logic_error(buffer.str());
02626 }
02627 break;
02628 }
02629 }
02630
02631
02632
02633
02639
02640 void DataSet::scale_targets_mean_standard_deviation(const Vector<double>& targets_mean, const Vector<double>& targets_standard_deviation)
02641 {
02642 const unsigned int targets_number = variables_information.count_targets_number();
02643
02644
02645
02646 #ifdef _DEBUG
02647
02648 const unsigned int targets_mean_size = targets_mean.size();
02649 const unsigned int targets_standard_deviation_size = targets_standard_deviation.size();
02650
02651 if(targets_mean_size != targets_number)
02652 {
02653 std::ostringstream buffer;
02654
02655 buffer << "OpenNN Exception: DataSet class.\n"
02656 << "void scale_targets_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02657 << "Size of target variables mean must be equal to number of target variables.\n";
02658
02659 throw std::logic_error(buffer.str());
02660 }
02661
02662 if(targets_standard_deviation_size != targets_number)
02663 {
02664 std::ostringstream buffer;
02665
02666 buffer << "OpenNN Exception: DataSet class.\n"
02667 << "void scale_targets_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02668 << "Size of target variables standard deviation must be equal to number of target variables.\n";
02669
02670 throw std::logic_error(buffer.str());
02671 }
02672
02673 #endif
02674
02675 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02676
02677 const unsigned int instances_number = get_instances_number();
02678
02679 unsigned int variable_index;
02680
02681
02682
02683 for(unsigned int j = 0; j < targets_number; j++)
02684 {
02685 variable_index = targets_indices[j];
02686
02687 if(targets_standard_deviation[j] < 1e-99)
02688 {
02689 if(display)
02690 {
02691 std::cout << "OpenNN Warning: DataSet class.\n"
02692 << "void scale_targets_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
02693 << "Standard deviation of target variable " << j << " is zero.\n"
02694 << "Those targets won't be scaled.\n";
02695 }
02696
02697
02698 }
02699 else
02700 {
02701 for(unsigned int i = 0; i < instances_number; i++)
02702 {
02703 data[i][variable_index] = (data[i][variable_index] - targets_mean[j])/targets_standard_deviation[j];
02704 }
02705 }
02706 }
02707 }
02708
02709
02710
02711
02725
02726 Vector< Vector<double> > DataSet::scale_targets_mean_standard_deviation(void)
02727 {
02728 Vector< Vector<double> > inputs_targets_statistics = calculate_inputs_targets_statistics();
02729
02730 const Vector<double>& targets_means = inputs_targets_statistics[4];
02731 const Vector<double>& targets_standard_deviations = inputs_targets_statistics[5];
02732
02733 scale_targets_mean_standard_deviation(targets_means, targets_standard_deviations);
02734
02735 return(inputs_targets_statistics);
02736 }
02737
02738
02739
02740
02746
02747 void DataSet::scale_targets_minimum_maximum(const Vector<double>& targets_minimum, const Vector<double>& targets_maximum)
02748 {
02749 const unsigned int instances_number = get_instances_number();
02750 const unsigned int targets_number = variables_information.count_targets_number();
02751
02752 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
02753
02754 unsigned int variable_index;
02755
02756
02757
02758 for(unsigned int j = 0; j < targets_number; j++)
02759 {
02760 variable_index = targets_indices[j];
02761
02762 if(targets_maximum[j] - targets_minimum[j] < 1e-99)
02763 {
02764 if(display)
02765 {
02766 std::cout << "OpenNN Warning: DataSet class.\n"
02767 << "void scale_targets_minimum_maximum(const Vector<double>&, const Vector<double>&) method.\n"
02768 << "Minimum and maximum values of target variable " << j << " are equal. "
02769 << "Those targets won't be scaled.\n"
02770 << "Minimum: " << targets_minimum[j] << "\n"
02771 << "Maximum: " << targets_maximum[j] << std::endl;
02772
02773 }
02774
02775
02776 }
02777 else
02778 {
02779 for(unsigned int i = 0; i < instances_number; i++)
02780 {
02781 data[i][variable_index] = 2.0*(data[i][variable_index] - targets_minimum[j])/(targets_maximum[j]-targets_minimum[j]) - 1.0;
02782 }
02783 }
02784 }
02785 }
02786
02787
02788
02789
02793
02794 Vector< Vector<double> > DataSet::scale_targets_minimum_maximum(void)
02795 {
02796 const Vector< Vector<double> > inputs_targets_statistics = calculate_inputs_targets_statistics();
02797
02798 const Vector<double>& targets_minimums = inputs_targets_statistics[4];
02799 const Vector<double>& targets_maximums = inputs_targets_statistics[5];
02800
02801 scale_targets_minimum_maximum(targets_minimums, targets_maximums);
02802
02803 return(inputs_targets_statistics);
02804 }
02805
02806
02807
02808
02813
02814 Vector< Vector<double> > DataSet::scale_targets(void)
02815 {
02816 switch(scaling_unscaling_method)
02817 {
02818 case MinimumMaximum:
02819 {
02820 return(scale_targets_minimum_maximum());
02821 }
02822 break;
02823
02824 case MeanStandardDeviation:
02825 {
02826 return(scale_targets_mean_standard_deviation());
02827 }
02828 break;
02829
02830 default:
02831 {
02832 std::ostringstream buffer;
02833
02834 buffer << "OpenNN Exception: DataSet class\n"
02835 << "Vector< Vector<double> > scale_targets(void) method.\n"
02836 << "Unknown scaling and unscaling method.\n";
02837
02838 throw std::logic_error(buffer.str());
02839 }
02840 break;
02841 }
02842 }
02843
02844
02845
02846
02854
02855 void DataSet::scale_inputs_targets_mean_standard_deviation(const Vector<double>& inputs_means, const Vector<double>& inputs_standard_deviations, const Vector<double>& targets_means, const Vector<double>& targets_standard_deviations)
02856 {
02857 scale_inputs_mean_standard_deviation(inputs_means, inputs_standard_deviations);
02858
02859 scale_targets_mean_standard_deviation(targets_means, targets_standard_deviations);
02860 }
02861
02862
02863
02864
02879
02880 Vector< Vector<double> > DataSet::scale_inputs_targets_mean_standard_deviation(void)
02881 {
02882 const Vector< Vector<double> > inputs_targets_statistics = calculate_inputs_targets_statistics();
02883
02884 const Vector<double>& inputs_means = inputs_targets_statistics[2];
02885 const Vector<double>& inputs_standard_deviations = inputs_targets_statistics[3];
02886
02887 const Vector<double>& targets_means = inputs_targets_statistics[6];
02888 const Vector<double>& targets_standard_deviations = inputs_targets_statistics[7];
02889
02890 scale_inputs_targets_mean_standard_deviation(inputs_means, inputs_standard_deviations, targets_means, targets_standard_deviations);
02891
02892 return(inputs_targets_statistics);
02893 }
02894
02895
02896
02897
02905
02906 void DataSet::scale_inputs_targets_minimum_maximum(const Vector<double>& inputs_minimums, const Vector<double>& inputs_maximums, const Vector<double>& targets_minimums, const Vector<double>& targets_maximums)
02907 {
02908 scale_inputs_minimum_maximum(inputs_minimums, inputs_maximums);
02909 scale_targets_minimum_maximum(targets_minimums, targets_maximums);
02910 }
02911
02912
02913
02914
02928
02929 Vector< Vector<double> > DataSet::scale_inputs_targets_minimum_maximum(void)
02930 {
02931 const Vector< Vector<double> > inputs_targets_statistics = calculate_inputs_targets_statistics();
02932
02933 const Vector<double> inputs_minimums = inputs_targets_statistics[0];
02934 const Vector<double> inputs_maximums = inputs_targets_statistics[1];
02935
02936 const Vector<double> targets_minimums = inputs_targets_statistics[4];
02937 const Vector<double> targets_maximums = inputs_targets_statistics[5];
02938
02939 scale_inputs_targets_minimum_maximum(inputs_minimums, inputs_maximums, targets_minimums, targets_maximums);
02940
02941 return(inputs_targets_statistics);
02942 }
02943
02944
02945
02946
02951
02952 Vector< Vector<double> > DataSet::scale_inputs_targets(void)
02953 {
02954 switch(scaling_unscaling_method)
02955 {
02956 case MinimumMaximum:
02957 {
02958 return(scale_inputs_targets_minimum_maximum());
02959 }
02960 break;
02961
02962 case MeanStandardDeviation:
02963 {
02964 return(scale_inputs_targets_mean_standard_deviation());
02965 }
02966 break;
02967
02968 default:
02969 {
02970 std::ostringstream buffer;
02971
02972 buffer << "OpenNN Exception: DataSet class\n"
02973 << "Vector< Vector<double> > scale_inputs_targets(void) method.\n"
02974 << "Unknown scaling and unscaling method.\n";
02975
02976 throw std::logic_error(buffer.str());
02977 }
02978 break;
02979 }
02980 }
02981
02982
02983
02984
02989
02990 void DataSet::unscale_data_mean_standard_deviation(const Vector<double>& mean, const Vector<double>& standard_deviation)
02991 {
02992 data.unscale_mean_standard_deviation(mean, standard_deviation);
02993 }
02994
02995
02996
02997
03003
03004 void DataSet::unscale_data_minimum_maximum(const Vector<double>& minimum, const Vector<double>& maximum)
03005 {
03006 data.unscale_minimum_maximum(minimum, maximum);
03007 }
03008
03009
03010
03011
03016
03017 void DataSet::unscale_inputs_mean_standard_deviation(const Vector<double>& inputs_mean,
03018 const Vector<double>& inputs_standard_deviation)
03019 {
03020 const unsigned int instances_number = get_instances_number();
03021 const unsigned int inputs_number = variables_information.count_inputs_number();
03022
03023
03024
03025 for(unsigned int j = 0; j < inputs_number; j++)
03026 {
03027 if(inputs_standard_deviation[j] < 1e-99)
03028 {
03029 if(display)
03030 {
03031 std::cout << "OpenNN Warning: DataSet class.\n"
03032 << "void unscale_inputs_mean_standard_deviation(const Vector<double>&, const Vector<double>&) method.\n"
03033 << "Standard deviation of input variable " << j << " is zero.\n"
03034 << "Those inputs won't be scaled.\n";
03035 }
03036
03037
03038 }
03039 else
03040 {
03041 for(unsigned int i = 0; i < instances_number; i++)
03042 {
03043 data[i][j] = data[i][j]*inputs_standard_deviation[j] + inputs_mean[j];
03044 }
03045 }
03046 }
03047 }
03048
03049
03050
03051
03057
03058 void DataSet::unscale_inputs_minimum_maximum(const Vector<double>& inputs_minimum, const Vector<double>& inputs_maximum)
03059 {
03060 const unsigned int instances_number = get_instances_number();
03061 const unsigned int inputs_number = variables_information.count_inputs_number();
03062
03063
03064
03065 for(unsigned int j = 0; j < inputs_number; j++)
03066 {
03067 if(inputs_maximum[j] - inputs_minimum[j] < 1e-99)
03068 {
03069 if(display)
03070 {
03071 std::cout << "OpenNN Warning: DataSet class.\n"
03072 << "void unscale_inputs_minimum_maximum(const Vector<double>&, const Vector<double>&) method.\n"
03073 << "Minimum and maximum values of input variable " << j << " are equal.\n"
03074 << "Those inputs won't be unscaled.\n";
03075 }
03076
03077
03078 }
03079 else
03080 {
03081 for(unsigned int i = 0; i < instances_number; i++)
03082 {
03083 data[i][j] = 0.5*(data[i][j] + 1.0)*(inputs_maximum[j]-inputs_minimum[j])
03084 + inputs_minimum[j];
03085 }
03086 }
03087 }
03088 }
03089
03090
03091
03092
03097
03098 void DataSet::unscale_targets_mean_standard_deviation(const Vector<double>& targets_mean,
03099 const Vector<double>& targets_standard_deviation)
03100 {
03101 const unsigned int instances_number = get_instances_number();
03102 const unsigned int targets_number = variables_information.count_targets_number();
03103
03104
03105
03106 for(unsigned int j = 0; j < targets_number; j++)
03107 {
03108 if(targets_standard_deviation[j] < 1e-99)
03109 {
03110 if(display)
03111 {
03112 std::cout << "OpenNN Warning: DataSet class.\n"
03113 << "void unscale_targets_mean_standard_deviation(const Vector<double>&) method.\n"
03114 << "Standard deviation of target variable " << j << " is zero.\n"
03115 << "Those targets won't be scaled.\n";
03116 }
03117
03118
03119 }
03120 else
03121 {
03122 for(unsigned int i = 0; i < instances_number; i++)
03123 {
03124 data[i][j] = data[i][j]*targets_standard_deviation[j] + targets_mean[j];
03125 }
03126 }
03127 }
03128 }
03129
03130
03131
03132
03138
03139 void DataSet::unscale_targets_minimum_maximum(const Vector<double>& targets_minimum,
03140 const Vector<double>& targets_maximum)
03141 {
03142 unsigned int instances_number = get_instances_number();
03143 unsigned int targets_number = variables_information.count_targets_number();
03144
03145
03146
03147 for(unsigned int j = 0; j < targets_number; j++)
03148 {
03149 if(targets_maximum[j] - targets_minimum[j] < 1e-99)
03150 {
03151 if(display)
03152 {
03153 std::cout << "OpenNN Warning: DataSet class.\n"
03154 << "void unscale_targets_minimum_maximum(const Vector<double>&, const Vector<double>&) method.\n"
03155 << "Minimum and maximum values of target variable " << j << " are equal.\n"
03156 << "Those targets won't be unscaled.\n";
03157 }
03158
03159
03160 }
03161 else
03162 {
03163 for(unsigned int i = 0; i < instances_number; i++)
03164 {
03165 data[i][j] = 0.5*(data[i][j] + 1.0)*(targets_maximum[j]-targets_minimum[j])
03166 + targets_minimum[j];
03167 }
03168 }
03169 }
03170 }
03171
03172
03173
03174
03180
03181 void DataSet::unscale_inputs_targets_mean_standard_deviation(const Vector< Vector<double> >& variables_statistics)
03182 {
03183
03184
03185 #ifdef _DEBUG
03186
03187 unsigned int size = variables_statistics.size();
03188
03189 if(size != 8)
03190 {
03191 std::ostringstream buffer;
03192
03193 buffer << "OpenNN Exception: DataSet class.\n"
03194 << "void unscale_inputs_targets_mean_standard_deviation(const Vector< Vector<double> >&) method.\n"
03195 << "Size of statistics must be 8.\n";
03196
03197 throw std::logic_error(buffer.str());
03198
03199 }
03200
03201 for(unsigned int i = 0; i < 8; i++)
03202 {
03203 size = variables_statistics[i].size();
03204
03205 if(size != 8)
03206 {
03207 std::ostringstream buffer;
03208
03209 buffer << "OpenNN Exception: DataSet class.\n"
03210 << "void unscale_inputs_targets_mean_standard_deviation(const Vector< Vector<double> >&) method.\n"
03211 << "Size of statistics element " << i << " must be equal to number of variables.\n";
03212
03213 throw std::logic_error(buffer.str());
03214 }
03215 }
03216
03217 #endif
03218
03219 unscale_inputs_mean_standard_deviation(variables_statistics[2], variables_statistics[3]);
03220 unscale_targets_mean_standard_deviation(variables_statistics[6], variables_statistics[7]);
03221 }
03222
03223
03224
03225
03231
03232 void DataSet::unscale_inputs_targets_minimum_maximum(const Vector< Vector<double> >& variables_statistics)
03233 {
03234
03235
03236 #ifdef _DEBUG
03237
03238 unsigned int size = variables_statistics.size();
03239
03240 if(size != 8)
03241 {
03242 std::ostringstream buffer;
03243
03244 buffer << "OpenNN Exception: DataSet class.\n"
03245 << "void unscale_inputs_targets_minimum_maximum(const Vector< Vector<double> >&) method.\n"
03246 << "Size of statistics must be 8.\n";
03247
03248 throw std::logic_error(buffer.str());
03249 }
03250
03251 #endif
03252
03253 unscale_inputs_minimum_maximum(variables_statistics[0], variables_statistics[1]);
03254
03255 unscale_targets_minimum_maximum(variables_statistics[4], variables_statistics[5]);
03256 }
03257
03258
03259
03260
03263
03264 void DataSet::initialize_data(const double& new_value)
03265 {
03266 data.initialize(new_value);
03267 }
03268
03269
03270
03271
03274
03275 void DataSet::initialize_data_normal(void)
03276 {
03277 data.initialize_normal();
03278 }
03279
03280
03281
03282
03284
03285 TiXmlElement* DataSet::to_XML(void) const
03286 {
03287 std::ostringstream buffer;
03288
03289
03290
03291 TiXmlElement* data_set_element = new TiXmlElement("DataSet");
03292 data_set_element->SetAttribute("Version", 4);
03293
03294
03295
03296 if(!data_filename.empty())
03297 {
03298 TiXmlElement* data_filename_element = new TiXmlElement("DataFilename");
03299 data_set_element->LinkEndChild(data_filename_element);
03300
03301 TiXmlText* data_filename_text = new TiXmlText(data_filename.c_str());
03302 data_filename_element->LinkEndChild(data_filename_text);
03303 }
03304
03305
03306
03307 TiXmlElement* variables_information_element = variables_information.to_XML();
03308 data_set_element->LinkEndChild(variables_information_element);
03309
03310
03311
03312 TiXmlElement* instances_information_element = instances_information.to_XML();
03313 data_set_element->LinkEndChild(instances_information_element);
03314
03315 return(data_set_element);
03316 }
03317
03318
03319
03320
03323
03324 void DataSet::from_XML(TiXmlElement* data_set_element)
03325 {
03326 if(!data_set_element)
03327 {
03328 return;
03329 }
03330
03331
03332
03333 TiXmlElement* data_filename_element = data_set_element->FirstChildElement("DataFilename");
03334
03335 if(data_filename_element)
03336 {
03337 std::string new_data_filename = data_filename_element->GetText();
03338
03339 try
03340 {
03341 set_data_filename(new_data_filename);
03342 load_data(new_data_filename);
03343 }
03344 catch(std::exception& e)
03345 {
03346 std::cout << e.what() << std::endl;
03347 }
03348 }
03349
03350
03351
03352 TiXmlElement* variables_information_element = data_set_element->FirstChildElement("VariablesInformation");
03353
03354 if(variables_information_element)
03355 {
03356 variables_information.from_XML(variables_information_element);
03357 }
03358
03359
03360
03361 TiXmlElement* instances_information_element = data_set_element->FirstChildElement("InstancesInformation");
03362
03363 if(instances_information_element)
03364 {
03365 instances_information.from_XML(instances_information_element);
03366 }
03367
03368 }
03369
03370
03371
03372
03374
03375 std::string DataSet::to_string(void) const
03376 {
03377 std::ostringstream buffer;
03378
03379 buffer << "Data filename: " << data_filename << "\n"
03380 << variables_information.to_string()
03381 << instances_information.to_string()
03382 << "Display: " << display << "\n"
03383 << "Data:\n" << data;
03384
03385 return(buffer.str());
03386 }
03387
03388
03389
03391
03392 void DataSet::print(void) const
03393 {
03394 if(display)
03395 {
03396 std::cout << to_string();
03397 }
03398 }
03399
03400
03401
03402
03405
03406 void DataSet::save(const std::string& filename) const
03407 {
03408 TiXmlDocument document;
03409
03410 TiXmlDeclaration* declaration = new TiXmlDeclaration("1.0", "", "");
03411 document.LinkEndChild(declaration);
03412
03413 TiXmlElement* data_set_element = to_XML();
03414 document.LinkEndChild(data_set_element);
03415
03416 document.SaveFile(filename.c_str());
03417 }
03418
03419
03420
03421
03428
03445
03446 void DataSet::load(const std::string& filename)
03447 {
03448 std::ostringstream buffer;
03449
03450 TiXmlDocument document(filename.c_str());
03451
03452 if(!document.LoadFile())
03453 {
03454 std::ostringstream buffer;
03455
03456 buffer << "OpenNN Exception: DataSet class.\n"
03457 << "void load(const std::string&) method.\n"
03458 << "Cannot load XML file " << filename << ".\n";
03459
03460 throw std::logic_error(buffer.str());
03461 }
03462
03463
03464
03465 TiXmlElement* data_set_element = document.FirstChildElement("DataSet");
03466
03467 if(!data_set_element)
03468 {
03469 buffer << "OpenNN Exception: DataSet class.\n"
03470 << "void load(const std::string&) method.\n"
03471 << "File " << filename << " is not a valid data set file.\n";
03472
03473 throw std::logic_error(buffer.str());
03474 }
03475
03476 from_XML(data_set_element);
03477 }
03478
03479
03480
03481
03483
03484 void DataSet::print_data(void) const
03485 {
03486 if(display)
03487 {
03488 std::cout << data;
03489 }
03490 }
03491
03492
03493
03494
03497
03498 void DataSet::save_data(const std::string& filename) const
03499 {
03500 std::ofstream file(filename.c_str());
03501
03502 if(!file.is_open())
03503 {
03504 std::ostringstream buffer;
03505
03506 buffer << "OpenNN Exception: DataSet class.\n"
03507 << "void save_data(const std::string&) const method.\n"
03508 << "Cannot open data file.\n";
03509
03510 throw std::logic_error(buffer.str().c_str());
03511 }
03512
03513
03514
03515 file << data;
03516
03517
03518
03519 file.close();
03520 }
03521
03522
03523
03524
03528
03529 void DataSet::load_data(const std::string& new_data_filename)
03530 {
03531 data_filename = new_data_filename;
03532
03533 data.load(data_filename);
03534
03535 unsigned int variables_number = data.get_columns_number();
03536
03537 variables_information.set(variables_number);
03538
03539 unsigned int instances_number = data.get_rows_number();
03540
03541 instances_information.set(instances_number);
03542 }
03543
03544
03545
03546
03551
03552 Vector<unsigned int> DataSet::calculate_target_class_distribution(void) const
03553 {
03554
03555
03556 const unsigned int instances_number = get_instances_number();
03557 const unsigned int targets_number = variables_information.count_targets_number();
03558 const Vector<unsigned int>& targets_indices = variables_information.get_targets_indices();
03559
03560 Vector<unsigned int> class_distribution;
03561
03562 if(targets_number == 1)
03563 {
03564 class_distribution.set(2, 0);
03565
03566 int target_index = targets_indices[0];
03567
03568 for(unsigned int instance_index = 0; instance_index < instances_number; instance_index++)
03569 {
03570 if(data[instance_index][target_index] < 0.5)
03571 {
03572 class_distribution[0]++;
03573 }
03574 else
03575 {
03576 class_distribution[1]++;
03577 }
03578 }
03579 }
03580 else
03581 {
03582 class_distribution.set(targets_number, 0);
03583
03584 for(unsigned int i = 0; i < instances_number; i++)
03585 {
03586 for(unsigned int j = 0; j < targets_number; j++)
03587 {
03588 if(data[i][targets_indices[j]] > 0.5)
03589 {
03590 class_distribution[j]++;
03591 }
03592 }
03593 }
03594 }
03595
03596
03597
03598 if(class_distribution.calculate_sum() != instances_number)
03599 {
03600 std::ostringstream buffer;
03601
03602 buffer << "OpenNN Exception: DataSet class.\n"
03603 << "Vector<int> calculate_target_class_distribution(void) const method.\n"
03604 << "Sum of class distributions is not equal to number of instances.\n"
03605 << "Class distribution: " << class_distribution << std::endl;
03606 throw std::logic_error(buffer.str().c_str());
03607 }
03608
03609 return(class_distribution);
03610 }
03611
03612
03613
03614
03615
03616
03617
03618
03619
03620
03621
03622
03623
03624
03625
03626
03627
03628
03629
03630
03631
03632
03633
03634
03635
03636
03637
03638
03639
03640
03641
03642
03643
03644
03645
03646
03647
03648
03650
03651
03652
03653
03654
03655
03656
03657
03658
03659
03660
03661
03662
03663
03664
03665
03666
03667
03668
03669
03670
03671
03672
03673
03674
03675
03676
03677
03678
03679
03680
03681
03682
03683
03684
03685
03686
03687
03688
03689
03690
03691
03692
03693
03694
03695
03696
03697
03698
03699
03700
03701
03702
03703
03704
03706
03707
03708
03709
03710
03711
03712
03713
03714
03715
03716
03717
03718
03719
03720
03721
03722
03723
03724
03725
03726
03727
03728
03729
03730
03731
03732
03733
03734
03735
03736
03738
03739
03740
03741
03742
03743
03744
03745
03746
03747
03748
03749
03750
03751
03752
03753
03754
03755
03756
03757
03758 }
03759
03760
03761
03762
03763
03764
03765
03766
03767
03768
03769
03770
03771
03772
03773
03774
03775
03776