00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <string>
00019 #include <sstream>
00020 #include <iostream>
00021 #include <fstream>
00022 #include <algorithm>
00023 #include <functional>
00024 #include <limits>
00025 #include <cmath>
00026 #include <ctime>
00027
00028
00029
00030 #include "quasi_newton_method.h"
00031
00032 namespace OpenNN
00033 {
00034
00035
00036
00040
00041 QuasiNewtonMethod::QuasiNewtonMethod(void)
00042 : TrainingAlgorithm()
00043 {
00044 set_default();
00045 }
00046
00047
00048
00049
00054
00055 QuasiNewtonMethod::QuasiNewtonMethod(PerformanceFunctional* new_performance_functional_pointer)
00056 : TrainingAlgorithm(new_performance_functional_pointer)
00057 {
00058 training_rate_algorithm.set_performance_functional_pointer(new_performance_functional_pointer);
00059
00060 set_default();
00061 }
00062
00063
00064
00065
00069
00070 QuasiNewtonMethod::QuasiNewtonMethod(TiXmlElement* quasi_Newton_method_element)
00071 : TrainingAlgorithm(quasi_Newton_method_element)
00072 {
00073 set_default();
00074 }
00075
00076
00077
00078
00081
00082 QuasiNewtonMethod::~QuasiNewtonMethod(void)
00083 {
00084 }
00085
00086
00087
00088
00089
00090
00091
00092
00094
00095 const TrainingRateAlgorithm& QuasiNewtonMethod::get_training_rate_algorithm(void) const
00096 {
00097 return(training_rate_algorithm);
00098 }
00099
00100
00101
00102
00104
00105 TrainingRateAlgorithm* QuasiNewtonMethod::get_training_rate_algorithm_pointer(void)
00106 {
00107 return(&training_rate_algorithm);
00108 }
00109
00110
00111
00112
00114
00115 const QuasiNewtonMethod::InverseHessianApproximationMethod& QuasiNewtonMethod::get_inverse_Hessian_approximation_method(void) const
00116 {
00117 return(inverse_Hessian_approximation_method);
00118 }
00119
00120
00121
00122
00124
00125 std::string QuasiNewtonMethod::write_inverse_Hessian_approximation_method(void) const
00126 {
00127 switch(inverse_Hessian_approximation_method)
00128 {
00129 case DFP:
00130 {
00131 return("DFP");
00132 }
00133 break;
00134
00135 case BFGS:
00136 {
00137 return("BFGS");
00138 }
00139 break;
00140
00141 default:
00142 {
00143 std::ostringstream buffer;
00144
00145 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00146 << "std::string write_inverse_Hessian_approximation_method(void) const method.\n"
00147 << "Unknown inverse Hessian approximation method.\n";
00148
00149 throw std::logic_error(buffer.str().c_str());
00150 }
00151 break;
00152 }
00153 }
00154
00155
00156
00157
00159
00160 const double& QuasiNewtonMethod::get_warning_parameters_norm(void) const
00161 {
00162 return(warning_parameters_norm);
00163 }
00164
00165
00166
00167
00169
00170 const double& QuasiNewtonMethod::get_warning_gradient_norm(void) const
00171 {
00172 return(warning_gradient_norm);
00173 }
00174
00175
00176
00177
00179
00180 const double& QuasiNewtonMethod::get_warning_training_rate(void) const
00181 {
00182 return(warning_training_rate);
00183 }
00184
00185
00186
00187
00189
00190 const double& QuasiNewtonMethod::get_error_parameters_norm(void) const
00191 {
00192 return(error_parameters_norm);
00193 }
00194
00195
00196
00197
00200
00201 const double& QuasiNewtonMethod::get_error_gradient_norm(void) const
00202 {
00203 return(error_gradient_norm);
00204 }
00205
00206
00207
00208
00211
00212 const double& QuasiNewtonMethod::get_error_training_rate(void) const
00213 {
00214 return(error_training_rate);
00215 }
00216
00217
00218
00219
00221
00222 const double& QuasiNewtonMethod::get_minimum_parameters_increment_norm(void) const
00223 {
00224 return(minimum_parameters_increment_norm);
00225 }
00226
00227
00228
00229
00231
00232 const double& QuasiNewtonMethod::get_minimum_performance_increase(void) const
00233 {
00234 return(minimum_performance_increase);
00235 }
00236
00237
00238
00239
00242
00243 const double& QuasiNewtonMethod::get_performance_goal(void) const
00244 {
00245 return(performance_goal);
00246 }
00247
00248
00249
00250
00253
00254 const double& QuasiNewtonMethod::get_gradient_norm_goal(void) const
00255 {
00256 return(gradient_norm_goal);
00257 }
00258
00259
00260
00261
00263
00264 const unsigned int& QuasiNewtonMethod::get_maximum_generalization_evaluation_decreases(void) const
00265 {
00266 return(maximum_generalization_evaluation_decreases);
00267 }
00268
00269
00270
00271
00273
00274 const unsigned int& QuasiNewtonMethod::get_maximum_epochs_number(void) const
00275 {
00276 return(maximum_epochs_number);
00277 }
00278
00279
00280
00281
00283
00284 const double& QuasiNewtonMethod::get_maximum_time(void) const
00285 {
00286 return(maximum_time);
00287 }
00288
00289
00290
00291
00292
00294
00295 const bool& QuasiNewtonMethod::get_reserve_parameters_history(void) const
00296 {
00297 return(reserve_parameters_history);
00298 }
00299
00300
00301
00302
00304
00305 const bool& QuasiNewtonMethod::get_reserve_parameters_norm_history(void) const
00306 {
00307 return(reserve_parameters_norm_history);
00308 }
00309
00310
00311
00312
00314
00315 const bool& QuasiNewtonMethod::get_reserve_evaluation_history(void) const
00316 {
00317 return(reserve_evaluation_history);
00318 }
00319
00320
00321
00322
00324
00325 const bool& QuasiNewtonMethod::get_reserve_gradient_history(void) const
00326 {
00327 return(reserve_gradient_history);
00328 }
00329
00330
00331
00332
00334
00335 const bool& QuasiNewtonMethod::get_reserve_gradient_norm_history(void) const
00336 {
00337 return(reserve_gradient_norm_history);
00338 }
00339
00340
00341
00342
00343
00345
00346 const bool& QuasiNewtonMethod::get_reserve_training_direction_history(void) const
00347 {
00348 return(reserve_training_direction_history);
00349 }
00350
00351
00352
00353
00355
00356 const bool& QuasiNewtonMethod::get_reserve_training_rate_history(void) const
00357 {
00358 return(reserve_training_rate_history);
00359 }
00360
00361
00362
00363
00365
00366 const bool& QuasiNewtonMethod::get_reserve_elapsed_time_history(void) const
00367 {
00368 return(reserve_elapsed_time_history);
00369 }
00370
00371
00372
00373
00375
00376 const bool& QuasiNewtonMethod::get_reserve_inverse_Hessian_history(void) const
00377 {
00378 return(reserve_inverse_Hessian_history);
00379 }
00380
00381
00382
00383
00385
00386 const bool& QuasiNewtonMethod::get_reserve_generalization_evaluation_history(void) const
00387 {
00388 return(reserve_generalization_evaluation_history);
00389 }
00390
00391
00392
00393
00395
00396 const unsigned int& QuasiNewtonMethod::get_display_period(void) const
00397 {
00398 return(display_period);
00399 }
00400
00401
00402
00403
00406
00407 void QuasiNewtonMethod::set_inverse_Hessian_approximation_method(const QuasiNewtonMethod::InverseHessianApproximationMethod&
00408 new_inverse_Hessian_approximation_method)
00409 {
00410 inverse_Hessian_approximation_method = new_inverse_Hessian_approximation_method;
00411 }
00412
00413
00414
00415
00423
00424 void QuasiNewtonMethod::set_inverse_Hessian_approximation_method(const std::string& new_inverse_Hessian_approximation_method_name)
00425 {
00426 if(new_inverse_Hessian_approximation_method_name == "DFP")
00427 {
00428 inverse_Hessian_approximation_method = DFP;
00429 }
00430 else if(new_inverse_Hessian_approximation_method_name == "BFGS")
00431 {
00432 inverse_Hessian_approximation_method = BFGS;
00433 }
00434 else
00435 {
00436 std::ostringstream buffer;
00437
00438 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00439 << "void set_inverse_Hessian_approximation_method(const std::string&) method.\n"
00440 << "Unknown inverse Hessian approximation method: " << new_inverse_Hessian_approximation_method_name << ".\n";
00441
00442 throw std::logic_error(buffer.str().c_str());
00443 }
00444 }
00445
00446
00447
00448
00452
00453 void QuasiNewtonMethod::set_reserve_all_training_history(const bool& new_reserve_all_training_history)
00454 {
00455 reserve_elapsed_time_history = new_reserve_all_training_history;
00456 reserve_parameters_history = new_reserve_all_training_history;
00457 reserve_parameters_norm_history = new_reserve_all_training_history;
00458 reserve_evaluation_history = new_reserve_all_training_history;
00459 reserve_gradient_history = new_reserve_all_training_history;
00460 reserve_gradient_norm_history = new_reserve_all_training_history;
00461 reserve_training_direction_history = new_reserve_all_training_history;
00462 reserve_training_rate_history = new_reserve_all_training_history;
00463 }
00464
00465
00466
00467
00468 void QuasiNewtonMethod::set_default(void)
00469 {
00470 inverse_Hessian_approximation_method = BFGS;
00471
00472 training_rate_algorithm.set_default();
00473
00474
00475
00476 warning_parameters_norm = 1.0e6;
00477 warning_gradient_norm = 1.0e6;
00478 warning_training_rate = 1.0e6;
00479
00480 error_parameters_norm = 1.0e9;
00481 error_gradient_norm = 1.0e9;
00482 error_training_rate = 1.0e9;
00483
00484
00485
00486 minimum_parameters_increment_norm = 0.0;
00487
00488 minimum_performance_increase = 0.0;
00489 performance_goal = -1.0e99;
00490 gradient_norm_goal = 0.0;
00491 maximum_generalization_evaluation_decreases = 1000000;
00492
00493 maximum_epochs_number = 1000;
00494 maximum_time = 1000.0;
00495
00496
00497
00498 reserve_parameters_history = false;
00499 reserve_parameters_norm_history = false;
00500
00501 reserve_evaluation_history = true;
00502 reserve_gradient_history = false;
00503 reserve_gradient_norm_history = false;
00504 reserve_generalization_evaluation_history = false;
00505 reserve_inverse_Hessian_history = false;
00506
00507 reserve_training_direction_history = false;
00508 reserve_training_rate_history = false;
00509 reserve_elapsed_time_history = false;
00510
00511
00512
00513 display = true;
00514 display_period = 100;
00515 }
00516
00517
00518
00519
00523
00524 void QuasiNewtonMethod::set_warning_parameters_norm(const double& new_warning_parameters_norm)
00525 {
00526
00527
00528 #ifdef _DEBUG
00529
00530 if(new_warning_parameters_norm < 0.0)
00531 {
00532 std::ostringstream buffer;
00533
00534 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00535 << "void set_warning_parameters_norm(const double&) method.\n"
00536 << "Warning parameters norm must be equal or greater than 0.\n";
00537
00538 throw std::logic_error(buffer.str().c_str());
00539 }
00540
00541 #endif
00542
00543
00544
00545 warning_parameters_norm = new_warning_parameters_norm;
00546 }
00547
00548
00549
00550
00554
00555 void QuasiNewtonMethod::set_warning_gradient_norm(const double& new_warning_gradient_norm)
00556 {
00557
00558
00559 #ifdef _DEBUG
00560
00561 if(new_warning_gradient_norm < 0.0)
00562 {
00563 std::ostringstream buffer;
00564
00565 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00566 << "void set_warning_gradient_norm(const double&) method.\n"
00567 << "Warning gradient norm must be equal or greater than 0.\n";
00568
00569 throw std::logic_error(buffer.str().c_str());
00570 }
00571
00572 #endif
00573
00574
00575
00576 warning_gradient_norm = new_warning_gradient_norm;
00577 }
00578
00579
00580
00581
00585
00586 void QuasiNewtonMethod::set_warning_training_rate(const double& new_warning_training_rate)
00587 {
00588
00589
00590 #ifdef _DEBUG
00591
00592 if(new_warning_training_rate < 0.0)
00593 {
00594 std::ostringstream buffer;
00595
00596 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00597 << "void set_warning_training_rate(const double&) method.\n"
00598 << "Warning training rate must be equal or greater than 0.\n";
00599
00600 throw std::logic_error(buffer.str().c_str());
00601 }
00602
00603 #endif
00604
00605 warning_training_rate = new_warning_training_rate;
00606 }
00607
00608
00609
00610
00614
00615 void QuasiNewtonMethod::set_error_parameters_norm(const double& new_error_parameters_norm)
00616 {
00617
00618
00619 #ifdef _DEBUG
00620
00621 if(new_error_parameters_norm < 0.0)
00622 {
00623 std::ostringstream buffer;
00624
00625 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00626 << "void set_error_parameters_norm(const double&) method.\n"
00627 << "Error parameters norm must be equal or greater than 0.\n";
00628
00629 throw std::logic_error(buffer.str().c_str());
00630 }
00631
00632 #endif
00633
00634
00635
00636 error_parameters_norm = new_error_parameters_norm;
00637 }
00638
00639
00640
00641
00645
00646 void QuasiNewtonMethod::set_error_gradient_norm(const double& new_error_gradient_norm)
00647 {
00648
00649
00650 #ifdef _DEBUG
00651
00652 if(new_error_gradient_norm < 0.0)
00653 {
00654 std::ostringstream buffer;
00655
00656 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00657 << "void set_error_gradient_norm(const double&) method.\n"
00658 << "Error gradient norm must be equal or greater than 0.\n";
00659
00660 throw std::logic_error(buffer.str().c_str());
00661 }
00662
00663 #endif
00664
00665
00666
00667 error_gradient_norm = new_error_gradient_norm;
00668 }
00669
00670
00671
00672
00676
00677 void QuasiNewtonMethod::set_error_training_rate(const double& new_error_training_rate)
00678 {
00679
00680
00681 #ifdef _DEBUG
00682
00683 if(new_error_training_rate < 0.0)
00684 {
00685 std::ostringstream buffer;
00686
00687 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00688 << "void set_error_training_rate(const double&) method.\n"
00689 << "Error training rate must be equal or greater than 0.\n";
00690
00691 throw std::logic_error(buffer.str().c_str());
00692 }
00693
00694 #endif
00695
00696
00697
00698 error_training_rate = new_error_training_rate;
00699 }
00700
00701
00702
00703
00706
00707 void QuasiNewtonMethod::set_minimum_parameters_increment_norm(const double& new_minimum_parameters_increment_norm)
00708 {
00709
00710
00711 #ifdef _DEBUG
00712
00713 if(new_minimum_parameters_increment_norm < 0.0)
00714 {
00715 std::ostringstream buffer;
00716
00717 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00718 << "void new_minimum_parameters_increment_norm(const double&) method.\n"
00719 << "Minimum parameters increment norm must be equal or greater than 0.\n";
00720
00721 throw std::logic_error(buffer.str().c_str());
00722 }
00723
00724 #endif
00725
00726
00727
00728 minimum_parameters_increment_norm = new_minimum_parameters_increment_norm;
00729 }
00730
00731
00732
00733
00736
00737 void QuasiNewtonMethod::set_minimum_performance_increase(const double& new_minimum_performance_increase)
00738 {
00739
00740
00741 #ifdef _DEBUG
00742
00743 if(new_minimum_performance_increase < 0.0)
00744 {
00745 std::ostringstream buffer;
00746
00747 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00748 << "void set_minimum_performance_increase(const double&) method.\n"
00749 << "Minimum performance improvement must be equal or greater than 0.\n";
00750
00751 throw std::logic_error(buffer.str().c_str());
00752 }
00753
00754 #endif
00755
00756
00757
00758 minimum_performance_increase = new_minimum_performance_increase;
00759 }
00760
00761
00762
00763
00767
00768 void QuasiNewtonMethod::set_performance_goal(const double& new_performance_goal)
00769 {
00770 performance_goal = new_performance_goal;
00771 }
00772
00773
00774
00775
00779
00780 void QuasiNewtonMethod::set_gradient_norm_goal(const double& new_gradient_norm_goal)
00781 {
00782
00783
00784 #ifdef _DEBUG
00785
00786 if(new_gradient_norm_goal < 0.0)
00787 {
00788 std::ostringstream buffer;
00789
00790 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00791 << "void set_gradient_norm_goal(const double&) method.\n"
00792 << "Gradient norm goal must be equal or greater than 0.\n";
00793
00794 throw std::logic_error(buffer.str().c_str());
00795 }
00796
00797 #endif
00798
00799
00800
00801 gradient_norm_goal = new_gradient_norm_goal;
00802 }
00803
00804
00805
00806
00809
00810 void QuasiNewtonMethod::set_maximum_generalization_evaluation_decreases(const unsigned int& new_maximum_generalization_evaluation_decreases)
00811 {
00812
00813
00814 #ifdef _DEBUG
00815
00816 if(new_maximum_generalization_evaluation_decreases < 0)
00817 {
00818 std::ostringstream buffer;
00819
00820 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00821 << "void set_maximum_generalization_evaluation_decreases(const unsigned int&) method.\n"
00822 << "Number of generalization performance decreases must be equal or greater than 0.\n";
00823
00824 throw std::logic_error(buffer.str().c_str());
00825 }
00826
00827 #endif
00828
00829
00830
00831 maximum_generalization_evaluation_decreases = new_maximum_generalization_evaluation_decreases;
00832 }
00833
00834
00835
00836
00839
00840 void QuasiNewtonMethod::set_maximum_epochs_number(const unsigned int& new_maximum_epochs_number)
00841 {
00842
00843
00844 #ifdef _DEBUG
00845
00846 if(new_maximum_epochs_number < 0)
00847 {
00848 std::ostringstream buffer;
00849
00850 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00851 << "void set_maximum_epochs_number(unsigned int) method.\n"
00852 << "Number of epochs must be equal or greater than 0.\n";
00853
00854 throw std::logic_error(buffer.str().c_str());
00855 }
00856
00857 #endif
00858
00859
00860
00861 maximum_epochs_number = new_maximum_epochs_number;
00862 }
00863
00864
00865
00866
00869
00870 void QuasiNewtonMethod::set_maximum_time(const double& new_maximum_time)
00871 {
00872
00873
00874 #ifdef _DEBUG
00875
00876 if(new_maximum_time < 0.0)
00877 {
00878 std::ostringstream buffer;
00879
00880 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
00881 << "void set_maximum_time(const double&) method.\n"
00882 << "Maximum time must be equal or greater than 0.\n";
00883
00884 throw std::logic_error(buffer.str().c_str());
00885 }
00886
00887 #endif
00888
00889
00890
00891 maximum_time = new_maximum_time;
00892 }
00893
00894
00895
00896
00899
00900 void QuasiNewtonMethod::set_reserve_parameters_history(const bool& new_reserve_parameters_history)
00901 {
00902 reserve_parameters_history = new_reserve_parameters_history;
00903 }
00904
00905
00906
00907
00910
00911 void QuasiNewtonMethod::set_reserve_parameters_norm_history(const bool& new_reserve_parameters_norm_history)
00912 {
00913 reserve_parameters_norm_history = new_reserve_parameters_norm_history;
00914 }
00915
00916
00917
00918
00921
00922 void QuasiNewtonMethod::set_reserve_evaluation_history(const bool& new_reserve_evaluation_history)
00923 {
00924 reserve_evaluation_history = new_reserve_evaluation_history;
00925 }
00926
00927
00928
00929
00932
00933 void QuasiNewtonMethod::set_reserve_gradient_history(const bool& new_reserve_gradient_history)
00934 {
00935 reserve_gradient_history = new_reserve_gradient_history;
00936 }
00937
00938
00939
00940
00944
00945 void QuasiNewtonMethod::set_reserve_gradient_norm_history(const bool& new_reserve_gradient_norm_history)
00946 {
00947 reserve_gradient_norm_history = new_reserve_gradient_norm_history;
00948 }
00949
00950
00951
00952
00956
00957 void QuasiNewtonMethod::set_reserve_inverse_Hessian_history(const bool& new_reserve_inverse_Hessian_history)
00958 {
00959 reserve_inverse_Hessian_history = new_reserve_inverse_Hessian_history;
00960 }
00961
00962
00963
00964
00968
00969 void QuasiNewtonMethod::set_reserve_training_direction_history(const bool& new_reserve_training_direction_history)
00970 {
00971 reserve_training_direction_history = new_reserve_training_direction_history;
00972 }
00973
00974
00975
00976
00980
00981 void QuasiNewtonMethod::set_reserve_training_rate_history(const bool& new_reserve_training_rate_history)
00982 {
00983 reserve_training_rate_history = new_reserve_training_rate_history;
00984 }
00985
00986
00987
00988
00992
00993 void QuasiNewtonMethod::set_reserve_elapsed_time_history(const bool& new_reserve_elapsed_time_history)
00994 {
00995 reserve_elapsed_time_history = new_reserve_elapsed_time_history;
00996 }
00997
00998
00999
01000
01004
01005 void QuasiNewtonMethod::set_reserve_generalization_evaluation_history(const bool& new_reserve_generalization_evaluation_history)
01006 {
01007 reserve_generalization_evaluation_history = new_reserve_generalization_evaluation_history;
01008 }
01009
01010
01011
01012
01016
01017 void QuasiNewtonMethod::set_display_period(const unsigned int& new_display_period)
01018 {
01019
01020
01021 #ifdef _DEBUG
01022
01023 if(new_display_period <= 0)
01024 {
01025 std::ostringstream buffer;
01026
01027 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01028 << "void set_display_period(const double&) method.\n"
01029 << "First training rate must be greater than 0.\n";
01030
01031 throw std::logic_error(buffer.str().c_str());
01032 }
01033
01034 #endif
01035
01036 display_period = new_display_period;
01037 }
01038
01039
01040
01041
01042
01043
01044
01051
01052 Matrix<double> QuasiNewtonMethod::calculate_inverse_Hessian_approximation(
01053 const Vector<double>& old_parameters, const Vector<double>& parameters,
01054 const Vector<double>& old_gradient, const Vector<double>& gradient,
01055 const Matrix<double>& old_inverse_Hessian) const
01056 {
01057 switch(inverse_Hessian_approximation_method)
01058 {
01059 case DFP:
01060 {
01061 return(calculate_DFP_inverse_Hessian(old_parameters, parameters, old_gradient, gradient, old_inverse_Hessian));
01062 }
01063 break;
01064
01065 case BFGS:
01066 {
01067 return(calculate_BFGS_inverse_Hessian(old_parameters, parameters, old_gradient, gradient, old_inverse_Hessian));
01068 }
01069 break;
01070
01071 default:
01072 {
01073 std::ostringstream buffer;
01074
01075 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01076 << "Vector<double> calculate_inverse_Hessian_approximation(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01077 << "Unknown inverse Hessian approximation method.\n";
01078
01079 throw std::logic_error(buffer.str().c_str());
01080 }
01081 break;
01082 }
01083 }
01084
01085
01086
01087
01091
01092 Vector<double> QuasiNewtonMethod::calculate_training_direction(const Vector<double>& gradient, const Matrix<double>& inverse_Hessian_approximation) const
01093 {
01094 Vector<double> training_direction = inverse_Hessian_approximation.dot(gradient)*(-1.0);
01095
01096 double training_direction_norm = training_direction.calculate_norm();
01097
01098 return(training_direction/training_direction_norm);
01099 }
01100
01101
01102
01103
01106
01107 Vector<double> QuasiNewtonMethod::calculate_gradient_descent_training_direction(const Vector<double>& gradient) const
01108 {
01109 double gradient_norm = gradient.calculate_norm();
01110
01111 return(gradient/(-1.0*gradient_norm));
01112 }
01113
01114
01115
01116
01124
01125 Matrix<double> QuasiNewtonMethod::calculate_DFP_inverse_Hessian(
01126 const Vector<double>& old_parameters, const Vector<double>& parameters, const Vector<double>& old_gradient, const Vector<double>& gradient, const Matrix<double>& old_inverse_Hessian) const
01127 {
01128
01129
01130 #ifdef _DEBUG
01131
01132 NeuralNetwork* neural_network_pointer = performance_functional_pointer->get_neural_network_pointer();
01133
01134 unsigned int parameters_number = neural_network_pointer->count_parameters_number();
01135
01136 unsigned int old_parameters_size = old_parameters.size();
01137 unsigned int parameters_size = parameters.size();
01138
01139 if(old_parameters_size != parameters_number)
01140 {
01141 std::ostringstream buffer;
01142
01143 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01144 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01145 << "Size of old parameters vector must be equal to number of parameters.\n";
01146
01147 throw std::logic_error(buffer.str().c_str());
01148 }
01149 else if(parameters_size != parameters_number)
01150 {
01151 std::ostringstream buffer;
01152
01153 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01154 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01155 << "Size of parameters vector must be equal to number of parameters.\n";
01156
01157 throw std::logic_error(buffer.str().c_str());
01158 }
01159
01160 unsigned int old_gradient_size = old_gradient.size();
01161 unsigned int gradient_size = gradient.size();
01162
01163 if(old_gradient_size != parameters_number)
01164 {
01165 std::ostringstream buffer;
01166
01167 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01168 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01169 << "Size of old gradient vector must be equal to number of parameters.\n";
01170
01171 throw std::logic_error(buffer.str().c_str());
01172 }
01173 else if(gradient_size != parameters_number)
01174 {
01175 std::ostringstream buffer;
01176
01177 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01178 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01179 << "Size of gradient vector must be equal to number of parameters.\n";
01180
01181 throw std::logic_error(buffer.str().c_str());
01182 }
01183
01184 unsigned int rows_number = old_inverse_Hessian.get_rows_number();
01185 unsigned int columns_number = old_inverse_Hessian.get_columns_number();
01186
01187 if(rows_number != parameters_number)
01188 {
01189 std::ostringstream buffer;
01190
01191 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01192 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01193 << "Number of rows in old inverse Hessian must be equal to number of parameters.\n";
01194
01195 throw std::logic_error(buffer.str().c_str());
01196 }
01197 else if(columns_number != parameters_number)
01198 {
01199 std::ostringstream buffer;
01200
01201 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01202 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01203 << "Number of columns in old inverse Hessian must be equal to number of parameters.\n";
01204
01205 throw std::logic_error(buffer.str().c_str());
01206 }
01207
01208 #endif
01209
01210
01211
01212 Vector<double> parameters_difference = parameters - old_parameters;
01213
01214
01215
01216 #ifdef _DEBUG
01217
01218 if(parameters_difference > -1.0e-50 && parameters_difference < 1.0e-50)
01219 {
01220 std::ostringstream buffer;
01221
01222 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01223 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01224 << "Parameters difference vector is zero.\n";
01225
01226 throw std::logic_error(buffer.str().c_str());
01227 }
01228
01229 #endif
01230
01231
01232
01233 Vector<double> gradient_difference = gradient - old_gradient;
01234
01235
01236
01237 #ifdef _DEBUG
01238
01239 if(gradient_difference > -1.0e-50 && gradient_difference < 1.0e-50)
01240 {
01241 std::ostringstream buffer;
01242
01243 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01244 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01245 << "Gradient difference vector is zero.\n";
01246
01247 throw std::logic_error(buffer.str().c_str());
01248 }
01249
01250 #endif
01251
01252
01253
01254 #ifdef _DEBUG
01255
01256 if(old_inverse_Hessian > -1.0e-50 && old_inverse_Hessian < 1.0e-50)
01257 {
01258 std::ostringstream buffer;
01259
01260 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01261 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01262 << "Old inverse Hessian matrix is zero.\n";
01263
01264 throw std::logic_error(buffer.str().c_str());
01265 }
01266
01267 if(fabs(parameters_difference.dot(gradient_difference)) < 1.0e-50)
01268 {
01269 std::ostringstream buffer;
01270
01271 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01272 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01273 << "Denominator of first term is zero.\n";
01274
01275 throw std::logic_error(buffer.str().c_str());
01276 }
01277 else if(fabs(gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference)) < 1.0e-50)
01278 {
01279 std::ostringstream buffer;
01280
01281 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01282 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01283 << "Denominator of second term is zero.\n";
01284
01285 throw std::logic_error(buffer.str().c_str());
01286 }
01287
01288 #endif
01289
01290 Matrix<double> DFP_inverse_Hessian = old_inverse_Hessian
01291 + parameters_difference.direct(parameters_difference)
01292 /parameters_difference.dot(gradient_difference)
01293 - (old_inverse_Hessian.dot(gradient_difference)).direct(old_inverse_Hessian.dot(gradient_difference))
01294 /gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference);
01295
01296 return(DFP_inverse_Hessian);
01297
01298 }
01299
01300
01301
01302
01310
01311 Matrix<double> QuasiNewtonMethod::calculate_BFGS_inverse_Hessian(
01312 const Vector<double>& old_parameters, const Vector<double>& parameters, const Vector<double>& old_gradient, const Vector<double>& gradient, const Matrix<double>& old_inverse_Hessian) const
01313 {
01314
01315
01316 #ifdef _DEBUG
01317
01318 NeuralNetwork* neural_network_pointer = performance_functional_pointer->get_neural_network_pointer();
01319
01320 unsigned int parameters_number = neural_network_pointer->count_parameters_number();
01321
01322 unsigned int old_parameters_size = old_parameters.size();
01323 unsigned int parameters_size = parameters.size();
01324
01325 if(old_parameters_size != parameters_number)
01326 {
01327 std::ostringstream buffer;
01328
01329 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01330 << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01331 << "Size of old parameters vector must be equal to number of parameters.\n";
01332
01333 throw std::logic_error(buffer.str().c_str());
01334 }
01335 else if(parameters_size != parameters_number)
01336 {
01337 std::ostringstream buffer;
01338
01339 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01340 << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01341 << "Size of parameters vector must be equal to number of parameters.\n";
01342
01343 throw std::logic_error(buffer.str().c_str());
01344 }
01345
01346 unsigned int old_gradient_size = old_gradient.size();
01347 unsigned int gradient_size = gradient.size();
01348
01349 if(old_gradient_size != parameters_number)
01350 {
01351 std::ostringstream buffer;
01352
01353 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01354 << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method."
01355 << std::endl
01356 << "Size of old gradient vector must be equal to number of parameters.\n";
01357
01358 throw std::logic_error(buffer.str().c_str());
01359 }
01360 else if(gradient_size != parameters_number)
01361 {
01362 std::ostringstream buffer;
01363
01364 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01365 << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method."
01366 << std::endl
01367 << "Size of gradient vector must be equal to number of parameters.\n";
01368
01369 throw std::logic_error(buffer.str().c_str());
01370 }
01371
01372 unsigned int rows_number = old_inverse_Hessian.get_rows_number();
01373 unsigned int columns_number = old_inverse_Hessian.get_columns_number();
01374
01375 if(rows_number != parameters_number)
01376 {
01377 std::ostringstream buffer;
01378
01379 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01380 << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01381 << "Number of rows in old inverse Hessian must be equal to number of parameters.\n";
01382
01383 throw std::logic_error(buffer.str().c_str());
01384 }
01385 else if(columns_number != parameters_number)
01386 {
01387 std::ostringstream buffer;
01388
01389 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01390 << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01391 << "Number of columns in old inverse Hessian must be equal to number of parameters.\n";
01392
01393 throw std::logic_error(buffer.str().c_str());
01394 }
01395
01396 #endif
01397
01398
01399
01400 Vector<double> parameters_difference = parameters - old_parameters;
01401
01402
01403
01404 #ifdef _DEBUG
01405
01406 if(parameters_difference > -1.0e-50 && parameters_difference < 1.0e-50)
01407 {
01408 std::ostringstream buffer;
01409
01410 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01411 << "Matrix<double> calculate_BFGS_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01412 << "Parameters difference vector is zero.\n";
01413
01414 throw std::logic_error(buffer.str().c_str());
01415 }
01416
01417 #endif
01418
01419
01420
01421 Vector<double> gradient_difference = gradient - old_gradient;
01422
01423
01424
01425 #ifdef _DEBUG
01426
01427 if(gradient_difference > -1.0e-50 && gradient_difference < 1.0e-50)
01428 {
01429 std::ostringstream buffer;
01430
01431 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01432 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01433 << "Gradient difference vector is zero.\n";
01434
01435 throw std::logic_error(buffer.str().c_str());
01436 }
01437
01438 #endif
01439
01440
01441
01442 #ifdef _DEBUG
01443
01444 if(old_inverse_Hessian > -1.0e-50 && old_inverse_Hessian < 1.0e-50)
01445 {
01446 std::ostringstream buffer;
01447
01448 buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
01449 << "Matrix<double> calculate_DFP_inverse_Hessian(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&, const Matrix<double>&) method.\n"
01450 << "Old inverse Hessian matrix is zero.\n";
01451
01452 throw std::logic_error(buffer.str());
01453 }
01454
01455 #endif
01456
01457
01458
01459 Vector<double> BFGS = parameters_difference/parameters_difference.dot(gradient_difference)
01460 - old_inverse_Hessian.dot(gradient_difference)
01461 /gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference);
01462
01463
01464
01465 Matrix<double> BFGS_inverse_Hessian = old_inverse_Hessian
01466 + parameters_difference.direct(parameters_difference)
01467 /parameters_difference.dot(gradient_difference)
01468 - (old_inverse_Hessian.dot(gradient_difference)).direct(gradient_difference.dot(old_inverse_Hessian))
01469 /gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference)
01470 + (BFGS.direct(BFGS))*(gradient_difference.dot(old_inverse_Hessian).dot(gradient_difference));
01471
01472 return(BFGS_inverse_Hessian);
01473 }
01474
01475
01476
01477
01480
01481 void QuasiNewtonMethod::QuasiNetwonMethodResults::resize_training_history(const unsigned int& new_size)
01482 {
01483 if(!parameters_history.empty())
01484 {
01485 parameters_history.resize(new_size);
01486 }
01487
01488
01489 if(!parameters_norm_history.empty())
01490 {
01491 parameters_norm_history.resize(new_size);
01492 }
01493
01494
01495 if(!evaluation_history.empty())
01496 {
01497 evaluation_history.resize(new_size);
01498 }
01499
01500 if(!generalization_evaluation_history.empty())
01501 {
01502 generalization_evaluation_history.resize(new_size);
01503 }
01504
01505 if(!gradient_history.empty())
01506 {
01507 gradient_history.resize(new_size);
01508 }
01509
01510 if(!gradient_norm_history.empty())
01511 {
01512 gradient_norm_history.resize(new_size);
01513 }
01514
01515 if(!inverse_Hessian_history.empty())
01516 {
01517 inverse_Hessian_history.resize(new_size);
01518 }
01519
01520 if(!training_direction_history.empty())
01521 {
01522 training_direction_history.resize(new_size);
01523 }
01524
01525 if(!training_rate_history.empty())
01526 {
01527 training_rate_history.resize(new_size);
01528 }
01529
01530 if(!elapsed_time_history.empty())
01531 {
01532 elapsed_time_history.resize(new_size);
01533 }
01534 }
01535
01536
01537
01538
01540
01541 std::string QuasiNewtonMethod::QuasiNetwonMethodResults::to_string(void) const
01542 {
01543 std::ostringstream buffer;
01544
01545
01546
01547 if(!parameters_history.empty())
01548 {
01549 if(!parameters_history[0].empty())
01550 {
01551 buffer << "% Parameters history:\n"
01552 << parameters_history << "\n";
01553 }
01554 }
01555
01556
01557
01558 if(!parameters_norm_history.empty())
01559 {
01560 buffer << "% Parameters norm history:\n"
01561 << parameters_norm_history << "\n";
01562 }
01563
01564
01565
01566 if(!evaluation_history.empty())
01567 {
01568 buffer << "% Performance history:\n"
01569 << evaluation_history << "\n";
01570 }
01571
01572
01573
01574 if(!generalization_evaluation_history.empty())
01575 {
01576 buffer << "% Generalization evaluation history:\n"
01577 << generalization_evaluation_history << "\n";
01578 }
01579
01580
01581
01582 if(!gradient_history.empty())
01583 {
01584 if(!gradient_history[0].empty())
01585 {
01586 buffer << "% Gradient history:\n"
01587 << gradient_history << "\n";
01588 }
01589 }
01590
01591
01592
01593 if(!gradient_norm_history.empty())
01594 {
01595 buffer << "% Gradient norm history:\n"
01596 << gradient_norm_history << "\n";
01597 }
01598
01599
01600
01601 if(!inverse_Hessian_history.empty())
01602 {
01603 if(!inverse_Hessian_history[0].empty())
01604 {
01605 buffer << "% Inverse Hessian history:\n"
01606 << inverse_Hessian_history << "\n";
01607 }
01608 }
01609
01610
01611
01612 if(!training_direction_history.empty())
01613 {
01614 if(!training_direction_history[0].empty())
01615 {
01616 buffer << "% Training direction history:\n"
01617 << training_direction_history << "\n";
01618 }
01619 }
01620
01621
01622
01623 if(!training_rate_history.empty())
01624 {
01625 buffer << "% Training rate history:\n"
01626 << training_rate_history << "\n";
01627 }
01628
01629
01630
01631 if(!elapsed_time_history.empty())
01632 {
01633 buffer << "% Elapsed time history:\n"
01634 << elapsed_time_history << "\n";
01635 }
01636
01637 return(buffer.str());
01638 }
01639
01640
01641
01642
01645
01646 QuasiNewtonMethod::QuasiNetwonMethodResults* QuasiNewtonMethod::perform_training(void)
01647 {
01648
01649
01650 #ifdef _DEBUG
01651
01652 check();
01653
01654 #endif
01655
01656
01657
01658 if(display)
01659 {
01660 std::cout << "Training with quasi-Newton method...\n";
01661 }
01662
01663 QuasiNetwonMethodResults* results_pointer = new QuasiNetwonMethodResults;
01664
01665 if(reserve_parameters_history)
01666 {
01667 results_pointer->parameters_history.resize(1 + maximum_epochs_number);
01668 }
01669 if(reserve_parameters_norm_history)
01670 {
01671 results_pointer->parameters_norm_history.resize(1 + maximum_epochs_number);
01672 }
01673 if(reserve_evaluation_history)
01674 {
01675 results_pointer->evaluation_history.resize(1 + maximum_epochs_number);
01676 }
01677 if(reserve_generalization_evaluation_history)
01678 {
01679 results_pointer->generalization_evaluation_history.resize(1 + maximum_epochs_number);
01680 }
01681 if(reserve_gradient_history)
01682 {
01683 results_pointer->gradient_history.resize(1 + maximum_epochs_number);
01684 }
01685 if(reserve_gradient_norm_history)
01686 {
01687 results_pointer->gradient_norm_history.resize(1 + maximum_epochs_number);
01688 }
01689 if(reserve_inverse_Hessian_history)
01690 {
01691 results_pointer->inverse_Hessian_history.resize(1 + maximum_epochs_number);
01692 }
01693 if(reserve_training_direction_history)
01694 {
01695 results_pointer->training_direction_history.resize(1 + maximum_epochs_number);
01696 }
01697 if(reserve_training_rate_history)
01698 {
01699 results_pointer->training_rate_history.resize(1 + maximum_epochs_number);
01700 }
01701 if(reserve_elapsed_time_history)
01702 {
01703 results_pointer->elapsed_time_history.resize(1 + maximum_epochs_number);
01704 }
01705
01706
01707
01708 NeuralNetwork* neural_network_pointer = performance_functional_pointer->get_neural_network_pointer();
01709
01710 const unsigned int parameters_number = neural_network_pointer->count_parameters_number();
01711
01712 Vector<double> parameters(parameters_number);
01713 Vector<double> old_parameters(parameters_number);
01714 double parameters_norm;
01715
01716 Vector<double> parameters_increment(parameters_number);
01717 double parameters_increment_norm;
01718
01719
01720
01721 double performance = 0.0;
01722 double old_performance = 0.0;
01723 double performance_increase = 0.0;
01724
01725 Vector<double> gradient(parameters_number);
01726 Vector<double> old_gradient(parameters_number);
01727 double gradient_norm;
01728
01729 Matrix<double> inverse_Hessian(parameters_number, parameters_number);
01730 Matrix<double> old_inverse_Hessian(parameters_number, parameters_number);
01731
01732 double generalization_evaluation = 0.0;
01733 double old_generalization_evaluation = 0.0;
01734
01735
01736
01737 Vector<double> training_direction(parameters_number);
01738
01739 double training_slope;
01740
01741 const double& first_training_rate = training_rate_algorithm.get_first_training_rate();
01742
01743 double initial_training_rate = 0.0;
01744 double training_rate = 0.0;
01745 double old_training_rate = 0.0;
01746
01747 Vector<double> directional_point(2);
01748 directional_point[0] = 0.0;
01749 directional_point[1] = 0.0;
01750
01751 bool stop_training = false;
01752
01753 unsigned int generalization_evaluation_decreases_count = 0;
01754
01755 time_t beginning_time, current_time;
01756 time(&beginning_time);
01757 double elapsed_time;
01758
01759
01760
01761 for(unsigned int epoch = 0; epoch <= maximum_epochs_number; epoch++)
01762 {
01763
01764
01765 parameters = neural_network_pointer->arrange_parameters();
01766
01767 parameters_norm = parameters.calculate_norm();
01768
01769 if(display && parameters_norm >= warning_parameters_norm)
01770 {
01771 std::cout << "OpenNN Warning: Parameters norm is " << parameters_norm << ".\n";
01772 }
01773
01774
01775
01776 if(epoch == 0)
01777 {
01778 performance = performance_functional_pointer->calculate_evaluation();
01779 performance_increase = 0.0;
01780 }
01781 else
01782 {
01783 performance = directional_point[1];
01784 performance_increase = old_performance - performance;
01785 }
01786
01787 gradient = performance_functional_pointer->calculate_gradient();
01788
01789 gradient_norm = gradient.calculate_norm();
01790
01791 if(display && gradient_norm >= warning_gradient_norm)
01792 {
01793 std::cout << "OpenNN Warning: Gradient norm is " << gradient_norm << ".\n";
01794 }
01795
01796 if(epoch == 0)
01797 {
01798 inverse_Hessian.initialize_identity();
01799 }
01800 else
01801 {
01802 inverse_Hessian = calculate_inverse_Hessian_approximation(old_parameters, parameters, old_gradient, gradient, old_inverse_Hessian);
01803 }
01804
01805 generalization_evaluation = performance_functional_pointer->calculate_generalization_evaluation();
01806
01807 if(epoch != 0 && generalization_evaluation > old_generalization_evaluation)
01808 {
01809 generalization_evaluation_decreases_count++;
01810 }
01811
01812
01813
01814 training_direction = calculate_training_direction(gradient, inverse_Hessian);
01815
01816
01817
01818 training_slope = (gradient/gradient_norm).dot(training_direction);
01819
01820
01821
01822 if(training_slope >= 0.0)
01823 {
01824
01825
01826 training_direction = calculate_gradient_descent_training_direction(gradient);
01827 }
01828
01829
01830
01831 if(epoch == 0)
01832 {
01833 initial_training_rate = first_training_rate;
01834 }
01835 else
01836 {
01837 initial_training_rate = old_training_rate;
01838 }
01839
01840 directional_point = training_rate_algorithm.calculate_directional_point(performance, training_direction, initial_training_rate);
01841
01842 training_rate = directional_point[0];
01843
01844 if(epoch != 0 && training_rate < 1.0e-99)
01845 {
01846
01847
01848 training_direction = calculate_gradient_descent_training_direction(gradient);
01849
01850 directional_point = training_rate_algorithm.calculate_directional_point(performance, training_direction, first_training_rate);
01851
01852 training_rate = directional_point[0];
01853 }
01854
01855 parameters_increment = training_direction*training_rate;
01856 parameters_increment_norm = parameters_increment.calculate_norm();
01857
01858
01859
01860 time(¤t_time);
01861 elapsed_time = difftime(current_time, beginning_time);
01862
01863
01864
01865 if(reserve_parameters_history)
01866 {
01867 results_pointer->parameters_history[epoch] = parameters;
01868 }
01869
01870 if(reserve_parameters_norm_history)
01871 {
01872 results_pointer->parameters_norm_history[epoch] = parameters_norm;
01873 }
01874
01875 if(reserve_evaluation_history)
01876 {
01877 results_pointer->evaluation_history[epoch] = performance;
01878 }
01879
01880 if(reserve_generalization_evaluation_history)
01881 {
01882 results_pointer->generalization_evaluation_history[epoch] = generalization_evaluation;
01883 }
01884
01885 if(reserve_gradient_history)
01886 {
01887 results_pointer->gradient_history[epoch] = gradient;
01888 }
01889
01890 if(reserve_gradient_norm_history)
01891 {
01892 results_pointer->gradient_norm_history[epoch] = gradient_norm;
01893 }
01894
01895 if(reserve_inverse_Hessian_history)
01896 {
01897 results_pointer->inverse_Hessian_history[epoch] = inverse_Hessian;
01898 }
01899
01900
01901
01902 if(reserve_training_direction_history)
01903 {
01904 results_pointer->training_direction_history[epoch] = training_direction;
01905 }
01906
01907 if(reserve_training_rate_history)
01908 {
01909 results_pointer->training_rate_history[epoch] = training_rate;
01910 }
01911
01912 if(reserve_elapsed_time_history)
01913 {
01914 results_pointer->elapsed_time_history[epoch] = elapsed_time;
01915 }
01916
01917
01918
01919 if(parameters_increment_norm <= minimum_parameters_increment_norm)
01920 {
01921 if(display)
01922 {
01923 std::cout << "Epoch " << epoch << ": Minimum parameters increment norm reached.\n"
01924 << "Parameters increment norm: " << parameters_increment_norm << std::endl;
01925 }
01926
01927 stop_training = true;
01928 }
01929
01930 if(epoch != 0 && performance_increase <= minimum_performance_increase)
01931 {
01932 if(display)
01933 {
01934 std::cout << "Epoch " << epoch << ": Minimum performance increase reached.\n"
01935 << "Performance increase: " << performance_increase << std::endl;
01936 }
01937
01938 stop_training = true;
01939 }
01940
01941 else if(performance <= performance_goal)
01942 {
01943 if(display)
01944 {
01945 std::cout << "Epoch " << epoch << ": Performance goal reached.\n";
01946 }
01947
01948 stop_training = true;
01949 }
01950
01951 else if(gradient_norm <= gradient_norm_goal)
01952 {
01953 if(display)
01954 {
01955 std::cout << "Epoch " << epoch << ": Gradient norm goal reached.\n";
01956 }
01957
01958 stop_training = true;
01959 }
01960
01961 else if(generalization_evaluation_decreases_count >= maximum_generalization_evaluation_decreases)
01962 {
01963 if(display)
01964 {
01965 std::cout << "Epoch " << epoch << ": Maximum generalization performance decreases reached.\n"
01966 << "Generalization performance decreases: "<< generalization_evaluation_decreases_count << std::endl;
01967 }
01968
01969 stop_training = true;
01970 }
01971
01972 else if(epoch == maximum_epochs_number)
01973 {
01974 if(display)
01975 {
01976 std::cout << "Epoch " << epoch << ": Maximum number of epochs reached.\n";
01977 }
01978
01979 stop_training = true;
01980 }
01981
01982 else if(elapsed_time >= maximum_time)
01983 {
01984 if(display)
01985 {
01986 std::cout << "Epoch " << epoch << ": Maximum training time reached.\n";
01987 }
01988
01989 stop_training = true;
01990 }
01991
01992 if(stop_training)
01993 {
01994 results_pointer->final_parameters = parameters;
01995 results_pointer->final_parameters_norm = parameters_norm;
01996
01997 results_pointer->final_evaluation = performance;
01998 results_pointer->final_generalization_evaluation = generalization_evaluation;
01999
02000 results_pointer->final_gradient = gradient;
02001 results_pointer->final_gradient_norm = gradient_norm;
02002
02003 results_pointer->final_training_direction = training_direction;
02004 results_pointer->final_training_rate = training_rate;
02005 results_pointer->elapsed_time = elapsed_time;
02006
02007 results_pointer->resize_training_history(epoch+1);
02008
02009 if(display)
02010 {
02011 std::cout << "Parameters norm: " << parameters_norm << "\n"
02012 << "Performance: " << performance << "\n"
02013 << "Gradient norm: " << gradient_norm << "\n"
02014 << performance_functional_pointer->write_information()
02015 << "Training rate: " << training_rate << "\n"
02016 << "Elapsed time: " << elapsed_time << std::endl;
02017
02018 if(generalization_evaluation != 0)
02019 {
02020 std::cout << "Generalization performance: " << generalization_evaluation << std::endl;
02021 }
02022 }
02023
02024 break;
02025 }
02026 else if(display && epoch % display_period == 0)
02027 {
02028 std::cout << "Epoch " << epoch << ";\n"
02029 << "Parameters norm: " << parameters_norm << "\n"
02030 << "Performance: " << performance << "\n"
02031 << "Gradient norm: " << gradient_norm << "\n"
02032 << performance_functional_pointer->write_information()
02033 << "Training rate: " << training_rate << "\n"
02034 << "Elapsed time: " << elapsed_time << std::endl;
02035
02036 if(generalization_evaluation != 0)
02037 {
02038 std::cout << "Generalization performance: " << generalization_evaluation << std::endl;
02039 }
02040 }
02041
02042
02043
02044 old_parameters = parameters;
02045
02046 old_performance = performance;
02047
02048 old_gradient = gradient;
02049
02050 old_inverse_Hessian = inverse_Hessian;
02051
02052 old_generalization_evaluation = generalization_evaluation;
02053
02054 old_training_rate = training_rate;
02055
02056 old_generalization_evaluation = generalization_evaluation;
02057
02058
02059
02060 parameters += parameters_increment;
02061
02062 neural_network_pointer->set_parameters(parameters);
02063 }
02064
02065 return(results_pointer);
02066 }
02067
02068
02069
02070
02071 std::string QuasiNewtonMethod::write_training_algorithm_type(void) const
02072 {
02073 return("QUASI_NEWTON_METHOD");
02074 }
02075
02076
02077
02078
02082
02083 TiXmlElement* QuasiNewtonMethod::to_XML(void) const
02084 {
02085 std::ostringstream buffer;
02086
02087
02088
02089 TiXmlElement* quasi_Newton_method_element = new TiXmlElement("QuasiNewtonMethod");
02090 quasi_Newton_method_element->SetAttribute("Version", 4);
02091
02092
02093
02094 TiXmlElement* training_rate_algorithm_element = training_rate_algorithm.to_XML();
02095 quasi_Newton_method_element->LinkEndChild(training_rate_algorithm_element);
02096
02097
02098
02099 TiXmlElement* warning_parameters_norm_element = new TiXmlElement("WarningParametersNorm");
02100 quasi_Newton_method_element->LinkEndChild(warning_parameters_norm_element);
02101
02102 buffer.str("");
02103 buffer << warning_parameters_norm;
02104
02105 TiXmlText* warning_parameters_norm_text = new TiXmlText(buffer.str().c_str());
02106 warning_parameters_norm_element->LinkEndChild(warning_parameters_norm_text);
02107
02108
02109
02110 TiXmlElement* warning_gradient_norm_element = new TiXmlElement("WarningGradientNorm");
02111 quasi_Newton_method_element->LinkEndChild(warning_gradient_norm_element);
02112
02113 buffer.str("");
02114 buffer << warning_gradient_norm;
02115
02116 TiXmlText* warning_gradient_norm_text = new TiXmlText(buffer.str().c_str());
02117 warning_gradient_norm_element->LinkEndChild(warning_gradient_norm_text);
02118
02119
02120
02121 TiXmlElement* warning_training_rate_element = new TiXmlElement("WarningTrainingRate");
02122 quasi_Newton_method_element->LinkEndChild(warning_training_rate_element);
02123
02124 buffer.str("");
02125 buffer << warning_training_rate;
02126
02127 TiXmlText* warning_training_rate_text = new TiXmlText(buffer.str().c_str());
02128 warning_training_rate_element->LinkEndChild(warning_training_rate_text);
02129
02130
02131
02132 TiXmlElement* error_parameters_norm_element = new TiXmlElement("ErrorParametersNorm");
02133 quasi_Newton_method_element->LinkEndChild(error_parameters_norm_element);
02134
02135 buffer.str("");
02136 buffer << error_parameters_norm;
02137
02138 TiXmlText* error_parameters_norm_text = new TiXmlText(buffer.str().c_str());
02139 error_parameters_norm_element->LinkEndChild(error_parameters_norm_text);
02140
02141
02142
02143 TiXmlElement* error_gradient_norm_element = new TiXmlElement("ErrorGradientNorm");
02144 quasi_Newton_method_element->LinkEndChild(error_gradient_norm_element);
02145
02146 buffer.str("");
02147 buffer << error_gradient_norm;
02148
02149 TiXmlText* error_gradient_norm_text = new TiXmlText(buffer.str().c_str());
02150 error_gradient_norm_element->LinkEndChild(error_gradient_norm_text);
02151
02152
02153
02154 TiXmlElement* error_training_rate_element = new TiXmlElement("ErrorTrainingRate");
02155 quasi_Newton_method_element->LinkEndChild(error_training_rate_element);
02156
02157 buffer.str("");
02158 buffer << error_training_rate;
02159
02160 TiXmlText* error_training_rate_text = new TiXmlText(buffer.str().c_str());
02161 error_training_rate_element->LinkEndChild(error_training_rate_text);
02162
02163
02164
02165 TiXmlElement* minimum_parameters_increment_norm_element = new TiXmlElement("MinimumParametersIncrement");
02166 quasi_Newton_method_element->LinkEndChild(minimum_parameters_increment_norm_element);
02167
02168 buffer.str("");
02169 buffer << minimum_parameters_increment_norm;
02170
02171 TiXmlText* minimum_parameters_increment_norm_text = new TiXmlText(buffer.str().c_str());
02172 minimum_parameters_increment_norm_element->LinkEndChild(minimum_parameters_increment_norm_text);
02173
02174
02175
02176 TiXmlElement* minimum_performance_increase_element = new TiXmlElement("MinimumPerformanceIncrease");
02177 quasi_Newton_method_element->LinkEndChild(minimum_performance_increase_element);
02178
02179 buffer.str("");
02180 buffer << minimum_performance_increase;
02181
02182 TiXmlText* minimum_performance_increase_text = new TiXmlText(buffer.str().c_str());
02183 minimum_performance_increase_element->LinkEndChild(minimum_performance_increase_text);
02184
02185
02186
02187 TiXmlElement* performance_goal_element = new TiXmlElement("PerformanceGoal");
02188 quasi_Newton_method_element->LinkEndChild(performance_goal_element);
02189
02190 buffer.str("");
02191 buffer << performance_goal;
02192
02193 TiXmlText* performance_goal_text = new TiXmlText(buffer.str().c_str());
02194 performance_goal_element->LinkEndChild(performance_goal_text);
02195
02196
02197
02198 TiXmlElement* gradient_norm_goal_element = new TiXmlElement("GradientNormGoal");
02199 quasi_Newton_method_element->LinkEndChild(gradient_norm_goal_element);
02200
02201 buffer.str("");
02202 buffer << gradient_norm_goal;
02203
02204 TiXmlText* gradient_norm_goal_text = new TiXmlText(buffer.str().c_str());
02205 gradient_norm_goal_element->LinkEndChild(gradient_norm_goal_text);
02206
02207
02208
02209 TiXmlElement* maximum_generalization_evaluation_decreases_element = new TiXmlElement("MaximumGeneralizationEvaluationDecreases");
02210 quasi_Newton_method_element->LinkEndChild(maximum_generalization_evaluation_decreases_element);
02211
02212 buffer.str("");
02213 buffer << maximum_generalization_evaluation_decreases;
02214
02215 TiXmlText* maximum_generalization_evaluation_decreases_text = new TiXmlText(buffer.str().c_str());
02216 maximum_generalization_evaluation_decreases_element->LinkEndChild(maximum_generalization_evaluation_decreases_text);
02217
02218
02219
02220 TiXmlElement* maximum_epochs_number_element = new TiXmlElement("MaximumEpochsNumber");
02221 quasi_Newton_method_element->LinkEndChild(maximum_epochs_number_element);
02222
02223 buffer.str("");
02224 buffer << maximum_epochs_number;
02225
02226 TiXmlText* maximum_epochs_number_text = new TiXmlText(buffer.str().c_str());
02227 maximum_epochs_number_element->LinkEndChild(maximum_epochs_number_text);
02228
02229
02230
02231 TiXmlElement* maximum_time_element = new TiXmlElement("MaximumTime");
02232 quasi_Newton_method_element->LinkEndChild(maximum_time_element);
02233
02234 buffer.str("");
02235 buffer << maximum_time;
02236
02237 TiXmlText* maximum_time_text = new TiXmlText(buffer.str().c_str());
02238 maximum_time_element->LinkEndChild(maximum_time_text);
02239
02240
02241
02242 TiXmlElement* reserve_parameters_history_element = new TiXmlElement("ReserveParametersHistory");
02243 quasi_Newton_method_element->LinkEndChild(reserve_parameters_history_element);
02244
02245 buffer.str("");
02246 buffer << reserve_parameters_history;
02247
02248 TiXmlText* reserve_parameters_history_text = new TiXmlText(buffer.str().c_str());
02249 reserve_parameters_history_element->LinkEndChild(reserve_parameters_history_text);
02250
02251
02252
02253 TiXmlElement* reserve_parameters_norm_history_element = new TiXmlElement("ReserveParametersNormHistory");
02254 quasi_Newton_method_element->LinkEndChild(reserve_parameters_norm_history_element);
02255
02256 buffer.str("");
02257 buffer << reserve_parameters_norm_history;
02258
02259 TiXmlText* reserve_parameters_norm_history_text = new TiXmlText(buffer.str().c_str());
02260 reserve_parameters_norm_history_element->LinkEndChild(reserve_parameters_norm_history_text);
02261
02262
02263
02264 TiXmlElement* reserve_evaluation_history_element = new TiXmlElement("ReservePerformanceHistory");
02265 quasi_Newton_method_element->LinkEndChild(reserve_evaluation_history_element);
02266
02267 buffer.str("");
02268 buffer << reserve_evaluation_history;
02269
02270 TiXmlText* reserve_evaluation_history_text = new TiXmlText(buffer.str().c_str());
02271 reserve_evaluation_history_element->LinkEndChild(reserve_evaluation_history_text);
02272
02273
02274
02275 TiXmlElement* reserve_gradient_history_element = new TiXmlElement("ReserveGradientHistory");
02276 quasi_Newton_method_element->LinkEndChild(reserve_gradient_history_element);
02277
02278 buffer.str("");
02279 buffer << reserve_gradient_history;
02280
02281 TiXmlText* reserve_gradient_history_text = new TiXmlText(buffer.str().c_str());
02282 reserve_gradient_history_element->LinkEndChild(reserve_gradient_history_text);
02283
02284
02285
02286 TiXmlElement* reserve_gradient_norm_history_element = new TiXmlElement("ReserveGradientNormHistory");
02287 quasi_Newton_method_element->LinkEndChild(reserve_gradient_norm_history_element);
02288
02289 buffer.str("");
02290 buffer << reserve_gradient_norm_history;
02291
02292 TiXmlText* reserve_gradient_norm_history_text = new TiXmlText(buffer.str().c_str());
02293 reserve_gradient_norm_history_element->LinkEndChild(reserve_gradient_norm_history_text);
02294
02295
02296
02297 TiXmlElement* reserve_inverse_Hessian_history_element = new TiXmlElement("ReserveInverseHessianHistory");
02298 quasi_Newton_method_element->LinkEndChild(reserve_inverse_Hessian_history_element);
02299
02300 buffer.str("");
02301 buffer << reserve_inverse_Hessian_history;
02302
02303 TiXmlText* reserve_inverse_Hessian_history_text = new TiXmlText(buffer.str().c_str());
02304 reserve_inverse_Hessian_history_element->LinkEndChild(reserve_inverse_Hessian_history_text);
02305
02306
02307
02308 TiXmlElement* reserve_training_direction_history_element = new TiXmlElement("ReserveTrainingDirectionHistory");
02309 quasi_Newton_method_element->LinkEndChild(reserve_training_direction_history_element);
02310
02311 buffer.str("");
02312 buffer << reserve_training_direction_history;
02313
02314 TiXmlText* reserve_training_direction_history_text = new TiXmlText(buffer.str().c_str());
02315 reserve_training_direction_history_element->LinkEndChild(reserve_training_direction_history_text);
02316
02317
02318
02319 TiXmlElement* reserve_training_rate_history_element = new TiXmlElement("ReserveTrainingRateHistory");
02320 quasi_Newton_method_element->LinkEndChild(reserve_training_rate_history_element);
02321
02322 buffer.str("");
02323 buffer << reserve_training_rate_history;
02324
02325 TiXmlText* reserve_training_rate_history_text = new TiXmlText(buffer.str().c_str());
02326 reserve_training_rate_history_element->LinkEndChild(reserve_training_rate_history_text);
02327
02328
02329
02330 TiXmlElement* reserve_elapsed_time_history_element = new TiXmlElement("ReserveElapsedTimeHistory");
02331 quasi_Newton_method_element->LinkEndChild(reserve_elapsed_time_history_element);
02332
02333 buffer.str("");
02334 buffer << reserve_elapsed_time_history;
02335
02336 TiXmlText* reserve_elapsed_time_history_text = new TiXmlText(buffer.str().c_str());
02337 reserve_elapsed_time_history_element->LinkEndChild(reserve_elapsed_time_history_text);
02338
02339
02340
02341 TiXmlElement* reserve_generalization_evaluation_history_element = new TiXmlElement("ReserveGeneralizationPerformanceHistory");
02342 quasi_Newton_method_element->LinkEndChild(reserve_generalization_evaluation_history_element);
02343
02344 buffer.str("");
02345 buffer << reserve_generalization_evaluation_history;
02346
02347 TiXmlText* reserve_generalization_evaluation_history_text = new TiXmlText(buffer.str().c_str());
02348 reserve_generalization_evaluation_history_element->LinkEndChild(reserve_generalization_evaluation_history_text);
02349
02350
02351
02352 TiXmlElement* display_period_element = new TiXmlElement("DisplayPeriod");
02353 quasi_Newton_method_element->LinkEndChild(display_period_element);
02354
02355 buffer.str("");
02356 buffer << display_period;
02357
02358 TiXmlText* display_period_text = new TiXmlText(buffer.str().c_str());
02359 display_period_element->LinkEndChild(display_period_text);
02360
02361
02362
02363 TiXmlElement* display_element = new TiXmlElement("Display");
02364 quasi_Newton_method_element->LinkEndChild(display_element);
02365
02366 buffer.str("");
02367 buffer << display;
02368
02369 TiXmlText* display_text = new TiXmlText(buffer.str().c_str());
02370 display_element->LinkEndChild(display_text);
02371
02372 return(quasi_Newton_method_element);
02373 }
02374
02375
02376
02377
02378 void QuasiNewtonMethod::from_XML(TiXmlElement* quasi_Newton_method_element)
02379 {
02380
02381
02382 TiXmlElement* inverse_Hessian_approximation_method_element = quasi_Newton_method_element->FirstChildElement("InverseHessianApproximationMethod");
02383
02384 if(inverse_Hessian_approximation_method_element)
02385 {
02386 std::string new_inverse_Hessian_approximation_method = inverse_Hessian_approximation_method_element->GetText();
02387
02388 try
02389 {
02390 set_inverse_Hessian_approximation_method(new_inverse_Hessian_approximation_method);
02391 }
02392 catch(std::exception& e)
02393 {
02394 std::cout << e.what() << std::endl;
02395 }
02396 }
02397
02398
02399
02400 TiXmlElement* training_rate_algorithm_element = quasi_Newton_method_element->FirstChildElement("TrainingRateAlgorithm");
02401
02402 if(training_rate_algorithm_element)
02403 {
02404 try
02405 {
02406 training_rate_algorithm.from_XML(training_rate_algorithm_element);
02407 }
02408 catch(std::exception& e)
02409 {
02410 std::cout << e.what() << std::endl;
02411 }
02412 }
02413
02414
02415
02416 TiXmlElement* warning_parameters_norm_element = quasi_Newton_method_element->FirstChildElement("WarningParametersNorm");
02417
02418 if(warning_parameters_norm_element)
02419 {
02420 double new_warning_parameters_norm = atof(warning_parameters_norm_element->GetText());
02421
02422 try
02423 {
02424 set_warning_parameters_norm(new_warning_parameters_norm);
02425 }
02426 catch(std::exception& e)
02427 {
02428 std::cout << e.what() << std::endl;
02429 }
02430 }
02431
02432
02433
02434 TiXmlElement* warning_gradient_norm_element = quasi_Newton_method_element->FirstChildElement("WarningGradientNorm");
02435
02436 if(warning_gradient_norm_element)
02437 {
02438 double new_warning_gradient_norm = atof(warning_gradient_norm_element->GetText());
02439
02440 try
02441 {
02442 set_warning_gradient_norm(new_warning_gradient_norm);
02443 }
02444 catch(std::exception& e)
02445 {
02446 std::cout << e.what() << std::endl;
02447 }
02448 }
02449
02450
02451
02452 TiXmlElement* warning_training_rate_element = quasi_Newton_method_element->FirstChildElement("WarningTrainingRate");
02453
02454 if(warning_training_rate_element)
02455 {
02456 double new_warning_training_rate = atof(warning_training_rate_element->GetText());
02457
02458 try
02459 {
02460 set_warning_training_rate(new_warning_training_rate);
02461 }
02462 catch(std::exception& e)
02463 {
02464 std::cout << e.what() << std::endl;
02465 }
02466 }
02467
02468
02469
02470 TiXmlElement* error_parameters_norm_element = quasi_Newton_method_element->FirstChildElement("ErrorParametersNorm");
02471
02472 if(error_parameters_norm_element)
02473 {
02474 double new_error_parameters_norm = atof(error_parameters_norm_element->GetText());
02475
02476 try
02477 {
02478 set_error_parameters_norm(new_error_parameters_norm);
02479 }
02480 catch(std::exception& e)
02481 {
02482 std::cout << e.what() << std::endl;
02483 }
02484 }
02485
02486
02487
02488 TiXmlElement* error_gradient_norm_element = quasi_Newton_method_element->FirstChildElement("ErrorGradientNorm");
02489
02490 if(error_gradient_norm_element)
02491 {
02492 double new_error_gradient_norm = atof(error_gradient_norm_element->GetText());
02493
02494 try
02495 {
02496 set_error_gradient_norm(new_error_gradient_norm);
02497 }
02498 catch(std::exception& e)
02499 {
02500 std::cout << e.what() << std::endl;
02501 }
02502 }
02503
02504
02505
02506 TiXmlElement* error_training_rate_element = quasi_Newton_method_element->FirstChildElement("ErrorTrainingRate");
02507
02508 if(error_training_rate_element)
02509 {
02510 double new_error_training_rate = atof(error_training_rate_element->GetText());
02511
02512 try
02513 {
02514 set_error_training_rate(new_error_training_rate);
02515 }
02516 catch(std::exception& e)
02517 {
02518 std::cout << e.what() << std::endl;
02519 }
02520 }
02521
02522
02523
02524 TiXmlElement* minimum_parameters_increment_norm_element = quasi_Newton_method_element->FirstChildElement("MinimumParametersIncrementNorm");
02525
02526 if(minimum_parameters_increment_norm_element)
02527 {
02528 double new_minimum_parameters_increment_norm = atof(minimum_parameters_increment_norm_element->GetText());
02529
02530 try
02531 {
02532 set_minimum_parameters_increment_norm(new_minimum_parameters_increment_norm);
02533 }
02534 catch(std::exception& e)
02535 {
02536 std::cout << e.what() << std::endl;
02537 }
02538 }
02539
02540
02541
02542 TiXmlElement* minimum_performance_increase_element = quasi_Newton_method_element->FirstChildElement("MinimumPerformanceIncrease");
02543
02544 if(minimum_performance_increase_element)
02545 {
02546 double new_minimum_performance_increase = atof(minimum_performance_increase_element->GetText());
02547
02548 try
02549 {
02550 set_minimum_performance_increase(new_minimum_performance_increase);
02551 }
02552 catch(std::exception& e)
02553 {
02554 std::cout << e.what() << std::endl;
02555 }
02556 }
02557
02558
02559
02560 TiXmlElement* performance_goal_element = quasi_Newton_method_element->FirstChildElement("PerformanceGoal");
02561
02562 if(performance_goal_element)
02563 {
02564 double new_performance_goal = atof(performance_goal_element->GetText());
02565
02566 try
02567 {
02568 set_performance_goal(new_performance_goal);
02569 }
02570 catch(std::exception& e)
02571 {
02572 std::cout << e.what() << std::endl;
02573 }
02574 }
02575
02576
02577
02578 TiXmlElement* gradient_norm_goal_element = quasi_Newton_method_element->FirstChildElement("GradientNormGoal");
02579
02580 if(gradient_norm_goal_element)
02581 {
02582 double new_gradient_norm_goal = atof(gradient_norm_goal_element->GetText());
02583
02584 try
02585 {
02586 set_gradient_norm_goal(new_gradient_norm_goal);
02587 }
02588 catch(std::exception& e)
02589 {
02590 std::cout << e.what() << std::endl;
02591 }
02592 }
02593
02594
02595
02596 TiXmlElement* maximum_generalization_evaluation_decreases_element = quasi_Newton_method_element->FirstChildElement("MaximumGeneralizationEvaluationDecreases");
02597
02598 if(maximum_generalization_evaluation_decreases_element)
02599 {
02600 unsigned int new_maximum_generalization_evaluation_decreases = atoi(maximum_generalization_evaluation_decreases_element->GetText());
02601
02602 try
02603 {
02604 set_maximum_generalization_evaluation_decreases(new_maximum_generalization_evaluation_decreases);
02605 }
02606 catch(std::exception& e)
02607 {
02608 std::cout << e.what() << std::endl;
02609 }
02610 }
02611
02612
02613
02614 TiXmlElement* maximum_epochs_number_element = quasi_Newton_method_element->FirstChildElement("MaximumEpochsNumber");
02615
02616 if(maximum_epochs_number_element)
02617 {
02618 unsigned int new_maximum_epochs_number = atoi(maximum_epochs_number_element->GetText());
02619
02620 try
02621 {
02622 set_maximum_epochs_number(new_maximum_epochs_number);
02623 }
02624 catch(std::exception& e)
02625 {
02626 std::cout << e.what() << std::endl;
02627 }
02628 }
02629
02630
02631
02632 TiXmlElement* maximum_time_element = quasi_Newton_method_element->FirstChildElement("MaximumTime");
02633
02634 if(maximum_time_element)
02635 {
02636 double new_maximum_time = atof(maximum_time_element->GetText());
02637
02638 try
02639 {
02640 set_maximum_time(new_maximum_time);
02641 }
02642 catch(std::exception& e)
02643 {
02644 std::cout << e.what() << std::endl;
02645 }
02646 }
02647
02648
02649
02650 TiXmlElement* reserve_parameters_history_element = quasi_Newton_method_element->FirstChildElement("ReserveParametersHistory");
02651
02652 if(reserve_parameters_history_element)
02653 {
02654 std::string new_reserve_parameters_history = reserve_parameters_history_element->GetText();
02655
02656 try
02657 {
02658 set_reserve_parameters_history(new_reserve_parameters_history != "0");
02659 }
02660 catch(std::exception& e)
02661 {
02662 std::cout << e.what() << std::endl;
02663 }
02664 }
02665
02666
02667
02668 TiXmlElement* reserve_parameters_norm_history_element = quasi_Newton_method_element->FirstChildElement("ReserveParametersNormHistory");
02669
02670 if(reserve_parameters_norm_history_element)
02671 {
02672 std::string new_reserve_parameters_norm_history = reserve_parameters_norm_history_element->GetText();
02673
02674 try
02675 {
02676 set_reserve_parameters_norm_history(new_reserve_parameters_norm_history != "0");
02677 }
02678 catch(std::exception& e)
02679 {
02680 std::cout << e.what() << std::endl;
02681 }
02682 }
02683
02684
02685
02686 TiXmlElement* reserve_evaluation_history_element = quasi_Newton_method_element->FirstChildElement("ReservePerformanceHistory");
02687
02688 if(reserve_evaluation_history_element)
02689 {
02690 std::string new_reserve_evaluation_history = reserve_evaluation_history_element->GetText();
02691
02692 try
02693 {
02694 set_reserve_evaluation_history(new_reserve_evaluation_history != "0");
02695 }
02696 catch(std::exception& e)
02697 {
02698 std::cout << e.what() << std::endl;
02699 }
02700 }
02701
02702
02703
02704 TiXmlElement* reserve_gradient_history_element = quasi_Newton_method_element->FirstChildElement("ReserveGradientHistory");
02705
02706 if(reserve_gradient_history_element)
02707 {
02708 std::string new_reserve_gradient_history = reserve_gradient_history_element->GetText();
02709
02710 try
02711 {
02712 set_reserve_gradient_history(new_reserve_gradient_history != "0");
02713 }
02714 catch(std::exception& e)
02715 {
02716 std::cout << e.what() << std::endl;
02717 }
02718 }
02719
02720
02721
02722 TiXmlElement* reserve_gradient_norm_history_element = quasi_Newton_method_element->FirstChildElement("ReserveGradientNormHistory");
02723
02724 if(reserve_gradient_norm_history_element)
02725 {
02726 std::string new_reserve_gradient_norm_history = reserve_gradient_norm_history_element->GetText();
02727
02728 try
02729 {
02730 set_reserve_gradient_norm_history(new_reserve_gradient_norm_history != "0");
02731 }
02732 catch(std::exception& e)
02733 {
02734 std::cout << e.what() << std::endl;
02735 }
02736 }
02737
02738
02739
02740 TiXmlElement* reserve_inverse_Hessian_history_element = quasi_Newton_method_element->FirstChildElement("ReserveInverseHessianHistory");
02741
02742 if(reserve_inverse_Hessian_history_element)
02743 {
02744 std::string new_reserve_inverse_Hessian_history = reserve_inverse_Hessian_history_element->GetText();
02745
02746 try
02747 {
02748 set_reserve_inverse_Hessian_history(new_reserve_inverse_Hessian_history != "0");
02749 }
02750 catch(std::exception& e)
02751 {
02752 std::cout << e.what() << std::endl;
02753 }
02754 }
02755
02756
02757
02758 TiXmlElement* reserve_training_direction_history_element = quasi_Newton_method_element->FirstChildElement("ReserveTrainingDirectionHistory");
02759
02760 if(reserve_training_direction_history_element)
02761 {
02762 std::string new_reserve_training_direction_history = reserve_training_direction_history_element->GetText();
02763
02764 try
02765 {
02766 set_reserve_training_direction_history(new_reserve_training_direction_history != "0");
02767 }
02768 catch(std::exception& e)
02769 {
02770 std::cout << e.what() << std::endl;
02771 }
02772 }
02773
02774
02775
02776 TiXmlElement* reserve_training_rate_history_element = quasi_Newton_method_element->FirstChildElement("ReserveTrainingRateHistory");
02777
02778 if(reserve_training_rate_history_element)
02779 {
02780 std::string new_reserve_training_rate_history = reserve_training_rate_history_element->GetText();
02781
02782 try
02783 {
02784 set_reserve_training_rate_history(new_reserve_training_rate_history != "0");
02785 }
02786 catch(std::exception& e)
02787 {
02788 std::cout << e.what() << std::endl;
02789 }
02790 }
02791
02792
02793
02794 TiXmlElement* reserve_elapsed_time_history_element = quasi_Newton_method_element->FirstChildElement("ReserveElapsedTimeHistory");
02795
02796 if(reserve_elapsed_time_history_element)
02797 {
02798 std::string new_reserve_elapsed_time_history = reserve_elapsed_time_history_element->GetText();
02799
02800 try
02801 {
02802 set_reserve_elapsed_time_history(new_reserve_elapsed_time_history != "0");
02803 }
02804 catch(std::exception& e)
02805 {
02806 std::cout << e.what() << std::endl;
02807 }
02808 }
02809
02810
02811
02812 TiXmlElement* reserve_generalization_evaluation_history_element = quasi_Newton_method_element->FirstChildElement("ReserveGeneralizationPerformanceHistory");
02813
02814 if(reserve_generalization_evaluation_history_element)
02815 {
02816 std::string new_reserve_generalization_evaluation_history = reserve_generalization_evaluation_history_element->GetText();
02817
02818 try
02819 {
02820 set_reserve_generalization_evaluation_history(new_reserve_generalization_evaluation_history != "0");
02821 }
02822 catch(std::exception& e)
02823 {
02824 std::cout << e.what() << std::endl;
02825 }
02826 }
02827
02828
02829
02830 TiXmlElement* display_period_element = quasi_Newton_method_element->FirstChildElement("DisplayPeriod");
02831
02832 if(display_period_element)
02833 {
02834 unsigned int new_display_period = atoi(display_period_element->GetText());
02835
02836 try
02837 {
02838 set_display_period(new_display_period);
02839 }
02840 catch(std::exception& e)
02841 {
02842 std::cout << e.what() << std::endl;
02843 }
02844 }
02845
02846
02847
02848 TiXmlElement* display_element = quasi_Newton_method_element->FirstChildElement("Display");
02849
02850 if(display_element)
02851 {
02852 std::string new_display = display_element->GetText();
02853
02854 try
02855 {
02856 set_display(new_display != "0");
02857 }
02858 catch(std::exception& e)
02859 {
02860 std::cout << e.what() << std::endl;
02861 }
02862 }
02863 }
02864
02865 }
02866
02867
02868
02869
02870
02871
02872
02873
02874
02875
02876
02877
02878
02879
02880
02881
02882
02883