00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <string>
00019 #include <sstream>
00020 #include <iostream>
00021 #include <fstream>
00022 #include <limits>
00023 #include <math.h>
00024
00025
00026
00027 #include "mean_squared_error.h"
00028
00029
00030
00031
00032 #include "../../parsers/tinyxml/tinyxml.h"
00033
00034
00035 namespace OpenNN
00036 {
00037
00038
00043
00044 MeanSquaredError::MeanSquaredError(void) : PerformanceTerm()
00045 {
00046 }
00047
00048
00049
00050
00056
00057 MeanSquaredError::MeanSquaredError(NeuralNetwork* new_neural_network_pointer)
00058 : PerformanceTerm(new_neural_network_pointer)
00059 {
00060 }
00061
00062
00063
00064
00070
00071 MeanSquaredError::MeanSquaredError(DataSet* new_data_set_pointer)
00072 : PerformanceTerm(new_data_set_pointer)
00073 {
00074 }
00075
00076
00077
00078
00085
00086 MeanSquaredError::MeanSquaredError(NeuralNetwork* new_neural_network_pointer, DataSet* new_data_set_pointer)
00087 : PerformanceTerm(new_neural_network_pointer, new_data_set_pointer)
00088 {
00089 }
00090
00091
00092
00093
00099
00100 MeanSquaredError::MeanSquaredError(TiXmlElement* mean_squared_error_element)
00101 : PerformanceTerm(mean_squared_error_element)
00102 {
00103 }
00104
00105
00106
00107
00111
00112 MeanSquaredError::MeanSquaredError(const MeanSquaredError& other_mean_squared_error)
00113 : PerformanceTerm(other_mean_squared_error)
00114 {
00115 }
00116
00117
00118
00119
00121
00122 MeanSquaredError::~MeanSquaredError(void)
00123 {
00124 }
00125
00126
00127
00128
00129
00130
00134
00135 void MeanSquaredError::check(void) const
00136 {
00137 std::ostringstream buffer;
00138
00139
00140
00141 if(!neural_network_pointer)
00142 {
00143 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00144 << "void check(void) const method.\n"
00145 << "Pointer to neural network is NULL.\n";
00146
00147 throw std::logic_error(buffer.str().c_str());
00148 }
00149
00150 const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
00151
00152 if(!multilayer_perceptron_pointer)
00153 {
00154 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00155 << "void check(void) const method.\n"
00156 << "Pointer to multilayer perceptron is NULL.\n";
00157
00158 throw std::logic_error(buffer.str().c_str());
00159 }
00160
00161 const unsigned int inputs_number = multilayer_perceptron_pointer->count_inputs_number();
00162 const unsigned int outputs_number = multilayer_perceptron_pointer->count_outputs_number();
00163
00164 if(inputs_number == 0)
00165 {
00166 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00167 << "void check(void) const method.\n"
00168 << "Number of inputs in multilayer perceptron object is zero.\n";
00169
00170 throw std::logic_error(buffer.str().c_str());
00171 }
00172
00173 if(outputs_number == 0)
00174 {
00175 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00176 << "void check(void) const method.\n"
00177 << "Number of outputs in multilayer perceptron object is zero.\n";
00178
00179 throw std::logic_error(buffer.str().c_str());
00180 }
00181
00182
00183
00184 if(!data_set_pointer)
00185 {
00186 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00187 << "void check(void) const method.\n"
00188 << "Pointer to data set is NULL.\n";
00189
00190 throw std::logic_error(buffer.str().c_str());
00191 }
00192
00193
00194
00195 const VariablesInformation& variables_information = data_set_pointer->get_variables_information();
00196
00197 const unsigned int targets_number = variables_information.count_targets_number();
00198
00199 if(inputs_number != inputs_number)
00200 {
00201 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00202 << "void check(void) const method.\n"
00203 << "Number of inputs in multilayer perceptron must be equal to number of inputs in data set.\n";
00204
00205 throw std::logic_error(buffer.str().c_str());
00206 }
00207
00208 if(outputs_number != targets_number)
00209 {
00210 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00211 << "void check(void) const method.\n"
00212 << "Number of outputs in multilayer perceptron must be equal to number of targets in data set.\n";
00213
00214 throw std::logic_error(buffer.str().c_str());
00215 }
00216 }
00217
00218
00219
00220
00223
00224 double MeanSquaredError::calculate_evaluation(void) const
00225 {
00226
00227
00228 #ifdef _DEBUG
00229
00230 check();
00231
00232 #endif
00233
00234
00235
00236 const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
00237
00238 const unsigned int inputs_number = multilayer_perceptron_pointer->count_inputs_number();
00239 const unsigned int outputs_number = multilayer_perceptron_pointer->count_outputs_number();
00240
00241
00242
00243 const InstancesInformation& instances_information = data_set_pointer->get_instances_information();
00244
00245 const unsigned int training_instances_number = instances_information.count_training_instances_number();
00246
00247
00248
00249 Vector<double> inputs(inputs_number);
00250 Vector<double> outputs(outputs_number);
00251 Vector<double> targets(outputs_number);
00252
00253 double sum_squared_error = 0.0;
00254
00255 for(unsigned int i = 0; i < training_instances_number; i++)
00256 {
00257
00258
00259 inputs = data_set_pointer->get_training_input_instance(i);
00260
00261
00262
00263 outputs = multilayer_perceptron_pointer->calculate_outputs(inputs);
00264
00265
00266
00267 targets = data_set_pointer->get_training_target_instance(i);
00268
00269
00270
00271 sum_squared_error += outputs.calculate_sum_squared_error(targets);
00272 }
00273
00274 return(sum_squared_error/(double)training_instances_number);
00275 }
00276
00277
00278
00279
00283
00284 double MeanSquaredError::calculate_evaluation(const Vector<double>& parameters) const
00285 {
00286
00287
00288 #ifdef _DEBUG
00289
00290 check();
00291
00292 #endif
00293
00294
00295 #ifdef _DEBUG
00296
00297 unsigned int size = parameters.size();
00298
00299 unsigned int parameters_number = neural_network_pointer->count_parameters_number();
00300
00301 if(size != parameters_number)
00302 {
00303 std::ostringstream buffer;
00304
00305 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00306 << "double calculate_evaluation(const Vector<double>&) const method.\n"
00307 << "Size (" << size << ") must be equal to number of parameters (" << parameters_number << ").\n";
00308
00309 throw std::logic_error(buffer.str().c_str());
00310 }
00311
00312 #endif
00313
00314 NeuralNetwork neural_network_copy(*neural_network_pointer);
00315
00316 neural_network_copy.set_parameters(parameters);
00317
00318 MeanSquaredError mean_squared_error_copy(*this);
00319
00320 mean_squared_error_copy.set_neural_network_pointer(&neural_network_copy);
00321
00322 return(mean_squared_error_copy.calculate_evaluation());
00323 }
00324
00325
00326
00327
00330
00331 double MeanSquaredError::calculate_generalization_evaluation(void) const
00332 {
00333
00334
00335 #ifdef _DEBUG
00336
00337 check();
00338
00339 #endif
00340
00341 const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
00342
00343 const unsigned int inputs_number = multilayer_perceptron_pointer->count_inputs_number();
00344 const unsigned int outputs_number = multilayer_perceptron_pointer->count_outputs_number();
00345
00346 const InstancesInformation& instances_information = data_set_pointer->get_instances_information();
00347
00348 const unsigned int generalization_instances_number = instances_information.count_generalization_instances_number();
00349
00350 if(generalization_instances_number == 0)
00351 {
00352 return(0.0);
00353 }
00354 else
00355 {
00356 Vector<double> inputs(inputs_number);
00357 Vector<double> outputs(outputs_number);
00358 Vector<double> targets(outputs_number);
00359
00360 double generalization_objective = 0.0;
00361
00362 for(unsigned int i = 0; i < generalization_instances_number; i++)
00363 {
00364
00365
00366 inputs = data_set_pointer->get_generalization_input_instance(i);
00367
00368
00369
00370 outputs = multilayer_perceptron_pointer->calculate_outputs(inputs);
00371
00372
00373
00374 targets = data_set_pointer->get_generalization_target_instance(i);
00375
00376
00377
00378 generalization_objective += outputs.calculate_sum_squared_error(targets);
00379 }
00380
00381 return(generalization_objective/(double)generalization_instances_number);
00382 }
00383 }
00384
00385
00386
00387
00390
00391 Vector<double> MeanSquaredError::calculate_gradient(void) const
00392 {
00393
00394
00395 #ifdef _DEBUG
00396
00397 check();
00398
00399 #endif
00400
00401
00402
00403 const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
00404
00405 const unsigned int inputs_number = multilayer_perceptron_pointer->count_inputs_number();
00406 const unsigned int outputs_number = multilayer_perceptron_pointer->count_outputs_number();
00407
00408 const unsigned int layers_number = multilayer_perceptron_pointer->count_layers_number();
00409
00410 const unsigned int parameters_number = multilayer_perceptron_pointer->count_parameters_number();
00411
00412 Vector< Vector< Vector<double> > > first_order_forward_propagation(2);
00413
00414 const ConditionsLayer* conditions_layer_pointer = neural_network_pointer->get_conditions_layer_pointer();
00415
00416 const bool& conditions_layer_flag = neural_network_pointer->get_conditions_layer_flag();
00417
00418 Vector<double> particular_solution;
00419 Vector<double> homogeneous_solution;
00420
00421
00422
00423 const InstancesInformation& instances_information = data_set_pointer->get_instances_information();
00424
00425 const unsigned int training_instances_number = instances_information.count_training_instances_number();
00426
00427 Vector<double> inputs(inputs_number);
00428 Vector<double> targets(outputs_number);
00429
00430
00431
00432 Vector< Vector<double> > layers_delta;
00433
00434 Vector<double> output_objective_gradient(outputs_number);
00435
00436 Vector<double> point_gradient(parameters_number, 0.0);
00437
00438 Vector<double> objective_gradient(parameters_number, 0.0);
00439
00440
00441
00442 for(unsigned int i = 0; i < training_instances_number; i++)
00443 {
00444 inputs = data_set_pointer->get_training_input_instance(i);
00445
00446 targets = data_set_pointer->get_training_target_instance(i);
00447
00448 first_order_forward_propagation = multilayer_perceptron_pointer->calculate_first_order_forward_propagation(inputs);
00449
00450 const Vector< Vector<double> >& layers_activation = first_order_forward_propagation[0];
00451 const Vector< Vector<double> >& layers_activation_derivative = first_order_forward_propagation[1];
00452
00453 if(!conditions_layer_flag)
00454 {
00455 output_objective_gradient = (layers_activation[layers_number-1]-targets)*(2.0/(double)training_instances_number);
00456
00457 layers_delta = calculate_layers_delta(layers_activation_derivative, output_objective_gradient);
00458 }
00459 else
00460 {
00461 particular_solution = conditions_layer_pointer->calculate_particular_solution(inputs);
00462 homogeneous_solution = conditions_layer_pointer->calculate_homogeneous_solution(inputs);
00463
00464 output_objective_gradient = (particular_solution+homogeneous_solution*layers_activation[layers_number-1] - targets)*(2.0/(double)training_instances_number);
00465
00466 layers_delta = calculate_layers_delta(layers_activation_derivative, homogeneous_solution, output_objective_gradient);
00467 }
00468
00469 point_gradient = calculate_point_gradient(inputs, layers_activation, layers_delta);
00470
00471 objective_gradient += point_gradient;
00472 }
00473
00474 return(objective_gradient);
00475 }
00476
00477
00478
00479
00481
00482 Matrix<double> MeanSquaredError::calculate_Hessian(void) const
00483 {
00484 Matrix<double> H;
00485
00486 return(H);
00487 }
00488
00489
00490
00491
00493
00494 PerformanceTerm::FirstOrderEvaluation MeanSquaredError::calculate_first_order_evaluation(void) const
00495 {
00496 FirstOrderEvaluation first_order_evaluation;
00497
00498 first_order_evaluation.evaluation = calculate_evaluation();
00499 first_order_evaluation.gradient = calculate_gradient();
00500
00501 return(first_order_evaluation);
00502 }
00503
00504
00505
00506
00508
00509 PerformanceTerm::SecondOrderEvaluation MeanSquaredError::calculate_second_order_evaluation(void) const
00510 {
00511 SecondOrderEvaluation second_order_evaluation;
00512
00513 second_order_evaluation.evaluation = calculate_evaluation();
00514 second_order_evaluation.gradient = calculate_gradient();
00515 second_order_evaluation.Hessian = calculate_Hessian();
00516
00517 return(second_order_evaluation);
00518 }
00519
00520
00521
00522
00525
00526 Vector<double> MeanSquaredError::calculate_evaluation_terms(void) const
00527 {
00528
00529
00530 #ifdef _DEBUG
00531
00532 check();
00533
00534 #endif
00535
00536
00537
00538 const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
00539
00540 const unsigned int inputs_number = multilayer_perceptron_pointer->count_inputs_number();
00541 const unsigned int outputs_number = multilayer_perceptron_pointer->count_outputs_number();
00542
00543
00544
00545 const InstancesInformation& instances_information = data_set_pointer->get_instances_information();
00546
00547 const unsigned int training_instances_number = instances_information.count_training_instances_number();
00548
00549
00550
00551 Vector<double> evaluation_terms(training_instances_number);
00552
00553 Vector<double> inputs(inputs_number);
00554 Vector<double> outputs(outputs_number);
00555 Vector<double> targets(outputs_number);
00556
00557 for(unsigned int i = 0; i < training_instances_number; i++)
00558 {
00559
00560
00561 inputs = data_set_pointer->get_training_input_instance(i);
00562
00563
00564
00565 outputs = multilayer_perceptron_pointer->calculate_outputs(inputs);
00566
00567
00568
00569 targets = data_set_pointer->get_training_target_instance(i);
00570
00571
00572
00573 evaluation_terms[i] = outputs.calculate_distance(targets);
00574 }
00575
00576 return(evaluation_terms/sqrt((double)training_instances_number));
00577 }
00578
00579
00580
00581
00585
00586 Vector<double> MeanSquaredError::calculate_evaluation_terms(const Vector<double>& network_parameters) const
00587 {
00588
00589
00590 #ifdef _DEBUG
00591
00592 check();
00593
00594 #endif
00595
00596 #ifdef _DEBUG
00597
00598 std::ostringstream buffer;
00599
00600 const unsigned int& size = network_parameters.size();
00601
00602 const unsigned int parameters_number = neural_network_pointer->count_parameters_number();
00603
00604 if(size != parameters_number)
00605 {
00606 buffer << "OpenNN Exception: MeanSquaredError class.\n"
00607 << "double calculate_evaluation_terms(const Vector<double>&) const method.\n"
00608 << "Size (" << size << ") must be equal to number of multilayer_perceptron_pointer parameters (" << parameters_number << ").\n";
00609
00610 throw std::logic_error(buffer.str().c_str());
00611 }
00612
00613 #endif
00614
00615 NeuralNetwork neural_network_copy(*neural_network_pointer);
00616
00617 neural_network_copy.set_parameters(network_parameters);
00618
00619 MeanSquaredError mean_squared_error_copy(*this);
00620
00621 mean_squared_error_copy.set_neural_network_pointer(&neural_network_copy);
00622
00623 return(mean_squared_error_copy.calculate_evaluation_terms());
00624 }
00625
00626
00627
00628
00631
00632 Matrix<double> MeanSquaredError::calculate_Jacobian_terms(void) const
00633 {
00634
00635
00636 #ifdef _DEBUG
00637
00638 check();
00639
00640 #endif
00641
00642
00643
00644 const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();
00645
00646 const unsigned int inputs_number = multilayer_perceptron_pointer->count_inputs_number();
00647 const unsigned int outputs_number = multilayer_perceptron_pointer->count_outputs_number();
00648
00649 const unsigned int layers_number = multilayer_perceptron_pointer->count_layers_number();
00650
00651 const unsigned int network_parameters_number = multilayer_perceptron_pointer->count_parameters_number();
00652
00653 Vector< Vector< Vector<double> > > first_order_forward_propagation(2);
00654
00655 Vector<double> particular_solution;
00656 Vector<double> homogeneous_solution;
00657
00658 const ConditionsLayer* conditions_layer_pointer = neural_network_pointer->get_conditions_layer_pointer();
00659
00660 const bool& conditions_layer_flag = neural_network_pointer->get_conditions_layer_flag();
00661
00662
00663
00664 const InstancesInformation& instances_information = data_set_pointer->get_instances_information();
00665
00666 const unsigned int training_instances_number = instances_information.count_training_instances_number();
00667
00668 Vector<double> inputs(inputs_number);
00669 Vector<double> targets(outputs_number);
00670
00671
00672
00673 Vector<double> term(outputs_number);
00674 double term_norm;
00675
00676 Vector<double> output_objective_gradient(outputs_number);
00677
00678 Vector< Vector<double> > layers_delta(layers_number);
00679 Vector<double> point_gradient(network_parameters_number);
00680
00681 Matrix<double> Jacobian_terms(training_instances_number, network_parameters_number);
00682
00683
00684
00685 for(unsigned int i = 0; i < training_instances_number; i++)
00686 {
00687 inputs = data_set_pointer->get_training_input_instance(i);
00688
00689 targets = data_set_pointer->get_training_target_instance(i);
00690
00691 first_order_forward_propagation = multilayer_perceptron_pointer->calculate_first_order_forward_propagation(inputs);
00692
00693 const Vector< Vector<double> >& layers_activation = first_order_forward_propagation[0];
00694 const Vector< Vector<double> >& layers_activation_derivative = first_order_forward_propagation[1];
00695
00696 if(!conditions_layer_flag)
00697 {
00698 const Vector<double>& outputs = first_order_forward_propagation[0][layers_number-1];
00699
00700 term = (outputs-targets);
00701 term_norm = term.calculate_norm();
00702
00703 if(term_norm == 0.0)
00704 {
00705 output_objective_gradient.initialize(0.0);
00706 }
00707 else
00708 {
00709 output_objective_gradient = term/term_norm;
00710 }
00711
00712 layers_delta = calculate_layers_delta(layers_activation_derivative, output_objective_gradient);
00713 }
00714 else
00715 {
00716 particular_solution = conditions_layer_pointer->calculate_particular_solution(inputs);
00717 homogeneous_solution = conditions_layer_pointer->calculate_homogeneous_solution(inputs);
00718
00719 term = (particular_solution+homogeneous_solution*layers_activation[layers_number-1] - targets)/sqrt((double)training_instances_number);
00720 term_norm = term.calculate_norm();
00721
00722 if(term_norm == 0.0)
00723 {
00724 output_objective_gradient.initialize(0.0);
00725 }
00726 else
00727 {
00728 output_objective_gradient = term/term_norm;
00729 }
00730
00731 layers_delta = calculate_layers_delta(layers_activation_derivative, homogeneous_solution, output_objective_gradient);
00732 }
00733
00734 point_gradient = calculate_point_gradient(inputs, layers_activation, layers_delta);
00735
00736 Jacobian_terms.set_row(i, point_gradient);
00737 }
00738
00739 return(Jacobian_terms/sqrt((double)training_instances_number));
00740 }
00741
00742
00743
00744
00746
00748
00749 MeanSquaredError::FirstOrderEvaluationTerms MeanSquaredError::calculate_first_order_evaluation_terms(void)
00750 {
00751 FirstOrderEvaluationTerms first_order_evaluation_terms;
00752
00753 first_order_evaluation_terms.evaluation_terms = calculate_evaluation_terms();
00754
00755 first_order_evaluation_terms.Jacobian_terms = calculate_Jacobian_terms();
00756
00757 return(first_order_evaluation_terms);
00758 }
00759
00760
00761
00762
00764
00765 std::string MeanSquaredError::write_performance_term_type(void) const
00766 {
00767 return("MEAN_SQUARED_ERROR");
00768 }
00769
00770
00771
00772
00775
00776 TiXmlElement* MeanSquaredError::to_XML(void) const
00777 {
00778 std::ostringstream buffer;
00779
00780
00781
00782 TiXmlElement* mean_squared_error_element = new TiXmlElement("MeanSquaredError");
00783 mean_squared_error_element->SetAttribute("Version", 4);
00784
00785
00786
00787 {
00788 TiXmlElement* element = new TiXmlElement("Display");
00789 mean_squared_error_element->LinkEndChild(element);
00790
00791 buffer.str("");
00792 buffer << display;
00793
00794 TiXmlText* text = new TiXmlText(buffer.str().c_str());
00795 element->LinkEndChild(text);
00796 }
00797
00798 return(mean_squared_error_element);
00799 }
00800
00801 }
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816
00817
00818