00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef __DATASET_H__
00017 #define __DATASET_H__
00018
00019
00020
00021 #include <string>
00022
00023
00024
00025 #include "../utilities/vector.h"
00026 #include "../utilities/matrix.h"
00027
00028 #include "variables_information.h"
00029 #include "instances_information.h"
00030
00031
00032
00033 #include "../../parsers/tinyxml/tinyxml.h"
00034
00035
00036 namespace OpenNN
00037 {
00038
00042
00043 class DataSet
00044 {
00045
00046 public:
00047
00048
00049
00050 explicit DataSet(void);
00051
00052
00053
00054 explicit DataSet(const unsigned int&, const unsigned int&);
00055
00056
00057
00058 explicit DataSet(const unsigned int&, const unsigned int&, const unsigned int&);
00059
00060
00061
00062 explicit DataSet(TiXmlElement*);
00063
00064
00065
00066 explicit DataSet(const std::string&);
00067
00068
00069
00070 DataSet(const DataSet&);
00071
00072
00073
00074 virtual ~DataSet(void);
00075
00076
00077
00078 DataSet& operator = (const DataSet&);
00079
00080
00081
00082 bool operator == (const DataSet&) const;
00083
00084
00085
00087
00088 enum ScalingUnscalingMethod{MinimumMaximum, MeanStandardDeviation};
00089
00090
00091
00092
00093
00094 const std::string& get_data_filename(void) const;
00095
00097
00098 inline unsigned int get_instances_number(void) const
00099 {
00100 return(data.get_rows_number());
00101 }
00102
00104
00105 inline unsigned int get_variables_number(void) const
00106 {
00107 return(data.get_columns_number());
00108 }
00109
00110 const VariablesInformation& get_variables_information(void) const;
00111 VariablesInformation* get_variables_information_pointer(void);
00112
00113 const InstancesInformation& get_instances_information(void) const;
00114 InstancesInformation* get_instances_information_pointer(void);
00115
00116 const ScalingUnscalingMethod& get_scaling_unscaling_method(void) const;
00117 std::string write_scaling_unscaling_method(void) const;
00118
00119 const bool& get_display(void) const;
00120
00121
00122
00123 const Matrix<double>& get_data(void) const;
00124
00125 Matrix<double> arrange_training_data(void) const;
00126 Matrix<double> arrange_generalization_data(void) const;
00127 Matrix<double> arrange_testing_data(void) const;
00128
00129 Matrix<double> arrange_input_data(void) const;
00130 Matrix<double> arrange_target_data(void) const;
00131
00132 Matrix<double> arrange_training_input_data(void) const;
00133 Matrix<double> arrange_training_target_data(void) const;
00134 Matrix<double> get_generalization_input_data(void) const;
00135 Matrix<double> get_generalization_target_data(void) const;
00136 Matrix<double> arrange_testing_input_data(void) const;
00137 Matrix<double> arrange_testing_target_data(void) const;
00138
00139
00140
00141 Vector<double> get_instance(const unsigned int&) const;
00142
00143 Vector<double> get_training_instance(const unsigned int&) const;
00144 Vector<double> get_generalization_instance(const unsigned int&) const;
00145 Vector<double> get_testing_instance(const unsigned int&) const;
00146
00147 Vector<double> get_input_instance(const unsigned int&) const;
00148 Vector<double> get_target_instance(const unsigned int&) const;
00149
00150 Vector<double> get_training_input_instance(const unsigned int&) const;
00151 Vector<double> get_training_target_instance(const unsigned int&) const;
00152
00153 Vector<double> get_generalization_input_instance(const unsigned int&) const;
00154 Vector<double> get_generalization_target_instance(const unsigned int&) const;
00155
00156 Vector<double> get_testing_input_instance(const unsigned int&) const;
00157 Vector<double> get_testing_target_instance(const unsigned int&) const;
00158
00159
00160
00161 Vector<double> get_variable(const unsigned int&) const;
00162
00163
00164
00165 void set(void);
00166 void set(const unsigned int&, const unsigned int&);
00167 void set(const unsigned int&, const unsigned int&, const unsigned int&);
00168 void set(const DataSet&);
00169 void set(TiXmlElement*);
00170 void set(const std::string&);
00171
00172
00173
00174 void set_data(const Matrix<double>&);
00175
00176 void set_instances_number(const unsigned int&);
00177 void set_variables_number(const unsigned int&);
00178
00179 void set_data_filename(const std::string&);
00180
00181 void set_scaling_unscaling_method(const ScalingUnscalingMethod&);
00182 void set_scaling_unscaling_method(const std::string&);
00183
00184
00185
00186 void set_display(const bool&);
00187
00188 void set_default(void);
00189
00190
00191
00192 void set_instance(const unsigned int&, const Vector<double>&);
00193
00194 void set_training_instance(const unsigned int&, const Vector<double>&);
00195 void set_generalization_instance(const unsigned int&, const Vector<double>&);
00196 void set_testing_instance(const unsigned int&, const Vector<double>&);
00197
00198 void set_input_instance(const unsigned int&, const Vector<double>&);
00199 void set_target_instance(const unsigned int&, const Vector<double>&);
00200
00201 void set_training_input_instance(const unsigned int&, const Vector<double>&);
00202 void set_training_target_instance(const unsigned int&, const Vector<double>&);
00203
00204 void set_generalization_input_instance(const unsigned int&, const Vector<double>&);
00205 void set_generalization_target_instance(const unsigned int&, const Vector<double>&);
00206
00207 void set_testing_input_instance(const unsigned int&, const Vector<double>&);
00208 void set_testing_target_instance(const unsigned int&, const Vector<double>&);
00209
00210
00211
00212 void add_instance(const Vector<double>&);
00213 void subtract_instance(const unsigned int&);
00214
00215 void append_variable(const Vector<double>&);
00216 void subtract_variable(const unsigned int&);
00217
00218 void subtract_constant_variables(void);
00219 void subtract_repeated_instances(void);
00220
00221
00222
00223 void initialize_data(const double&);
00224
00225 void initialize_data_normal(void);
00226
00227
00228
00229 Vector< Vector<double> > calculate_data_statistics(void) const;
00230
00231 Vector< Vector<double> > calculate_training_instances_statistics(void) const;
00232 Vector< Vector<double> > calculate_generalization_instances_statistics(void) const;
00233 Vector< Vector<double> > calculate_testing_instances_statistics(void) const;
00234
00235 Vector< Vector<double> > calculate_instances_statistics(void) const;
00236
00237 Vector< Vector<double> > calculate_inputs_statistics(void) const;
00238 Vector< Vector<double> > calculate_targets_statistics(void) const;
00239
00240 Vector< Vector<double> > calculate_inputs_targets_minimum_maximum(void) const;
00241 Vector< Vector<double> > calculate_inputs_targets_mean_standard_deviation(void) const;
00242 Vector< Vector<double> > calculate_inputs_targets_statistics(void) const;
00243
00244 Vector<double> calculate_training_target_data_mean(void) const;
00245 Vector<double> calculate_generalization_target_data_mean(void) const;
00246 Vector<double> calculate_testing_target_data_mean(void) const;
00247
00248
00249
00250 Matrix<double> calculate_variables_correlation(void) const;
00251
00252
00253
00254 Vector< Vector< Vector<double> > > calculate_data_histogram(const unsigned int&) const;
00255 Vector< Vector< Vector<double> > > calculate_data_histogram(void) const;
00256
00257
00258
00259 void scale_data_minimum_maximum(const Vector<double>&, const Vector<double>&);
00260 void scale_data_mean_standard_deviation(const Vector<double>&, const Vector<double>&);
00261
00262 void scale_data(const Vector< Vector<double> >&);
00263
00264 Vector< Vector<double> > scale_data(void);
00265
00266
00267
00268 void scale_inputs_minimum_maximum(const Vector<double>&, const Vector<double>&);
00269 Vector< Vector<double> > scale_inputs_minimum_maximum(void);
00270
00271 void scale_inputs_mean_standard_deviation(const Vector<double>&, const Vector<double>&);
00272 Vector< Vector<double> > scale_inputs_mean_standard_deviation(void);
00273
00274 Vector< Vector<double> > scale_inputs(void);
00275
00276
00277
00278 void scale_targets_minimum_maximum(const Vector<double>&, const Vector<double>&);
00279 Vector< Vector<double> > scale_targets_minimum_maximum(void);
00280
00281 void scale_targets_mean_standard_deviation(const Vector<double>&, const Vector<double>&);
00282 Vector< Vector<double> > scale_targets_mean_standard_deviation(void);
00283
00284 Vector< Vector<double> > scale_targets(void);
00285
00286
00287
00288 void scale_inputs_targets_minimum_maximum(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&);
00289 Vector< Vector<double> > scale_inputs_targets_minimum_maximum(void);
00290
00291 void scale_inputs_targets_mean_standard_deviation(const Vector<double>&, const Vector<double>&, const Vector<double>&, const Vector<double>&);
00292 Vector< Vector<double> > scale_inputs_targets_mean_standard_deviation(void);
00293
00294 Vector< Vector<double> > scale_inputs_targets(void);
00295
00296
00297
00298 void unscale_data_minimum_maximum(const Vector<double>&, const Vector<double>&);
00299 void unscale_data_mean_standard_deviation(const Vector<double>&, const Vector<double>&);
00300
00301
00302
00303 void unscale_inputs_minimum_maximum(const Vector<double>&, const Vector<double>&);
00304 void unscale_inputs_mean_standard_deviation(const Vector<double>&, const Vector<double>&);
00305
00306
00307
00308 void unscale_targets_minimum_maximum(const Vector<double>&, const Vector<double>&);
00309 void unscale_targets_mean_standard_deviation(const Vector<double>&, const Vector<double>&);
00310
00311
00312
00313 void unscale_inputs_targets_minimum_maximum(const Vector< Vector<double> >&);
00314 void unscale_inputs_targets_mean_standard_deviation(const Vector< Vector<double> >&);
00315
00316
00317
00318 Vector<unsigned int> calculate_target_class_distribution(void) const;
00319
00320
00321
00322 std::string to_string(void) const;
00323
00324 void print(void) const;
00325
00326 TiXmlElement* to_XML(void) const;
00327 void from_XML(TiXmlElement*);
00328
00329 void save(const std::string&) const;
00330 void load(const std::string&);
00331
00332 void print_data(void) const;
00333
00334 void save_data(const std::string&) const;
00335 void load_data(const std::string&);
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356 private:
00357
00358
00359
00361
00362 std::string data_filename;
00363
00364
00365
00366
00367
00369
00370 Matrix<double> data;
00371
00373
00374 VariablesInformation variables_information;
00375
00377
00378 InstancesInformation instances_information;
00379
00381
00382 ScalingUnscalingMethod scaling_unscaling_method;
00383
00385
00386 bool display;
00387 };
00388
00389 }
00390
00391 #endif
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409