diff --git a/docs/source/benchmarks/amlb_res.csv b/docs/source/benchmarks/amlb_res.csv
index 569f98431d..d8e0b56ece 100644
--- a/docs/source/benchmarks/amlb_res.csv
+++ b/docs/source/benchmarks/amlb_res.csv
@@ -1,33 +1,39 @@
-Dataset name,Metric name,AutoGluon,FEDOT,H2O,LAMA
-APSFailure,auc,0.99,0.991,0.992,0.992
-Amazon_employee_access,auc,0.857,0.865,0.873,0.879
-Australian,auc,0.94,0.939,0.939,0.945
-Covertype,neg_logloss,-0.071,-0.117,-0.265,
-Fashion-MNIST,neg_logloss,-0.329,-0.373,-0.38,-0.248
-Jannis,neg_logloss,-0.728,-0.737,-0.691,-0.664
-KDDCup09_appetency,auc,0.804,0.822,0.829,0.85
-MiniBooNE,auc,0.982,0.981,,0.988
-Shuttle,neg_logloss,-0.001,-0.001,-0.0,-0.001
-Volkert,neg_logloss,-0.917,-1.097,-0.976,-0.806
-adult,auc,0.91,0.925,0.931,0.932
-bank-marketing,auc,0.931,0.935,0.939,0.94
-blood-transfusion,auc,0.69,0.759,0.765,0.75
-car,neg_logloss,-0.117,-0.011,-0.004,-0.002
-christine,auc,0.804,0.812,0.823,0.83
-cnae-9,neg_logloss,-0.332,-0.211,-0.175,-0.156
-connect-4,neg_logloss,-0.502,-0.456,-0.338,-0.337
-credit-g,auc,0.795,0.778,0.789,0.796
-dilbert,neg_logloss,-0.148,-0.159,-0.05,-0.033
-fabert,neg_logloss,-0.788,-0.895,-0.752,-0.766
-guillermo,auc,0.9,0.891,,0.926
-jasmine,auc,0.883,0.888,0.887,0.88
-jungle chess,neg_logloss,-0.431,-0.193,-0.24,-0.149
-kc1,auc,0.822,0.843,,0.831
-kr-vs-kp,auc,0.999,1.0,,1.0
-mfeat-factors,neg_logloss,-0.161,-0.094,,-0.082
-nomao,auc,0.995,0.994,0.996,0.997
-numerai28_6,auc,0.517,0.529,0.531,0.531
-phoneme,auc,0.965,0.965,,0.965
-segment,neg_logloss,-0.094,-0.062,,-0.061
-sylvine,auc,0.985,0.988,,0.988
-vehicle,neg_logloss,-0.515,-0.354,,-0.404
+Dataset,Metric,AutoGluon,FEDOT,H2O,TPOT
+adult,auc,0.9100126,0.91529255,0.9307700000000001,0.9272897999999999
+airlines,auc,0.7249085714285715,0.6537803999999999,0.7303896,0.693676
+albert,auc,0.739028,0.7276503,,
+amazon_employee_access,auc,0.8571479999999999,0.8591113,0.8728077000000001,0.8662471
+apsfailure,auc,0.9906209,0.9899874210526317,0.9925172,0.990437
+australian,auc,0.9395274,0.9378541,0.93857085,0.9360440999999999
+bank-marketing,auc,0.9312558,0.93245125,0.9385977000000001,0.9346086
+blood-transfusion,auc,0.6895855,0.72444385,0.75949435,0.7401904
+christine,auc,0.8042872000000001,0.8044556500000001,0.8193608421052632,0.8066902
+credit-g,auc,0.7952859,0.7845833,0.79357155,0.7938096
+guillermo,auc,0.8996748,0.89125215,,0.7833095714285714
+jasmine,auc,0.8831222000000001,0.88548405,0.8873440499999999,0.8903762000000001
+kc1,auc,0.8222621,0.8385662,,0.8448118000000001
+kddcup09_appetency,auc,0.8044676000000001,0.7877767,0.8291237,0.825562
+kr-vs-kp,auc,0.9988583999999999,0.9992477,0.9997232,0.9997627
+miniboone,auc,0.9821717,0.98101815,,0.9834643333333334
+nomao,auc,0.9948282,0.99419515,0.9959996,0.9953825
+numerai28_6,auc,0.5165548,0.5216116000000001,0.5305179,
+phoneme,auc,0.9654223,0.9644835,0.9675107000000001,0.970699
+riccardo,auc,0.9997026,0.9979384,,
+sylvine,auc,0.9847037999999999,0.9849627999999999,0.9893596,0.9933923
+car,neg_logloss,-0.11658660000000001,-0.088851992,-0.003471899925,-0.64257486468
+cnae-9,neg_logloss,-0.332075,-0.270096135,-0.21849159,-0.15368975
+connect-4,neg_logloss,-0.5015701,-0.47033240000000004,-0.33770059999999996,-0.3734921
+covertype,neg_logloss,-0.07139724444444445,-0.1409624,-0.2642175,
+dilbert,neg_logloss,-0.14967388235294118,-0.24454559000000003,-0.07642755500000001,-0.168390625
+dionis,neg_logloss,-2.157603,,,
+fabert,neg_logloss,-0.7878137,-0.9015242000000001,-0.77193945,-0.8915912
+fashion-mnist,neg_logloss,-0.3325671,-0.38379342857142856,-0.3832832,-0.535493
+helena,neg_logloss,-2.784965,-6.348634,-2.9801966666666666,-2.98157375
+jannis,neg_logloss,-0.7283778,-0.7619161,-0.691228,-0.703102
+jungle_chess_2pcs_raw_endgame_complete,neg_logloss,-0.43063529999999994,-0.270741845,-0.23951890000000003,-0.21872090000000002
+mfeat-factors,neg_logloss,-0.1611791,-0.17412199,-0.09295753,-0.10726150999999999
+robert,neg_logloss,-1.6843139999999999,-1.745091,,
+segment,neg_logloss,-0.09418663,-0.096434561,-0.05962082,-0.07710542000000001
+shuttle,neg_logloss,-0.0008124975,-0.0010121353499999998,-0.00035519797666666667,
+vehicle,neg_logloss,-0.5154588,-0.42775929999999995,-0.3313683,-0.3915049
+volkert,neg_logloss,-0.9200727000000001,-1.0448454545454544,-0.9779738888888888,
diff --git a/docs/source/benchmarks/amlb_res.html b/docs/source/benchmarks/amlb_res.html
deleted file mode 100644
index d0c976963d..0000000000
--- a/docs/source/benchmarks/amlb_res.html
+++ /dev/null
@@ -1,719 +0,0 @@
-
-
-
-
- Classification statistics
-
-
-
-
- |
-
- framework
- |
-
- AutoGluon
- |
-
- FEDOT
- |
-
- H2O
- |
-
- LAMA
- |
-
-
-
- Dataset name
- |
-
- Metric name
- |
-
- |
-
- |
-
- |
-
- |
-
-
-
-
-
- APSFailure
- |
-
- auc
- |
-
- 0.990
- |
-
- 0.991
- |
-
- 0.992
- |
-
- 0.992
- |
-
-
-
- Amazon_employee_access
- |
-
- auc
- |
-
- 0.857
- |
-
- 0.865
- |
-
- 0.873
- |
-
- 0.879
- |
-
-
-
- Australian
- |
-
- auc
- |
-
- 0.940
- |
-
- 0.939
- |
-
- 0.938
- |
-
- 0.945
- |
-
-
-
- Covertype
- |
-
- neg_logloss
- |
-
- -0.071
- |
-
- -0.117
- |
-
- -0.265
- |
-
- nan
- |
-
-
-
- Fashion-MNIST
- |
-
- neg_logloss
- |
-
- -0.329
- |
-
- -0.373
- |
-
- -0.380
- |
-
- -0.248
- |
-
-
-
- Jannis
- |
-
- neg_logloss
- |
-
- -0.728
- |
-
- -0.737
- |
-
- -0.691
- |
-
- -0.664
- |
-
-
-
- KDDCup09_appetency
- |
-
- auc
- |
-
- 0.804
- |
-
- 0.822
- |
-
- 0.829
- |
-
- 0.850
- |
-
-
-
- MiniBooNE
- |
-
- auc
- |
-
- 0.982
- |
-
- 0.981
- |
-
- nan
- |
-
- 0.988
- |
-
-
-
- Shuttle
- |
-
- neg_logloss
- |
-
- -0.001
- |
-
- -0.001
- |
-
- -0.000
- |
-
- -0.001
- |
-
-
-
- Volkert
- |
-
- neg_logloss
- |
-
- -0.917
- |
-
- -1.097
- |
-
- -0.976
- |
-
- -0.806
- |
-
-
-
- adult
- |
-
- auc
- |
-
- 0.910
- |
-
- 0.925
- |
-
- 0.931
- |
-
- 0.932
- |
-
-
-
- bank-marketing
- |
-
- auc
- |
-
- 0.931
- |
-
- 0.935
- |
-
- 0.939
- |
-
- 0.940
- |
-
-
-
- blood-transfusion
- |
-
- auc
- |
-
- 0.690
- |
-
- 0.759
- |
-
- 0.754
- |
-
- 0.750
- |
-
-
-
- car
- |
-
- neg_logloss
- |
-
- -0.117
- |
-
- -0.011
- |
-
- -0.003
- |
-
- -0.002
- |
-
-
-
- christine
- |
-
- auc
- |
-
- 0.804
- |
-
- 0.812
- |
-
- 0.815
- |
-
- 0.830
- |
-
-
-
- cnae-9
- |
-
- neg_logloss
- |
-
- -0.332
- |
-
- -0.211
- |
-
- -0.262
- |
-
- -0.156
- |
-
-
-
- connect-4
- |
-
- neg_logloss
- |
-
- -0.502
- |
-
- -0.456
- |
-
- -0.338
- |
-
- -0.337
- |
-
-
-
- credit-g
- |
-
- auc
- |
-
- 0.795
- |
-
- 0.778
- |
-
- 0.798
- |
-
- 0.796
- |
-
-
-
- dilbert
- |
-
- neg_logloss
- |
-
- -0.148
- |
-
- -0.159
- |
-
- -0.103
- |
-
- -0.033
- |
-
-
-
- fabert
- |
-
- neg_logloss
- |
-
- -0.788
- |
-
- -0.895
- |
-
- -0.792
- |
-
- -0.766
- |
-
-
-
- guillermo
- |
-
- auc
- |
-
- 0.900
- |
-
- 0.891
- |
-
- nan
- |
-
- 0.926
- |
-
-
-
- jasmine
- |
-
- auc
- |
-
- 0.883
- |
-
- 0.888
- |
-
- 0.888
- |
-
- 0.880
- |
-
-
-
- jungle chess
- |
-
- neg_logloss
- |
-
- -0.431
- |
-
- -0.193
- |
-
- -0.240
- |
-
- -0.149
- |
-
-
-
- kc1
- |
-
- auc
- |
-
- 0.822
- |
-
- 0.843
- |
-
- nan
- |
-
- 0.831
- |
-
-
-
- kr-vs-kp
- |
-
- auc
- |
-
- 0.999
- |
-
- 1.000
- |
-
- 1.000
- |
-
- 1.000
- |
-
-
-
- mfeat-factors
- |
-
- neg_logloss
- |
-
- -0.161
- |
-
- -0.094
- |
-
- -0.093
- |
-
- -0.082
- |
-
-
-
- nomao
- |
-
- auc
- |
-
- 0.995
- |
-
- 0.994
- |
-
- 0.996
- |
-
- 0.997
- |
-
-
-
- numerai28_6
- |
-
- auc
- |
-
- 0.517
- |
-
- 0.529
- |
-
- 0.531
- |
-
- 0.531
- |
-
-
-
- phoneme
- |
-
- auc
- |
-
- 0.965
- |
-
- 0.965
- |
-
- 0.968
- |
-
- 0.965
- |
-
-
-
- segment
- |
-
- neg_logloss
- |
-
- -0.094
- |
-
- -0.062
- |
-
- -0.060
- |
-
- -0.061
- |
-
-
-
- sylvine
- |
-
- auc
- |
-
- 0.985
- |
-
- 0.988
- |
-
- 0.989
- |
-
- 0.988
- |
-
-
-
- vehicle
- |
-
- neg_logloss
- |
-
- -0.515
- |
-
- -0.354
- |
-
- -0.331
- |
-
- -0.404
- |
-
-
-
-
diff --git a/docs/source/benchmarks/forecasting.rst b/docs/source/benchmarks/forecasting.rst
index d83a2880ae..35e696647e 100644
--- a/docs/source/benchmarks/forecasting.rst
+++ b/docs/source/benchmarks/forecasting.rst
@@ -55,3 +55,94 @@ Timeout for Fedot and other frameworks was set by 2 minutes on each series. For
Additionally you can examine papers about Fedot performance on different time series forecasting tasks `[1] `__ , `[2] `__, `[3] `__,
`[4] `__, `[5] `__, `[6] `__, `[7] `__.
+
+
+More M4 benchmarking
+~~~~~~~~~~~~~~~~~~~~
+
+This benchmark is based on a unified benchmarking interface provided by the `pytsbe framework `__ (a tool for benchmarking automated time-series forecasting algorithms).
+The `pytsbe` tool uses `subsample `__ from `M4 competition `__ (sample contains 998 series with daily, weekly, monthly, quarterly, yearly intervals).
+The forecasting horizons for each series type are: 6 for yearly series, 8 for quarterly series, 18 for monthly series, 13 for weekly series, and 14 for daily series.
+The estimation metric used is Symmetric Mean Absolute Percentage Error (SMAPE).
+
+ +-------------+----------+--------+--------+--------+-----------+--------+---------+
+ | Library | Quantile | Intervals |
+ + + +--------+--------+--------+-----------+--------+---------+
+ | | | Daily | Weekly | Montly | Quarterly | Yearly | Overall |
+ +=============+==========+========+========+========+===========+========+=========+
+ | LagLlama | 10 | 1.457 | 3.258 | 5.303 | 5.713 | 11.665 | 2.64 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 50 | 4.513 | 11.167 | 18.534 | 20.027 | 33.141 | 13.036 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 90 | 13.123 | 28.268 | 62.091 | 48.793 | 73.565 | 48.056 |
+ +-------------+----------+--------+--------+--------+-----------+--------+---------+
+ | NBEATS | 10 | 0.732 | 1.021 | 1.173 | 1.818 | 3.038 | 1.036 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 50 | 1.948 | 4.384 | 7.628 | 8.193 | 12.648 | 4.643 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 90 | 4.57 | 19.665 | 38.343 | 49.764 | 36.045 | 28.567 |
+ +-------------+----------+--------+--------+--------+-----------+--------+---------+
+ | TimeGPT | 10 | 1.687 | 1.272 | 1.134 | 2.459 | 4.179 | 1.536 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 50 | 5.586 | 7.17 | 6.235 | 7.058 | 8.982 | 6.565 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 90 | 15.716 | 23.337 | 35.786 | 28.056 | 32.902 | 26.387 |
+ +-------------+----------+--------+--------+--------+-----------+--------+---------+
+ | autogluon | 10 | 0.93 | 0.744 | 1.26 | 2.159 | 2.624 | 1.131 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 50 | 2.37 | 5.96 | 7.402 | 6.168 | 7.598 | 4.704 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 90 | 6.189 | 20.888 | 33.51 | 24.909 | 40.516 | 25.026 |
+ +-------------+----------+--------+--------+--------+-----------+--------+---------+
+ | Fedot | 10 | 0.97 | 0.733 | 1.342 | 1.771 | 2.892 | 1.064 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 50 | 2.326 | 4.95 | 7.123 | 6.786 | 8.682 | 4.655 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 90 | 5.398 | 19.131 | 43.519 | 36.36 | 41.147 | 30.29 |
+ +-------------+----------+--------+--------+--------+-----------+--------+---------+
+ | repeat_last | 10 | 0.795 | 1.059 | 1.477 | 2.534 | 4.242 | 1.146 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 50 | 2.008 | 5.365 | 7.796 | 7.379 | 9.066 | 5.158 |
+ + +----------+--------+--------+--------+-----------+--------+---------+
+ | | 90 | 4.66 | 22.38 | 37.294 | 27.215 | 33.074 | 25.79 |
+ +-------------+----------+--------+--------+--------+-----------+--------+---------+
+
+For a more clear understanding, please refer to the mean values of the SMAPE metrics.
+Here, as per usual, the best value is indicated in bold for each row (for each seasonal period).
+
+ +-------------+---------+---------+---------+-----------+---------+---------+
+ | Library | Intervals |
+ + +---------+---------+---------+-----------+---------+---------+
+ | | Daily | Weekly | Montly | Quarterly | Yearly | Overall |
+ +=============+=========+=========+=========+===========+=========+=========+
+ | LagLlama | 4.513 | 11.167 | 18.534 | 20.027 | 33.141 | 13.036 |
+ +-------------+---------+---------+---------+-----------+---------+---------+
+ | NBEATS |**1.948**|**4.384**| 7.628 | 8.193 | 12.648 |**4.643**|
+ +-------------+---------+---------+---------+-----------+---------+---------+
+ | TimeGPT | 5.586 | 7.17 |**6.235**| 7.058 | 8.982 | 6.565 |
+ +-------------+---------+---------+---------+-----------+---------+---------+
+ | autogluon | 2.37 | 5.96 | 7.402 |**6.168** |**7.598**| 4.704 |
+ +-------------+---------+---------+---------+-----------+---------+---------+
+ | Fedot | 2.326 | 4.95 | 7.123 | 6.786 | 8.682 | 4.655 |
+ +-------------+---------+---------+---------+-----------+---------+---------+
+ | repeat_last | 2.008 | 5.365 | 7.796 | 7.379 | 9.066 | 5.158 |
+ +-------------+---------+---------+---------+-----------+---------+---------+
+
+
+The statistical analysis on SMAPE metrics was conducted using the Friedman t-test.
+The results confirm that FEDOT's time series forecasting ability is statistically indistinguishable from
+forecasting methods of the field leaders (represented by autogluon and NBEATS).
+
+ +------------+--------+----------+--------+---------+-----------+
+ | | FEDOT | LAGLLAMA | NBEATS | TimeGPT | autogluon |
+ +============+========+==========+========+=========+===========+
+ | FEDOT | | 0.044 | 0.613 | 0.613 | 0.971 |
+ +------------+--------+----------+--------+---------+-----------+
+ | LAGLLAMA | 0.044 | | 0.121 | 0.121 | 0.048 |
+ +------------+--------+----------+--------+---------+-----------+
+ | NBEATS | 0.613 | 0.121 | | 1.000 | 0.639 |
+ +------------+--------+----------+--------+---------+-----------+
+ | TimeGPT | 0.613 | 0.121 | 1.000 | | 0.639 |
+ +------------+--------+----------+--------+---------+-----------+
+ | autogluon | 0.971 | 0.048 | 0.639 | 0.639 | |
+ +------------+--------+----------+--------+---------+-----------+
diff --git a/docs/source/benchmarks/tabular.rst b/docs/source/benchmarks/tabular.rst
index 8e7801c7ba..0ff6dd00a7 100644
--- a/docs/source/benchmarks/tabular.rst
+++ b/docs/source/benchmarks/tabular.rst
@@ -2,20 +2,53 @@ Tabular data
------------
Here are overall classification problem results across state-of-the-art AutoML frameworks
-using `AutoMlBenchmark `__ test suite:
+using `AMLB `__ test suite:
-.. raw:: html
- :file: amlb_res.html
+.. csv-table::
+ :header: Dataset, Metric, AutoGluon, FEDOT, H2O, TPOT
-The results are obtained using sever based on Xeon Cascadelake (2900MHz)
-with 12 cores and 24GB memory for experiments with the local infrastructure. 1h8c configuration was used for AMLB.
-
-Despite the obtained metrics being a bit different from AMLB's `paper `__
-the results confirm that FEDOT is competitive with SOTA solutions.
+ adult, auc, 0.91001, 0.91529, **0.93077**, 0.92729
+ airlines, auc, 0.72491, 0.65378, **0.73039**, 0.69368
+ albert, auc, **0.73903**, 0.72765, nan, nan
+ amazon_employee_access, auc, 0.85715, 0.85911, **0.87281**, 0.86625
+ apsfailure, auc, 0.99062, 0.98999, **0.99252**, 0.99044
+ australian, auc, **0.93953**, 0.93785, 0.93857, 0.93604
+ bank-marketing, auc, 0.93126, 0.93245, **0.93860**, 0.93461
+ blood-transfusion, auc, 0.68959, 0.72444, **0.75949**, 0.74019
+ christine, auc, 0.80429, 0.80446, **0.81936**, 0.80669
+ credit-g, auc, **0.79529**, 0.78458, 0.79357, 0.79381
+ guillermo, auc, **0.89967**, 0.89125, nan, 0.78331
+ jasmine, auc, 0.88312, 0.88548, 0.88734, **0.89038**
+ kc1, auc, 0.82226, 0.83857, nan, **0.84481**
+ kddcup09_appetency, auc, 0.80447, 0.78778, **0.82912**, 0.82556
+ kr-vs-kp, auc, 0.99886, 0.99925, 0.99972, **0.99976**
+ miniboone, auc, 0.98217, 0.98102, nan, **0.98346**
+ nomao, auc, 0.99483, 0.99420, **0.99600**, 0.99538
+ numerai28_6, auc, 0.51655, 0.52161, **0.53052**, nan
+ phoneme, auc, 0.96542, 0.96448, 0.96751, **0.97070**
+ riccardo, auc, **0.99970**, 0.99794, nan, nan
+ sylvine, auc, 0.98470, 0.98496, 0.98936, **0.99339**
+ car, neg_logloss, -0.11659, -0.08885, **-0.00347**, -0.64257
+ cnae-9, neg_logloss, -0.33208, -0.27010, -0.21849, **-0.15369**
+ connect-4, neg_logloss, -0.50157, -0.47033, **-0.33770**, -0.37349
+ covertype, neg_logloss, **-0.07140**, -0.14096, -0.26422, nan
+ dilbert, neg_logloss, -0.14967, -0.24455, **-0.07643**, -0.16839
+ dionis, neg_logloss, **-2.15760**, nan, nan, nan
+ fabert, neg_logloss, -0.78781, -0.90152, **-0.77194**, -0.89159
+ fashion-mnist, neg_logloss, **-0.33257**, -0.38379, -0.38328, -0.53549
+ helena, neg_logloss, **-2.78497**, -6.34863, -2.98020, -2.98157
+ jannis, neg_logloss, -0.72838, -0.76192, **-0.69123**, -0.70310
+ jungle_chess, neg_logloss, -0.43064, -0.27074, -0.23952, **-0.21872**
+ mfeat-factors, neg_logloss, -0.16118, -0.17412, **-0.09296**, -0.10726
+ robert, neg_logloss, **-1.68431**, -1.74509, nan, nan
+ segment, neg_logloss, -0.09419, -0.09643, **-0.05962**, -0.07711
+ shuttle, neg_logloss, -0.00081, -0.00101, **-0.00036**, nan
+ vehicle, neg_logloss, -0.51546, -0.42776, **-0.33137**, -0.39150
+ volkert, neg_logloss, **-0.92007**, -1.04485, -0.97797, nan
The statistical analysis was conducted using the Friedman t-test.
The results of experiments and analysis confirm that FEDOT results are statistically indistinguishable
-from SOTA competitors H2O, AutoGluon and LAMA (see below).
+from SOTA competitors H2O, AutoGluon and TPOT (see below).
-.. image:: img_benchmarks/stats.png
+.. image:: img_benchmarks/stats.png
\ No newline at end of file