From afcc5ffda57cc318b01f80c4b28e612e1282d865 Mon Sep 17 00:00:00 2001 From: MMenchero Date: Tue, 11 Jun 2024 14:49:48 -0600 Subject: [PATCH 1/4] Fix: Changes how bitcoin price data is downloaded --- .../2_bitcoin_price_prediction.ipynb | 234 ++---------------- 1 file changed, 21 insertions(+), 213 deletions(-) diff --git a/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb b/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb index a01c261d..a9501106 100644 --- a/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb +++ b/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb @@ -118,46 +118,7 @@ "source": [ "Bitcoin (₿) is the first decentralized digital currency and is one of the most popular cryptocurrencies. Transactions are managed and recorded on a public ledger known as the blockchain. Bitcoins are created as a reward for mining, a process that involves solving complex cryptographic tasks to verify transactions. This digital currency can be used as payment for goods and services, traded for other currencies, or held as a store of value.\n", "\n", - "In this tutorial, we will first download the historical Bitcoin price data with `cryptocmd`, a Python package for downloading data from [CoinMarketCap](https://coinmarketcap.com/). To start, we need to define a `scraper`, selecting our cryptocurrency of interest and the start and end dates in format dd-mm-yyyy." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "::: {.callout-note}\n", - "You can install `cryptocmd` with `pip`:\n", - " \n", - "```python\n", - "pip install cryptocmd\n", - "```\n", - ":::" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd \n", - "from cryptocmd import CmcScraper" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scraper = CmcScraper('BTC', '01-01-2020', '31-12-2023')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next we create a `pandas` DataFrame with the data. Note that it is important to sort the data by date in ascending order. " + "In this tutorial, we will first download the historical Bitcoin price data in USD as a `pandas` DataFrame. " ] }, { @@ -187,121 +148,46 @@ " \n", " \n", " Date\n", - " Open\n", - " High\n", - " Low\n", " Close\n", - " Volume\n", - " Market Cap\n", - " Time Open\n", - " Time High\n", - " Time Low\n", - " Time Close\n", " \n", " \n", " \n", " \n", - " 1460\n", + " 0\n", " 2020-01-01\n", - " 7194.891971\n", - " 7254.330611\n", - " 7174.944153\n", - " 7200.174393\n", - " 1.856566e+10\n", - " 1.305808e+11\n", - " 2020-01-01T00:00:00.000Z\n", - " 2020-01-01T15:42:01.000Z\n", - " 2020-01-01T01:06:01.000Z\n", - " 2020-01-01T23:59:59.999Z\n", + " 7200.174316\n", " \n", " \n", - " 1459\n", + " 1\n", " 2020-01-02\n", - " 7202.551122\n", - " 7212.155253\n", - " 6935.269972\n", - " 6985.470001\n", - " 2.080208e+10\n", - " 1.266994e+11\n", - " 2020-01-02T00:00:00.000Z\n", - " 2020-01-02T01:30:00.000Z\n", - " 2020-01-02T23:02:01.000Z\n", - " 2020-01-02T23:59:59.999Z\n", + " 6985.470215\n", " \n", " \n", - " 1458\n", + " 2\n", " 2020-01-03\n", - " 6984.428612\n", - " 7413.715099\n", - " 6914.995908\n", - " 7344.884183\n", - " 2.811148e+10\n", - " 1.332334e+11\n", - " 2020-01-03T00:00:00.000Z\n", - " 2020-01-03T17:04:00.000Z\n", - " 2020-01-03T02:10:01.000Z\n", - " 2020-01-03T23:59:59.999Z\n", + " 7344.884277\n", " \n", " \n", - " 1457\n", + " 3\n", " 2020-01-04\n", - " 7345.375275\n", - " 7427.385794\n", - " 7309.514012\n", - " 7410.656566\n", - " 1.844427e+10\n", - " 1.344425e+11\n", - " 2020-01-04T00:00:00.000Z\n", - " 2020-01-04T18:44:02.000Z\n", - " 2020-01-04T00:39:02.000Z\n", - " 2020-01-04T23:59:59.999Z\n", + " 7410.656738\n", " \n", " \n", - " 1456\n", + " 4\n", " 2020-01-05\n", - " 7410.451694\n", - " 7544.496872\n", - " 7400.535561\n", - " 7411.317327\n", - " 1.972507e+10\n", - " 1.344695e+11\n", - " 2020-01-05T00:00:00.000Z\n", - " 2020-01-05T18:57:00.000Z\n", - " 2020-01-05T23:18:00.000Z\n", - " 2020-01-05T23:59:59.999Z\n", + " 7411.317383\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Date Open High Low Close \\\n", - "1460 2020-01-01 7194.891971 7254.330611 7174.944153 7200.174393 \n", - "1459 2020-01-02 7202.551122 7212.155253 6935.269972 6985.470001 \n", - "1458 2020-01-03 6984.428612 7413.715099 6914.995908 7344.884183 \n", - "1457 2020-01-04 7345.375275 7427.385794 7309.514012 7410.656566 \n", - "1456 2020-01-05 7410.451694 7544.496872 7400.535561 7411.317327 \n", - "\n", - " Volume Market Cap Time Open \\\n", - "1460 1.856566e+10 1.305808e+11 2020-01-01T00:00:00.000Z \n", - "1459 2.080208e+10 1.266994e+11 2020-01-02T00:00:00.000Z \n", - "1458 2.811148e+10 1.332334e+11 2020-01-03T00:00:00.000Z \n", - "1457 1.844427e+10 1.344425e+11 2020-01-04T00:00:00.000Z \n", - "1456 1.972507e+10 1.344695e+11 2020-01-05T00:00:00.000Z \n", - "\n", - " Time High Time Low \\\n", - "1460 2020-01-01T15:42:01.000Z 2020-01-01T01:06:01.000Z \n", - "1459 2020-01-02T01:30:00.000Z 2020-01-02T23:02:01.000Z \n", - "1458 2020-01-03T17:04:00.000Z 2020-01-03T02:10:01.000Z \n", - "1457 2020-01-04T18:44:02.000Z 2020-01-04T00:39:02.000Z \n", - "1456 2020-01-05T18:57:00.000Z 2020-01-05T23:18:00.000Z \n", - "\n", - " Time Close \n", - "1460 2020-01-01T23:59:59.999Z \n", - "1459 2020-01-02T23:59:59.999Z \n", - "1458 2020-01-03T23:59:59.999Z \n", - "1457 2020-01-04T23:59:59.999Z \n", - "1456 2020-01-05T23:59:59.999Z " + " Date Close\n", + "0 2020-01-01 7200.174316\n", + "1 2020-01-02 6985.470215\n", + "2 2020-01-03 7344.884277\n", + "3 2020-01-04 7410.656738\n", + "4 2020-01-05 7411.317383" ] }, "execution_count": null, @@ -310,8 +196,9 @@ } ], "source": [ - "df = scraper.get_dataframe()\n", - "df = df.sort_values('Date', ascending=True)\n", + "import pandas as pd \n", + "\n", + "df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/bitcoin_price_usd.csv', sep=',') \n", "df.head()" ] }, @@ -319,86 +206,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `scraper` provides different details regarding the price of Bitcoin. Here, we will use the `Close` column as our target variable, although any other column could also be used. It's important to note that unlike traditional financial assets, Bitcoin trades 24/7. Therefore, the closing price represents the price of Bitcoin at a specific time each day, rather than at the end of a trading day." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DateClose
14602020-01-017200.174393
14592020-01-026985.470001
14582020-01-037344.884183
14572020-01-047410.656566
14562020-01-057411.317327
\n", - "
" - ], - "text/plain": [ - " Date Close\n", - "1460 2020-01-01 7200.174393\n", - "1459 2020-01-02 6985.470001\n", - "1458 2020-01-03 7344.884183\n", - "1457 2020-01-04 7410.656566\n", - "1456 2020-01-05 7411.317327" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = df[['Date', 'Close']]\n", - "df.head()" + "This dataset contains the closing price of Bitcoin in USD from 2020-01-01 to 2023-12-31. It's important to note that unlike traditional financial assets, Bitcoin trades 24/7. Therefore, the closing price represents the price of Bitcoin at a specific time each day, rather than at the end of a trading day." ] }, { From 9efe95b5798677e7d4707734a8af59152110898e Mon Sep 17 00:00:00 2001 From: MMenchero Date: Tue, 11 Jun 2024 16:41:13 -0600 Subject: [PATCH 2/4] Removed cryptocmd from envionment.yml --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index 1cc0523a..103674d9 100644 --- a/environment.yml +++ b/environment.yml @@ -22,4 +22,3 @@ dependencies: - fire - tabulate - tenacity - - cryptocmd From 5263063008e7ed7965b47f264cc3783025569d56 Mon Sep 17 00:00:00 2001 From: MMenchero Date: Tue, 11 Jun 2024 17:02:35 -0600 Subject: [PATCH 3/4] Expanded on the model's limitations --- nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb b/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb index a9501106..bbd88af0 100644 --- a/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb +++ b/nbs/docs/use-cases/2_bitcoin_price_prediction.ipynb @@ -856,7 +856,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As stated in the introduction, predicting the future prices of financial assets is a challenging task, especially for assets like Bitcoin. However, for those who need or want to forecast these assets, `TimeGPT` can be a powerful tool that simplifies the forecasting process. With just a couple of lines of code, `TimeGPT` can help you: \n", + "As stated in the introduction, predicting the future prices of financial assets is a challenging task, especially for assets like Bitcoin. The predictions in this tutorial seem very accurate, because we are doing historical forecasting. The real challenge is forecasting the price of Bitcoin for the upcoming days, not its historical price. For those who need or want to try to forecast these assets, `TimeGPT` can be an option that simplifies the forecasting process. With just a couple of lines of code, `TimeGPT` can help you: \n", "\n", "- Produce point forecasts \n", "- Quantify the uncertainty of your predictions \n", From b2615946e644dd6fcb349ca3d29524982b87dbd2 Mon Sep 17 00:00:00 2001 From: MMenchero Date: Tue, 11 Jun 2024 17:07:16 -0600 Subject: [PATCH 4/4] Fix for failing tests --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 90c0cf5c..6aeb0d3a 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ "neuralforecast", "hierarchicalforecast", "jupyterlab", + "setuptools<70", ] distributed = ["dask[dataframe]", "fugue[ray]>=0.8.7", "pyspark", "ray[serve-grpc]"] plotting = ["utilsforecast[plotting]>=0.1.7"]