From a3f939979a6be6fc5b38df78e3df7627f57820c1 Mon Sep 17 00:00:00 2001 From: Pablo Olivares Date: Mon, 1 Apr 2024 19:49:38 +0200 Subject: [PATCH] Added dataset version control advances #2 This was achieved using DVC --- .dvc/.gitignore | 3 ++ .dvc/config | 4 ++ .dvcignore | 3 ++ .gitignore | 1 + data.dvc | 6 +++ environment.yaml | 102 ++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 .dvc/.gitignore create mode 100644 .dvc/config create mode 100644 .dvcignore create mode 100644 data.dvc diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..96b2e3f --- /dev/null +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = mygdrive +['remote "mygdrive"'] + url = gdrive://1m6dIzlNmUCJN3uK-LYvkinXlTlzNMVPl diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.gitignore b/.gitignore index 0a452a7..cbc9c4e 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,4 @@ data/ outputs/ logs/ +/data diff --git a/data.dvc b/data.dvc new file mode 100644 index 0000000..0a3208c --- /dev/null +++ b/data.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 8e9341cdead1a76a960dfbfe54fc9cba.dir + size: 2323069815 + nfiles: 9249 + hash: md5 + path: data diff --git a/environment.yaml b/environment.yaml index d604c86..caf7110 100644 --- a/environment.yaml +++ b/environment.yaml @@ -7,18 +7,41 @@ channels: dependencies: - _libgcc_mutex=0.1 - _openmp_mutex=4.5 + - aiohttp=3.9.3 + - aiohttp-retry=2.8.3 + - aiosignal=1.3.1 + - amqp=5.2.0 + - antlr-python-runtime=4.9.3 + - appdirs=1.4.4 - asttokens=2.4.1 + - asyncssh=2.14.1 + - atpublic=3.0.1 + - attrs=23.2.0 + - backports.zoneinfo=0.2.1 + - billiard=4.2.0 - blas=1.0 + - boto3=1.34.74 + - botocore=1.34.74 - bottleneck=1.3.7 - brotli=1.1.0 - brotli-bin=1.1.0 - brotli-python=1.0.9 - bzip2=1.0.8 - - ca-certificates=2024.3.11 + - ca-certificates=2024.2.2 + - cachetools=5.3.3 + - celery=5.3.6 - certifi=2024.2.2 + - cffi=1.16.0 - charset-normalizer=2.0.4 + - click=8.1.7 + - click-didyoumean=0.3.1 + - click-plugins=1.1.1 + - click-repl=0.3.0 + - colorama=0.4.6 - comm=0.2.2 + - configobj=5.0.8 - contourpy=1.2.0 + - cryptography=42.0.5 - cuda-cudart=12.1.105 - cuda-cupti=12.1.105 - cuda-libraries=12.1.0 @@ -30,41 +53,75 @@ dependencies: - dbus=1.13.18 - debugpy=1.8.1 - decorator=5.1.1 + - dictdiffer=0.9.0 + - diskcache=5.6.3 + - distro=1.9.0 + - dpath=2.1.6 + - dulwich=0.21.7 + - dvc=2.46.0 + - dvc-data=0.42.3 + - dvc-gdrive=3.0.1 + - dvc-http=2.32.0 + - dvc-objects=0.25.0 + - dvc-render=1.0.1 + - dvc-studio-client=0.20.0 + - dvc-task=0.4.0 - exceptiongroup=1.2.0 - executing=2.0.1 - expat=2.6.2 - ffmpeg=4.3 - filelock=3.13.1 + - flatten-dict=0.4.2 + - flufl.lock=7.1 - fontconfig=2.14.2 - fonttools=4.50.0 - freetype=2.12.1 + - frozenlist=1.4.1 + - fsspec=2024.3.1 + - funcy=1.18 + - future=1.0.0 + - gitdb=4.0.11 + - gitpython=3.1.42 - glib=2.78.4 - glib-tools=2.78.4 - gmp=6.2.1 - gmpy2=2.1.2 - gnutls=3.6.15 + - google-api-core=2.18.0 + - google-api-python-client=2.124.0 + - google-auth=2.29.0 + - google-auth-httplib2=0.2.0 + - googleapis-common-protos=1.63.0 + - grandalf=0.7 - gst-plugins-base=1.14.1 - gstreamer=1.14.1 + - httplib2=0.22.0 + - hydra-core=1.3.2 - icu=58.2 - idna=3.4 - importlib-metadata=7.1.0 - importlib_metadata=7.1.0 + - importlib_resources=6.4.0 - iniconfig=1.1.1 - intel-openmp=2023.1.0 - ipykernel=6.29.3 - ipython=8.22.2 + - iterative-telemetry=0.0.8 - jedi=0.19.1 - jinja2=3.1.3 + - jmespath=1.0.1 - jpeg=9e - jupyter_client=8.6.1 - jupyter_core=5.7.2 - keyutils=1.6.1 - kiwisolver=1.4.5 + - kombu=5.3.6 - krb5=1.20.1 - lame=3.100 - lcms2=2.12 - ld_impl_linux-64=2.40 - lerc=3.0 + - libabseil=20240116.1 - libbrotlicommon=1.1.0 - libbrotlidec=1.1.0 - libbrotlienc=1.1.0 @@ -81,6 +138,7 @@ dependencies: - libexpat=2.6.2 - libffi=3.4.2 - libgcc-ng=13.2.0 + - libgit2=1.7.2 - libglib=2.78.4 - libgomp=13.2.0 - libiconv=1.16 @@ -93,8 +151,10 @@ dependencies: - libnvjpeg=12.1.1.14 - libpng=1.6.39 - libpq=12.17 + - libprotobuf=4.25.3 - libsodium=1.0.18 - libsqlite=3.45.2 + - libssh2=1.11.0 - libstdcxx-ng=13.2.0 - libtasn1=4.19.0 - libtiff=4.5.1 @@ -108,10 +168,12 @@ dependencies: - libzlib=1.2.13 - llvm-openmp=14.0.6 - lz4-c=1.9.4 + - markdown-it-py=3.0.0 - markupsafe=2.1.3 - matplotlib=3.8.0 - matplotlib-base=3.8.0 - matplotlib-inline=0.1.6 + - mdurl=0.1.2 - mkl=2023.1.0 - mkl-service=2.4.0 - mkl_fft=1.3.8 @@ -119,7 +181,9 @@ dependencies: - mpc=1.1.0 - mpfr=4.0.2 - mpmath=1.3.0 + - multidict=6.0.5 - munkres=1.1.4 + - nanotime=0.5.2 - ncurses=6.4.20240210 - nest-asyncio=1.6.0 - nettle=3.7.3 @@ -129,12 +193,17 @@ dependencies: - numexpr=2.8.7 - numpy=1.26.4 - numpy-base=1.26.4 + - oauth2client=4.1.3 + - omegaconf=2.3.0 - openh264=2.1.1 - openjpeg=2.4.0 - openssl=3.2.1 + - orjson=3.9.15 - packaging=24.0 - pandas=2.2.1 - parso=0.8.3 + - pathlib2=2.3.7.post1 + - pathspec=0.12.1 - pcre2=10.42 - pexpect=4.9.0 - pickleshare=0.7.5 @@ -144,11 +213,22 @@ dependencies: - pluggy=1.0.0 - ply=3.11 - prompt-toolkit=3.0.42 + - prompt_toolkit=3.0.42 + - proto-plus=1.23.0 + - protobuf=4.25.3 - psutil=5.9.8 - pthread-stubs=0.4 - ptyprocess=0.7.0 - pure_eval=0.2.2 + - pyasn1=0.5.1 + - pyasn1-modules=0.3.0 + - pycparser=2.22 + - pydot=1.2.4 + - pydrive2=1.19.0 + - pygit2=1.14.1 - pygments=2.17.2 + - pygtrie=2.5.0 + - pyopenssl=24.0.0 - pyparsing=3.0.9 - pyqt=5.15.10 - pyqt5-sip=12.13.0 @@ -156,27 +236,42 @@ dependencies: - pytest=7.4.0 - python=3.11.8 - python-dateutil=2.9.0 + - python-gssapi=1.8.2 - python-tzdata=2023.3 - python_abi=3.11 - pytorch=2.2.1 - pytorch-cuda=12.1 - pytorch-mutex=1.0 - pytz=2023.3.post1 + - pyu2f=0.1.5 + - pywin32-on-windows=0.1.0 - pyyaml=6.0.1 - pyzmq=25.1.2 - qt-main=5.15.2 - readline=8.2 - requests=2.31.0 + - rich=13.7.1 + - rsa=4.9 + - ruamel.yaml=0.18.6 + - ruamel.yaml.clib=0.2.8 + - s3transfer=0.10.1 + - scmrepo=0.2.1 - seaborn=0.12.2 - setuptools=69.2.0 + - shortuuid=1.0.13 + - shtab=1.7.1 - sip=6.7.12 - six=1.16.0 + - smmap=5.0.0 - sqlite=3.45.2 + - sqltrie=0.11.0 - stack_data=0.6.2 - sympy=1.12 + - tabulate=0.9.0 - tbb=2021.8.0 - tk=8.6.13 - tomli=2.0.1 + - tomlkit=0.12.4 - torchaudio=2.2.1 - torchtriton=2.2.0 - torchvision=0.17.1 @@ -185,13 +280,18 @@ dependencies: - traitlets=5.14.2 - typing_extensions=4.10.0 - tzdata=2024a + - uritemplate=4.1.1 - urllib3=2.1.0 + - vine=5.1.0 + - voluptuous=0.14.2 - wcwidth=0.2.13 - wheel=0.43.0 - xorg-libxau=1.0.11 - xorg-libxdmcp=1.1.3 - xz=5.4.6 - yaml=0.2.5 + - yarl=1.9.4 + - zc.lockfile=3.0.post1 - zeromq=4.3.5 - zipp=3.17.0 - zlib=1.2.13