Skip to content

Commit

Permalink
Rust fetch
Browse files Browse the repository at this point in the history
  • Loading branch information
AyushGupta-0 committed Apr 3, 2024
1 parent 214856a commit ac72740
Showing 1 changed file with 93 additions and 40 deletions.
133 changes: 93 additions & 40 deletions docs/tools/vdb_table/fetch-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import os
from datetime import datetime, timedelta
from urllib.parse import urlparse
from tqdm.auto import tqdm

import requests
import pypistats
import requests
from tqdm.auto import tqdm

# Constants for the script
DIRECTORY = "docs/tools/vdb_table/data"
Expand All @@ -14,6 +14,7 @@
DOCKER_HUB_API_URL = "https://hub.docker.com/v2/repositories/"
NPM_API_URL = "https://api.npmjs.org/downloads/point/"
PYPI_API_URL = "https://api.pepy.tech/api/v2/projects"
RUST_CRATES_API_URL = "https://crates.io/api/v1/crates/"


def get_github_stars(github_url, headers=None):
Expand Down Expand Up @@ -46,7 +47,7 @@ def get_npm_package_info(npm_package):
response = requests.get(f"https://registry.npmjs.org/{npm_package}")
if response.status_code == 200:
data = response.json()
first_release_date = data.get("time")['created']
first_release_date = data.get("time")["created"]
return first_release_date
else:
print(
Expand All @@ -61,51 +62,90 @@ def get_npm_downloads(npm_package, headers=None, start_date=None):
release_date = get_npm_package_info(npm_package)
if release_date:
# Subtract one day from the release date for the start range
start_date = datetime.strptime(release_date, "%Y-%m-%dT%H:%M:%S.%fZ") - timedelta(days=1)
start_date = datetime.strptime(
release_date, "%Y-%m-%dT%H:%M:%S.%fZ"
) - timedelta(days=1)

start_date = start_date.strftime("%Y-%m-%d")
end_date = (datetime.now() + + timedelta(days=1)).strftime("%Y-%m-%d")
response = requests.get(f"{NPM_API_URL}{start_date}:{end_date}/{npm_package}", headers)
end_date = (datetime.now() + +timedelta(days=1)).strftime("%Y-%m-%d")
response = requests.get(
f"{NPM_API_URL}{start_date}:{end_date}/{npm_package}", headers
)
if response.status_code == 200:
return response.json()["downloads"]
else:
print(
f"Failed to fetch npm downloads for {npm_package}: {response.status_code}"
)
return None


def get_pypi_downloads(pypi_package, headers=None):
global PYPI_API_URL
headers['X-Api-Key'] = os.getenv('PYPI_API_KEY', '')
response = requests.get(f"{PYPI_API_URL}/{pypi_package}", headers=headers)
if response.status_code == 200:
return response.json()["total_downloads"]
else:
print(
f"Failed to fetch pypi downloads for {pypi_package}: {response.status_code}"
)
return None



def get_pypi_downloads_last_90(pypi_package, headers=None, start_date=None, end_date=None):
stats = json.loads(pypistats.overall(pypi_package, mirrors=True, format="json", start_date=start_date, end_date=end_date))
if stats and 'data' in stats:
return stats['data'][0]['downloads']
global PYPI_API_URL
headers["X-Api-Key"] = os.getenv("PYPI_API_KEY", "")
response = requests.get(f"{PYPI_API_URL}/{pypi_package}", headers=headers)
if response.status_code == 200:
return response.json()["total_downloads"]
else:
print(
f"Failed to fetch pypi downloads for {pypi_package}"
f"Failed to fetch pypi downloads for {pypi_package}: {response.status_code}"
)
return None


def get_pypi_downloads_last_90(
pypi_package, headers=None, start_date=None, end_date=None
):
stats = json.loads(
pypistats.overall(
pypi_package,
mirrors=True,
format="json",
start_date=start_date,
end_date=end_date,
)
)
if stats and "data" in stats:
return stats["data"][0]["downloads"]
else:
print(f"Failed to fetch pypi downloads for {pypi_package}")
return None


def get_rust_downloads(crate_name):
response = requests.get(f"{RUST_CRATES_API_URL}{crate_name}")
if response.status_code == 200:
return response.json()["crate"]["downloads"]
else:
print(
f"Failed to fetch total downloads for Rust crate {crate_name}: {response.status_code}"
)
return None


def get_rust_downloads_last_90(crate_name):
response = requests.get(f"{RUST_CRATES_API_URL}{crate_name}/downloads")
if response.status_code == 200:
downloads_data = response.json()["meta"]["extra_downloads"]
last_90_days_date = datetime.now() - timedelta(days=90)
downloads_last_90_days = sum(
item["downloads"]
for item in downloads_data
if datetime.strptime(item["date"], "%Y-%m-%d") >= last_90_days_date
)
return downloads_last_90_days
else:
print(
f"Failed to fetch downloads for the last 90 days for Rust crate {crate_name}: {response.status_code}"
)
return None


def update_json_files(directory, headers=None):
if headers is None:
headers = {}

sources = ['github_stars', 'docker_pulls', 'npm_downloads', 'pypi_downloads']
sources = ["github_stars", "docker_pulls", "npm_downloads", "pypi_downloads"]

for filename in tqdm(os.listdir(directory)):
if filename.endswith(".json"):
Expand All @@ -119,8 +159,8 @@ def update_json_files(directory, headers=None):
rust_url = data.get("rust_downloads", {}).get("source_url", "")

for source in sources:
if 'value_90_days' not in data[source]:
data[source]['value_90_days'] = 0
if "value_90_days" not in data[source]:
data[source]["value_90_days"] = 0

if dockerhub_url:
parsed_dockerhub_path = str(urlparse(dockerhub_url).path)
Expand All @@ -142,7 +182,9 @@ def update_json_files(directory, headers=None):
data["github_stars"]["value"] = stars

if npm_url:
npm_package_name = list(npm_url.split('https://www.npmjs.com/package/'))[1].strip()
npm_package_name = list(
npm_url.split("https://www.npmjs.com/package/")
)[1].strip()
downloads = get_npm_downloads(npm_package_name, headers)
if downloads is not None:
data["npm_downloads"]["value"] = downloads
Expand All @@ -151,30 +193,41 @@ def update_json_files(directory, headers=None):
downloads = get_npm_downloads(npm_package_name, headers, start_date)
if downloads is not None:
data["npm_downloads"]["value_90_days"] = downloads

if pypi_url:
pypi_package_name = list(pypi_url.split('https://pypi.org/project/'))[1].strip().strip('/')
pypi_package_name = (
list(pypi_url.split("https://pypi.org/project/"))[1]
.strip()
.strip("/")
)
downloads = get_pypi_downloads(pypi_package_name, headers)
if downloads is not None:
data["pypi_downloads"]["value"] = downloads

end_date = datetime.now()
start_date = end_date - timedelta(days=90)
downloads = get_pypi_downloads_last_90(pypi_package_name, headers, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
downloads = get_pypi_downloads_last_90(
pypi_package_name,
headers,
start_date.strftime("%Y-%m-%d"),
end_date.strftime("%Y-%m-%d"),
)
if downloads is not None:
data["pypi_downloads"]["value_90_days"] = downloads

if rust_url:
pypi_package_name = list(pypi_url.split('https://pypi.org/project/'))[1].strip().strip('/')
downloads = get_pypi_downloads(pypi_package_name, headers)
rust_crate_name = (
list(rust_url.split("https://crates.io/crates/"))[1]
.strip()
.strip("/")
)
downloads = get_rust_downloads(rust_crate_name)
if downloads is not None:
data["rust_downloads"]["value"] = downloads

end_date = datetime.now()
start_date = end_date - timedelta(days=90)
downloads = get_pypi_downloads_last_90(pypi_package_name, headers, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
if downloads is not None:
data["rust_downloads"]["value_90_days"] = downloads
downloads_last_90 = get_rust_downloads_last_90(rust_crate_name)
if downloads_last_90 is not None:
data["rust_downloads"]["value_90_days"] = downloads_last_90

# Write the updated data back to the file
json_file.seek(0) # Rewind to the start of the file
Expand Down

0 comments on commit ac72740

Please sign in to comment.