-
Notifications
You must be signed in to change notification settings - Fork 106
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
356 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
name: Upload blogs to Strapi | ||
|
||
on: | ||
push: | ||
branches: | ||
- vh-redesign | ||
|
||
jobs: | ||
run-script: | ||
if: ${{ github.repository == 'superlinked/VectorHub' }} | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python 3.x | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.x' | ||
- name: Install dependencies | ||
run: | | ||
pip install -r blog/requirements.txt | ||
- name: Run script | ||
run: python blog/main.py --directories blog/directories.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.env | ||
.venv | ||
./env/ | ||
./venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Blog Uploads | ||
|
||
## Setup | ||
```bash | ||
cd blog | ||
python3 -m venv env | ||
source env/bin/activate | ||
pip install -r requirements.txt | ||
``` | ||
|
||
## Run | ||
```bash | ||
export STRAPI_URL="https://mystrapi.strapiapp.com" | ||
export STRAPI_API_KEY="mystrapi-api-key" | ||
source blog/env/bin/activate | ||
python blog/main.py --directories blog/directories.json | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
[ | ||
{ | ||
"type": "folder", | ||
"name": "VectorHub", | ||
"path": "docs", | ||
"has_blogs": false, | ||
"children": [ | ||
{ | ||
"type": "folder", | ||
"name": "Building Blocks", | ||
"path": "building_blocks", | ||
"has_blogs": false, | ||
"children": [ | ||
{ | ||
"type": "folder", | ||
"name": "Vector Compute", | ||
"path": "vector_compute", | ||
"has_blogs": true | ||
}, | ||
{ | ||
"type": "folder", | ||
"name": "Vector Search", | ||
"path": "vector_search", | ||
"has_blogs": true | ||
}, | ||
{ | ||
"type": "folder", | ||
"name": "Data Sources", | ||
"path": "data_sources", | ||
"has_blogs": true | ||
} | ||
] | ||
}, | ||
{ | ||
"type": "folder", | ||
"name": "Use Cases", | ||
"path": "use_cases", | ||
"has_blogs": true | ||
} | ||
] | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import os | ||
import enum | ||
|
||
GIT_REPO = 'https://github.com/superlinked/VectorHub' | ||
|
||
class ItemType(enum.Enum): | ||
FOLDER = "folder" | ||
FILE = "file" | ||
|
||
class Item: | ||
def __init__(self, type, name, path, has_blogs=False, children=None): | ||
self.type = type | ||
self.name = name | ||
self.path = path | ||
self.has_blogs = has_blogs | ||
self.children = [] | ||
|
||
if children: | ||
self.add_children(children) | ||
|
||
def __str__(self) -> str: | ||
return self.path | ||
|
||
def add_children(self, children): | ||
for child in children: | ||
# Recursively create DirectoryItem objects for children | ||
self.children.append(Item.from_dict(child)) | ||
|
||
@classmethod | ||
def from_dict(cls, data): | ||
""" | ||
Create an Item instance from a dictionary. | ||
""" | ||
return cls( | ||
type=data.get("type", ItemType.FOLDER), | ||
name=data.get("name", ""), | ||
path=data.get("path", ""), | ||
has_blogs=data.get("has_blogs", False), | ||
children=data.get("children", []) | ||
) | ||
|
||
def to_dict(self): | ||
""" | ||
Convert the Item object to a dictionary. | ||
""" | ||
return { | ||
"type": self.type, | ||
"path": self.path, | ||
"name": self.name, | ||
"has_blogs": self.has_blogs, | ||
"children": self.children, | ||
} | ||
|
||
|
||
class StrapiBlog: | ||
def __init__(self, content, filepath, last_updated): | ||
self.content = content | ||
self.filepath = filepath | ||
self.last_updated = last_updated | ||
|
||
def get_title(self) -> str: | ||
return os.path.basename(self.filepath).replace('-', ' ').replace('_', ' ').replace('.md', '') | ||
|
||
def __str__(self) -> str: | ||
return self.get_title() | ||
|
||
def get_github_url(self): | ||
return f'{GIT_REPO}/blob/main/{self.filepath}' | ||
|
||
|
||
def get_slug(self): | ||
slug = self.filepath.replace('.md', '').replace('_', '-').replace(' ', '-').replace('docs/', '').replace('&', '').replace('--', '-') | ||
return slug.lower() | ||
|
||
def get_json(self): | ||
return { | ||
"github_url": self.get_github_url(), | ||
"content": self.content, | ||
"github_last_updated_date": self.last_updated, | ||
"title": self.get_title(), | ||
"slug_url": self.get_slug() | ||
} | ||
|
||
def get_post_json(self): | ||
return {"data": self.get_json()} | ||
|
||
def __eq__(self, __value) -> bool: | ||
self.get_slug() == __value.get_slug() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import os | ||
import json | ||
import argparse | ||
from json.decoder import JSONDecodeError | ||
import requests | ||
from urllib.parse import urljoin | ||
from helpers import Item, ItemType, StrapiBlog | ||
from tqdm.auto import tqdm | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
args = None | ||
|
||
BASE_URL = os.getenv('STRAPI_URL', "") | ||
API_KEY = os.getenv('STRAPI_API_KEY', "") | ||
|
||
paths_to_search = [] | ||
existing_slugs_discovered = {} | ||
|
||
headers = { | ||
'Authorization': f'Bearer {API_KEY}', | ||
'Content-Type': 'application/json' | ||
} | ||
|
||
def arg_parse(): | ||
global args | ||
parser = argparse.ArgumentParser(description="VectorHub Strapi Upload") | ||
parser.add_argument('--directories', help='Path to json which describes the directories to parse') | ||
args = parser.parse_args() | ||
|
||
def load_items_from_json(directories: str) -> list: | ||
if os.path.exists(directories): | ||
items = [] | ||
try: | ||
with open(directories, 'r') as file: | ||
data = json.load(file) | ||
for item_data in data: | ||
items.append(Item.from_dict(item_data)) | ||
except JSONDecodeError as e: | ||
print('JSON Structure is invalid.') | ||
exit(1) | ||
except Exception as e: | ||
print('Unknown error occured.') | ||
print(e) | ||
exit(1) | ||
return items | ||
else: | ||
print(f"{directories} does not exist.") | ||
exit(1) | ||
|
||
|
||
def load_existing_blogs(page_num=1): | ||
global existing_slugs_discovered | ||
base_url = urljoin(BASE_URL, 'api/blogs') | ||
search_url = base_url + f"?pagination[page]={page_num}" | ||
|
||
session = requests.Session() | ||
|
||
response = session.get(search_url, headers=headers) | ||
if response.status_code == 200: | ||
data = json.loads(response.text)['data'] | ||
if len(data) > 0: | ||
for item in data: | ||
existing_slugs_discovered[item['attributes']['slug_url']] = {'discovered': False, 'id': -1} | ||
load_existing_blogs(page_num+1) | ||
|
||
|
||
def fetch_paths(node: Item, current_path=""): | ||
global paths_to_search | ||
# Update the current path with dthe node's path | ||
current_path = f"{current_path}/{node.path}" if current_path else node.path | ||
|
||
# If the node has children, recurse on each child | ||
if node.has_blogs: | ||
paths_to_search.append(current_path) | ||
if node.children and len(node.children) > 0: | ||
for child in node.children: | ||
fetch_paths(child, current_path) | ||
|
||
|
||
def find_files_to_upload(items: list): | ||
global paths_to_search | ||
|
||
for item in items: | ||
fetch_paths(item) | ||
|
||
files = [] | ||
|
||
extension = 'md' | ||
|
||
for path in paths_to_search: | ||
folder_path = Path(path) | ||
folder_files = folder_path.glob(f"*.{extension}") | ||
for file in folder_files: | ||
if 'readme.md' not in str(file).lower(): | ||
files.append({ | ||
'path': str(file), | ||
'time': datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d") | ||
}) | ||
|
||
return files | ||
|
||
|
||
def build_blog_object(file_obj: dict) -> StrapiBlog: | ||
filepath = file_obj['path'] | ||
with open(filepath, 'r') as file: | ||
content = file.read() | ||
blog = StrapiBlog(content, filepath, file_obj['time']) | ||
return blog | ||
|
||
def upload_blog(blog: StrapiBlog): | ||
base_url = urljoin(BASE_URL, 'api/blogs') | ||
slug = blog.get_slug() | ||
search_url = base_url + f"?filters[slug_url][$eq]={slug}" | ||
session = requests.Session() | ||
|
||
if slug in existing_slugs_discovered: | ||
existing_slugs_discovered[slug]['discovered'] = True | ||
|
||
response = session.get(search_url, headers=headers) | ||
|
||
if response.status_code == 200: | ||
responses = json.loads(response.text)['data'] | ||
print(f'Uploading slug: {blog.get_slug()}') | ||
if len(responses) > 0: | ||
# Blog already exists at this slug | ||
id = json.loads(response.text)['data'][0]['id'] | ||
|
||
url = f"{base_url}/{id}" | ||
create_response = session.put(url, headers=headers, data=json.dumps(blog.get_post_json())) | ||
else: | ||
# Its a new blog | ||
url = base_url | ||
create_response = session.post(url, headers=headers, data=json.dumps(blog.get_post_json())) | ||
|
||
if create_response.status_code == 200: | ||
if slug in existing_slugs_discovered: | ||
create_response_text = json.loads(create_response.text) | ||
existing_slugs_discovered[slug]['id'] = create_response_text['data']['id'] | ||
else: | ||
print(f'Error in parsing blog: {slug}') | ||
print(create_response.text) | ||
exit(1) | ||
|
||
def delete_old_blogs(): | ||
global existing_slugs_discovered | ||
|
||
base_url = urljoin(BASE_URL, 'api/blogs') | ||
session = requests.Session() | ||
|
||
for slug in existing_slugs_discovered: | ||
if not existing_slugs_discovered[slug]['discovered']: | ||
print(f"Deleting slug: {slug}") | ||
if existing_slugs_discovered[slug]['id'] > 0: | ||
url = f"{base_url}/{id}" | ||
response = session.delete(url, headers=headers) | ||
|
||
|
||
if __name__ == "__main__": | ||
arg_parse() | ||
items = load_items_from_json(args.directories) | ||
|
||
load_existing_blogs() | ||
|
||
files = find_files_to_upload(items) | ||
|
||
print('Uploading blogs') | ||
for file in tqdm(files): | ||
blog = build_blog_object(file) | ||
upload_blog(blog) | ||
|
||
print('Deleting blogs') | ||
delete_old_blogs() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
certifi==2024.2.2 | ||
charset-normalizer==3.3.2 | ||
idna==3.6 | ||
requests==2.31.0 | ||
tqdm==4.66.2 | ||
urllib3==2.2.1 |