Skip to content

Commit

Permalink
Should be ready - remember to pull in data
Browse files Browse the repository at this point in the history
  • Loading branch information
thesteve0 committed Apr 4, 2024
1 parent faeb210 commit 3f6deaf
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 154 deletions.
38 changes: 38 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
ARG VARIANT=3.10-bookworm
FROM mcr.microsoft.com/devcontainers/python:${VARIANT}

ENV PYTHONUNBUFFERED 1

# [Choice] Node.js version: none, lts/*, 16, 14, 12, 10
ARG NODE_VERSION="none"
RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi

RUN useradd postgres

# [Optional] If your requirements rarely change, uncomment this section to add them to the image.
# COPY requirements.txt /tmp/pip-tmp/
RUN pip3 --disable-pip-version-check --no-cache-dir install \
psycopg2-binary \
pgvector \
pyarrow \
thingsvision \
InstructorEmbedding \
beautifulsoup4 \
pandas

RUN pip3 --disable-pip-version-check --no-cache-dir install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu



# [Optional] Uncomment this section to install additional OS packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
postgresql-client \
nano \
&& apt-get clean && \
rm -rf /var/lib/apt/lists/*




46 changes: 46 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Update the VARIANT arg in docker-compose.yml to pick a Python version
{
"name": "Tech Raven Consulting PostgreSQL Workshop",
"dockerComposeFile": "docker-compose.yml",
"service": "app",
"workspaceFolder": "/workspace",

// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"python.defaultInterpreterPath": "/usr/local/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint",
"python.testing.pytestPath": "/usr/local/py-utils/bin/pytest"
},

// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// This can be used to network with other containers or the host.
// "forwardPorts": [5000, 5432],

// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "pip install --user -r requirements.txt",

// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode"
}
47 changes: 47 additions & 0 deletions .devcontainer/docker-compose.withdockerfile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
version: '3.8'

services:
app:
build:
context: ..
dockerfile: .devcontainer/Dockerfile
args:
# Update 'VARIANT' to pick a version of Python: 3, 3.10, 3.9, 3.8, 3.7, 3.6
# Append -bullseye or -buster to pin to an OS version.
# Use -bullseye variants on local arm64/Apple Silicon.
# https://hub.docker.com/_/microsoft-devcontainers-python
# had to go with 3.10 bc of a conflict with numba
VARIANT: 3.10-bookworm
# Optional Node.js version to install
NODE_VERSION: "lts/*"

volumes:
- ..:/workspace:cached

# Overrides default command so things don't shut down after the process ends.
command: sleep infinity

# Runs app on the same network as the database container, allows "forwardPorts" in devcontainer.json function.
network_mode: service:db

# Uncomment the next line to use a non-root user for all processes.
# user: vscode

# Use "forwardPorts" in **devcontainer.json** to forward an app port locally.
# (Adding the "ports" property to this file will not forward from a Codespace.)

db:
image: ghcr.io/thesteve0/pg16-full-workshop:latest
restart: unless-stopped
volumes:
- postgres-data:/var/lib/postgresql/data
environment:
POSTGRES_USER: postgres
POSTGRES_DB: postgres
POSTGRES_PASSWORD: postgres

# Add "forwardPorts": ["5432"] to **devcontainer.json** to forward PostgreSQL locally.
# (Adding the "ports" property to this file will not forward from a Codespace.)

volumes:
postgres-data:
36 changes: 36 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version: '3.8'

services:
app:
image: ghcr.io/thesteve0/pg_devcontainer:latest
restart: unless-stopped
volumes:
- ..:/workspace:cached

# Overrides default command so things don't shut down after the process ends.
command: sleep infinity

# Runs app on the same network as the database container, allows "forwardPorts" in devcontainer.json function.
network_mode: service:db

# Uncomment the next line to use a non-root user for all processes.
# user: vscode

# Use "forwardPorts" in **devcontainer.json** to forward an app port locally.
# (Adding the "ports" property to this file will not forward from a Codespace.)

db:
image: ghcr.io/thesteve0/pg16-full-workshop:latest
restart: unless-stopped
volumes:
- postgres-data:/var/lib/postgresql/data
environment:
POSTGRES_USER: postgres
POSTGRES_DB: postgres
POSTGRES_PASSWORD: postgres

# Add "forwardPorts": ["5432"] to **devcontainer.json** to forward PostgreSQL locally.
# (Adding the "ports" property to this file will not forward from a Codespace.)

volumes:
postgres-data:
64 changes: 0 additions & 64 deletions arvix-example-copy.py

This file was deleted.

9 changes: 5 additions & 4 deletions arxiv-import.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# The proper word is arxiv but I messed when creating the folder and project

PARQUET_PATH= Path('./')
DB_NAME= 'lala'
DB_NAME= 'arxiv_abstracts'

conn = psycopg.connect("host=localhost user=postgres password='letmein'", autocommit=True)
cursor = conn.cursor()
Expand All @@ -17,7 +17,7 @@

list_database = cursor.fetchall()

if ('lala',) in list_database:
if (DB_NAME,) in list_database:
cursor.execute(("DROP database "+ DB_NAME +" with (FORCE);"))
cursor.execute("create database " + DB_NAME + ";")
else:
Expand All @@ -26,12 +26,13 @@
#Now close the connection and switch DB
conn.close()

connect_string = f"host=localhost user=postgres password='letmein' dbname='{DB_NAME}'"

conn = psycopg.connect("host=localhost user=postgres password='letmein' dbname='lala'", autocommit=True)
conn = psycopg.connect(connect_string, autocommit=True)
conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
conn.close()

conn = psycopg.connect("host=localhost user=postgres password='letmein' dbname='lala'", autocommit=True)
conn = psycopg.connect(connect_string, autocommit=True)
register_vector(conn)

conn.execute('DROP TABLE IF EXISTS documents')
Expand Down
69 changes: 0 additions & 69 deletions pgvector_example.py

This file was deleted.

Loading

0 comments on commit 3f6deaf

Please sign in to comment.