Skip to content

Commit

Permalink
Merge pull request #347 from gen-mind/feature/vision-1
Browse files Browse the repository at this point in the history
Feature/vision 1
  • Loading branch information
gsantopaolo authored Jul 14, 2024
2 parents e145a19 + 19a40be commit b18b651
Show file tree
Hide file tree
Showing 23 changed files with 544 additions and 12 deletions.
Binary file modified .DS_Store
Binary file not shown.
27 changes: 27 additions & 0 deletions deployment/docker-compose-cognix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,33 @@ services:
networks:
- cognix-network

vision:
image: gen-mind/cognix-vision:latest
build:
context: ../src/backend
dockerfile: vision/Dockerfile
volumes:
- ${DATA_PATH}/temp:/temp
- ${DATA_PATH}/models:/models
env_file:
- ${CONFIG_PATH}/vision-srv.env
- ${CONFIG_PATH}/nats-cli.env
- ${CONFIG_PATH}/cockroach-cli.env
- ${CONFIG_PATH}/minio-cli.env
restart: always
# healthcheck:
# test: "curl --silent --fail http://localhost:8080/healthz > /dev/null || exit 1"
# interval: 60s
# start_period: 10s
# timeout: 3s
# retries: 3
depends_on:
- nats
- cockroach
- minio
networks:
- cognix-network

transformer:
container_name: transformer
image: gen-mind/cognix-transformer:latest
Expand Down
Binary file modified src/.DS_Store
Binary file not shown.
Binary file modified src/backend/.DS_Store
Binary file not shown.
28 changes: 28 additions & 0 deletions src/backend/lib_py/cognix_lib/gen_types/vision_data_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions src/backend/lib_py/cognix_lib/gen_types/vision_data_pb2.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import file_type_pb2 as _file_type_pb2
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

class VisionData(_message.Message):
__slots__ = ("url", "document_id", "connector_id", "file_type", "collection_name", "model_name", "model_dimension")
URL_FIELD_NUMBER: _ClassVar[int]
DOCUMENT_ID_FIELD_NUMBER: _ClassVar[int]
CONNECTOR_ID_FIELD_NUMBER: _ClassVar[int]
FILE_TYPE_FIELD_NUMBER: _ClassVar[int]
COLLECTION_NAME_FIELD_NUMBER: _ClassVar[int]
MODEL_NAME_FIELD_NUMBER: _ClassVar[int]
MODEL_DIMENSION_FIELD_NUMBER: _ClassVar[int]
url: str
document_id: int
connector_id: int
file_type: _file_type_pb2.FileType
collection_name: str
model_name: str
model_dimension: int
def __init__(self, url: _Optional[str] = ..., document_id: _Optional[int] = ..., connector_id: _Optional[int] = ..., file_type: _Optional[_Union[_file_type_pb2.FileType, str]] = ..., collection_name: _Optional[str] = ..., model_name: _Optional[str] = ..., model_dimension: _Optional[int] = ...) -> None: ...
29 changes: 29 additions & 0 deletions src/backend/lib_py/cognix_lib/gen_types/vision_data_pb2_grpc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import warnings


GRPC_GENERATED_VERSION = '1.63.0'
GRPC_VERSION = grpc.__version__
EXPECTED_ERROR_RELEASE = '1.65.0'
SCHEDULED_RELEASE_DATE = 'June 25, 2024'
_version_not_supported = False

try:
from grpc._utilities import first_version_is_lower
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
except ImportError:
_version_not_supported = True

if _version_not_supported:
warnings.warn(
f'The grpc package installed is at version {GRPC_VERSION},'
+ f' but the generated code in vision_data_pb2_grpc.py depends on'
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
+ f' This warning will become an error in {EXPECTED_ERROR_RELEASE},'
+ f' scheduled for release on {SCHEDULED_RELEASE_DATE}.',
RuntimeWarning
)
45 changes: 45 additions & 0 deletions src/backend/vision/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
VISION_LOG_LEVEL=INFO
VISION_LOG_FORMAT="%(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(message)s"

VISION_MODEL=paraphrase-multilingual-mpnet-base-v2
VISION_MODEL_CACHE_LIMIT=1
VISION_LOCAL_MODEL_PATH=../../../data/models
VISION_LOCAL_TEMP_PATH=../../../data/temp

NATS_CLIENT_URL=nats://127.0.0.1:4222
NATS_CLIENT_CONNECT_TIMEOUT=3 # in seconds
NATS_CLIENT_RECONNECT_TIME_WAIT=3 # in seconds
NATS_CLIENT_MAX_RECONNECT_ATTEMPTS=3 # in seconds

NATS_CLIENT_VISION_STREAM_NAME=vision
NATS_CLIENT_VISION_STREAM_SUBJECT=vision_activity
# ACK_WAIT shall be long enough to allow semantic to finish the work
# if the message will not be ack in the ack wait it will be re delivered
# in seconds
NATS_CLIENT_VISION_ACK_WAIT=10800
NATS_CLIENT_VISION_MAX_DELIVER=3



MILVUS_ALIAS=default
# when running inside the cluster: use the name of the milvus instance inside the cluster
# when running locally: use 127.0.0.1
MILVUS_HOST=127.0.0.1
MILVUS_PORT=19530
MILVUS_INDEX_TYPE=DISKANN
MILVUS_METRIC_TYPE=COSINE

COCKROACH_CLIENT_DATABASE_URL='cockroachdb://root:[email protected]:26257/defaultdb?sslmode=disable'
COCKROACH_CLIENT_DB_DEBUG=false



MINIO_ACCESS_KEY=minioadmin
MINIO_SECRET_ACCESS_KEY=minioadmin
MINIO_USE_SSL=false
MINIO_BUCKET_NAME=documents
MINIO_REGION=local
MINIO_ENDPOINT=127.0.0.1:9000
MINIO_MOCKED=false

READINESS_TIME_OUT=500
29 changes: 29 additions & 0 deletions src/backend/vision/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Using Python 3.11.0 as the new supported by PyTorch
FROM python:3.11.7-slim-bookworm

# Set the working directory inside the container
WORKDIR /app
ADD ./ ./

# Copy the requirements.txt first to leverage Docker cache
COPY vision/requirements.txt /app/

# Install dependencies and clean up
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt \
&& rm -rf /root/.cache

# Copy the rest of the application code
COPY vision /app/vision
COPY lib_py/cognix_lib /app/cognix_lib

# Set the PYTHONPATH environment variable
ENV PYTHONPATH="/app"

# Command to run your application
CMD ["python", "vision/vision_service.py"]
20 changes: 20 additions & 0 deletions src/backend/vision/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
nats-py==2.7.2
python-dotenv==1.0.1
protobuf==5.27.0
grpcio==1.63.0
grpcio-tools==1.63.0
sqlalchemy==2.0.30
psycopg2-binary==2.9.9
mistune==3.0.2
minio==7.2.7
transformers==4.42.3
chardet==5.2.0
pytesseract==0.3.10
torch==2.0.1
torchaudio==2.0.2
torchvision==0.15.2





Binary file added src/backend/vision/sample-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/backend/vision/sample-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/backend/vision/sample-3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/backend/vision/sample-4.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/backend/vision/sample-5.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
102 changes: 102 additions & 0 deletions src/backend/vision/vision_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import os
import logging
import threading
from typing import Dict, Tuple
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import pytesseract

class Vision:
def __init__(self, model_cache_limit: int = 1, local_model_path: str = 'models'):
self.logger = logging.getLogger(self.__class__.__name__)
self._cache_limit = model_cache_limit
self._local_model_dir = os.path.abspath(local_model_path)

if self._cache_limit <= 0:
raise ValueError("MODEL_CACHE_LIMIT must be an integer greater than 0")

if not os.path.isdir(self._local_model_dir):
raise ValueError(f"'{self._local_model_dir}' is not a valid directory")

self._lock: threading.Lock = threading.Lock()
self._model_cache: Dict[str, Tuple[BlipProcessor, BlipForConditionalGeneration]] = {}

def _load_model(self, model_name: str) -> Tuple[BlipProcessor, BlipForConditionalGeneration]:
model_path: str = os.path.join(self._local_model_dir, model_name)
if model_name not in self._model_cache:
self.logger.info(f"{model_name} model not found in cache, loading...")
if not os.path.exists(model_path) or not os.listdir(model_path):
self.logger.info(f"{model_name} model not found locally, downloading from Hugging Face...")
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name)
self.logger.info(f"{model_name} model downloaded and loaded")
else:
self.logger.info(f"Loading {model_name} from local directory...")
processor = BlipProcessor.from_pretrained(model_path)
model = BlipForConditionalGeneration.from_pretrained(model_path)

with self._lock:
if len(self._model_cache) >= self._cache_limit:
self.logger.info("Model cache limit reached, removing oldest model...")
oldest_model_name = next(iter(self._model_cache))
del self._model_cache[oldest_model_name]

self._model_cache[model_name] = (processor, model)

return self._model_cache[model_name]

def generate_caption(self, image_path: str, model_name: str) -> str:
processor, model = self._load_model(model_name)
image = Image.open(image_path).convert('RGB')
inputs = processor(image, return_tensors="pt")
outputs = model.generate(**inputs)
caption = processor.decode(outputs[0], skip_special_tokens=True)
return caption

def extract_text(self, image_path: str) -> str:
image = Image.open(image_path).convert('RGB')
text = pytesseract.image_to_string(image)
return text

def analyze_image(self, image_path: str, model_name: str) -> str:
caption = self.generate_caption(image_path, model_name)
text = self.extract_text(image_path)
return f"### Caption\n\n{caption}\n\n### Extracted Text\n\n{text}"


import os
import logging
from dotenv import load_dotenv

import os
import logging
from dotenv import load_dotenv

if __name__ == "__main__":
load_dotenv()

# Configure logging
log_level_str = os.getenv('VISION_LOG_LEVEL', 'ERROR').upper()
log_level = getattr(logging, log_level_str, logging.INFO)
log_format = os.getenv('VISION_LOG_FORMAT', '%(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(message)s')
logging.basicConfig(level=log_level, format=log_format)
logger = logging.getLogger(__name__)
logger.info(f"Logging configured with level {log_level_str} and format {log_format}")

# Loading from env
model_path = os.getenv('VISION_LOCAL_MODEL_PATH', 'models')
# image_file = 'sample-1.png'
# image_file = 'sample-2.png'
# image_file = 'sample-3.png'
# image_file = 'sample-4.jpg'
image_file = 'sample-5.jpg'

# Example usage
model_name = "Salesforce/blip-image-captioning-base"
vision = Vision(model_cache_limit=1, local_model_path=model_path)
result = vision.analyze_image(image_file, model_name)


print(result)


Loading

0 comments on commit b18b651

Please sign in to comment.