-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #347 from gen-mind/feature/vision-1
Feature/vision 1
- Loading branch information
Showing
23 changed files
with
544 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
28 changes: 28 additions & 0 deletions
28
src/backend/lib_py/cognix_lib/gen_types/vision_data_pb2.py
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
24 changes: 24 additions & 0 deletions
24
src/backend/lib_py/cognix_lib/gen_types/vision_data_pb2.pyi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import file_type_pb2 as _file_type_pb2 | ||
from google.protobuf import descriptor as _descriptor | ||
from google.protobuf import message as _message | ||
from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union | ||
|
||
DESCRIPTOR: _descriptor.FileDescriptor | ||
|
||
class VisionData(_message.Message): | ||
__slots__ = ("url", "document_id", "connector_id", "file_type", "collection_name", "model_name", "model_dimension") | ||
URL_FIELD_NUMBER: _ClassVar[int] | ||
DOCUMENT_ID_FIELD_NUMBER: _ClassVar[int] | ||
CONNECTOR_ID_FIELD_NUMBER: _ClassVar[int] | ||
FILE_TYPE_FIELD_NUMBER: _ClassVar[int] | ||
COLLECTION_NAME_FIELD_NUMBER: _ClassVar[int] | ||
MODEL_NAME_FIELD_NUMBER: _ClassVar[int] | ||
MODEL_DIMENSION_FIELD_NUMBER: _ClassVar[int] | ||
url: str | ||
document_id: int | ||
connector_id: int | ||
file_type: _file_type_pb2.FileType | ||
collection_name: str | ||
model_name: str | ||
model_dimension: int | ||
def __init__(self, url: _Optional[str] = ..., document_id: _Optional[int] = ..., connector_id: _Optional[int] = ..., file_type: _Optional[_Union[_file_type_pb2.FileType, str]] = ..., collection_name: _Optional[str] = ..., model_name: _Optional[str] = ..., model_dimension: _Optional[int] = ...) -> None: ... |
29 changes: 29 additions & 0 deletions
29
src/backend/lib_py/cognix_lib/gen_types/vision_data_pb2_grpc.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! | ||
"""Client and server classes corresponding to protobuf-defined services.""" | ||
import grpc | ||
import warnings | ||
|
||
|
||
GRPC_GENERATED_VERSION = '1.63.0' | ||
GRPC_VERSION = grpc.__version__ | ||
EXPECTED_ERROR_RELEASE = '1.65.0' | ||
SCHEDULED_RELEASE_DATE = 'June 25, 2024' | ||
_version_not_supported = False | ||
|
||
try: | ||
from grpc._utilities import first_version_is_lower | ||
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) | ||
except ImportError: | ||
_version_not_supported = True | ||
|
||
if _version_not_supported: | ||
warnings.warn( | ||
f'The grpc package installed is at version {GRPC_VERSION},' | ||
+ f' but the generated code in vision_data_pb2_grpc.py depends on' | ||
+ f' grpcio>={GRPC_GENERATED_VERSION}.' | ||
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' | ||
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' | ||
+ f' This warning will become an error in {EXPECTED_ERROR_RELEASE},' | ||
+ f' scheduled for release on {SCHEDULED_RELEASE_DATE}.', | ||
RuntimeWarning | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
VISION_LOG_LEVEL=INFO | ||
VISION_LOG_FORMAT="%(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(message)s" | ||
|
||
VISION_MODEL=paraphrase-multilingual-mpnet-base-v2 | ||
VISION_MODEL_CACHE_LIMIT=1 | ||
VISION_LOCAL_MODEL_PATH=../../../data/models | ||
VISION_LOCAL_TEMP_PATH=../../../data/temp | ||
|
||
NATS_CLIENT_URL=nats://127.0.0.1:4222 | ||
NATS_CLIENT_CONNECT_TIMEOUT=3 # in seconds | ||
NATS_CLIENT_RECONNECT_TIME_WAIT=3 # in seconds | ||
NATS_CLIENT_MAX_RECONNECT_ATTEMPTS=3 # in seconds | ||
|
||
NATS_CLIENT_VISION_STREAM_NAME=vision | ||
NATS_CLIENT_VISION_STREAM_SUBJECT=vision_activity | ||
# ACK_WAIT shall be long enough to allow semantic to finish the work | ||
# if the message will not be ack in the ack wait it will be re delivered | ||
# in seconds | ||
NATS_CLIENT_VISION_ACK_WAIT=10800 | ||
NATS_CLIENT_VISION_MAX_DELIVER=3 | ||
|
||
|
||
|
||
MILVUS_ALIAS=default | ||
# when running inside the cluster: use the name of the milvus instance inside the cluster | ||
# when running locally: use 127.0.0.1 | ||
MILVUS_HOST=127.0.0.1 | ||
MILVUS_PORT=19530 | ||
MILVUS_INDEX_TYPE=DISKANN | ||
MILVUS_METRIC_TYPE=COSINE | ||
|
||
COCKROACH_CLIENT_DATABASE_URL='cockroachdb://root:[email protected]:26257/defaultdb?sslmode=disable' | ||
COCKROACH_CLIENT_DB_DEBUG=false | ||
|
||
|
||
|
||
MINIO_ACCESS_KEY=minioadmin | ||
MINIO_SECRET_ACCESS_KEY=minioadmin | ||
MINIO_USE_SSL=false | ||
MINIO_BUCKET_NAME=documents | ||
MINIO_REGION=local | ||
MINIO_ENDPOINT=127.0.0.1:9000 | ||
MINIO_MOCKED=false | ||
|
||
READINESS_TIME_OUT=500 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Using Python 3.11.0 as the new supported by PyTorch | ||
FROM python:3.11.7-slim-bookworm | ||
|
||
# Set the working directory inside the container | ||
WORKDIR /app | ||
ADD ./ ./ | ||
|
||
# Copy the requirements.txt first to leverage Docker cache | ||
COPY vision/requirements.txt /app/ | ||
|
||
# Install dependencies and clean up | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
build-essential \ | ||
curl \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Install Python dependencies | ||
RUN pip install --no-cache-dir -r requirements.txt \ | ||
&& rm -rf /root/.cache | ||
|
||
# Copy the rest of the application code | ||
COPY vision /app/vision | ||
COPY lib_py/cognix_lib /app/cognix_lib | ||
|
||
# Set the PYTHONPATH environment variable | ||
ENV PYTHONPATH="/app" | ||
|
||
# Command to run your application | ||
CMD ["python", "vision/vision_service.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
nats-py==2.7.2 | ||
python-dotenv==1.0.1 | ||
protobuf==5.27.0 | ||
grpcio==1.63.0 | ||
grpcio-tools==1.63.0 | ||
sqlalchemy==2.0.30 | ||
psycopg2-binary==2.9.9 | ||
mistune==3.0.2 | ||
minio==7.2.7 | ||
transformers==4.42.3 | ||
chardet==5.2.0 | ||
pytesseract==0.3.10 | ||
torch==2.0.1 | ||
torchaudio==2.0.2 | ||
torchvision==0.15.2 | ||
|
||
|
||
|
||
|
||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import os | ||
import logging | ||
import threading | ||
from typing import Dict, Tuple | ||
from transformers import BlipProcessor, BlipForConditionalGeneration | ||
from PIL import Image | ||
import pytesseract | ||
|
||
class Vision: | ||
def __init__(self, model_cache_limit: int = 1, local_model_path: str = 'models'): | ||
self.logger = logging.getLogger(self.__class__.__name__) | ||
self._cache_limit = model_cache_limit | ||
self._local_model_dir = os.path.abspath(local_model_path) | ||
|
||
if self._cache_limit <= 0: | ||
raise ValueError("MODEL_CACHE_LIMIT must be an integer greater than 0") | ||
|
||
if not os.path.isdir(self._local_model_dir): | ||
raise ValueError(f"'{self._local_model_dir}' is not a valid directory") | ||
|
||
self._lock: threading.Lock = threading.Lock() | ||
self._model_cache: Dict[str, Tuple[BlipProcessor, BlipForConditionalGeneration]] = {} | ||
|
||
def _load_model(self, model_name: str) -> Tuple[BlipProcessor, BlipForConditionalGeneration]: | ||
model_path: str = os.path.join(self._local_model_dir, model_name) | ||
if model_name not in self._model_cache: | ||
self.logger.info(f"{model_name} model not found in cache, loading...") | ||
if not os.path.exists(model_path) or not os.listdir(model_path): | ||
self.logger.info(f"{model_name} model not found locally, downloading from Hugging Face...") | ||
processor = BlipProcessor.from_pretrained(model_name) | ||
model = BlipForConditionalGeneration.from_pretrained(model_name) | ||
self.logger.info(f"{model_name} model downloaded and loaded") | ||
else: | ||
self.logger.info(f"Loading {model_name} from local directory...") | ||
processor = BlipProcessor.from_pretrained(model_path) | ||
model = BlipForConditionalGeneration.from_pretrained(model_path) | ||
|
||
with self._lock: | ||
if len(self._model_cache) >= self._cache_limit: | ||
self.logger.info("Model cache limit reached, removing oldest model...") | ||
oldest_model_name = next(iter(self._model_cache)) | ||
del self._model_cache[oldest_model_name] | ||
|
||
self._model_cache[model_name] = (processor, model) | ||
|
||
return self._model_cache[model_name] | ||
|
||
def generate_caption(self, image_path: str, model_name: str) -> str: | ||
processor, model = self._load_model(model_name) | ||
image = Image.open(image_path).convert('RGB') | ||
inputs = processor(image, return_tensors="pt") | ||
outputs = model.generate(**inputs) | ||
caption = processor.decode(outputs[0], skip_special_tokens=True) | ||
return caption | ||
|
||
def extract_text(self, image_path: str) -> str: | ||
image = Image.open(image_path).convert('RGB') | ||
text = pytesseract.image_to_string(image) | ||
return text | ||
|
||
def analyze_image(self, image_path: str, model_name: str) -> str: | ||
caption = self.generate_caption(image_path, model_name) | ||
text = self.extract_text(image_path) | ||
return f"### Caption\n\n{caption}\n\n### Extracted Text\n\n{text}" | ||
|
||
|
||
import os | ||
import logging | ||
from dotenv import load_dotenv | ||
|
||
import os | ||
import logging | ||
from dotenv import load_dotenv | ||
|
||
if __name__ == "__main__": | ||
load_dotenv() | ||
|
||
# Configure logging | ||
log_level_str = os.getenv('VISION_LOG_LEVEL', 'ERROR').upper() | ||
log_level = getattr(logging, log_level_str, logging.INFO) | ||
log_format = os.getenv('VISION_LOG_FORMAT', '%(asctime)s - %(levelname)s - %(name)s - %(funcName)s - %(message)s') | ||
logging.basicConfig(level=log_level, format=log_format) | ||
logger = logging.getLogger(__name__) | ||
logger.info(f"Logging configured with level {log_level_str} and format {log_format}") | ||
|
||
# Loading from env | ||
model_path = os.getenv('VISION_LOCAL_MODEL_PATH', 'models') | ||
# image_file = 'sample-1.png' | ||
# image_file = 'sample-2.png' | ||
# image_file = 'sample-3.png' | ||
# image_file = 'sample-4.jpg' | ||
image_file = 'sample-5.jpg' | ||
|
||
# Example usage | ||
model_name = "Salesforce/blip-image-captioning-base" | ||
vision = Vision(model_cache_limit=1, local_model_path=model_path) | ||
result = vision.analyze_image(image_file, model_name) | ||
|
||
|
||
print(result) | ||
|
||
|
Oops, something went wrong.