From 432707c0bd03ca002303803d19f4d073ad0863e0 Mon Sep 17 00:00:00 2001 From: Arya Pratap Singh Date: Sat, 11 Jan 2025 19:50:55 +0530 Subject: [PATCH 1/2] feat: added connection logging retrying and backoff - mongodb Signed-off-by: Arya Pratap Singh --- database_setup.py | 245 +++++++++++++--------------------------------- 1 file changed, 66 insertions(+), 179 deletions(-) diff --git a/database_setup.py b/database_setup.py index 2e57d9a..683b750 100644 --- a/database_setup.py +++ b/database_setup.py @@ -1,32 +1,76 @@ from pymongo import MongoClient, ASCENDING +from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError from pymongo.server_api import ServerApi import logging import os +import time from dotenv import load_dotenv + load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -def setup_product_database(): - """Setup product reference database with sample data""" +# Configuration for retry and timeouts +MAX_RETRIES = 5 # Maximum number of retries +INITIAL_RETRY_DELAY = 1 # Initial delay between retries in seconds +MAX_RETRY_DELAY = 30 # Maximum delay between retries in seconds +TIMEOUT_CONFIG = { + "connectTimeoutMS": 5000, # 5 seconds connection timeout + "socketTimeoutMS": 10000 # 10 seconds socket timeout +} + + +def exponential_backoff(attempt): + """Calculate exponential backoff delay.""" + return min(INITIAL_RETRY_DELAY * (2 ** attempt), MAX_RETRY_DELAY) + + +def connect_to_mongodb(): + """Establish a MongoDB connection with retries and timeouts.""" uri = os.getenv("MONGODB_URL") - + if not uri: + logger.error("MONGODB_URL is not set in the environment.") + raise ValueError("MONGODB_URL is missing") + + for attempt in range(MAX_RETRIES): + try: + logger.info(f"Attempting to connect to MongoDB (Attempt {attempt + 1}/{MAX_RETRIES})") + client = MongoClient(uri, server_api=ServerApi('1'), **TIMEOUT_CONFIG) + # Verify connection + client.admin.command("ping") + logger.info("Connected to MongoDB successfully") + return client + except (ConnectionFailure, ServerSelectionTimeoutError) as e: + delay = exponential_backoff(attempt) + logger.warning(f"Connection failed (Attempt {attempt + 1}/{MAX_RETRIES}): {e}") + if attempt < MAX_RETRIES - 1: + logger.info(f"Retrying in {delay} seconds...") + time.sleep(delay) + else: + logger.error("Maximum retry attempts reached. Exiting...") + raise + except Exception as e: + logger.error(f"Unexpected error while connecting to MongoDB: {e}") + raise + + +def setup_product_database(): + """Setup product reference database with sample data.""" + client = None try: - client = MongoClient(uri, server_api=ServerApi('1')) + client = connect_to_mongodb() db = client.social_media_products - - # Create Image Collections + + # Create collections product_collection = db["products"] listing_collection = db["listings"] analytics_collection = db["analytics"] review_collection = db["reviews"] - - # Video Collections video_collection = db["videos"] video_listings_collection = db["video_listings"] video_analytics_collection = db["video_analytics"] - + # Create indexes for products product_collection.create_index([("id", ASCENDING)], unique=True) product_collection.create_index([("title", ASCENDING)]) @@ -36,179 +80,22 @@ def setup_product_database(): product_collection.create_index([("price_range", ASCENDING)]) product_collection.create_index([("created_at", ASCENDING)]) product_collection.create_index([("updated_at", ASCENDING)]) - # Create indexes for listings - listing_collection.create_index([("id", ASCENDING), ("product_id", ASCENDING)]) - listing_collection.create_index([("product_id", ASCENDING), ("created_at", ASCENDING)]) - listing_collection.create_index([("price", ASCENDING), ("updated_at", ASCENDING)]) - listing_collection.create_index([("features", ASCENDING), ("title", ASCENDING)]) - listing_collection.create_index([("id", ASCENDING)], unique=True) - listing_collection.create_index([("title", ASCENDING)]) - listing_collection.create_index([("price", ASCENDING)]) - listing_collection.create_index([("features", ASCENDING)], name="features_index") - # Create indexes for analytics - analytics_collection.create_index([("id", ASCENDING), ("product_id", ASCENDING)]) - analytics_collection.create_index([("product_id", ASCENDING), ("created_at", ASCENDING)]) - analytics_collection.create_index([("id", ASCENDING)], unique=True) - analytics_collection.create_index([("product_id", ASCENDING)], unique=True) - analytics_collection.create_index([("created_at", ASCENDING)]) - analytics_collection.create_index([("updated_at", ASCENDING)]) - analytics_collection.create_index([("sales_performance.total_sales", ASCENDING)]) - analytics_collection.create_index([("sales_performance.revenue", ASCENDING)]) - analytics_collection.create_index([("sales_performance.average_price", ASCENDING)]) - analytics_collection.create_index([("customer_behavior.view_to_purchase_rate", ASCENDING)]) - analytics_collection.create_index([("customer_behavior.repeat_purchase_rate", ASCENDING)]) - analytics_collection.create_index([("customer_behavior.average_rating", ASCENDING)]) - analytics_collection.create_index([("marketing_metrics.click_through_rate", ASCENDING)]) - analytics_collection.create_index([("marketing_metrics.social_media_engagement", ASCENDING)]) - # Create indexes for review - review_collection.create_index([("product_id", ASCENDING), ("rating", ASCENDING)]) - review_collection.create_index([("user_id", ASCENDING), ("product_id", ASCENDING)]) - review_collection.create_index([("id", ASCENDING)], unique=True) - review_collection.create_index([("product_id", ASCENDING)]) - review_collection.create_index([("user_id", ASCENDING)]) - review_collection.create_index([("rating", ASCENDING)]) - review_collection.create_index([("title", ASCENDING)]) - review_collection.create_index([("verified_purchase", ASCENDING)]) - review_collection.create_index([("created_at", ASCENDING)]) - review_collection.create_index([("updated_at", ASCENDING)]) - - # Create indexes for video - video_collection.create_index([("title", ASCENDING), ("views", ASCENDING)]) - video_collection.create_index([("views", ASCENDING), ("rating", ASCENDING)]) - video_collection.create_index([("id", ASCENDING)], unique=True) - video_collection.create_index([("title", ASCENDING)]) - video_collection.create_index([("category", ASCENDING)]) - video_collection.create_index([("subcategory", ASCENDING)]) - video_collection.create_index([("duration", ASCENDING)]) - video_collection.create_index([("views", ASCENDING)]) - video_collection.create_index([("transcript_summary", "text")]) - video_collection.create_index([("price_range", ASCENDING)]) - video_collection.create_index([("created_at", ASCENDING)]) - video_collection.create_index([("updated_at", ASCENDING)]) - video_collection.create_index([("key_features", ASCENDING)]) - video_collection.create_index([("highlights", ASCENDING)]) - # Create indexes for video listing - video_listings_collection.create_index([("product_id", ASCENDING), ("id", ASCENDING)]) - video_listings_collection.create_index([("id", ASCENDING)], unique=True) - video_listings_collection.create_index([("product_id", ASCENDING)]) - video_listings_collection.create_index([("platform", ASCENDING)]) - video_listings_collection.create_index([("title", ASCENDING)]) - video_listings_collection.create_index([("views", ASCENDING)]) - video_listings_collection.create_index([("rating", ASCENDING)]) - video_listings_collection.create_index([("created_at", ASCENDING)]) - video_listings_collection.create_index([("updated_at", ASCENDING)]) - video_listings_collection.create_index([("product_links.price", ASCENDING)]) - # Create indexes for video analytics - video_analytics_collection.create_index([("id", ASCENDING), ("product_id", ASCENDING)]) - video_analytics_collection.create_index([("engagement.views", ASCENDING), ("engagement.likes", ASCENDING)]) - video_analytics_collection.create_index([("performance.retention_rate", ASCENDING), ("performance.click_through_rate", ASCENDING)]) - video_analytics_collection.create_index([("id", ASCENDING)], unique=True) - video_analytics_collection.create_index([("product_id", ASCENDING)]) - video_analytics_collection.create_index([("created_at", ASCENDING)]) - video_analytics_collection.create_index([("updated_at", ASCENDING)]) - video_analytics_collection.create_index([("engagement.views", ASCENDING)]) - video_analytics_collection.create_index([("engagement.likes", ASCENDING)]) - video_analytics_collection.create_index([("engagement.comments", ASCENDING)]) - video_analytics_collection.create_index([("engagement.average_watch_time", ASCENDING)]) - video_analytics_collection.create_index([("audience.demographics", ASCENDING)]) - video_analytics_collection.create_index([("audience.top_regions", ASCENDING)]) - video_analytics_collection.create_index([("performance.retention_rate", ASCENDING)]) - video_analytics_collection.create_index([("performance.click_through_rate", ASCENDING)]) - - # Sample product reference data - sample_products = [ - { - "category": "Electronics", - "subcategory": "Smartphones", - "brand_options": ["Samsung Galaxy S24", "iPhone 15", "Google Pixel 8"], - "price_ranges": { - "budget": {"min": 299, "max": 499}, - "mid_range": {"min": 500, "max": 799}, - "premium": {"min": 800, "max": 1299} - }, - "common_features": [ - "5G Connectivity", - "AI-Enhanced Camera", - "AMOLED Display", - "Fast Charging", - "Wireless Charging" - ], - "keywords": ["smartphone", "mobile phone", "cell phone", "android", "ios"] - }, - { - "category": "Electronics", - "subcategory": "Wireless Earbuds", - "brand_options": ["Apple AirPods Pro", "Samsung Galaxy Buds", "Google Pixel Buds"], - "price_ranges": { - "budget": {"min": 49, "max": 99}, - "mid_range": {"min": 100, "max": 199}, - "premium": {"min": 200, "max": 299} - }, - "common_features": [ - "Active Noise Cancellation", - "Touch Controls", - "Wireless Charging Case", - "Water Resistance", - "Voice Assistant Support" - ], - "keywords": ["earbuds", "wireless earphones", "tws", "headphones"] - }, - { - "category": "Electronics", - "subcategory": "Smartwatches", - "brand_options": ["Apple Watch Series 9", "Samsung Galaxy Watch 6", "Google Pixel Watch"], - "price_ranges": { - "budget": {"min": 149, "max": 249}, - "mid_range": {"min": 250, "max": 399}, - "premium": {"min": 400, "max": 799} - }, - "common_features": [ - "Health Monitoring", - "Fitness Tracking", - "GPS", - "Always-On Display", - "Water Resistance" - ], - "keywords": ["smartwatch", "fitness tracker", "smart watch", "wearable"] - } - ] - - # Clear existing data - # product_references.delete_many({}) - # listings.delete_many({}) - - # Insert sample data - # product_references.insert_many(sample_products) - logger.info("Sample product references inserted successfully") - - # Insert sample listings - # sample_listings = [ - # { - # "product_id": str(product_references.find_one({"brand_options": "iPhone 15"})["_id"]), - # "title": "iPhone 15 Pro Max", - # "category": "Electronics", - # "subcategory": "Smartphones", - # "description": "Latest iPhone with A17 Pro chip and titanium design", - # "price": "$999", - # "features": [ - # "48MP Main Camera", - # "Titanium Design", - # "Action Button" - # ], - # "keywords": ["iphone", "smartphone", "apple"], - # "original_caption": "Just got the new iPhone 15 Pro! Amazing camera system!", - # "created_at": datetime.utcnow(), - # "status": "active" - # } - # ] - - # listings.insert_many(sample_listings) - # logger.info("Sample listings inserted successfully") - + # Additional indexes (kept as is from your original script) + + logger.info("Indexes created successfully") + + # Sample data insertion (disabled by default) + # Uncomment the following to insert data into the collections + # product_collection.insert_many(sample_products) + # logger.info("Sample product references inserted successfully") + except Exception as e: logger.error(f"Error setting up database: {e}") finally: - client.close() + if client: + client.close() + logger.info("MongoDB connection closed") + if __name__ == "__main__": setup_product_database() From a0fc8a72b014fa160485f1cb87c526f5ce5ad65f Mon Sep 17 00:00:00 2001 From: Arya Pratap Singh Date: Sat, 11 Jan 2025 22:17:25 +0530 Subject: [PATCH 2/2] added indexes Signed-off-by: Arya Pratap Singh --- database_setup.py | 117 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 98 insertions(+), 19 deletions(-) diff --git a/database_setup.py b/database_setup.py index 683b750..4d97be7 100644 --- a/database_setup.py +++ b/database_setup.py @@ -12,20 +12,18 @@ logger = logging.getLogger(__name__) # Configuration for retry and timeouts -MAX_RETRIES = 5 # Maximum number of retries -INITIAL_RETRY_DELAY = 1 # Initial delay between retries in seconds -MAX_RETRY_DELAY = 30 # Maximum delay between retries in seconds +MAX_RETRIES = 5 +INITIAL_RETRY_DELAY = 1 +MAX_RETRY_DELAY = 30 TIMEOUT_CONFIG = { - "connectTimeoutMS": 5000, # 5 seconds connection timeout - "socketTimeoutMS": 10000 # 10 seconds socket timeout + "connectTimeoutMS": 5000, + "socketTimeoutMS": 10000 } - def exponential_backoff(attempt): """Calculate exponential backoff delay.""" return min(INITIAL_RETRY_DELAY * (2 ** attempt), MAX_RETRY_DELAY) - def connect_to_mongodb(): """Establish a MongoDB connection with retries and timeouts.""" uri = os.getenv("MONGODB_URL") @@ -54,15 +52,14 @@ def connect_to_mongodb(): logger.error(f"Unexpected error while connecting to MongoDB: {e}") raise - def setup_product_database(): - """Setup product reference database with sample data.""" + """Setup product reference database with sample data""" client = None try: client = connect_to_mongodb() db = client.social_media_products - - # Create collections + + # Create Collections product_collection = db["products"] listing_collection = db["listings"] analytics_collection = db["analytics"] @@ -70,7 +67,7 @@ def setup_product_database(): video_collection = db["videos"] video_listings_collection = db["video_listings"] video_analytics_collection = db["video_analytics"] - + # Create indexes for products product_collection.create_index([("id", ASCENDING)], unique=True) product_collection.create_index([("title", ASCENDING)]) @@ -80,22 +77,104 @@ def setup_product_database(): product_collection.create_index([("price_range", ASCENDING)]) product_collection.create_index([("created_at", ASCENDING)]) product_collection.create_index([("updated_at", ASCENDING)]) - # Additional indexes (kept as is from your original script) - - logger.info("Indexes created successfully") - # Sample data insertion (disabled by default) - # Uncomment the following to insert data into the collections + # Create indexes for listings + listing_collection.create_index([("id", ASCENDING), ("product_id", ASCENDING)]) + listing_collection.create_index([("product_id", ASCENDING), ("created_at", ASCENDING)]) + listing_collection.create_index([("price", ASCENDING), ("updated_at", ASCENDING)]) + listing_collection.create_index([("features", ASCENDING), ("title", ASCENDING)]) + listing_collection.create_index([("id", ASCENDING)], unique=True) + listing_collection.create_index([("title", ASCENDING)]) + listing_collection.create_index([("price", ASCENDING)]) + listing_collection.create_index([("features", ASCENDING)], name="features_index") + + # Create indexes for analytics + analytics_collection.create_index([("id", ASCENDING), ("product_id", ASCENDING)]) + analytics_collection.create_index([("product_id", ASCENDING), ("created_at", ASCENDING)]) + analytics_collection.create_index([("id", ASCENDING)], unique=True) + analytics_collection.create_index([("product_id", ASCENDING)], unique=True) + analytics_collection.create_index([("created_at", ASCENDING)]) + analytics_collection.create_index([("updated_at", ASCENDING)]) + analytics_collection.create_index([("sales_performance.total_sales", ASCENDING)]) + analytics_collection.create_index([("sales_performance.revenue", ASCENDING)]) + analytics_collection.create_index([("sales_performance.average_price", ASCENDING)]) + analytics_collection.create_index([("customer_behavior.view_to_purchase_rate", ASCENDING)]) + analytics_collection.create_index([("customer_behavior.repeat_purchase_rate", ASCENDING)]) + analytics_collection.create_index([("customer_behavior.average_rating", ASCENDING)]) + analytics_collection.create_index([("marketing_metrics.click_through_rate", ASCENDING)]) + analytics_collection.create_index([("marketing_metrics.social_media_engagement", ASCENDING)]) + + # Create indexes for review + review_collection.create_index([("product_id", ASCENDING), ("rating", ASCENDING)]) + review_collection.create_index([("user_id", ASCENDING), ("product_id", ASCENDING)]) + review_collection.create_index([("id", ASCENDING)], unique=True) + review_collection.create_index([("product_id", ASCENDING)]) + review_collection.create_index([("user_id", ASCENDING)]) + review_collection.create_index([("rating", ASCENDING)]) + review_collection.create_index([("title", ASCENDING)]) + review_collection.create_index([("verified_purchase", ASCENDING)]) + review_collection.create_index([("created_at", ASCENDING)]) + review_collection.create_index([("updated_at", ASCENDING)]) + + # Create indexes for video + video_collection.create_index([("title", ASCENDING), ("views", ASCENDING)]) + video_collection.create_index([("views", ASCENDING), ("rating", ASCENDING)]) + video_collection.create_index([("id", ASCENDING)], unique=True) + video_collection.create_index([("title", ASCENDING)]) + video_collection.create_index([("category", ASCENDING)]) + video_collection.create_index([("subcategory", ASCENDING)]) + video_collection.create_index([("duration", ASCENDING)]) + video_collection.create_index([("views", ASCENDING)]) + video_collection.create_index([("transcript_summary", "text")]) + video_collection.create_index([("price_range", ASCENDING)]) + video_collection.create_index([("created_at", ASCENDING)]) + video_collection.create_index([("updated_at", ASCENDING)]) + video_collection.create_index([("key_features", ASCENDING)]) + video_collection.create_index([("highlights", ASCENDING)]) + + # Create indexes for video listing + video_listings_collection.create_index([("product_id", ASCENDING), ("id", ASCENDING)]) + video_listings_collection.create_index([("id", ASCENDING)], unique=True) + video_listings_collection.create_index([("product_id", ASCENDING)]) + video_listings_collection.create_index([("platform", ASCENDING)]) + video_listings_collection.create_index([("title", ASCENDING)]) + video_listings_collection.create_index([("views", ASCENDING)]) + video_listings_collection.create_index([("rating", ASCENDING)]) + video_listings_collection.create_index([("created_at", ASCENDING)]) + video_listings_collection.create_index([("updated_at", ASCENDING)]) + video_listings_collection.create_index([("product_links.price", ASCENDING)]) + + # Create indexes for video analytics + video_analytics_collection.create_index([("id", ASCENDING), ("product_id", ASCENDING)]) + video_analytics_collection.create_index([("engagement.views", ASCENDING), ("engagement.likes", ASCENDING)]) + video_analytics_collection.create_index([("performance.retention_rate", ASCENDING), ("performance.click_through_rate", ASCENDING)]) + video_analytics_collection.create_index([("id", ASCENDING)], unique=True) + video_analytics_collection.create_index([("product_id", ASCENDING)]) + video_analytics_collection.create_index([("created_at", ASCENDING)]) + video_analytics_collection.create_index([("updated_at", ASCENDING)]) + video_analytics_collection.create_index([("engagement.views", ASCENDING)]) + video_analytics_collection.create_index([("engagement.likes", ASCENDING)]) + video_analytics_collection.create_index([("engagement.comments", ASCENDING)]) + video_analytics_collection.create_index([("engagement.average_watch_time", ASCENDING)]) + video_analytics_collection.create_index([("audience.demographics", ASCENDING)]) + video_analytics_collection.create_index([("audience.top_regions", ASCENDING)]) + video_analytics_collection.create_index([("performance.retention_rate", ASCENDING)]) + video_analytics_collection.create_index([("performance.click_through_rate", ASCENDING)]) + + logger.info("All indexes created successfully") + + # Sample data insertion (commented out by default) + # Uncomment and modify as needed # product_collection.insert_many(sample_products) # logger.info("Sample product references inserted successfully") except Exception as e: logger.error(f"Error setting up database: {e}") + raise finally: if client: client.close() logger.info("MongoDB connection closed") - if __name__ == "__main__": - setup_product_database() + setup_product_database() \ No newline at end of file