diff --git a/cmd/addCategory/main.go b/cmd/addCategory/main.go index 9cbc5be..89aeb13 100644 --- a/cmd/addCategory/main.go +++ b/cmd/addCategory/main.go @@ -154,7 +154,7 @@ func validateSchema(content []byte, schemaPath string) error { func insertCategory(db *sqlx.DB, category Category, parentID *int64) { var categoryID int64 - query := `INSERT INTO Category (name, description, parent_id, created_at) + query := `INSERT INTO Categories (name, description, parent_id, created_at) VALUES ($1, $2, $3, $4) RETURNING category_id` err := db.QueryRowx(query, category.Name, category.Description, parentID, time.Now()).Scan(&categoryID) if err != nil { diff --git a/doc/database_architecture.md b/doc/database_architecture.md index 10cade4..d870845 100644 --- a/doc/database_architecture.md +++ b/doc/database_architecture.md @@ -12,7 +12,9 @@ erDiagram BIGSERIAL information_seed_id PK TIMESTAMP created_at TIMESTAMP last_updated_at - VARCHAR(256) information_seed + BIGINT category_id + BIGINT usr_id + VARCHAR information_seed JSONB config } @@ -20,8 +22,11 @@ erDiagram BIGSERIAL source_id PK TIMESTAMP created_at TIMESTAMP last_updated_at + BIGINT usr_id + BIGINT category_id TEXT url - VARCHAR(50) status + VARCHAR status + VARCHAR engine TIMESTAMP last_crawled_at TEXT last_error TIMESTAMP last_error_at @@ -35,15 +40,37 @@ erDiagram BIGSERIAL owner_id PK TIMESTAMP created_at TIMESTAMP last_updated_at - VARCHAR(64) details_hash + BIGINT usr_id + VARCHAR details_hash JSONB details } + SearchIndex { + BIGSERIAL index_id PK + TIMESTAMP created_at + TIMESTAMP last_updated_at + TEXT page_url + VARCHAR title + TEXT summary + VARCHAR detected_type + VARCHAR detected_lang + TSVECTOR tsv + } + + Categories { + BIGSERIAL category_id PK + TIMESTAMP created_at + TIMESTAMP last_updated_at + VARCHAR name + TEXT description + BIGINT parent_id FK "REFERENCES Categories(category_id)" + } + NetInfo { BIGSERIAL netinfo_id PK TIMESTAMP created_at TIMESTAMP last_updated_at - VARCHAR(64) details_hash + VARCHAR details_hash JSONB details } @@ -51,27 +78,16 @@ erDiagram BIGSERIAL httpinfo_id PK TIMESTAMP created_at TIMESTAMP last_updated_at - VARCHAR(64) details_hash + VARCHAR details_hash JSONB details } - SearchIndex { - BIGSERIAL index_id PK - TIMESTAMP created_at - TIMESTAMP last_updated_at - TEXT page_url - VARCHAR(255) title - TEXT summary - VARCHAR(8) detected_type - VARCHAR(8) detected_lang - } - Screenshots { BIGSERIAL screenshot_id PK - BIGINT index_id FK + BIGINT index_id FK "REFERENCES SearchIndex(index_id)" TIMESTAMP created_at TIMESTAMP last_updated_at - VARCHAR(10) type + VARCHAR type TEXT screenshot_link INTEGER height INTEGER width @@ -79,7 +95,7 @@ erDiagram INTEGER thumbnail_height INTEGER thumbnail_width TEXT thumbnail_link - VARCHAR(10) format + VARCHAR format } WebObjects { @@ -87,8 +103,8 @@ erDiagram TIMESTAMP created_at TIMESTAMP last_updated_at TEXT object_link - VARCHAR(255) object_type - VARCHAR(64) object_hash + VARCHAR object_type + VARCHAR object_hash TEXT object_content TEXT object_html JSONB details @@ -96,7 +112,7 @@ erDiagram MetaTags { BIGSERIAL metatag_id PK - VARCHAR(255) name + VARCHAR name TEXT content } @@ -104,52 +120,71 @@ erDiagram BIGSERIAL keyword_id PK TIMESTAMP created_at TIMESTAMP last_updated_at - VARCHAR(100) keyword + VARCHAR keyword + } + + Events { + CHAR event_sha256 PK + TIMESTAMP created_at + TIMESTAMP last_updated_at + BIGINT source_id FK "REFERENCES Sources(source_id)" + VARCHAR event_type + VARCHAR event_severity + TIMESTAMP event_timestamp + JSONB details + } + + SourceInformationSeedIndex { + BIGSERIAL source_information_seed_id PK + BIGINT source_id FK "REFERENCES Sources(source_id)" + BIGINT information_seed_id FK "REFERENCES InformationSeed(information_seed_id)" + TIMESTAMP created_at + TIMESTAMP last_updated_at } SourceOwnerIndex { BIGSERIAL source_owner_id PK - BIGINT source_id FK - BIGINT owner_id FK + BIGINT source_id FK "REFERENCES Sources(source_id)" + BIGINT owner_id FK "REFERENCES Owners(owner_id)" TIMESTAMP created_at TIMESTAMP last_updated_at } SourceSearchIndex { BIGSERIAL ss_index_id PK - BIGINT source_id FK - BIGINT index_id FK + BIGINT source_id FK "REFERENCES Sources(source_id)" + BIGINT index_id FK "REFERENCES SearchIndex(index_id)" TIMESTAMP created_at TIMESTAMP last_updated_at } - SourceInformationSeed { - BIGSERIAL source_information_seed_id PK - BIGINT source_id FK - BIGINT information_seed_id FK + SourceCategoryIndex { + BIGSERIAL source_category_id PK + BIGINT source_id FK "REFERENCES Sources(source_id)" + BIGINT category_id FK "REFERENCES Categories(category_id)" TIMESTAMP created_at TIMESTAMP last_updated_at } WebObjectsIndex { BIGSERIAL page_object_id PK - BIGINT index_id FK - BIGINT object_id FK + BIGINT index_id FK "REFERENCES SearchIndex(index_id)" + BIGINT object_id FK "REFERENCES WebObjects(object_id)" TIMESTAMP created_at TIMESTAMP last_updated_at } MetaTagsIndex { BIGSERIAL sim_id PK - BIGINT index_id FK - BIGINT metatag_id FK + BIGINT index_id FK "REFERENCES SearchIndex(index_id)" + BIGINT metatag_id FK "REFERENCES MetaTags(metatag_id)" TIMESTAMP created_at } KeywordIndex { BIGSERIAL keyword_index_id PK - BIGINT keyword_id FK - BIGINT index_id FK + BIGINT keyword_id FK "REFERENCES Keywords(keyword_id)" + BIGINT index_id FK "REFERENCES SearchIndex(index_id)" TIMESTAMP created_at TIMESTAMP last_updated_at INTEGER occurrences @@ -157,35 +192,44 @@ erDiagram NetInfoIndex { BIGSERIAL netinfo_index_id PK - BIGINT netinfo_id FK - BIGINT index_id FK + BIGINT netinfo_id FK "REFERENCES NetInfo(netinfo_id)" + BIGINT index_id FK "REFERENCES SearchIndex(index_id)" TIMESTAMP created_at TIMESTAMP last_updated_at } HTTPInfoIndex { BIGSERIAL httpinfo_index_id PK - BIGINT httpinfo_id FK - BIGINT index_id FK - TIMESTAMP created_at - TIMESTAMP last_updated_at - } - - InformationSeed ||--o{ SourceInformationSeed: "linked to" - Sources ||--o{ SourceInformationSeed: "has" - Sources ||--o{ SourceOwnerIndex: "has" - Owners ||--o{ SourceOwnerIndex: "owned by" - Sources ||--o{ SourceSearchIndex: "has" - SearchIndex ||--o{ SourceSearchIndex: "indexed by" - SearchIndex ||--o{ Screenshots: "has" - SearchIndex ||--o{ WebObjectsIndex: "contains" - WebObjects ||--o{ WebObjectsIndex: "found in" - SearchIndex ||--o{ MetaTagsIndex: "has" - MetaTags ||--o{ MetaTagsIndex: "tagged by" - SearchIndex ||--o{ KeywordIndex: "has" - Keywords ||--o{ KeywordIndex: "used in" - SearchIndex ||--o{ NetInfoIndex: "has" - NetInfo ||--o{ NetInfoIndex: "linked to" - SearchIndex ||--o{ HTTPInfoIndex: "has" - HTTPInfo ||--o{ HTTPInfoIndex: "linked to" + BIGINT httpinfo_id FK "REFERENCES HTTPInfo(httpinfo_id)" + BIGINT index_id FK "REFERENCES SearchIndex(index_id)" + TIMESTAMP created_at + TIMESTAMP last_updated_at + } + + Categories ||--|{ Categories : "parent_id" + InformationSeed ||--o{ Categories : "category_id" + InformationSeed ||--o{ Sources : "usr_id" + Sources ||--o{ Categories : "category_id" + Sources ||--o{ Owners : "usr_id" + Owners ||--o{ Sources : "usr_id" + Owners ||--o{ Owners : "usr_id" + SourceInformationSeedIndex ||--|{ InformationSeed : "information_seed_id" + SourceInformationSeedIndex ||--|{ Sources : "source_id" + SourceOwnerIndex ||--|{ Sources : "source_id" + SourceOwnerIndex ||--|{ Owners : "owner_id" + SourceSearchIndex ||--|{ Sources : "source_id" + SourceSearchIndex ||--|{ SearchIndex : "index_id" + SourceCategoryIndex ||--|{ Sources : "source_id" + SourceCategoryIndex ||--|{ Categories : "category_id" + WebObjectsIndex ||--|{ WebObjects : "object_id" + WebObjectsIndex ||--|{ SearchIndex : "index_id" + MetaTagsIndex ||--|{ MetaTags : "metatag_id" + MetaTagsIndex ||--|{ SearchIndex : "index_id" + KeywordIndex ||--|{ Keywords : "keyword_id" + KeywordIndex ||--|{ SearchIndex : "index_id" + NetInfoIndex ||--|{ NetInfo : "netinfo_id" + NetInfoIndex ||--|{ SearchIndex : "index_id" + HTTPInfoIndex ||--|{ HTTPInfo : "httpinfo_id" + HTTPInfoIndex ||--|{ SearchIndex : "index_id" + Screenshots ||--|{ SearchIndex : "index_id" ``` diff --git a/pkg/database/postgresql-setup-v1.4.pgsql b/pkg/database/postgresql-setup-v1.4.pgsql index 4dfd00c..fc260ce 100644 --- a/pkg/database/postgresql-setup-v1.4.pgsql +++ b/pkg/database/postgresql-setup-v1.4.pgsql @@ -77,8 +77,8 @@ CREATE TABLE IF NOT EXISTS SearchIndex ( detected_lang VARCHAR(8) -- (URI language) denormalized for fast searches ); --- Category table stores the categories (and subcategories) for the sources -CREATE TABLE IF NOT EXISTS Category ( +-- Categories table stores the categories (and subcategories) for the sources +CREATE TABLE IF NOT EXISTS Categories ( category_id BIGSERIAL PRIMARY KEY, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, last_updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, @@ -87,7 +87,7 @@ CREATE TABLE IF NOT EXISTS Category ( parent_id BIGINT, CONSTRAINT fk_parent FOREIGN KEY(parent_id) - REFERENCES Category(category_id) + REFERENCES Categories(category_id) ON DELETE SET NULL ); @@ -142,7 +142,7 @@ CREATE TABLE IF NOT EXISTS WebObjects ( -- and uniqueness. object_content TEXT, -- The actual content of the object, nullable if -- stored externally. - object_html TEXT, -- The HTML content of the object, nullable if + object_html TEXT, -- The HTML content of the object, nullable if -- stored externally. details JSONB NOT NULL -- Stores JSON document with all details about -- the object. @@ -167,11 +167,13 @@ CREATE TABLE IF NOT EXISTS Keywords ( -- Events table stores the events generated by the system CREATE TABLE IF NOT EXISTS Events ( event_sha256 CHAR(64) PRIMARY KEY, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + last_updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, source_id BIGINT REFERENCES Sources(source_id) ON DELETE CASCADE, event_type VARCHAR(255) NOT NULL, event_severity VARCHAR(50) NOT NULL, event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, - event_details JSONB NOT NULL + details JSONB NOT NULL ); ---------------------------------------- @@ -242,7 +244,7 @@ CREATE TABLE IF NOT EXISTS SourceCategoryIndex ( ON DELETE CASCADE, CONSTRAINT fk_category FOREIGN KEY(category_id) - REFERENCES Category(category_id) + REFERENCES Categories(category_id) ON DELETE CASCADE, UNIQUE(source_id, category_id) ); @@ -314,114 +316,91 @@ CREATE TABLE IF NOT EXISTS HTTPInfoIndex ( -------------------------------------------------------------------------------- -- Indexes and triggers setup --- Creates an index for the WebObjects table on the object_id column +-- Indexes for the InformationSeed table --------------------------------------- DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjects_object_id') THEN - CREATE INDEX idx_webobjects_object_id ON WebObjects (object_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_informationseed_category_id') THEN + CREATE INDEX idx_informationseed_category_id ON InformationSeed(category_id); END IF; END $$; --- Creates an index for the WebObjectsIndex table on the object_id column DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjectsindex_object_id') THEN - CREATE INDEX idx_webobjectsindex_object_id ON WebObjectsIndex (object_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_informationseed_usr_id') THEN + CREATE INDEX idx_informationseed_usr_id ON InformationSeed(usr_id); END IF; END $$; --- Creates an index for the WebObjectsIndex table on the index_id column DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjectsindex_index_id') THEN - CREATE INDEX idx_webobjectsindex_index_id ON WebObjectsIndex (index_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_informationseed_information_seed') THEN + CREATE INDEX idx_informationseed_information_seed ON InformationSeed(information_seed); END IF; END $$; --- Creates an index for the SearchIndex table on the index_id column DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_index_id') THEN - CREATE INDEX idx_searchindex_index_id ON SearchIndex (index_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_informationseed_created_at') THEN + CREATE INDEX idx_informationseed_created_at ON InformationSeed(created_at); END IF; END $$; --- Creates an index for the SearchIndex table on the page_url column (for lower-cased searches) DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_page_url_lower') THEN - CREATE INDEX idx_searchindex_page_url_lower ON SearchIndex (LOWER(page_url)); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_informationseed_last_updated_at') THEN + CREATE INDEX idx_informationseed_last_updated_at ON InformationSeed(last_updated_at); END IF; END $$; --- Creates an index for the SearchIndex table on the title column (for lower-cased searches) -DO $$ -BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_title_lower') THEN - CREATE INDEX idx_searchindex_title_lower ON SearchIndex (LOWER(title)); - END IF; -END -$$; --- Creates an index for the SearchIndex table on the summary column (for lower-cased searches) -DO $$ -BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_summary_lower') THEN - CREATE INDEX idx_searchindex_summary_lower ON SearchIndex (LOWER(summary)); - END IF; -END -$$; +-- Indexes for the Sources table ----------------------------------------------- --- Creates an index for the Keywords table on the keyword column (for lower-cased searches) +-- Creates an index for the Sources url column DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_keywords_keyword_lower') THEN - CREATE INDEX idx_keywords_keyword_lower ON Keywords (LOWER(keyword)); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_url') THEN + CREATE INDEX idx_sources_url ON Sources(url text_pattern_ops); END IF; END $$; --- Creates an index for the KeywordIndex table on the keyword_id column +-- Creates an index for the Sources category_id column DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_keywordindex_keyword_id') THEN - CREATE INDEX idx_keywordindex_keyword_id ON KeywordIndex (keyword_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_category_id') THEN + CREATE INDEX idx_sources_category_id ON Sources(category_id); END IF; END $$; --- Creates an index for the KeywordIndex table on the index_id column +-- Creates an index for the Sources usr_id column DO $$ BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_keywordindex_index_id') THEN - CREATE INDEX idx_keywordindex_index_id ON KeywordIndex (index_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_usr_id') THEN + CREATE INDEX idx_sources_usr_id ON Sources(usr_id); END IF; END $$; --- Creates an index for the Sources url column +-- Creates an index for the Sources status column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_url') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_sources_url ON Sources(url text_pattern_ops); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_status') THEN + CREATE INDEX idx_sources_status ON Sources(status); END IF; END $$; --- Creates an index for the Sources status column +-- Creates an index for the Sources engine column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_status') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_sources_status ON Sources(status); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_engine') THEN + CREATE INDEX idx_sources_engine ON Sources(engine); END IF; END $$; @@ -429,21 +408,17 @@ $$; -- Creates an index for the Sources last_crawled_at column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_last_crawled_at') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_sources_last_crawled_at ON Sources(last_crawled_at); END IF; END $$; --- Creates an index for the Sources source_id column +-- Creates an index for the Sources last_error_at column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_source_id') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_sources_source_id ON Sources(source_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_last_error_at') THEN + CREATE INDEX idx_sources_last_error_at ON Sources(last_error_at); END IF; END $$; @@ -451,21 +426,20 @@ $$; -- Creates a gin index for the Source config column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sources_config') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_sources_config ON Sources USING gin(config jsonb_path_ops); END IF; END $$; --- Creates an index for the Owners details column + +-- Indexes for the Owners table ------------------------------------------------ + +-- Creates an index for the Owners usr_id column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_owners_details') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_owners_details ON Owners USING gin(details jsonb_path_ops); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_owners_usr_id') THEN + CREATE INDEX idx_owners_usr_id ON Owners(usr_id); END IF; END $$; @@ -473,20 +447,25 @@ $$; -- Creates an index for the Owners details_hash column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_owners_details_hash') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_owners_details_hash ON Owners(details_hash); END IF; END $$; +-- Creates an index for the Owners details column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_owners_details') THEN + CREATE INDEX idx_owners_details ON Owners USING gin(details jsonb_path_ops); + END IF; +END +$$; + -- Creates an index for the Owners last_updated_at column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_owners_last_updated_at') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_owners_last_updated_at ON Owners(last_updated_at); END IF; END @@ -495,146 +474,116 @@ $$; -- Creates an index for the Owners created_at column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_owners_created_at') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_owners_created_at ON Owners(created_at); END IF; END $$; --- Creates an index for the NetInfo last_updated_at column -DO $$ -BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_netinfo_last_updated_at') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_netinfo_last_updated_at ON NetInfo(last_updated_at); - END IF; -END -$$; --- Creates an index for the NetInfo created_at column +-- Indexes for the SearchIndex table ------------------------------------------- + +-- Creates an index for the SearchIndex page_url column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_netinfo_created_at') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_netinfo_created_at ON NetInfo(created_at); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_page_url') THEN + CREATE INDEX idx_searchindex_page_url ON SearchIndex(page_url); END IF; END $$; --- Creates an index for the NetInfo details_hash column +-- Creates an index for the SearchIndex title column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_netinfo_details_hash') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_netinfo_details_hash ON NetInfo(details_hash); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_title') THEN + CREATE INDEX idx_searchindex_title ON SearchIndex(title); END IF; END $$; --- Creates an index for the details column in the NetInfo table --- This index is used to search for specific keys in the JSONB column --- The jsonb_path_ops operator class is used to index the JSONB column --- for queries that use the @> operator to search for keys in the JSONB column --- The jsonb_path_ops operator class is optimized for queries that use the @> operator --- to search for keys in the JSONB column +-- Creates an index for the SearchIndex summary column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_json_netinfo_details') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_json_netinfo_details ON NetInfo USING gin (details jsonb_path_ops); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_summary') THEN + CREATE INDEX idx_searchindex_summary ON SearchIndex(summary); END IF; END $$; --- Creates an index for the HTTPInfo last_updated_at column +-- Creates an index for the SearchIndex detected_type column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_httpinfo_last_updated_at') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_httpinfo_last_updated_at ON HTTPInfo(last_updated_at); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_detected_type') THEN + CREATE INDEX idx_searchindex_detected_type ON SearchIndex(detected_type); END IF; END $$; --- Creates an index for the HTTPInfo created_at column +-- Creates an index for the SearchIndex detected_lang column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_httpinfo_created_at') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_httpinfo_created_at ON HTTPInfo(created_at); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_detected_lang') THEN + CREATE INDEX idx_searchindex_detected_lang ON SearchIndex(detected_lang); END IF; END $$; --- Creates an index for the HTTPInfo details_hash column + +-- Indexes for the Categories table -------------------------------------------- DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_httpinfo_details_hash') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_httpinfo_details_hash ON HTTPInfo(details_hash); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_categories_parent_id') THEN + CREATE INDEX idx_categories_parent_id ON Categories(parent_id); END IF; END $$; --- Creates an index for the HTTPInfo details column + +-- Indexes for the NetInfo table ----------------------------------------------- + +-- Creates an index for the NetInfo details_hash column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_json_httpinfo_details') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_json_httpinfo_details ON HTTPInfo USING gin (details jsonb_path_ops); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_netinfo_details_hash') THEN + CREATE INDEX idx_netinfo_details_hash ON NetInfo(details_hash); END IF; END $$; --- Creates an index for the SearchIndex title column +-- Creates an index for the NetInfo last_updated_at column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_title') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_searchindex_title ON SearchIndex(title text_pattern_ops) WHERE title IS NOT NULL; + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_netinfo_last_updated_at') THEN + CREATE INDEX idx_netinfo_last_updated_at ON NetInfo(last_updated_at); END IF; END $$; --- Creates an index for the SearchIndex summary column +-- Creates an index for the NetInfo created_at column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_summary') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_searchindex_summary ON SearchIndex(left(summary, 1000) text_pattern_ops) WHERE summary IS NOT NULL; + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_netinfo_created_at') THEN + CREATE INDEX idx_netinfo_created_at ON NetInfo(created_at); END IF; END $$; --- Creates an index for the SearchIndex last_updated_at column +-- Creates an index for the details column in the NetInfo table DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_last_updated_at') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_searchindex_last_updated_at ON SearchIndex(last_updated_at); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_json_netinfo_details') THEN + CREATE INDEX idx_json_netinfo_details ON NetInfo USING gin (details jsonb_path_ops); END IF; END $$; + +-- Indexes for the Screenshots table ------------------------------------------- + -- Creates an index for the Screenshots index_id column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_screenshots_index_id') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_screenshots_index_id ON Screenshots(index_id); END IF; END @@ -643,9 +592,7 @@ $$; -- Creates an index for the Screenshots screenshot_link column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_screenshots_screenshot_link') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_screenshots_screenshot_link ON Screenshots(screenshot_link); END IF; END @@ -654,9 +601,7 @@ $$; -- Creates an index for the Screenshots last_updated_at column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_screenshots_last_updated_at') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_screenshots_last_updated_at ON Screenshots(last_updated_at); END IF; END @@ -665,20 +610,19 @@ $$; -- Creates an index for the Screenshots created_at column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_screenshots_created_at') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_screenshots_created_at ON Screenshots(created_at); END IF; END $$; + +-- Indexes for the WebObjects table -------------------------------------------- + -- Creates an index for the WebObjects object_link column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjects_object_link') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_webobjects_object_link ON WebObjects(object_link text_pattern_ops); END IF; END @@ -687,20 +631,16 @@ $$; -- Creates an index for the WebObjects object_type column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjects_object_type') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_webobjects_object_type ON WebObjects(object_type); END IF; END $$; --- Create an index for the WebObjects object_hash column +-- Creates an index for the WebObjects object_hash column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjects_object_hash') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_webobjects_object_hash ON WebObjects(object_hash); END IF; END @@ -709,9 +649,7 @@ $$; -- Creates an index for the WebObjects object_content column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjects_object_content') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_webobjects_object_content ON WebObjects(left(object_content, 1024) text_pattern_ops) WHERE object_content IS NOT NULL AND object_link = 'db'; END IF; END @@ -720,9 +658,7 @@ $$; -- Creates an index for the WebObjects created_at column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjects_created_at') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_webobjects_created_at ON WebObjects(created_at); END IF; END @@ -731,92 +667,238 @@ $$; -- Creates an index for the WebObjects last_updated_at column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjects_last_updated_at') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_webobjects_last_updated_at ON WebObjects(last_updated_at); END IF; END $$; -- Creates an index for the details column in the WebObjects table --- This index is used to search for specific keys in the JSONB column --- The jsonb_path_ops operator class is used to index the JSONB column --- for queries that use the @> operator to search for keys in the JSONB column --- The jsonb_path_ops operator class is optimized for queries that use the @> operator --- to search for keys in the JSONB column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_json_webobjects_details') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_json_webobjects_details ON WebObjects USING gin (details jsonb_path_ops); END IF; END $$; --- Creates an index for the MetaTags name column + +-- Indexes for WebObjectsIndex Table ------------------------------------------- + +-- Creates an index for the WebObjectsIndex table on the object_id column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjectsindex_object_id') THEN + CREATE INDEX idx_webobjectsindex_object_id ON WebObjectsIndex (object_id); + END IF; +END +$$; + +-- Creates an index for the WebObjectsIndex table on the index_id column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_webobjectsindex_index_id') THEN + CREATE INDEX idx_webobjectsindex_index_id ON WebObjectsIndex (index_id); + END IF; +END +$$; + + +-- Indexes for the Keywords table ---------------------------------------------- + +-- Creates an index for the Keywords table on the keyword column (for lower-cased searches) +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_keywords_keyword_lower') THEN + CREATE INDEX idx_keywords_keyword_lower ON Keywords (LOWER(keyword)); + END IF; +END +$$; + + +-- Indexes for the KeywordIndex table ------------------------------------------ + +-- Creates an index for the KeywordIndex table on the keyword_id column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_metatags_name') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_metatags_name ON MetaTags(name text_pattern_ops) WHERE name IS NOT NULL; + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_keywordindex_keyword_id') THEN + CREATE INDEX idx_keywordindex_keyword_id ON KeywordIndex (keyword_id); END IF; END $$; --- Creates an index for the MetaTags content column +-- Creates an index for the KeywordIndex table on the index_id column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_metatags_content') THEN - -- Create the index if it doesn't exist - CREATE INDEX idx_metatags_content ON MetaTags(left(content, 1024) text_pattern_ops) WHERE content IS NOT NULL; + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_keywordindex_index_id') THEN + CREATE INDEX idx_keywordindex_index_id ON KeywordIndex (index_id); END IF; END $$; --- Creates and index for the Keywords ocurences column to help --- with keyowrds analysis +-- Creates an index for the KeywordIndex table on the occurrences column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_keywordindex_occurrences') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_keywordindex_occurrences ON KeywordIndex(occurrences); END IF; END $$; --- Creates an index for SourceOwnerIndex owner_id column + +-- Indexes for the HTTPInfo table ---------------------------------------------- + +-- Creates an index for the HTTPInfo details_hash column DO $$ BEGIN - -- Check if the index already exists - IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sourceownerindex_owner_id') THEN - -- Create the index if it doesn't exist - CREATE INDEX IF NOT EXISTS idx_sourceownerindex_owner_id ON SourceOwnerIndex(owner_id); + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_httpinfo_details_hash') THEN + CREATE INDEX idx_httpinfo_details_hash ON HTTPInfo(details_hash); END IF; END $$; +-- Creates an index for the HTTPInfo details column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_json_httpinfo_details') THEN + CREATE INDEX idx_json_httpinfo_details ON HTTPInfo USING gin (details jsonb_path_ops); + END IF; +END +$$; + +-- Creates an index for the HTTPInfo last_updated_at column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_httpinfo_last_updated_at') THEN + CREATE INDEX idx_httpinfo_last_updated_at ON HTTPInfo(last_updated_at); + END IF; +END +$$; + +-- Creates an index for the HTTPInfo created_at column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_httpinfo_created_at') THEN + CREATE INDEX idx_httpinfo_created_at ON HTTPInfo(created_at); + END IF; +END +$$; + + +-- Indexes for the Events table ------------------------------------------------ + +-- Creates an index for the Events source_id column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_events_source_id') THEN + CREATE INDEX idx_events_source_id ON Events(source_id); + END IF; +END +$$; + +-- Creates an index for the Events event_type column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_events_event_type') THEN + CREATE INDEX idx_events_event_type ON Events(event_type); + END IF; +END +$$; + +-- Creates an index for the Events event_severity column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_events_event_severity') THEN + CREATE INDEX idx_events_event_severity ON Events(event_severity); + END IF; +END +$$; + +-- Creates an index for the Events event_timestamp column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_events_event_timestamp') THEN + CREATE INDEX idx_events_event_timestamp ON Events(event_timestamp); + END IF; +END +$$; + +-- Creates an index for the Events details column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_events_details') THEN + CREATE INDEX idx_events_details ON Events USING gin(details jsonb_path_ops); + END IF; +END +$$; + +-- Creates an index for the Events last_updated_at column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_events_last_updated_at') THEN + CREATE INDEX idx_events_last_updated_at ON Events(last_updated_at); + END IF; +END +$$; + +-- Creates an index for the Events created_at column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_events_created_at') THEN + CREATE INDEX idx_events_created_at ON Events(created_at); + END IF; +END +$$; + + +-- Indexes for the SourceInformationSeedIndex table ---------------------------- + +-- Creates an index for SourceInformationSeedIndex source_id column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sourceinformationseedindex_source_id') THEN + CREATE INDEX idx_sourceinformationseedindex_source_id ON SourceInformationSeedIndex(source_id); + END IF; +END +$$; + +-- Creates an index for SourceInformationSeedIndex information_seed_id column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sourceinformationseedindex_information_seed_id') THEN + CREATE INDEX idx_sourceinformationseedindex_information_seed_id ON SourceInformationSeedIndex(information_seed_id); + END IF; +END +$$; + + +-- Indexes for the SourceOwnerIndex table --------------------------------------- + -- Creates an index for SourceOwnerIndex source_id column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sourceownerindex_source_id') THEN - -- Create the index if it doesn't exist - CREATE INDEX IF NOT EXISTS idx_sourceownerindex_source_id ON SourceOwnerIndex(source_id); + CREATE INDEX idx_sourceownerindex_source_id ON SourceOwnerIndex(source_id); END IF; END $$; +-- Creates an index for SourceOwnerIndex owner_id column +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_sourceownerindex_owner_id') THEN + CREATE INDEX idx_sourceownerindex_owner_id ON SourceOwnerIndex(owner_id); + END IF; +END +$$; + + +-- Indexes for the SourceSearchIndex table --------------------------------------- + -- Creates an index for the SourceSearchIndex source_id column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_ssi_source_id') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_ssi_source_id ON SourceSearchIndex(source_id); END IF; END @@ -825,20 +907,19 @@ $$; -- Creates an index for the SourceSearchIndex index_id column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_ssi_index_id') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_ssi_index_id ON SourceSearchIndex(index_id); END IF; END $$; + +-- Indexes for WebObjectsIndex table ----------------------------------------- + -- Creates an index for the WebObjectsIndex index_id column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_woi_index_id') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_woi_index_id ON WebObjectsIndex(index_id); END IF; END @@ -847,9 +928,7 @@ $$; -- Creates an index for the WebObjectsIndex object_id column DO $$ BEGIN - -- Check if the index already exists IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_woi_object_id') THEN - -- Create the index if it doesn't exist CREATE INDEX idx_woi_object_id ON WebObjectsIndex(object_id); END IF; END @@ -858,6 +937,7 @@ $$; -------------------------------------------------------------------------------- -- Full Text Search setup +-- SearchIndex Full Text Search (FTS) DO $$ BEGIN IF NOT EXISTS ( @@ -882,7 +962,7 @@ BEGIN END $$ LANGUAGE plpgsql; --- Creates an index for the SearchIndex table on the page_url column (for lower-cased searches) +-- Creates an index for the SearchIndex tsv column DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_searchindex_tsv') THEN @@ -891,10 +971,9 @@ BEGIN END $$; --- Adds a tsvector column for full-text search +-- WebObjects Full Text Search (FTS) DO $$ BEGIN - -- Check and add the content_fts column if it does not exist IF NOT EXISTS ( SELECT 1 FROM information_schema.columns @@ -906,10 +985,7 @@ BEGIN END $$; --------------------------------------------------------------------------------- --- Functions and Triggers setup - --- Creates a function to update the tsvector column (FTS = Full Text Search) +-- Create a trigger to update the tsvector column for WebObjects CREATE OR REPLACE FUNCTION webobjects_content_trigger() RETURNS trigger AS $$ BEGIN NEW.object_content_fts := to_tsvector('english', coalesce(NEW.object_content, '')); @@ -917,18 +993,18 @@ BEGIN END $$ LANGUAGE plpgsql; --- Creates a trigger to update the tsvector column DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_webobjects_content') THEN - -- Create the trigger if it doesn't exist CREATE TRIGGER trg_webobjects_content BEFORE INSERT OR UPDATE ON WebObjects FOR EACH ROW EXECUTE FUNCTION webobjects_content_trigger(); END IF; END $$; +-------------------------------------------------------------------------------- +-- Functions and Triggers setup + -- Creates a function to update the last_updated_at column CREATE OR REPLACE FUNCTION update_last_updated_at_column() RETURNS TRIGGER AS $$ @@ -941,7 +1017,6 @@ $$ LANGUAGE plpgsql; -- Creates a trigger to update the last_updated_at column on Sources table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_sources_last_updated_before_update') THEN CREATE TRIGGER trg_update_sources_last_updated_before_update BEFORE UPDATE ON Sources @@ -954,7 +1029,6 @@ $$; -- Create a trigger to update the last_updated_at column for InformationSeed table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_information_seed_last_updated_before_update') THEN CREATE TRIGGER trg_update_information_seed_last_updated_before_update BEFORE UPDATE ON InformationSeed @@ -967,7 +1041,6 @@ $$; -- Create a trigger to update the last_updated_at column for SourceInformationSeedIndex table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_sourceinformationseedidx_last_updated_before_update') THEN CREATE TRIGGER trg_update_sourceinformationseedidx_last_updated_before_update BEFORE UPDATE ON SourceInformationSeedIndex @@ -980,7 +1053,6 @@ $$; -- Creates a trigger to update the last_updated_at column on SearchIndex table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_searchindex_last_updated_before_update') THEN CREATE TRIGGER trg_update_searchindex_last_updated_before_update BEFORE UPDATE ON SearchIndex @@ -993,7 +1065,6 @@ $$; -- Creates a trigger to update the last_updated_at column on Owners table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_owners_last_updated_before_update') THEN CREATE TRIGGER trg_update_owners_last_updated_before_update BEFORE UPDATE ON Owners @@ -1006,7 +1077,6 @@ $$; -- Creates a trigger to update the last_updated_at column on NetInfo table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_netinfo_last_updated_before_update') THEN CREATE TRIGGER trg_update_netinfo_last_updated_before_update BEFORE UPDATE ON NetInfo @@ -1019,7 +1089,6 @@ $$; -- Creates a trigger to update the last_updated_at column on HTTPInfo table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_httpinfo_last_updated_before_update') THEN CREATE TRIGGER trg_update_httpinfo_last_updated_before_update BEFORE UPDATE ON HTTPInfo @@ -1032,7 +1101,6 @@ $$; -- Creates a trigger to update the last_updated_at column on WebObjects table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_webobjects_last_updated_before_update') THEN CREATE TRIGGER trg_update_webobjects_last_updated_before_update BEFORE UPDATE ON WebObjects @@ -1045,7 +1113,6 @@ $$; -- Creates a trigger to update the last_updated_at column on MetaTags table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_metatags_last_updated_before_update') THEN CREATE TRIGGER trg_update_metatags_last_updated_before_update BEFORE UPDATE ON MetaTags @@ -1058,7 +1125,6 @@ $$; -- Creates a trigger to update the last_updated_at column on Keywords table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_keywords_last_updated_before_update') THEN CREATE TRIGGER trg_update_keywords_last_updated_before_update BEFORE UPDATE ON Keywords @@ -1071,7 +1137,6 @@ $$; -- Creates a trigger to update the last_updated_at column on SourceOwnerIndex table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_sourceowner_last_updated_before_update') THEN CREATE TRIGGER trg_update_sourceowner_last_updated_before_update BEFORE UPDATE ON SourceOwnerIndex @@ -1084,7 +1149,6 @@ $$; -- Creates a trigger to update the last_updated_at column on SourceSearchIndex table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_ssi_last_updated_before_update') THEN CREATE TRIGGER trg_update_ssi_last_updated_before_update BEFORE UPDATE ON SourceSearchIndex @@ -1097,7 +1161,6 @@ $$; -- Creates a trigger to update the last_updated_at column on KeywordIndex table DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_update_keywordindex_last_updated_before_update') THEN CREATE TRIGGER trg_update_keywordindex_last_updated_before_update BEFORE UPDATE ON KeywordIndex @@ -1164,9 +1227,7 @@ $$ LANGUAGE plpgsql; -- Creates a trigger to handle the deletion of shared entities when no longer linked to any Source. DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_after_delete_metatagsindex') THEN - -- Create the trigger if it doesn't exist CREATE TRIGGER trg_after_delete_metatagsindex AFTER DELETE ON MetaTagsIndex FOR EACH ROW @@ -1178,9 +1239,7 @@ $$; -- Creates a trigger to handle the deletion of shared entities when no longer linked to any Source. DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_after_delete_webobjectsindex') THEN - -- Create the trigger if it doesn't exist CREATE TRIGGER trg_after_delete_webobjectsindex AFTER DELETE ON WebObjectsIndex FOR EACH ROW @@ -1192,9 +1251,7 @@ $$; -- Creates a trigger to handle the deletion of shared entities when no longer linked to any Source. DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_after_delete_keywordindex') THEN - -- Create the trigger if it doesn't exist CREATE TRIGGER trg_after_delete_keywordindex AFTER DELETE ON KeywordIndex FOR EACH ROW @@ -1206,9 +1263,7 @@ $$; -- Creates a trigger to handle the deletion of shared entities when no longer linked to any Source. DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_after_delete_netinfoindex') THEN - -- Create the trigger if it doesn't exist CREATE TRIGGER trg_after_delete_netinfoindex AFTER DELETE ON NetInfoIndex FOR EACH ROW @@ -1220,9 +1275,7 @@ $$; -- Creates a trigger to handle the deletion of shared entities when no longer linked to any Source. DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_after_delete_httpinfoindex') THEN - -- Create the trigger if it doesn't exist CREATE TRIGGER trg_after_delete_httpinfoindex AFTER DELETE ON HTTPInfoIndex FOR EACH ROW @@ -1244,13 +1297,10 @@ BEGIN END; $$ LANGUAGE plpgsql; - -- Creates a trigger to handle the deletion of SearchIndex entries when no longer linked to any Source. DO $$ BEGIN - -- Check if the trigger already exists IF NOT EXISTS (SELECT 1 FROM pg_trigger WHERE tgname = 'trg_after_delete_source_searchindex') THEN - -- Create the trigger if it doesn't exist CREATE TRIGGER trg_after_delete_source_searchindex AFTER DELETE ON SourceSearchIndex FOR EACH ROW EXECUTE FUNCTION handle_searchindex_deletion(); @@ -1318,8 +1368,6 @@ RETURNS TABLE ( url TEXT ) AS $$ BEGIN - -- RAISE NOTICE 'Starting search for domain: %', domain; - RETURN QUERY WITH PartnerSourcesFromNetInfo AS ( SELECT DISTINCT ssi.source_id @@ -1353,12 +1401,9 @@ BEGIN SELECT DISTINCT s.source_id, s.url FROM Sources s JOIN AllPartnerSources aps ON s.source_id = aps.source_id; - - -- RAISE NOTICE 'Finished search for domain: %', domain; END; $$ LANGUAGE plpgsql; - -------------------------------------------------------------------------------- -- User and permissions setup @@ -1378,7 +1423,6 @@ END; $$ LANGUAGE plpgsql; - -- Creates a new user CREATE USER :CROWLER_DB_USER WITH ENCRYPTED PASSWORD :'CROWLER_DB_PASSWORD'; @@ -1407,8 +1451,7 @@ ALTER TABLE owners OWNER TO :CROWLER_DB_USER; ALTER TABLE screenshots OWNER TO :CROWLER_DB_USER; ALTER TABLE keywords OWNER TO :CROWLER_DB_USER; ALTER TABLE events OWNER TO :CROWLER_DB_USER; -ALTER TABLE category OWNER TO :CROWLER_DB_USER; - +ALTER TABLE categories OWNER TO :CROWLER_DB_USER; -- Grants permissions to the user on the :"POSTGRES_DB" database SELECT grant_sequence_permissions('public', :'CROWLER_DB_USER');