Skip to content

Commit

Permalink
Merge pull request #346 from pzaino/develop
Browse files Browse the repository at this point in the history
Updated DB schema to latest release
  • Loading branch information
pzaino authored Jun 21, 2024
2 parents 18b326c + 30f6574 commit 5d2424f
Show file tree
Hide file tree
Showing 3 changed files with 393 additions and 306 deletions.
2 changes: 1 addition & 1 deletion cmd/addCategory/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func validateSchema(content []byte, schemaPath string) error {

func insertCategory(db *sqlx.DB, category Category, parentID *int64) {
var categoryID int64
query := `INSERT INTO Category (name, description, parent_id, created_at)
query := `INSERT INTO Categories (name, description, parent_id, created_at)
VALUES ($1, $2, $3, $4) RETURNING category_id`
err := db.QueryRowx(query, category.Name, category.Description, parentID, time.Now()).Scan(&categoryID)
if err != nil {
Expand Down
168 changes: 106 additions & 62 deletions doc/database_architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,21 @@ erDiagram
BIGSERIAL information_seed_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
VARCHAR(256) information_seed
BIGINT category_id
BIGINT usr_id
VARCHAR information_seed
JSONB config
}
Sources {
BIGSERIAL source_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
BIGINT usr_id
BIGINT category_id
TEXT url
VARCHAR(50) status
VARCHAR status
VARCHAR engine
TIMESTAMP last_crawled_at
TEXT last_error
TIMESTAMP last_error_at
Expand All @@ -35,157 +40,196 @@ erDiagram
BIGSERIAL owner_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
VARCHAR(64) details_hash
BIGINT usr_id
VARCHAR details_hash
JSONB details
}
SearchIndex {
BIGSERIAL index_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
TEXT page_url
VARCHAR title
TEXT summary
VARCHAR detected_type
VARCHAR detected_lang
TSVECTOR tsv
}
Categories {
BIGSERIAL category_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
VARCHAR name
TEXT description
BIGINT parent_id FK "REFERENCES Categories(category_id)"
}
NetInfo {
BIGSERIAL netinfo_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
VARCHAR(64) details_hash
VARCHAR details_hash
JSONB details
}
HTTPInfo {
BIGSERIAL httpinfo_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
VARCHAR(64) details_hash
VARCHAR details_hash
JSONB details
}
SearchIndex {
BIGSERIAL index_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
TEXT page_url
VARCHAR(255) title
TEXT summary
VARCHAR(8) detected_type
VARCHAR(8) detected_lang
}
Screenshots {
BIGSERIAL screenshot_id PK
BIGINT index_id FK
BIGINT index_id FK "REFERENCES SearchIndex(index_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
VARCHAR(10) type
VARCHAR type
TEXT screenshot_link
INTEGER height
INTEGER width
INTEGER byte_size
INTEGER thumbnail_height
INTEGER thumbnail_width
TEXT thumbnail_link
VARCHAR(10) format
VARCHAR format
}
WebObjects {
BIGSERIAL object_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
TEXT object_link
VARCHAR(255) object_type
VARCHAR(64) object_hash
VARCHAR object_type
VARCHAR object_hash
TEXT object_content
TEXT object_html
JSONB details
}
MetaTags {
BIGSERIAL metatag_id PK
VARCHAR(255) name
VARCHAR name
TEXT content
}
Keywords {
BIGSERIAL keyword_id PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
VARCHAR(100) keyword
VARCHAR keyword
}
Events {
CHAR event_sha256 PK
TIMESTAMP created_at
TIMESTAMP last_updated_at
BIGINT source_id FK "REFERENCES Sources(source_id)"
VARCHAR event_type
VARCHAR event_severity
TIMESTAMP event_timestamp
JSONB details
}
SourceInformationSeedIndex {
BIGSERIAL source_information_seed_id PK
BIGINT source_id FK "REFERENCES Sources(source_id)"
BIGINT information_seed_id FK "REFERENCES InformationSeed(information_seed_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
SourceOwnerIndex {
BIGSERIAL source_owner_id PK
BIGINT source_id FK
BIGINT owner_id FK
BIGINT source_id FK "REFERENCES Sources(source_id)"
BIGINT owner_id FK "REFERENCES Owners(owner_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
SourceSearchIndex {
BIGSERIAL ss_index_id PK
BIGINT source_id FK
BIGINT index_id FK
BIGINT source_id FK "REFERENCES Sources(source_id)"
BIGINT index_id FK "REFERENCES SearchIndex(index_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
SourceInformationSeed {
BIGSERIAL source_information_seed_id PK
BIGINT source_id FK
BIGINT information_seed_id FK
SourceCategoryIndex {
BIGSERIAL source_category_id PK
BIGINT source_id FK "REFERENCES Sources(source_id)"
BIGINT category_id FK "REFERENCES Categories(category_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
WebObjectsIndex {
BIGSERIAL page_object_id PK
BIGINT index_id FK
BIGINT object_id FK
BIGINT index_id FK "REFERENCES SearchIndex(index_id)"
BIGINT object_id FK "REFERENCES WebObjects(object_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
MetaTagsIndex {
BIGSERIAL sim_id PK
BIGINT index_id FK
BIGINT metatag_id FK
BIGINT index_id FK "REFERENCES SearchIndex(index_id)"
BIGINT metatag_id FK "REFERENCES MetaTags(metatag_id)"
TIMESTAMP created_at
}
KeywordIndex {
BIGSERIAL keyword_index_id PK
BIGINT keyword_id FK
BIGINT index_id FK
BIGINT keyword_id FK "REFERENCES Keywords(keyword_id)"
BIGINT index_id FK "REFERENCES SearchIndex(index_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
INTEGER occurrences
}
NetInfoIndex {
BIGSERIAL netinfo_index_id PK
BIGINT netinfo_id FK
BIGINT index_id FK
BIGINT netinfo_id FK "REFERENCES NetInfo(netinfo_id)"
BIGINT index_id FK "REFERENCES SearchIndex(index_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
HTTPInfoIndex {
BIGSERIAL httpinfo_index_id PK
BIGINT httpinfo_id FK
BIGINT index_id FK
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
InformationSeed ||--o{ SourceInformationSeed: "linked to"
Sources ||--o{ SourceInformationSeed: "has"
Sources ||--o{ SourceOwnerIndex: "has"
Owners ||--o{ SourceOwnerIndex: "owned by"
Sources ||--o{ SourceSearchIndex: "has"
SearchIndex ||--o{ SourceSearchIndex: "indexed by"
SearchIndex ||--o{ Screenshots: "has"
SearchIndex ||--o{ WebObjectsIndex: "contains"
WebObjects ||--o{ WebObjectsIndex: "found in"
SearchIndex ||--o{ MetaTagsIndex: "has"
MetaTags ||--o{ MetaTagsIndex: "tagged by"
SearchIndex ||--o{ KeywordIndex: "has"
Keywords ||--o{ KeywordIndex: "used in"
SearchIndex ||--o{ NetInfoIndex: "has"
NetInfo ||--o{ NetInfoIndex: "linked to"
SearchIndex ||--o{ HTTPInfoIndex: "has"
HTTPInfo ||--o{ HTTPInfoIndex: "linked to"
BIGINT httpinfo_id FK "REFERENCES HTTPInfo(httpinfo_id)"
BIGINT index_id FK "REFERENCES SearchIndex(index_id)"
TIMESTAMP created_at
TIMESTAMP last_updated_at
}
Categories ||--|{ Categories : "parent_id"
InformationSeed ||--o{ Categories : "category_id"
InformationSeed ||--o{ Sources : "usr_id"
Sources ||--o{ Categories : "category_id"
Sources ||--o{ Owners : "usr_id"
Owners ||--o{ Sources : "usr_id"
Owners ||--o{ Owners : "usr_id"
SourceInformationSeedIndex ||--|{ InformationSeed : "information_seed_id"
SourceInformationSeedIndex ||--|{ Sources : "source_id"
SourceOwnerIndex ||--|{ Sources : "source_id"
SourceOwnerIndex ||--|{ Owners : "owner_id"
SourceSearchIndex ||--|{ Sources : "source_id"
SourceSearchIndex ||--|{ SearchIndex : "index_id"
SourceCategoryIndex ||--|{ Sources : "source_id"
SourceCategoryIndex ||--|{ Categories : "category_id"
WebObjectsIndex ||--|{ WebObjects : "object_id"
WebObjectsIndex ||--|{ SearchIndex : "index_id"
MetaTagsIndex ||--|{ MetaTags : "metatag_id"
MetaTagsIndex ||--|{ SearchIndex : "index_id"
KeywordIndex ||--|{ Keywords : "keyword_id"
KeywordIndex ||--|{ SearchIndex : "index_id"
NetInfoIndex ||--|{ NetInfo : "netinfo_id"
NetInfoIndex ||--|{ SearchIndex : "index_id"
HTTPInfoIndex ||--|{ HTTPInfo : "httpinfo_id"
HTTPInfoIndex ||--|{ SearchIndex : "index_id"
Screenshots ||--|{ SearchIndex : "index_id"
```
Loading

0 comments on commit 5d2424f

Please sign in to comment.