Skip to content

Commit

Permalink
feat: HourBucketizer fragmenting members per hour as a double linked …
Browse files Browse the repository at this point in the history
…list
  • Loading branch information
smessie committed Oct 24, 2024
1 parent 05f6950 commit 3c5f220
Show file tree
Hide file tree
Showing 5 changed files with 410 additions and 4 deletions.
22 changes: 22 additions & 0 deletions configs/bucketizer_configs.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,25 @@
sh:minCount 1;
].

[ ] a sh:NodeShape;
sh:targetClass tree:HourFragmentation;
sh:property [
sh:name "path";
sh:path tree:timestampPath;
sh:class rdfl:PathLens;
sh:maxCount 1;
sh:minCount 1;
], [
sh:name "pathQuads";
sh:path tree:timestampPath;
sh:class <RdfThing>;
sh:maxCount 1;
sh:minCount 1;
], [
sh:name "unorderedRelations";
sh:path tree:unorderedRelations;
sh:datatype xsd:boolean;
sh:maxCount 1;
sh:minCount 0;
].

142 changes: 142 additions & 0 deletions src/bucketizers/hourBucketizer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import { AddRelation, Bucketizer, HourFragmentation } from "./index";
import { getLoggerFor } from "../utils/logUtil";
import { Bucket, RdfThing, Record } from "../utils";
import { BasicLensM, Cont } from "rdf-lens";
import { Term } from "@rdfjs/types";
import { TREE, XSD } from "@treecg/types";
import { DataFactory } from "n3";

const { literal, namedNode } = DataFactory;

export default class HourBucketizer implements Bucketizer {
protected readonly logger = getLoggerFor(this);

private readonly path: BasicLensM<Cont, { value: string; literal?: Term }>;
private readonly pathQuads: RdfThing;
private readonly unorderedRelations: boolean;

private hour: Date;
private root: boolean = true;

constructor(config: HourFragmentation, save?: string) {
this.path = config.path.mapAll((x) => ({
value: x.id.value,
literal: x.id,
}));
this.pathQuads = config.pathQuads;
this.unorderedRelations = config.unorderedRelations ?? false;

if (save) {
const parsed = JSON.parse(save);
this.hour = new Date(parsed.hour);
this.root = parsed.root;
}
}

bucketize(
record: Record,
getBucket: (key: string, root?: boolean) => Bucket,
addRelation: AddRelation,
): Bucket[] {
const values = this.path
.execute(record.data)
.filter(
(x, i, arr) => arr.findIndex((y) => x.value === y.value) == i,
);

const out: Bucket[] = [];

for (const value of values) {
if (value.literal) {
// The record has a timestamp value.
const timestamp = value.literal.value;

const recordDate = new Date(timestamp);

if (!this.hour) {
// Create the first (root) bucket.
this.root = true;
this.hour = new Date(
recordDate.getFullYear(),
recordDate.getMonth(),
recordDate.getDate(),
recordDate.getHours(),
);
out.push(getBucket(this.hour.toISOString(), this.root));

this.logger.debug(
`Created root hour bucket ${this.hour.toISOString()}`,
);
} else if (recordDate.getHours() !== this.hour.getHours()) {
// Create a new bucket.
const newHour = new Date(
recordDate.getFullYear(),
recordDate.getMonth(),
recordDate.getDate(),
recordDate.getHours(),
);
const newBucket = getBucket(newHour.toISOString(), false);

// Add a relation from and to the previous bucket.
const oldBucket = getBucket(
this.hour.toISOString(),
this.root,
);
this.root = false;
if (this.unorderedRelations) {
addRelation(oldBucket, newBucket, TREE.terms.Relation);
addRelation(newBucket, oldBucket, TREE.terms.Relation);
} else {
addRelation(
oldBucket,
newBucket,
TREE.terms.GreaterThanOrEqualToRelation,
literal(
this.hour.toISOString(),
namedNode(XSD.dateTime),
),
this.pathQuads,
);
addRelation(
newBucket,
oldBucket,
TREE.terms.LessThanRelation,
literal(
this.hour.toISOString(),
namedNode(XSD.dateTime),
),
this.pathQuads,
);
}

// Mark the old bucket as immutable.
oldBucket.immutable = true;

out.push(newBucket);
this.hour = newHour;

this.logger.debug(
`Created new hour bucket ${this.hour.toISOString()}`,
);
} else {
// The record belongs to the current bucket.
out.push(getBucket(this.hour.toISOString(), this.root));
}
} else {
// The record does not have a timestamp value.
this.logger.warn(
`Received records without timestamp values. Ignoring record '${record.data.id.value}'.`,
);
}
}

return out;
}

save() {
return JSON.stringify({
hour: this.hour,
root: this.root,
});
}
}
17 changes: 14 additions & 3 deletions src/bucketizers/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { readFileSync } from "fs";
import * as path from "path";
import { Term } from "@rdfjs/types";
import { BasicLensM, Cont } from "rdf-lens";
import {
Expand All @@ -16,13 +14,18 @@ import SubjectBucketizer from "./subjectBucketizer";
import TimebasedBucketizer from "./timebasedBucketizer";

import { $INLINE_FILE } from "@ajuvercr/ts-transformer-inline-file";
import HourBucketizer from "./hourBucketizer";

const df = new DataFactory();
export const SHAPES_TEXT = $INLINE_FILE("../../configs/bucketizer_configs.ttl");

export type BucketizerConfig = {
type: Term;
config: SubjectFragmentation | PageFragmentation | TimebasedFragmentation;
config:
| SubjectFragmentation
| PageFragmentation
| TimebasedFragmentation
| HourFragmentation;
};

export type SubjectFragmentation = {
Expand All @@ -44,6 +47,12 @@ export type TimebasedFragmentation = {
minBucketSpan: number;
};

export type HourFragmentation = {
path: BasicLensM<Cont, Cont>;
pathQuads: Cont;
unorderedRelations?: boolean;
};

export type AddRelation = (
origin: Bucket,
target: Bucket,
Expand Down Expand Up @@ -78,6 +87,8 @@ function createBucketizer(config: BucketizerConfig, save?: string): Bucketizer {
<TimebasedFragmentation>config.config,
save,
);
case TREE.custom("HourFragmentation"):
return new HourBucketizer(<HourFragmentation>config.config, save);
}
throw "Unknown bucketizer " + config.type.value;
}
Expand Down
Loading

0 comments on commit 3c5f220

Please sign in to comment.