Skip to content

Commit

Permalink
Change approach: retrieve metadata separately
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminaaron committed May 4, 2024
1 parent 8773f82 commit 7873229
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 73 deletions.
23 changes: 14 additions & 9 deletions dev/dev.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@ import path from "path"
import { fileURLToPath } from "url"
import fs, { promises as fsPromise } from "fs"
import { validateAll, validateOne, validateUserProfile } from "../src/index.js"
import { extractRequirementProfilesMetadata, runSparqlConstructQueryOnRdfString, runSparqlSelectQueryOnRdfString } from "../src/utils.js"
import {
extractDatafieldsMetadata,
extractRequirementProfilesMetadata,
runSparqlConstructQueryOnRdfString,
runSparqlSelectQueryOnRdfString
} from "../src/utils.js"

const DB_DIR = path.join(path.dirname(fileURLToPath(import.meta.url)), "requirement-profiles")
const SHACL_DIR = `${DB_DIR}/shacl`
const USER_PROFILE = `${DB_DIR}/user-profile-examples/kinderzuschlag-user-profile.ttl`
const USER_PROFILE = `${DB_DIR}/user-profile-examples/empty-user-profile.ttl`
const DATAFIELDS = `${DB_DIR}/datafields.ttl`
const MATERIALIZATION = `${DB_DIR}/materialization.ttl`

Expand Down Expand Up @@ -100,13 +105,13 @@ async function devValidateUserProfile() {
console.log(conforms)
}

async function devExtractRequirementProfileMedatada() {
let shaclFiles = await fsPromise.readdir(SHACL_DIR)
let shaclFileContents = []
for (let file of shaclFiles) {
shaclFileContents.push(await fsPromise.readFile(`${SHACL_DIR}/${file}`, "utf8"))
async function devExtractMedatada() {
let rpStrings = []
for (let file of await fsPromise.readdir(SHACL_DIR)) {
rpStrings.push(await fsPromise.readFile(`${SHACL_DIR}/${file}`, "utf8"))
}
console.log(await extractRequirementProfilesMetadata(shaclFileContents))
console.log("Requirement profiles metadata:", await extractRequirementProfilesMetadata(rpStrings))
console.log("Datafields metadata:", await extractDatafieldsMetadata(await fsPromise.readFile(DATAFIELDS, "utf8")))
}

// devRunSparqlSelectQueryOnRdfString()
Expand All @@ -115,4 +120,4 @@ devValidateAll()
// devValidateOne()
// devValidateOneStrings()
// devValidateUserProfile()
// devExtractRequirementProfileMedatada()
// devExtractMedatada()
6 changes: 4 additions & 2 deletions global.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import {
import {
rdfStringsToStore,
runSparqlSelectQueryOnStore,
extractRequirementProfilesMetadata
extractRequirementProfilesMetadata,
extractDatafieldsMetadata
} from "./src/utils.js";

window.MatchingEngine = {
Expand All @@ -18,5 +19,6 @@ window.MatchingEngine = {
validateAll,
rdfStringsToStore,
runSparqlSelectQueryOnStore,
extractRequirementProfilesMetadata
extractRequirementProfilesMetadata,
extractDatafieldsMetadata
}
58 changes: 14 additions & 44 deletions src/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import {
addRdfStringToStore,
extractRequirementProfilesMetadataFromStore,
addRdfStringToStore, extractRpUriFromRpString,
printDatasetAsTurtle,
printStoreAsTurtle,
runSparqlAskQueryOnStore,
Expand Down Expand Up @@ -40,31 +39,24 @@ export async function validateUserProfile(userProfile, datafieldsStr, debug = fa
export async function validateAll(userProfileStr, requirementProfiles, datafieldsStr, materializationStr, debug = false) {
let map = {
reports: [],
missingUserInputsAggregated: {},
metadata: {}
missingUserInputsAggregated: {}
}
for (let [filename, reqProfileStr] of Object.entries(requirementProfiles)) {
let rpUri = await extractRpUriFromRpString(reqProfileStr)
let report = await validateOne(userProfileStr, reqProfileStr, datafieldsStr, materializationStr, debug)
report.rpUri = rpUri
map.reports.push(report)
map.metadata = { ...map.metadata, ...report.metadata }
let rpId = Object.keys(report.metadata)[0]
report.metadata = {
...report.metadata[rpId],
id: rpId,
filename: filename
}
for (let userInput of report.missingUserInput) {
let key = userInput.subject + "_" + userInput.predicate
let key = userInput.subject + "_" + userInput.dfUri
if (!map.missingUserInputsAggregated[key]) {
map.missingUserInputsAggregated[key] = {
subject: userInput.subject,
predicate: userInput.predicate,
label: userInput.label,
dfUri: userInput.dfUri,
usedIn: []
}
}
map.missingUserInputsAggregated[key].usedIn.push({
id: rpId,
rpUri: rpUri,
optional: userInput.optional,
isLastMissingUserInput: report.missingUserInput.length === 1
})
Expand All @@ -82,9 +74,6 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
await addRdfStringToStore(materializationStr, store)
await addRdfStringToStore(datafieldsStr, store)

// ----- extract metadata from the requirement profile -----`
let rpMetadata = await extractRequirementProfilesMetadataFromStore(store)

// ----- first validation to identify missing data points -----
let firstReport = await runValidationOnStore(store)
if (debug) {
Expand All @@ -100,8 +89,7 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
result: ValidationResult.INELIGIBLE,
violations: violations,
missingUserInput: [],
inMemoryMaterializedTriples: [],
metadata: rpMetadata
inMemoryMaterializedTriples: []
}
}

Expand All @@ -114,7 +102,7 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
let message = result.message[0].value // can the arrays be bigger than 1?
missingList.push({
subject: fromSubject,
predicate: missingPredicate,
dfUri: missingPredicate, // predicate
optional: message.toLowerCase().includes("[optional]") // a better way to check for this?
})
}
Expand All @@ -130,7 +118,7 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
PREFIX ff: <https://foerderfunke.org/default#>
SELECT * WHERE {
?rule ff:output ?output .
FILTER(?output = <${missing.predicate}>) .
FILTER(?output = <${missing.dfUri}>) .
?rule ff:sparqlConstructQuery ?query .
OPTIONAL { ?rule ff:input ?input . }
}
Expand All @@ -155,7 +143,7 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
let existingMainPersonPredicates = result.map(n => n.predicate)

for (let missing of missingList) {
let matchingRule = materializableDataPoints.find(n => n.output === missing.predicate)
let matchingRule = materializableDataPoints.find(n => n.output === missing.dfUri)
let otherRuleWithThatInputAsOutput = undefined
if (matchingRule && matchingRule.input) {
otherRuleWithThatInputAsOutput = materializableDataPoints.find(n => n.output === matchingRule.input)
Expand All @@ -166,22 +154,6 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
askUserForDataPoints.push(missing)
}

// ----- enrich the ones we'll ask for with labels -----
// should we send more along than then the label?
for (let dataPoint of askUserForDataPoints) {
let query = `
PREFIX ff: <https://foerderfunke.org/default#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT * WHERE {
?predicate a ff:DataField .
FILTER(?predicate = <${dataPoint.predicate}>) .
?predicate rdfs:label ?label .
}
`
let resultLine = (await runSparqlSelectQueryOnStore(query, store))[0]
if (resultLine) dataPoint.label = resultLine.label
}

let optionals = askUserForDataPoints.filter(missing => missing.optional)
let blockers = askUserForDataPoints.filter(missing => !missing.optional)

Expand All @@ -196,8 +168,7 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
result: ValidationResult.UNDETERMINABLE,
violations: [],
missingUserInput: askUserForDataPoints,
inMemoryMaterializedTriples: [],
metadata: rpMetadata
inMemoryMaterializedTriples: []
}
}

Expand Down Expand Up @@ -241,7 +212,7 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
?shape a sh:NodeShape .
FILTER(?shape = ff:MainPersonShape) .
?shape sh:property ?propertyShape .
?propertyShape sh:path <${optional.predicate}> .
?propertyShape sh:path <${optional.dfUri}> .
?propertyShape ?pred ?obj .
}
` // can this query be simplified?
Expand Down Expand Up @@ -275,8 +246,7 @@ export async function validateOne(userProfile, requirementProfile, datafieldsStr
result: secondReport.conforms ? ValidationResult.ELIGIBLE : ValidationResult.INELIGIBLE,
violations: collectViolations(secondReport, false),
missingUserInput: askUserForDataPoints,
inMemoryMaterializedTriples: materializedTriples,
metadata: rpMetadata
inMemoryMaterializedTriples: materializedTriples
}
}

Expand Down
66 changes: 48 additions & 18 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import { QueryEngine } from "@comunica/query-sparql-rdfjs"

export async function rdfStringsToStore(rdfStrings) {
let store = new Store()
for (let rdfString of rdfStrings) {
await addRdfStringToStore(rdfString, store)
for (let str of rdfStrings) {
await addRdfStringToStore(str, store)
}
return store
}
Expand Down Expand Up @@ -114,29 +114,59 @@ export async function runSparqlDeleteQueryOnStore(query, store) {
return await queryEngine.queryVoid(query, { sources: [ store ] })
}

export async function extractRequirementProfilesMetadata(shaclFileContents) {
let store = await rdfStringsToStore(shaclFileContents)
return await extractRequirementProfilesMetadataFromStore(store)
export async function extractRpUriFromRpString(requirementProfileStr) {
let store = await rdfStringToStore(requirementProfileStr)
let query = `
PREFIX ff: <https://foerderfunke.org/default#>
SELECT * WHERE {
?rpUri a ff:RequirementProfile .
}`
let rows = await runSparqlSelectQueryOnStore(query, store)
return rows[0].rpUri
}

export async function extractRequirementProfilesMetadataFromStore(store) {
export async function extractRequirementProfilesMetadata(requirementProfileStrings) {
let store = await rdfStringsToStore(requirementProfileStrings)
let query = `
PREFIX ff: <https://foerderfunke.org/default#>
SELECT * WHERE {
?id a ff:RequirementProfile .
?id ff:title ?title .
OPTIONAL { ?id ff:category ?category } .
}`
let rqMetadata = {}
PREFIX ff: <https://foerderfunke.org/default#>
SELECT * WHERE {
?rpUri a ff:RequirementProfile .
OPTIONAL { ?rpUri ff:title ?title } .
OPTIONAL { ?rpUri ff:category ?category } .
}`
let metadata = {}
let rows = await runSparqlSelectQueryOnStore(query, store)
for (let row of rows) {
if (!rqMetadata[row.id]) {
rqMetadata[row.id] = {
title: row.title,
if (!metadata[row.rpUri]) {
metadata[row.rpUri] = {
uri: row.rpUri,
title: row.title ?? "",
categories: []
}
}
if (row.category) rqMetadata[row.id].categories.push(row.category)
if (row.category) metadata[row.rpUri].categories.push(row.category)
}
return metadata
}

export async function extractDatafieldsMetadata(datafieldsStr) {
let store = await rdfStringToStore(datafieldsStr)
let query = `
PREFIX ff: <https://foerderfunke.org/default#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT * WHERE {
?dfUri a ff:DataField .
OPTIONAL { ?dfUri rdfs:label ?label } .
OPTIONAL { ?dfUri rdfs:comment ?comment } .
}`
let metadata = {}
let rows = await runSparqlSelectQueryOnStore(query, store)
for (let row of rows) {
metadata[row.dfUri] = {
uri: row.dfUri,
label: row.label ?? "",
comment: row.comment ?? ""
}
}
return rqMetadata
return metadata
}

0 comments on commit 7873229

Please sign in to comment.