Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize URIs #75

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions brickschema/brickify/src/handlers/Handler/RACHandler/RACHandler.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from pathlib import Path
from typing import Optional, List

import typer
from pathlib import Path
from tabulate import tabulate
from typing import Optional, List

from brickschema.brickify.src.handlers.Handler.TableHandler import TableHandler
from brickschema.brickify.util import (
Expand Down Expand Up @@ -72,6 +71,12 @@ def ingest_data(self):
)
)

replace_dict = {"headers": {}, "values": {}, "ignore_columns": {}}
if "replace_dict" in self.config:
for key in replace_dict.keys():
if key in self.config["replace_dict"]:
replace_dict[key] = self.config["replace_dict"][key]

for index, sheet in enumerate(sheets):
if index not in sheet_ids:
continue
Expand All @@ -81,9 +86,7 @@ def ingest_data(self):
if not header_row:
continue
header = sheet.row_values(header_row)
header = cleaned_value(
value=header, replace_dict=self.config["replace_dict"]["headers"]
)
header = cleaned_value(value=header, replace_dict=replace_dict["headers"])
header_dict = {idx: value for idx, value in enumerate(header)}
for row_number in range(header_row + 1, sheet.nrows):
row = sheet.row_values(row_number)
Expand All @@ -108,8 +111,9 @@ def ingest_data(self):
row_object = {
key: cleaned_value(
value,
replace_dict=self.config["replace_dict"]["values"],
replace_dict=replace_dict["values"],
)
for key, value in row_object.items()
if key not in replace_dict["ignore_columns"]
}
self.data.append(row_object)
20 changes: 10 additions & 10 deletions brickschema/brickify/src/handlers/Handler/TableHandler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import csv
import re
import traceback
from pathlib import Path
from typing import Optional, List

from jinja2 import Template
from pathlib import Path
from typer import progressbar
from typing import Optional, List

from brickschema.brickify.src.handlers.Handler.Handler import Handler
from brickschema.brickify.util import cleaned_value
Expand Down Expand Up @@ -43,21 +42,22 @@ def ingest_data(self):
Ingests tabular data into a key-value based data model where the key is the column header, and value is
the cleaned cell value.
"""
replace_dict = {"headers": {}, "values": {}, "ignore_columns": {}}
if "replace_dict" in self.config:
for key in replace_dict.keys():
if key in self.config["replace_dict"]:
replace_dict[key] = self.config["replace_dict"][key]

with open(self.source, newline="") as csv_file:
reader = csv.DictReader(csv_file, dialect=self.dialect)
for row in reader:
replace_dict = (
self.config["replace_dict"]["values"]
if "replace_dict" in self.config
and "values" in self.config["replace_dict"]
else {}
)
item = {
key.strip(): cleaned_value(
value,
replace_dict=replace_dict,
replace_dict=replace_dict["values"],
)
for key, value in row.items()
if key not in replace_dict["ignore_columns"]
}
self.data.append(item)

Expand Down
27 changes: 21 additions & 6 deletions brickschema/brickify/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,31 @@
The util module provides helper functions used by brickify.
"""

import click_spinner
import json
import re
from pathlib import Path
from typing import Optional, Dict

import click_spinner
import typer
import urllib.parse
import yaml
from pathlib import Path
from rdflib import Namespace, OWL, RDF, RDFS, Graph
from typing import Optional, Dict, Union
from unicodedata import normalize
from xlrd import open_workbook


def decode(value: Union[str, bytes]):
"""
Returns a UTF-8 string produced for the given value.

:param value: string|bytes
:returns: UTF-8 decoded string
"""
if type(value) is bytes:
return value.decode("UTF-8")
return value


def cleaned_value(value, replace_dict: Optional[Dict] = {}):
"""
Returns a cleaned value produced by doing regex replacements and elimination
Expand All @@ -39,9 +52,11 @@ def cleaned_value(value, replace_dict: Optional[Dict] = {}):
return True
if value in ["FALSE", "false", "False", "off", "OFF"]:
return False
clean_value = clean_value.strip()
for replacement in replace_dict.items():
clean_value = re.sub(*replacement, clean_value)
return clean_value.strip()
clean_value = re.sub(*replacement, clean_value).strip()
clean_value = urllib.parse.quote_plus(clean_value.encode("UTF-8"))
return clean_value
return clean_value


Expand Down
7 changes: 6 additions & 1 deletion brickschema/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ def home(self):

def bindings(self):
return jsonify(
{prefix: namespace for prefix, namespace in self.graph.namespaces() if prefix not in self.ignore_prefixes})
{
prefix: namespace
for prefix, namespace in self.graph.namespaces()
if prefix not in self.ignore_prefixes
}
)

def apply_reasoning(self, profile):
self.graph.expand(profile)
Expand Down