Skip to content

Commit

Permalink
feat: sort longturtle blank nodes (#2997)
Browse files Browse the repository at this point in the history
* feat: sort longturtle blank nodes in the object position by their cbd string

* fix: #2767
  • Loading branch information
edmondchuc authored Nov 29, 2024
1 parent 08dd4b7 commit 28a6190
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 77 deletions.
29 changes: 29 additions & 0 deletions rdflib/plugins/serializers/longturtle.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,34 @@ def predicateList(self, subject, newline=False):
def verb(self, node, newline=False):
self.path(node, VERB, newline)

def sortObjects(
self, values: list[URIRef | BNode | Literal]
) -> list[URIRef | BNode | Literal]:
"""
Perform a sort on the values where each value is a blank node. Grab the CBD of the
blank node and sort it by its longturtle serialization value.
Identified nodes come first and the sorted blank nodes are tacked on after.
"""
bnode_map: dict[BNode, list[str]] = {}
objects = []
for value in values:
if isinstance(value, BNode):
bnode_map[value] = []
else:
objects.append(value)

for bnode in bnode_map:
cbd = self.store.cbd(bnode).serialize(format="longturtle")
bnode_map[bnode].append(cbd)

sorted_bnodes = sorted(
[(k, v) for k, v in bnode_map.items()], key=lambda x: x[1]
)
return objects + [x[0] for x in sorted_bnodes]

def objectList(self, objects):
objects = self.sortObjects(objects)
count = len(objects)
if count == 0:
return
Expand All @@ -303,6 +330,8 @@ def objectList(self, objects):
if count > 1:
if not isinstance(objects[0], BNode):
self.write("\n" + self.indent(1))
else:
self.write(" ")
first_nl = True
self.path(objects[0], OBJECT, newline=first_nl)
for obj in objects[1:]:
Expand Down
74 changes: 74 additions & 0 deletions test/data/longturtle/longturtle-target.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
PREFIX cn: <https://linked.data.gov.au/def/cn/>
PREFIX ex: <http://example.com/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sdo: <https://schema.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

ex:nicholas
a sdo:Person ;
sdo:age 41 ;
sdo:alternateName
"N.J. Car" ,
"Nick Car" ,
[
sdo:name "Dr N.J. Car" ;
] ;
sdo:name
[
a cn:CompoundName ;
sdo:hasPart
[
a cn:CompoundName ;
rdf:value "John" ;
] ,
[
a cn:CompoundName ;
rdf:value "Nicholas" ;
] ,
[
a cn:CompoundName ;
sdo:hasPart
[
a cn:CompoundName ;
rdf:value "Car" ;
] ,
[
a cn:CompoundName ;
rdf:value "Maxov" ;
] ;
] ;
] ;
sdo:worksFor <https://kurrawong.ai> ;
.

<https://kurrawong.ai>
a sdo:Organization ;
sdo:location <https://kurrawong.ai/hq> ;
.

<https://kurrawong.ai/hq>
a sdo:Place ;
sdo:address
[
a sdo:PostalAddress ;
sdo:addressCountry
[
sdo:identifier "au" ;
sdo:name "Australia" ;
] ;
sdo:addressLocality "Shorncliffe" ;
sdo:addressRegion "QLD" ;
sdo:postalCode 4017 ;
sdo:streetAddress (
72
"Yundah"
"Street"
) ;
] ;
sdo:geo
[
sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
] ;
sdo:name "KurrawongAI HQ" ;
.
82 changes: 5 additions & 77 deletions test/test_serializers/test_serializer_longturtle.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import difflib
from textwrap import dedent
from pathlib import Path

from rdflib import Graph, Namespace
from rdflib.namespace import GEO, SDO
Expand Down Expand Up @@ -170,83 +170,11 @@ def test_longturtle():
output = g.serialize(format="longturtle")

# fix the target
target = dedent(
""" PREFIX cn: <https://linked.data.gov.au/def/cn/>
PREFIX ex: <http://example.com/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX sdo: <https://schema.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
current_dir = Path.cwd() # Get the current directory
target_file_path = current_dir / "test/data/longturtle" / "longturtle-target.ttl"

ex:nicholas
a sdo:Person ;
sdo:age 41 ;
sdo:alternateName
[
sdo:name "Dr N.J. Car" ;
] ,
"N.J. Car" ,
"Nick Car" ;
sdo:name
[
a cn:CompoundName ;
sdo:hasPart
[
a cn:CompoundName ;
rdf:value "Nicholas" ;
] ,
[
a cn:CompoundName ;
rdf:value "John" ;
] ,
[
a cn:CompoundName ;
sdo:hasPart
[
a cn:CompoundName ;
rdf:value "Car" ;
] ,
[
a cn:CompoundName ;
rdf:value "Maxov" ;
] ;
] ;
] ;
sdo:worksFor <https://kurrawong.ai> ;
.
<https://kurrawong.ai>
a sdo:Organization ;
sdo:location <https://kurrawong.ai/hq> ;
.
<https://kurrawong.ai/hq>
a sdo:Place ;
sdo:address
[
a sdo:PostalAddress ;
sdo:addressCountry
[
sdo:identifier "au" ;
sdo:name "Australia" ;
] ;
sdo:addressLocality "Shorncliffe" ;
sdo:addressRegion "QLD" ;
sdo:postalCode 4017 ;
sdo:streetAddress (
72
"Yundah"
"Street"
) ;
] ;
sdo:geo
[
sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
] ;
sdo:name "KurrawongAI HQ" ;
.
"""
)
with open(target_file_path, encoding="utf-8") as file:
target = file.read()

# compare output to target
# - any differences will produce output
Expand Down

0 comments on commit 28a6190

Please sign in to comment.