Trying rdflib-sqlalchemy fork (#110)

* moving around * adding in rdflib as subpackage of brickschema * remove symlink * no modules * fixing deps * fixing import * fix import * fix import * trying new version * try new package version * rdflibsqlalchemy fork shoul work now * bump deps * Refactored graph name handling to use URIRef. * aider: Override the `triples` method in `PersistentGraph` to query across all contexts if no specific context is provided. # Aider chat conversation: USER: add `brickschema/persistent.py` ASSISTANT: It looks like you've added `brickschema/persistent.py` to our workspace for potential modifications. How can I assist you with this file? USER: Please help me debug my code. I can insert triples into the `PersistentGraph`, but when I try to loop through the triples like `self.triples((None, NOne, None))`, nothing is returned. ASSISTANT: It seems like the issue might be related to how the triples are being inserted and then queried. Given the description of the problem, it's possible that the triples are being inserted into a specific graph context within the `PersistentGraph`, but when querying with `self.triples((None, None, None))`, it's attempting to query the default graph context which might not contain any triples. To address this, we need to ensure that when querying for triples, we're querying across all graph contexts if that's the intended behavior. This can be achieved by modifying the `__len__` and `triples` methods to correctly handle querying across all contexts in the `PersistentGraph` class. Here's a proposed change to ensure that the `triples` method queries across all contexts: ```diff --- brickschema/persistent.py +++ brickschema/persistent.py @@ ... @@ class PersistentGraph(Graph): + def triples(self, triple_pattern, context=None): + """ + Override the triples method to query across all contexts if no specific context is provided. + """ + if context is None: + # Query across all contexts + return super().triples(triple_pattern, context=self) + else: + # Query within the specified context + return super().triples(triple_pattern, context=context) ``` This change adds an override for the `triples` method in the `PersistentGraph` class. When no specific context is provided, it queries across all contexts by passing `self` as the context to the parent method. This ensures that when you call `self.triples((None, None, None))`, it will return triples from all contexts within the `PersistentGraph`. Please note, this is a general approach and might need adjustments based on the specific structure and requirements of your application. If your application relies on querying specific contexts, ensure that the context is correctly specified when calling the `triples` method. * try new version of dependency * bump rdflib dep, version
BrickSchema · Feb 21, 2024 · 713e49c · 713e49c
1 parent 5ae5318
commit 713e49c
Show file tree

Hide file tree

Showing 7 changed files with 273 additions and 261 deletions.
diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml
@@ -17,7 +17,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install Poetry
@@ -33,7 +33,7 @@ jobs:
           java-version: '17'
       - name: Load cached venv
         id: cached-poetry-dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: .venv
           key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}

diff --git a/.gitmodules b/.gitmodules
diff --git a/brickschema/__init__.py b/brickschema/__init__.py
@@ -13,6 +13,15 @@
     level=logging.WARNING,
 )
 
+has_sqlalchemy = False
+try:
+    import rdflib_sqlalchemy
+    has_sqlalchemy = True
+except ImportError as e:
+    print(e)
+    logging.warning(
+        "sqlalchemy not installed. SQL-backed graph support will not be available. Try 'pip install brickschema[persistence]' to install it."
+    )
 
 __version__ = "0.2.0"
 __all__ = ["graph", "inference", "namespaces"]
diff --git a/brickschema/persistent.py b/brickschema/persistent.py
@@ -6,7 +6,7 @@
 from contextlib import contextmanager
 from rdflib import ConjunctiveGraph
 from rdflib.graph import BatchAddGraph
-from rdflib import plugin
+from rdflib import plugin, URIRef
 from rdflib.store import Store
 from rdflib_sqlalchemy import registerplugins
 from sqlalchemy import text, Row
@@ -55,7 +55,7 @@ def __init__(self, uri: str, *args, **kwargs):
 class Changeset(Graph):
     def __init__(self, graph_name):
         super().__init__()
-        self.name = graph_name
+        self.name = URIRef(graph_name)
         self.uid = uuid.uuid4()
         self.additions = []
         self.deletions = []
@@ -87,7 +87,7 @@ def __init__(self, uri: str, *args, **kwargs):
         """
         To create an in-memory store, use uri="sqlite://"
         """
-        store = plugin.get("SQLAlchemy", Store)(identifier="my_store")
+        store = plugin.get("SQLAlchemy", Store)(identifier=URIRef("my_store"))
         super().__init__(store, *args, **kwargs)
         self.open(uri, create=True)
         self._precommit_hooks = OrderedDict()
@@ -204,7 +204,11 @@ def conn(self):
 
     @contextmanager
     def new_changeset(self, graph_name, ts=None):
+        if not isinstance(graph_name, URIRef):
+            graph_name = URIRef(graph_name)
         namespaces = []
+        buffered_adds = []
+        buffered_removes = []
         with self.conn() as conn:
             transaction_start = time.time()
             cs = Changeset(graph_name)
@@ -220,30 +224,34 @@ def new_changeset(self, graph_name, ts=None):
                         text("INSERT INTO changesets VALUES (:uid, :ts, :graph, :deletion, :triple)").bindparams(
                             uid=str(cs.uid),
                             ts=ts,
-                            graph=graph_name,
+                            graph=str(graph_name),
                             deletion=True,
                             triple=pickle.dumps(triple),
                         )
                     )
-                graph = self.get_context(graph_name)
                 for triple in cs.deletions:
-                    graph.remove(triple)
+                    buffered_removes.append(triple)
+                #graph = self.get_context(graph_name)
+                #for triple in cs.deletions:
+                #    graph.remove(triple)
             if cs.additions:
                 for triple in cs.additions:
                     conn.execute(
                         text("INSERT INTO changesets VALUES (:uid, :ts, :graph, :deletion, :triple)").bindparams(
                             uid=str(cs.uid),
                             ts=ts,
-                            graph=graph_name,
+                            graph=str(graph_name),
                             deletion=False,
                             triple=pickle.dumps(triple),
                         )
                     )
-                with BatchAddGraph(
-                    self.get_context(graph_name), batch_size=10000
-                ) as graph:
-                    for triple in cs.additions:
-                        graph.add(triple)
+                for triple in cs.additions:
+                    buffered_adds.append(triple)
+                # with BatchAddGraph(
+                #     self.get_context(graph_name), batch_size=10000
+                # ) as graph:
+                #     for triple in cs.additions:
+                #         graph.add(triple)
 
             # take care of precommit hooks
             transaction_end = time.time()
@@ -259,12 +267,26 @@ def new_changeset(self, graph_name, ts=None):
             logging.info(
                 f"Committing after {transaction_end - transaction_start} seconds"
             )
+        # add the buffered changes to the graph
+        print([(type(c.identifier), c.identifier) for c in self.contexts()])
+        graph = self.get_context(graph_name)
+        for triple in buffered_removes:
+            print(f"Removing {triple}")
+            graph.remove(triple)
+        with BatchAddGraph(graph, batch_size=10000) as graph:
+            for triple in buffered_adds:
+                print(f"Adding {triple}")
+                graph.add(triple)
+        print(f"Self graph has {len(self)} triples")
+        # loop through all of the contexts and print length
         # update namespaces
         for pfx, ns in namespaces:
             self.bind(pfx, ns)
         for hook in self._postcommit_hooks.values():
             hook(self)
         self._latest_version = ts
+        for c in self.contexts():
+            print(f"{c.identifier} has {len(c)} triples")
 
     def latest(self, graph):
         return self.get_context(graph)
@@ -280,6 +302,7 @@ def graph_at(self, timestamp=None, graph=None):
             for t in self.get_context(graph).triples((None, None, None)):
                 g.add(t)
         else:
+            # TODO: this doesn't work for some reason
             for t in self.triples((None, None, None)):
                 g.add(t)
         with self.conn() as conn:
@@ -295,7 +318,9 @@ def _graph_at(self, alter_graph, conn, timestamp=None, graph=None):
         if isinstance(timestamp, (dict, Row)):
             timestamp = timestamp["timestamp"]
 
-        print(f"Getting graph at {timestamp}", type(timestamp))
+        print(f"Getting graph {graph} ({type(graph)}) at {timestamp}", type(timestamp))
+        # print # of rows in changesets
+        print(f"Changesets has {len(list(conn.execute(text('SELECT * FROM changesets'))))} rows")
         if graph is not None:
             rows = conn.execute(
                     text("SELECT * FROM changesets WHERE graph = :g AND timestamp > :ts ORDER BY timestamp DESC").bindparams(
@@ -309,9 +334,12 @@ def _graph_at(self, alter_graph, conn, timestamp=None, graph=None):
                     )
             )
         for row in rows.mappings():
+            print(f"Row: {row}")
             triple = pickle.loads(row["triple"])
             if row["is_insertion"]:
+                print(f"Adding {triple}")
                 alter_graph.add((triple[0], triple[1], triple[2]))
             else:
+                print(f"Removing {triple}")
                 alter_graph.remove((triple[0], triple[1], triple[2]))
         return alter_graph