diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py
index 35351de2f9f..8a7889bbb19 100644
--- a/backend/onyx/background/indexing/run_indexing.py
+++ b/backend/onyx/background/indexing/run_indexing.py
@@ -14,6 +14,7 @@
 from onyx.configs.constants import MilestoneRecordType
 from onyx.connectors.connector_runner import ConnectorRunner
 from onyx.connectors.factory import instantiate_connector
+from onyx.connectors.models import Document
 from onyx.connectors.models import IndexAttemptMetadata
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_last_successful_attempt_time
@@ -90,6 +91,35 @@ def _get_connector_runner(
     )
 
 
+def strip_null_characters(doc_batch: list[Document]) -> list[Document]:
+    cleaned_batch = []
+    for doc in doc_batch:
+        cleaned_doc = doc.model_copy()
+
+        if "\x00" in cleaned_doc.id:
+            logger.warning(f"NUL characters found in document ID: {cleaned_doc.id}")
+            cleaned_doc.id = cleaned_doc.id.replace("\x00", "")
+
+        if "\x00" in cleaned_doc.semantic_identifier:
+            logger.warning(
+                f"NUL characters found in document semantic identifier: {cleaned_doc.semantic_identifier}"
+            )
+            cleaned_doc.semantic_identifier = cleaned_doc.semantic_identifier.replace(
+                "\x00", ""
+            )
+
+        for section in cleaned_doc.sections:
+            if section.link and "\x00" in section.link:
+                logger.warning(
+                    f"NUL characters found in document link for document: {cleaned_doc.id}"
+                )
+                section.link = section.link.replace("\x00", "")
+
+        cleaned_batch.append(cleaned_doc)
+
+    return cleaned_batch
+
+
 class ConnectorStopSignal(Exception):
     """A custom exception used to signal a stop in processing."""
 
@@ -238,7 +268,9 @@ def _run_indexing(
                     )
 
                 batch_description = []
-                for doc in doc_batch:
+
+                doc_batch_cleaned = strip_null_characters(doc_batch)
+                for doc in doc_batch_cleaned:
                     batch_description.append(doc.to_short_descriptor())
 
                     doc_size = 0
@@ -258,15 +290,15 @@ def _run_indexing(
 
                 # real work happens here!
                 new_docs, total_batch_chunks = indexing_pipeline(
-                    document_batch=doc_batch,
+                    document_batch=doc_batch_cleaned,
                     index_attempt_metadata=index_attempt_md,
                 )
 
                 batch_num += 1
                 net_doc_change += new_docs
                 chunk_count += total_batch_chunks
-                document_count += len(doc_batch)
-                all_connector_doc_ids.update(doc.id for doc in doc_batch)
+                document_count += len(doc_batch_cleaned)
+                all_connector_doc_ids.update(doc.id for doc in doc_batch_cleaned)
 
                 # commit transaction so that the `update` below begins
                 # with a brand new transaction. Postgres uses the start
@@ -276,7 +308,7 @@ def _run_indexing(
                 db_session.commit()
 
                 if callback:
-                    callback.progress("_run_indexing", len(doc_batch))
+                    callback.progress("_run_indexing", len(doc_batch_cleaned))
 
                 # This new value is updated every batch, so UI can refresh per batch update
                 update_docs_indexed(
diff --git a/backend/onyx/db/models.py b/backend/onyx/db/models.py
index a356e397a94..47170f93b22 100644
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -54,6 +54,7 @@
 from onyx.db.enums import IndexModelStatus
 from onyx.db.enums import TaskStatus
 from onyx.db.pydantic_type import PydanticType
+from onyx.utils.logger import setup_logger
 from onyx.utils.special_types import JSON_ro
 from onyx.file_store.models import FileDescriptor
 from onyx.llm.override_models import LLMOverride
@@ -65,6 +66,8 @@
 from shared_configs.enums import EmbeddingProvider
 from shared_configs.enums import RerankerProvider
 
+logger = setup_logger()
+
 
 class Base(DeclarativeBase):
     __abstract__ = True
@@ -72,6 +75,8 @@ class Base(DeclarativeBase):
 
 class EncryptedString(TypeDecorator):
     impl = LargeBinary
+    # This type's behavior is fully deterministic and doesn't depend on any external factors.
+    cache_ok = True
 
     def process_bind_param(self, value: str | None, dialect: Dialect) -> bytes | None:
         if value is not None:
@@ -86,6 +91,8 @@ def process_result_value(self, value: bytes | None, dialect: Dialect) -> str | N
 
 class EncryptedJson(TypeDecorator):
     impl = LargeBinary
+    # This type's behavior is fully deterministic and doesn't depend on any external factors.
+    cache_ok = True
 
     def process_bind_param(self, value: dict | None, dialect: Dialect) -> bytes | None:
         if value is not None:
@@ -102,6 +109,21 @@ def process_result_value(
         return value
 
 
+class NullFilteredString(TypeDecorator):
+    impl = String
+    # This type's behavior is fully deterministic and doesn't depend on any external factors.
+    cache_ok = True
+
+    def process_bind_param(self, value: str | None, dialect: Dialect) -> str | None:
+        if value is not None and "\x00" in value:
+            logger.warning(f"NUL characters found in value: {value}")
+            return value.replace("\x00", "")
+        return value
+
+    def process_result_value(self, value: str | None, dialect: Dialect) -> str | None:
+        return value
+
+
 """
 Auth/Authz (users, permissions, access) Tables
 """
@@ -451,16 +473,16 @@ class Document(Base):
 
     # this should correspond to the ID of the document
     # (as is passed around in Onyx)
-    id: Mapped[str] = mapped_column(String, primary_key=True)
+    id: Mapped[str] = mapped_column(NullFilteredString, primary_key=True)
     from_ingestion_api: Mapped[bool] = mapped_column(
         Boolean, default=False, nullable=True
     )
     # 0 for neutral, positive for mostly endorse, negative for mostly reject
     boost: Mapped[int] = mapped_column(Integer, default=DEFAULT_BOOST)
     hidden: Mapped[bool] = mapped_column(Boolean, default=False)
-    semantic_id: Mapped[str] = mapped_column(String)
+    semantic_id: Mapped[str] = mapped_column(NullFilteredString)
     # First Section's link
-    link: Mapped[str | None] = mapped_column(String, nullable=True)
+    link: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)
 
     # The updated time is also used as a measure of the last successful state of the doc
     # pulled from the source (to help skip reindexing already updated docs in case of