Skip to content

Commit

Permalink
[NEAT-666] 😈Mapping include connections (#876)
Browse files Browse the repository at this point in the history
* docs: enums

* build; freeze mistune

* linting

* feat: support reading url

* refactor: catch_issues return issues

* refactor; removed unused parameters

* refactor; remove unused

* refactor; moved logic out of NeatSession

* tests: updated test

* fix: DMS import bug

* build: changelog

* refactor; simplification

* refactor; proper error

* refactor; do not require you need to see resource

* fix: introduced bug

* tests: stipulate test

* Linting and static code checks

* refactor; renaming

* tests: fix test

* refactor; reset

* build: changelog entry

* style: fix typo

* fix: writing model to zip

* refactor: update cognite-core

* fix: bug in DMSImporter

* refactor: removed unused

* refactor; introduced subclasses

* refactor: split out enterprise

* refactor; split out solution

* refactor; split out data-product

* fix: adjustment

* refactor; cleanup ToExtension

* refactor: combine init param

* refactor; move out

* fix; introduced bug

* refactor: reduce lines

* refactor; clean up ToSolutionModel

* refactor cleanup data product

* build: changelog

* refactor: drop neat id from columns

* fix: drop neatId

* refactor; increase flexibility of data product

* tests: extending to mock client

* refactor: inherit filter

* tests: extending test and fixing bug

* tests: regen

* tests: refactor

* tests; extend test to include data product

* build: changelog

* fix: ensure valid model

* build: changelog

* tests: regen

* tests: updated

* feat; automatically include path and root when mapping to asset parent

* refactor: regen

* Ãrefactor; added readonly properties

* fix: skip readonly properties

* fix; proper implementation

* build; changelog

* test: regen

* refactor: review feedback

* build: upgrade toolkit to 0.3.23

* build: upgrade modules

* build: upgrade to toolkit 0.3

* build: added missin acl\

* docs; document the to solution model parameters

* refactor; switch to container filter

* fix: deterministic

* tests: regen

* tests: regen

* tests: regen

* build: changelog

* feat; first draft connection to literal

* build: changelog

* tests: include in test

* tests: regen

* tests: added missing step

* test: regen

* fix: typo

* tests: extending wind farm with sequence rows

* refactor; update test data

* feat: introduced neat sequence

* feat: factory method

* feat: first pass of updating extractor

* tests: updated test data

* refactor; update example

* feat: implemented unpacking

* refactor: column as list

* tests: updated test

* fix: keep order

* Ãdocs; document new

* build: changelog

* refactor: move out special handling to sequence extractor

* refactor: introduce extra parameter

* tests: extend test

* tests: as write instead

* feat; implemented unpacking of sequences

* refactor: finish implementation

* tests: include sequence in classic to dms test

* refactor: renaming

* refactor: added connections to classic core mapping

* refactor; update excel to yaml script

* refactor; regen yaml file

* Ãrefactor: update rules mapper to include new properties

* fix: skip suffixing in transformation

* fix: updating enums

* refactor; regen

* refacotor added dummy property

* fix: include GUID container

* tests: regen

* tests: updated test case

* build: changelog

---------

Co-authored-by: doctrino <[email protected]>
  • Loading branch information
doctrino and doctrino authored Jan 6, 2025
1 parent daa6d96 commit 47738f4
Show file tree
Hide file tree
Showing 9 changed files with 1,196 additions and 428 deletions.
277 changes: 239 additions & 38 deletions cognite/neat/_rules/models/mapping/_classic2core.yaml

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion cognite/neat/_rules/transformers/_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,6 @@ def transform(self, rules: InformationRules) -> InformationRules:
for class_ in output.classes:
if class_.class_.suffix.endswith(self.suffix):
class_.implements = [ClassEntity(prefix=class_.class_.prefix, suffix=self.implements)]
output.metadata.version = f"{output.metadata.version}.implements_{self.implements}"
return output

@property
Expand Down
152 changes: 74 additions & 78 deletions cognite/neat/_rules/transformers/_mapping.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import warnings
from abc import ABC
from collections import defaultdict
from functools import cached_property
from typing import Any, ClassVar, Literal

from cognite.client import data_modeling as dm

from cognite.neat._client import NeatClient
from cognite.neat._constants import get_asset_read_only_properties_with_connection
from cognite.neat._issues.errors import CDFMissingClientError, NeatValueError, ResourceNotFoundError
from cognite.neat._issues.warnings import NeatValueWarning, PropertyOverwritingWarning
from cognite.neat._issues.warnings import PropertyOverwritingWarning
from cognite.neat._rules.models import DMSRules, SheetList
from cognite.neat._rules.models.data_types import Enum
from cognite.neat._rules.models.dms import DMSEnum, DMSProperty, DMSView
from cognite.neat._rules.models.entities import ContainerEntity, ViewEntity
from cognite.neat._rules.models.dms import DMSContainer, DMSEnum, DMSProperty
from cognite.neat._rules.models.entities import ClassEntity, ContainerEntity, ViewEntity

from ._base import RulesTransformer

Expand Down Expand Up @@ -105,11 +103,12 @@ def transform(self, rules: DMSRules) -> DMSRules:
class RuleMapper(RulesTransformer[DMSRules, DMSRules]):
"""Maps properties and classes using the given mapping.
**Note**: This transformer mutates the input rules.
Args:
mapping: The mapping to use.
mapping: The mapping to use represented as a DMSRules object.
data_type_conflict: How to handle data type conflicts. The default is "overwrite".
A data type conflicts occurs when the data type of a property in the mapping is different from the
data type of the property in the input rules. If "overwrite" the data type in the input rules is overwritten
with the data type in the mapping.
"""

_mapping_fields: ClassVar[frozenset[str]] = frozenset(
Expand All @@ -120,86 +119,83 @@ def __init__(self, mapping: DMSRules, data_type_conflict: Literal["overwrite"] =
self.mapping = mapping
self.data_type_conflict = data_type_conflict

@cached_property
def _view_by_entity_id(self) -> dict[str, DMSView]:
return {view.view.external_id: view for view in self.mapping.views}

@cached_property
def _property_by_view_property(self) -> dict[tuple[str, str], DMSProperty]:
return {(prop.view.external_id, prop.view_property): prop for prop in self.mapping.properties}

def transform(self, rules: DMSRules) -> DMSRules:
if self.data_type_conflict != "overwrite":
raise NeatValueError(f"Invalid data_type_conflict: {self.data_type_conflict}")
input_rules = rules
new_rules = input_rules.model_copy(deep=True)

for view in new_rules.views:
if mapping_view := self._view_by_entity_id.get(view.view.external_id):
view.implements = mapping_view.implements
views_by_external_id = {view.view.external_id: view for view in new_rules.views}
new_views: set[ViewEntity] = set()
for mapping_view in self.mapping.views:
if existing_view := views_by_external_id.get(mapping_view.view.external_id):
existing_view.implements = mapping_view.implements
else:
# We need to add all the views in the mapping that are not in the input rules.
# This is to ensure that all ValueTypes are present in the resulting rules.
# For example, if a property is a direct relation to an Equipment view, we need to add
# the Equipment view to the rules.
new_rules.views.append(mapping_view)
new_views.add(mapping_view.view)

# This is a special case, if this property is in the mapping, we want ot automatically add the path and parent
# properties to the view.
asset_parent_property = ContainerEntity(space="cdf_cdm", externalId="CogniteAsset"), "assetHierarchy_parent"
read_only_properties: list[DMSProperty] = []
for prop in new_rules.properties:
key = (prop.view.external_id, prop.view_property)
if key not in self._property_by_view_property:
continue
mapping_prop = self._property_by_view_property[key]
to_overwrite, conflicts = self._find_overwrites(prop, mapping_prop)
if conflicts and self.data_type_conflict == "overwrite":
warnings.warn(
PropertyOverwritingWarning(prop.view.as_id(), "view", prop.view_property, tuple(conflicts)),
stacklevel=2,
)
elif conflicts:
raise NeatValueError(f"Conflicting properties for {prop.view}.{prop.view_property}: {conflicts}")

for field_name, value in to_overwrite.items():
setattr(prop, field_name, value)
prop.container = mapping_prop.container
prop.container_property = mapping_prop.container_property

if (prop.container, prop.container_property) == asset_parent_property:
# Add the read-only properties to the view.
# Note we have to do this after the current loop as we are iterating over the properties and
# thus we cannot modify the list.
for read_only_prop in get_asset_read_only_properties_with_connection():
# The value type of path and root will always be the same as the parent property.
new_read_only_prop = read_only_prop.model_copy(
update={"view": prop.view, "value_type": prop.value_type}
properties_by_view_property = {
(prop.view.external_id, prop.view_property): prop for prop in new_rules.properties
}
existing_enum_collections = {item.collection for item in new_rules.enum or []}
mapping_enums_by_collection: dict[ClassEntity, list[DMSEnum]] = defaultdict(list)
for item in self.mapping.enum or []:
mapping_enums_by_collection[item.collection].append(item)
existing_containers = {container.container for container in new_rules.containers or []}
mapping_containers_by_id = {container.container: container for container in self.mapping.containers or []}
for mapping_prop in self.mapping.properties:
if existing_prop := properties_by_view_property.get(
(mapping_prop.view.external_id, mapping_prop.view_property)
):
to_overwrite, conflicts = self._find_overwrites(existing_prop, mapping_prop)
if conflicts and self.data_type_conflict == "overwrite":
warnings.warn(
PropertyOverwritingWarning(
existing_prop.view.as_id(), "view", existing_prop.view_property, tuple(conflicts)
),
stacklevel=2,
)
elif conflicts:
raise NeatValueError(
f"Conflicting properties for {existing_prop.view}.{existing_prop.view_property}: {conflicts}"
)
read_only_properties.append(new_read_only_prop)

if read_only_properties:
new_rules.properties.extend(read_only_properties)

# Add missing views used as value types
existing_views = {view.view for view in new_rules.views}
new_value_types = {
prop.value_type
for prop in new_rules.properties
if isinstance(prop.value_type, ViewEntity) and prop.value_type not in existing_views
}
for new_value_type in new_value_types:
if mapping_view := self._view_by_entity_id.get(new_value_type.external_id):
new_rules.views.append(mapping_view)
for field_name, value in to_overwrite.items():
setattr(existing_prop, field_name, value)
existing_prop.container = mapping_prop.container
existing_prop.container_property = mapping_prop.container_property
elif isinstance(mapping_prop.value_type, ViewEntity):
# All connections must be included in the rules. This is to update the
# ValueTypes of the implemented views.
new_rules.properties.append(mapping_prop)
elif mapping_prop.view in new_views:
# All properties of new views are included. Main motivation is GUIDs properties
new_rules.properties.append(mapping_prop)
else:
warnings.warn(NeatValueWarning(f"View {new_value_type} not found in mapping"), stacklevel=2)
# Skipping mapped properties that are not in the input rules.
continue

# Add missing enums
existing_enum_collections = {item.collection for item in new_rules.enum or []}
new_enums = {
prop.value_type.collection
for prop in new_rules.properties
if isinstance(prop.value_type, Enum) and prop.value_type.collection not in existing_enum_collections
}
if new_enums:
new_rules.enum = new_rules.enum or SheetList[DMSEnum]([])
for item in self.mapping.enum or []:
if item.collection in new_enums:
new_rules.enum.append(item)
if (
isinstance(mapping_prop.value_type, Enum)
and mapping_prop.value_type.collection not in existing_enum_collections
):
if not new_rules.enum:
new_rules.enum = SheetList[DMSEnum]([])
new_rules.enum.extend(mapping_enums_by_collection[mapping_prop.value_type.collection])

if (
mapping_prop.container
and mapping_prop.container not in existing_containers
and (new_container := mapping_containers_by_id.get(mapping_prop.container))
):
# Mapping can include new containers for GUID properties
if not new_rules.containers:
new_rules.containers = SheetList[DMSContainer]([])
new_rules.containers.append(new_container)

return new_rules

Expand Down
6 changes: 4 additions & 2 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ Changes are grouped as follows:
values types that are not part of the data model. This ensures that the data model is self-contained. This is solved
by dropping all properties that have a value type that is not part of the data model.
- The `neat.to.cdf.instances(...)` now correctly ignores read-only properties when creating instances in CDF.
- The `neat.mapping.data_model.classic_to_core(...)` now automatically includes the `path`and `root` properties
when mapping to `CogniteAsset`
- Reading sequences from CDF using `read.cdf.classic.graph(...)` now includes `rows`. In addition, the `columns` are
now created as a list and not a blob.

### Changed
- The `neat.mapping.data_model.classic_to_core(...)` now includes all connection properties from the view that
it is implementing.

## [0.104.0] - 20-12-**2024**
### Improved
- When using a `NeatSession` object in a notebook. The return issues now has context for what actions they
Expand Down
Binary file modified scripts/core_classic_mapping.xlsx
Binary file not shown.
44 changes: 8 additions & 36 deletions scripts/create_classic_core_mapping_yaml.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,25 @@
from datetime import datetime, timezone

from cognite.neat import NeatSession, get_cognite_client
from pathlib import Path
from rich import print

from cognite.neat._rules.exporters import YAMLExporter
from cognite.neat._rules.importers import ExcelImporter
from cognite.neat._rules.transformers import VerifyDMSRules
from cognite.neat._store._provenance import Change

THIS_FOLDER = Path(__file__).resolve().parent

XLSX_FILE = THIS_FOLDER / "core_classic_mapping.xlsx"

TARGET_FILE = THIS_FOLDER.parent / "cognite" / "neat" / "_rules" / "models" / "mapping" / "_classic2core.yaml"

def main() -> None:
client = get_cognite_client(".env")
neat = NeatSession(client)

issues = neat.read.excel(XLSX_FILE)
if issues.has_errors:
neat.inspect.issues()
return
def main() -> None:
read_rules = ExcelImporter(XLSX_FILE).to_rules()
print(f"[bold green]Read {XLSX_FILE.name}[/bold green]")
# Redoing the .verify to skip the validation step.
start = datetime.now(timezone.utc)
transformer = VerifyDMSRules("continue", validate=False)
source_id, last_unverified_rule = neat._state.data_model.last_unverified_rule
result = transformer.transform(last_unverified_rule)
end = datetime.now(timezone.utc)
issues = result.issues
if issues.has_errors:
neat.inspect.issues()
return

dms_rules = VerifyDMSRules(validate=False).transform(read_rules)
print("[bold green]Verified[/bold green]")
# change = Change.from_rules_activity(
# result.rules,
# transformer.agent,
# start,
# end,
# f"Verified data model {source_id} as {result.rules.metadata.identifier}",
# neat._state.data_model.provenance.source_entity(source_id)
# or neat._state.data_model.provenance.target_entity(source_id),
# )
#
# neat._state.data_model.write(result.rules, change)

neat.to.yaml(TARGET_FILE, format="neat")

YAMLExporter().export_to_file(dms_rules, TARGET_FILE)
print(f"[bold green]Wrote {TARGET_FILE.name}[/bold green]")


if __name__ == "__main__":
main()
2 changes: 0 additions & 2 deletions tests/tests_integration/test_session/test_graph_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def test_classic_to_dms(self, cognite_client: CogniteClient, data_regression: Da
neat._state.instances.store.write(extractor)

# Sequences is not yet supported
neat.drop.instances("Sequence")
neat.prepare.instances.relationships_as_edges()

neat.prepare.instances.convert_data_type(
Expand All @@ -60,7 +59,6 @@ def test_classic_to_dms(self, cognite_client: CogniteClient, data_regression: Da
rules = neat._state.rule_store.last_unverified_rule
rules.metadata.created = "2024-09-19T00:00:00Z"
rules.metadata.updated = "2024-09-19T00:00:00Z"

# Sorting the properties to ensure deterministic output
rules.properties = sorted(rules.properties, key=lambda x: (x.class_, x.property_))

Expand Down
Loading

0 comments on commit 47738f4

Please sign in to comment.