config/config.example.yml

﻿# Template for the config file,
# Defaults to config.yml in ./config.
# The config tool defaults to config/config.config-tool.yml, and uses the same config
# options, though only EndpointURL and if needed username/password/secure are required.
# The values here are generally the default values, except for the contents of lists etc.
# Your config file can contain only a subset of these config options. Any extra options will
# cause the extractor to fail.

# Version of the config schema
version: 1

# Set this to true to run the extractor in dry-run mode.
# In this mode, the extractor will read data from OPC-UA, but not push it to any destination.
# The extractor will still read data from destinations, if configured.
dry-run: false

source:
    # The URL of the OPC-UA server to connect to.
    # This is used as discovery URL.
    endpoint-url: "opc.tcp://localhost:4840"
    # A list of URLs to connect to if the endpoint-url fails.
    # These are connected to in order, and the one with the highest ServiceLevel is selected.
    # See the OPC-UA standard part 4 6.6.2, this is for non-transparent redundancy.
    # When using this, setting force-restart: true is recommended, as otherwise the extractor will only try to
    # reconnect to the same server on failure.
    alt-endpoint-urls:
    # - opc.tcp://some-other-server:1234
    # - opc.tcp://some-third-server:4321
    # Details used to override default endpoint behavior.
    endpoint-details:
        # Endpoint URL to override URLs returned from discovery.
        # This can be used if the server is behind NAT, or similar URL rewrites.
        override-endpoint-url:
    # Extra configuration options related to redundancy and ServiceLevel monitoring
    redundancy:
        # Servers above this threshold are considered "good"
        service-level-threshold: 200
        # If using redundancy, the extractor will attempt to find a better server
        # with this interval if service level is below threshold.
        reconnect-interval: 10m
        # If true, the extractor will subscribe to changes in ServiceLevel
        # and trigger reconnect once it drops below service-level-threshold,
        # and history restart once it goes back above the threshold.
        # This also makes the server not update extraction state if it is connected to a server
        # with service level below the threshold.
        # If a server does not want the extractor to trust the available data ranges
        # it should set the ServiceLevel below the threshold.
        monitor-service-level: false
    # Auto accept connections from unknown servers.
    # There is not yet a feature to accept new certificates, but you can manually move rejected certificates to accepted.
    # Paths are defined in opc.ua.net.extractor.Config.xml
    auto-accept: false
    # How often the client requests updates to subscribed variables.
    # This decides the maximum rate points are pushed to CDF. Higher reduces network and server load.
    publishing-interval: 500
    # OPC-UA username, leave blank for anonymous user. (no authentication)
    username:
    # OPC-UA password
    password:
    # Local URL used for reverse-connect. This is the URL the server should connect to. An endpoint-url should also be specified,
    # but the server is responsible for initiating connections, meaning it can be placed entirely behind a firewall.
    reverse-connect-url:
    # OPC-UA servers may accept x509 certificates for authentication. This can be used to specify a certificate
    # in a file or local certificate store.
    # Certificate based authentication is highly server dependent.
    x509-certificate:
        # Path to x509 certificate
        file-name:
        # Password to use for x509 certificate file
        password:
        # Local store to use, one of None (to use file), Local (for LocalMachine) or User
        store: None
        # Name of certificate in store, e.g. CN=my-certificate
        cert-name:
    # Use secured connection
    secure: false
    # Ignore other server certificate issues, like missing flags, too short key length, or bad issuer chain.
    # Note that enabling this makes the connection less secure, and that a better solution is to resolve these issues on
    # the server.
    ignore-certificate-issues: false
    # If true, exit the session and reconnect on restart, this means that subscriptions will need to be recreated,
    # which has some performance cost.
    # Required for failover if alt-endpoint-urls is set.
    force-restart: false
    # Completely exit the extractor on failure, instead of doing an internal restart. Internal reset should re-initialize everything relevant,
    # but this can be set to do restart with some external tool instead.
    exit-on-failure: false
    # Restart the extractor on reconnect. If the server handles reconnecting and is not expected to have important structural changes
    # on restart, this can be left as false for smoother reconnects (in this case, OPC-UA has systems in place to handle reconnecting).
    restart-on-reconnect: false
    # Interval in ms between each keep-alive request to the server. If the extractor times out during startup, increasing this may help.
    # Timeout is 2 * interval + 100ms, this can happen if the server is poorly parallelized and hangs during heavy load.
    keep-alive-interval: 5000
    # Max-sizes of various requests to OPC-UA. Defaults are usually okay, but some servers may fail with too large request sizes
    # Max number of results per browse or browseNext operation
    browse-chunk: 1000
    # Max number of nodes to browse at the same time. Higher is much faster, but may be restricted by the server.
    browse-nodes-chunk: 1000
    # Maximum number of attributes to request per call to the server. Higher is faster.
    attributes-chunk: 10000
    # Maximum number of monitoredItems to create on the server per call. Higher is faster.
    subscription-chunk: 1000
    # Read from NodeSet2 files instead of browsing the OPC-UA node hierarchy.
    node-set-source:
        # List of nodesets to read. Specified by URL, file name, or both. If no name is specified, the last segment of
        # the URL is used as file name.
        # File name is path both of downloaded files, and where the extractor looks for existing files.
        node-sets:
            # - url: "https://files.opcfoundation.org/schemas/UA/1.04/Opc.Ua.NodeSet2.xml"
            # - file-name: "Server.NodeSet2.xml"
        # Map the instance hierarchy. This entirely skips normal browse, and just "browses" the nodeset file.
        # instance: false
        # Map types. This skips mapping of data-types, events types, reference types and object types.
        # types: false
    # Default true, use the Server_ServerCapabilities object to limit chunk sizes.
    # Turn this off only if you want to set the limits higher and are certain that the server is reporting the wrong limits.
    # If these limits are exceeded the extractor will typically crash.
    limit-to-server-config: true
    # If an alternative node source like CDF Raw or a NodeSet file is configured and used,
    # start browsing for updates immediately in the background.
    # This setup does not reduce load on the server, but can speed up startup.
    alt-source-background-browse: false
    # Configuration for throttling browse operations.
    browse-throttling:
        # Maximum number of browse requests per type per minute.
        max-per-minute: 0
        # Maximum number of parallel browse requests.
        max-parallelism: 0
        # Maximum number of nodes to read in parallel. This should be set equal to or lower than the maximum number of continuation points set by the server.
        max-node-parallelism: 0
    # Default certificate expiration in months. You may also replace the certificate with your own, by modifying
    # the .xml config file
    certificate-expiry: 60
    # Configuration for retrying operations against the OPC-UA server.
    # Note that when running the config tool, this option is ignored.
    retries:
        # Total timeout, after this much time has elapsed no more retries will be started.
        # Syntax is N[timeunit] where timeunit is w, d, h, m, s or ms.
        # Set to 0 to disable timeout.
        timeout: 0s
        # Maximum number of retries per operation
        max-tries: 5
        # Maximum delay between retries, 0 limits it to roughly 10 minutes.
        # Syntax is N[timeunit] where timeunit is w, d, h, m, s or ms
        max-delay: 0s
        # Initial delay between retries, basis for exponential backoff.
        # Syntax is N[timeunit] where timeunit is w, d, h, m, s or ms
        initial-delay: 500ms
        # Additional list of status codes to retry. In addition to a long list of default codes.
        # Can be either numeric status codes, or symbolic names.
        # For example "BadNotConnected" or 0x808A0000
        retry-status-codes:

# Config for reading of history from the server
history:
    # Enable/disable history synchronization from the OPC-UA server to CDF.
    # This is a master switch covering both events and data
    enabled: false
    # Enable or disable data history on nodes with history. "Enabled" must be true.
    # By default nodes with AccessLevel ReadHistory are read.
    data: true
    # Enable/disable backfill behavior. If this is false, data is read using frontfill only. (Pre 1.1 behavior)
    # This applied to both datapoints and events.
    backfill: false
    # True to require Historizing to be set on timeseries to read history
    require-historizing: false
    # Time in seconds to wait between each restart of history. Setting this too low may impact performance.
    # Leave at 0 to disable periodic restarts.
    # Alternatively, use N[timeunit] where timeunit is w, d, h, m, s or ms.
    # You may also use a cron expression on the form "[minute] [hour] [day of month] [month] [day of week]"
    # See https://crontab.cronhub.io/
    restart-period: 0
    # Max number of datapoints per history read request, 0 for server specified
    data-chunk: 1000
    # Max number of simultaneous nodes per historyRead request for data
    data-nodes-chunk: 100
    # Max number of events per history read request, 0 for server specified
    event-chunk: 1000
    # Max number of simultaneous nodes per historyRead request for events
    event-nodes-chunk: 100
    # The earliest timestamp to be read from history on the OPC-UA server, in milliseconds since 1/1/1970.
    # Alternatively, use syntax N[timeunit](-ago) where timeunit is w, d, h, m, s or ms. In past if -ago is added,
    # future if not.
    start-time: null
    # Timestamp to be considered the end of forward history. Only relevant if max-read-length is set.
    # In milliseconds since 1/1/1970. Default is current time, if this is null.
    # Alternatively, use syntax N[timeunit](-ago) where timeunit is w, d, h, m, s or ms. In past if -ago is added,
    # future if not.
    end-time: null
    # Maximum length of each read of history, in seconds.
    # If this is set greater than zero, history will be read in chunks of maximum this size, until the end.
    # This can potentially take a very long time if end-time is much larger than start-time.
    # Alternatively, use N[timeunit] where timeunit is w, d, h, m, s or ms.
    max-read-length: 0
    # Granularity to use when doing historyRead, in seconds. Nodes with last known timestamp within this range of eachother will
    # be read together. Should not be smaller than usual average update rate
    # Leave at 0 to always read a single node each time.
    # Alternatively, use N[timeunit] where timeunit is w, d, h, m, s or ms.
    granularity: 600
    # Set to true to attempt to read history without using continationPoints, instead using the Time of events, and
    # SourceTimestamp of datapoints to incrementally change the start time of the request until no points are returned.
    ignore-continuation-points: false
    # Configuration for throttling history reads.
    throttling:
        # Maximum number of history requests per type per minute.
        max-per-minute: 0
        # Maximum number of parallel history requests.
        max-parallelism: 0
        # Maximum number of nodes to read in parallel. This should be set equal to or lower than the maximum number of continuation points set by the server.
        max-node-parallelism: 0
    # Log bad history datapoints, count per read at debug and each datapoint at verbose
    log-bad-values: true
    # Threshold in percent for a history run to be considered failed.
    # Example: 10.0 -> History read operation would be considered failed if more than 10% of nodes fail to read at some point.
    # Retries still apply, this only applies to nodes that fail even after retries.
    error-threshold: 10.0

# Configuration for the pusher to CDF.
cognite:
    # The project to connect to in the API
    project:
    # Cognite service url
    host: "https://api.cognitedata.com"
    # Replace all instances of NaN with this floating point number. If left empty, ignore instead.
    nan-replacement:
    # Whether to read start/end-points on startup, where possible. At least one pusher should be able to do this,
    # otherwise back/frontfill will run for the entire history every restart.
    # The CDF pusher is not able to read start/end points for events, so if reading historical events is enabled, one other pusher
    # able to do this should be enabled.
    # The state-store can do all this, if the state-store is enabled this can still be enabled if timeseries have been deleted from CDF
    # and need to be re-read from history.
    read-extracted-ranges: true
    # Data set to use for new objects. Existing objects will not be updated
    data-set:
        # Data set internal id.
        id:
        # Data set external id. Requires the capability datasets:read for the given data set.
        external-id:

    # CDF target configurations. Regardless of the configuration here, a form of timeseries is always created (either full or minimal) for now.
    # In the future however, users are expected to create target specific destinations.
    metadata-targets:
      # Configuration to enable some form of clean i.e. assets, timeseries or relationships.
      clean:
        # Toggle CDF assets destination as a target
        assets: false
        # Toggle CDF timeseries destination as a target
        timeseries: false
        # Toggle CDF relationships destination as a target
        relationships: false
      # Configuration for some form or CDF raw destination.
      # When this is used, it is required that 'database' name and any of the other keys are provided
      raw:
        # Database name 
        database:
        # Assets table name
        assets-table:
        # Timeseries table name
        timeseries-table:
        # Relationships table name
        relationships-table:

    # Read from CDF instead of OPC-UA when starting, to speed up start on slow servers.
    # Requires extraction.data-types.expand-node-ids and append-internal-values to be set to true.

    # This should generally be enabled along with metadata-targets.raw
    # If browse-on-empty is set to true, and metadata-targets.raw is configured with the same
    # database and tables, the extractor will read into raw on first run, then use raw later,
    # and the raw database can be deleted to reset on next read.
    raw-node-buffer:
        # Enable the raw node buffer
        enable: false
        # Raw database to read from
        database:
        # Table to read assets from, for events.
        assets-table:
        # Table to read timeseries from
        timeseries-table:
        # Run normal browse if nothing is found when reading from CDF, either because the tables are empty, or they do not exist.
        # No valid nodes must be found to run this at all, meaning it may run if there are nodes, but none of them are
        # potentially valid extraction targets.
        browse-on-empty: false
    # Map metadata to asset/timeseries attributes. Each of "assets" and "timeseries" is a map from property DisplayName to
    # CDF attribute. Legal attributes are "name, description, parentId" and "unit" for timeseries. "parentId" must somehow refer to
    # an existing asset. For timeseries it must be a mapped asset, for assets it can be any asset.
    # Example usage:
    # timeseries:
    #    "EngineeringUnits": "unit"
    #    "EURange": "description"
    # assets:
    #    "Name": "name"
    metadata-mapping:
        assets:
        timeseries:
    # Config for authentication if a bearer access token has to be used for authentication.
    # Leave empty to disable.
    idp-authentication:
        # URL to fetch tokens from. Either this or tenant must be present.
        token-url:
        # Identity provider authority endpoint (optional, only used in combination with tenant)
        authority: "https://login.microsoftonline.com/"
        # Directory tenant
        tenant:
        # Application Id
        client-id:
        # Client secret
        secret:
        # List of resource scopes, ex:
        # scopes:
        #   - scopeA
        #   - scopeB
        # Certificate/key pair to authenticate to AAD with (alternative to secret)
        certificate:
            # Path to certificate file
            path:
            # Authority url, if tenant is not configured
            authority-url:
            # Password for certificate, if set
            password:
        scopes:
        # Audience parameter
        audience:
        # Which implementation to use in the authenticator. One of
        # MSAL (recommended) - Microsoft Authentication Library, works only with authority/tenant
        # Basic - Post to authentication endpoint and parse JSON response, works with both authority/tenant and token-url
        # Default is MSAL
        implementation: MSAL
        # Minimum time-to-live in seconds for the token (optional)
        min-ttl: 30

    # BETA:
    # Write events to data modeling stream records.
    # Containers corresponding to OPC-UA event types will be created as needed in the configured
    # model-space. Note that if your server produces a lot of different event types, this can create
    # a large number of containers.
    stream-records:
        # Data modeling space to write the records to. The space will be created if it
        # does not already exist. Required.
        log-space:
        # Data modeling space to write containers to. The space will be created if it
        # does not already exist. Required.
        model-space:
        # Stream to write the records to. The stream will be created if it
        # does not already exist. Required.
        stream:
        # Use a raw string representation of node IDs, on the form ns=NS;[i/s/o/g]=IDENTIFIER.
        # If this is not enabled, node IDs will be created as direct relations.
        use-raw-node-id: false
        # Use OPC-UA reversible encoding for OPC-UA structures. This will make it possible
        # to convert them back into OPC-UA types later, at expense of human readability.
        use-reversible-encoding: false

    # Configure automatic retries on requests to CDF. Can be left out to keep default values.
    cdf-retries:
        # Timeout in ms for each individual try
        timeout: 80000
        # Maximum number of retries, less than 0 retries forever
        max-retries: 5
        # Max delay in ms between each try. Base delay is calculated according to 125*2^retry ms.
        # If less than 0, there is no maximum. (0 means that there is never any delay)
        max-delay: 5000
    # Configure chunking of data on requests to CDF. Note that increasing these may cause requests to fail due to limits in CDF.
    cdf-chunking:
        # Maximum number of timeseries per get/create timeseries request
        time-series: 1000
        # Maximum number of assets per get/create asset request
        assets: 1000
        # Maximum number of timeseries per datapoint create request
        data-point-time-series: 100
        # Maximum number of datapoints per datapoint create request
        data-points: 100000
        # Maximum number of timeseries per datapoint read request, used when getting the first point in a timeseries
        data-point-list: 100
        # Maximum number of timeseries per datapoint read latest request, used when getting last point in a timeseries
        data-point-latest: 100
        # Maximum number of rows per request to cdf raw. Used with raw state-store.
        raw-rows: 10000
        # Maximum number of events per get/create events request
        events: 1000
        # Maximum number of data modeling nodes/edges per ingestion request
        instances: 1000
        # Maximum number of data modeling records per ingest request
        stream-records: 1000
    # Configure how requests to CDF should be throttled
    cdf-throttling:
        # Maximum number of parallel requests per timeseries operation
        time-series: 20
        # Maximum number of parallel requests per assets operation
        assets: 20
        # Maximum number of parallel requests per datapoints operation
        data-points: 10
        # Maximum number of parallel requests per raw operation
        raw: 10
        # Maximum number of parallel requests per get first/last datapoint operation
        ranges: 20
        # Maximum number of parallel requests per events operation
        events: 20
        # Maximum number of parallel requests per data modeling instances operation
        instances: 4
        # Maximum number of parallel requests per data modeling records operation
        stream-records: 4
    # Configure if the SDK should do logging of requests
    sdk-logging:
        # True to disable logging from the SDK
        disable: false
        # Level of logging, one of trace, debug, information, warning, error, critical, none
        level: debug
        # Format of the log message
        format: "CDF ({Message}): {HttpMethod} {Url} - {Elapsed} ms"
    # Configure an extraction pipeline manager
    extraction-pipeline:
        # ExternalId of extraction pipeline
        # external-id:

# Push to an influx-database. Data-variables are mapped to series with the given id.
# Events are mapped to series with ids on the form [id].[eventId] where id is given by the source node.
influx:
    # Host URI, ex localhost:8086
    host:
    # Influx username
    username:
    # Influx password
    password:
    # Database to connect to, will not be created automatically
    database:
    # Replace all instances of NaN or Infinity with this floating point number. If left empty, ignore instead.
    non-finite-replacement:
    # Whether to read start/end-points on startup, where possible. At least one pusher should be able to do this,
    # otherwise back/frontfill will run for the entire history every restart.
    read-extracted-ranges: true
    # Whether to read start/end-points for events on startup, where possible.
    read-extracted-event-ranges: true
    # Max number of points to send in each request to influx
    point-chunk-size: 100000

# Push to an MQTT broker. Requres a separate application to be running with access to CDF
# that translates MQTT messages to requests to CDF and handles missing ids etc.
# This setup allows the extractor to run in secure environments like zone 3.
mqtt:
    # TCP Broker URL
    host:
    # TCP Broker port
    port:
    # MQTT broker username
    username:
    # MQTT broker password
    password:
    # True to enable TLS
    use-tls:
    # Allow untrusted server SSL certificates. This is fundamentally unsafe
    allow-untrusted-certificates: false
    # Optional path to a custom certificate authority file for SSL
    custom-certificate-authority:
    # Mqtt client id. Should be unique for a given broker.
    client-id: cognite-opcua-extractor
    # Data set to use for new objects. Existing objects will not be updated
    data-set-id:
    # Assets topic
    asset-topic: cognite/opcua/assets
    # Timeseries topic
    ts-topic: cognite/opcua/timeseries
    # Events topic
    event-topic: cognite/opcua/events
    # Datapoints topic
    datapoint-topic: cognite/opcua/datapoints
    # Raw topic
    raw-topic: cognite/opcua/raw
    # Set to enable storing a list of created assets/timeseries to local litedb.
    # Requires the StateStorage.Location property to be set.
    # If this is left empty, metadata will have to be read each time the extractor restarts.
    # Default is empty
    local-state:
    # Timestamp in ms since epoch to invalidate stored mqtt states.
    # On extractor restart, assets/timeseries created before this will be attempted re-created in CDF.
    # They will not be deleted or updated.
    invalidate-before: 0
    # Replace all instances of NaN, Infinity or values greater than 1E100 with this floating point number. If left empty, ignore instead.
    non-finite-replacement:
    # Do not push any metadata at all. If this is true, plain timeseries without metadata will be created,
    # similarly to raw-metadata, and datapoints will be pushed. Nothing will be written to raw, and no assets will be created.
    # Events will be created, but without asset context.
    skip-metadata: false
    # Default empty. Store assets and/or timeseries data in raw. Assets will not be created at all,
    # timeseries will be created with just externalId, isStep and isString.
    # Both timeseries and assets will be persisted in their entirety to raw.
    # Datapoints are not affected, events will be created, but without asset context. The externalId
    # of the source node is added to metadata if applicable.
    # Use different table names for assets and timeseries.
    raw-metadata:
        # Database to store data in, required.
        database:
        # Table to store assets in.
        assets-table:
        # Table to store timeseries in.
        timeseries-table:
        # Table to store relationships in
        relationships-table:
    # Map metadata to asset/timeseries attributes. Each of "assets" and "timeseries" is a map from property DisplayName to
    # CDF attribute. Legal attributes are "name, description, parentId" and "unit" for timeseries. "parentId" must somehow refer to
    # an existing asset. For timeseries it must be a mapped asset, for assets it can be any asset.
    # Example usage:
    # timeseries:
    #    "EngineeringUnits": "unit"
    #    "EURange": "description"
    # assets:
    #    "Name": "name"
    metadata-mapping:
        assets:
        timeseries:

    # If relationships are eneabled, and written to clean, and deletes are enabled. This needs to be set to
    # true in order to hard delete the relationships.
    delete-relationships: false

# If a pusher fails to push data for some reason, the failure buffer will automatically store the data,
# then add it back into the queue once a push succeeds.
# If all points/events are historized, this does nothing.
failure-buffer:
    # If false, buffering is disabled
    enabled: false
    # Use an influxdb pusher as buffer. Requires an influxdb pusher.
    # This is intended to be used if there is a local influxdb instance running.
    # If points are received on non-historical points while the connection to CDF is down,
    # they are read from influxdb once the connection is restored.
    influx: false
    # If state-storage is configured, this can be used to store the ranges of points buffered in influxdb, so that
    # they can be recovered even if the extractor goes down.
    influx-state-store: false
    # Store points to a binary file for datapoints. There is no safety, and a bad write can corrupt the file,
    # but it is very fast.
    # Path to a local binary buffer file for datapoints.
    datapoint-path:
    # Path to a local binary buffer file for events.
    # The two buffer file paths must be different.
    event-path:
    # Cap in bytes for each buffer file, default is uncapped. If the size exceeds this, no new datapoints will be written,
    # and any further data is lost. Note that if both datapoint and event buffers are enabled, the potential disk usage
    # is twice this number.
    max-buffer-size: 0

# Periodically store state in a local database to speed up starting, by not having to read state from destinations
# This allows you to set the read-extracted-ranges option to false on the pushers without having to read all of history on startup.
# If the OPC-UA server does not support history this does nothing.
state-storage:
    # Path to .db file used by the state storage, or database in CDF raw.
    location: # ex: "buffer.db"
    # Which type of database to use. One of "None", "Raw", "LiteDb"
    database: "None"
    # Names of the stores to use for each type of range. Raw tables or collections in litedb.
    variable-store: variable_states
    event-store: event_states
    influx-variable-store: influx_variable_states
    influx-event-store: influx_event_states
    # Interval between each write to the buffer file, in seconds. 0 or less disables the state storage.
    # Alternatively, use N[timeunit] where timeunit is w, d, h, m, s or ms.
    interval: 10

logger:
    # Writes log events at this level to the Console. One of verbose, debug, information, warning, error, fatal.
    # If not present, or if the level is invalid, Console is not used.
    console:
        level:
    # Writes log events at this level to a file. Logs will roll over to new files daily.
    # If not present, or if the level is invalid, logging to file is disabled.
    file:
        level:
        # Path for logging output. If not present, logging to file is disabled.
        path: # "logs/log.txt"
        # Maximum number of logs files that are kept in the log folder.
        retention-limit: 31
        # Rolling interval for log files. Either "day" or "hour".
        rolling-interval: "day"
    # Optionally capture OPC-UA trace. One of verbose, debug, information, warning, error fatal.
    ua-trace-level:
    # Trace data sent to and from the OPC-UA server.
    # WARNING: This produces an enormous amount of logs. Only use this when running against a small
    # number of nodes, producing a limited number of datapoints, and make sure it is not turned on in production.
    ua-session-tracing: false

metrics:
    # Start a metrics server in the extractor for Prometheus scrape
    server:
        host:
        port: 0
    # Multiple Prometheus PushGateway destinations:
    push-gateways:
        - host:
          job:
          username:
          password:
    # Configuration to treat OPC-UA nodes as metrics.
    # Values will be mapped to opcua_nodes_NODE-DISPLAY-NAME in prometheus.
    nodes:
        # Map relevant static diagnostics contained in ServerDiagnosticsSummary.
        server-metrics: false
        # Map other nodes, given by a list of ProtoNodeIds.
        other-metrics:
            # - namespace-uri:
            #   node-id:

extraction:
    # Global prefix for externalId towards pushers. Should be unique to prevent name conflicts in the push destinations.
    # The externalId is: IdPrefix + NamespaceMap[nodeId.NamespaceUri] + nodeId.Identifier
    id-prefix: # ex: "gp:"

    # Delay in ms between each push of data points to targets
    # Alternatively, use N[timeunit] where timeunit is w, d, h, m, s or ms.
    data-push-delay: 1000

    # Source node in the OPC-UA server. Leave empty to use the top level Objects node.
    # If root-nodes is set, this is added to the list of root nodes.
    root-node:
        # Full name of the namespace of the root node.
        namespace-uri:
        # Id of the root node, on the form "i=123" or "s=stringid" etc.
        node-id:

    # List of proto-node-ids similar to root-node.
    # The extractor will start exploring from these.
    # Specifying nodes connected with hierarchical references can result in some strange behavior:
    # generally, the node deeper in the hierarchy will be detached from its parent and excluded from the hierarchy of the other node.
    root-nodes:
        # - namespace-uri:
        #   node-id:

    # Override mappings between OPC UA node id and externalId, allowing e.g. the RootNode to be mapped to
    # a particular asset in CDF. Applies to both assets and time series.
    # node-map:
    #   "externalId": { namespace-uri: "uri", node-id: "i=123" }
    node-map:

    # Map OPC-UA namespaces to prefixes in CDF. If not mapped, the full namespace URI is used.
    # Saves space compared to using the full URL. Using the ns index is not safe as the order can change on the server.
    # For example:
    # namespace-map:
    #   "urn:cognite:net:server": cns
    #   "urn:freeopcua:python:server": fps
    #   "http://examples.freeopcua.github.io": efg
    namespace-map:

    # Config for how OPC-UA data-types are mapped to destinations
    data-types:
        # Add custom numeric types using their nodeId. is-step indicates whether the datatype is discrete,
        # enum indicates that it is an enumeration, which may be mapped to a string if enums-as-strings is true.
        # This also overwrite default behavior, so it is possible to make Integer discrete, etc.
        # Note that the type in question needs to have a sensible numerical conversion in C#, unless it is an array type or similar,
        # in which case each element needs a conversion
        custom-numeric-types:
        #    - node-id:
        #          namespace-uri:
        #          node-id:
        #      is-step: false
        #      enum: false

        # List of NodeIds corresponding to DataTypes that should be ignored. Timeseries with these datatypes will not be mapped to destinations.
        ignore-data-types:
            # - NamespaceUri:
            #	NodeId:

        # Assume unknown ValueRanks without ArrayDimensions are all scalar, and create timeseries in CDF accordingly.
        # If such a variable produces an array, only the first element will be mapped to CDF
        unknown-as-scalar: false

        # Maximum size of array variables. Only arrays with the ArrayDimensions property in opc-ua specified will be used,
        # leave at 0 to only allow scalar values.
        # Note that some server implementations have issues with the ArrayDimensions property, so it is not fetched at all if MaxArraySize is 0
        # -1 indicates that there is no limit to array length, though only 1-dimensional structures will be read either way.
        max-array-size: 0

        # Set to true to allow fetching string variables. This means that all variables with non-numeric type is converted to string in some way.
        allow-string-variables: false

        # Map out the dataType hierarchy before starting, useful if there are custom or enum types.
        # Necessary for enum metadata and for enums-as-strings to work. If this is false, any
        # custom numeric types have to be added manually.
        auto-identify-types: false

        # If this is false and auto-identify-types is true, or there are manually added enums in custom-numeric-types,
        # enums will be mapped to numeric timeseries, and labels are added as metadata fields.
        # If this is true, labels are not mapped to metadata, and enums will be mapped to string timeseries with values
        # equal to mapped label values.
        enums-as-strings: false

        # Add a metadata property dataType which contains the id of the OPC-UA datatype.
        data-type-metadata: false

        # True to treat null nodeIds as numeric instead of string
        null-as-numeric: false

        # Add full JSON node-ids to data pushed to Raw. TypeDefintionId, ParentNodeId, NodeId and DataTypeId.
        expand-node-ids: false

        # Add attributes generally used internally like AccessLevel, Historizing, ArrayDimensions, ValueRank etc.
        # to data pushed to Raw.
        append-internal-values: false

        # If max-array-size is set, this looks for the MaxArraySize property on each node with one-dimension ValueRank,
        # if it is not found, it tries to read the value as well, and look at the current size.
        # ArrayDimensions is still the prefered way to identify array sizes, this is not guaranteed to generate
        # reasonable or useful values.
        estimate-array-sizes: false


    # Time in minutes between each call to browse the OPC-UA directory, then push new nodes to destinations.
    # Note that this is a heavy operation, so this number should not be set too low.
    # Alternatively, use N[timeunit] where timeunit is w, d, h, m, s or ms.
    # You may also use a cron expression on the form "[minute] [hour] [day of month] [month] [day of week]"
    # See https://crontab.cronhub.io/
    auto-rebrowse-period: 0
    # Enable using audit events to discover new nodes. If this is set to true, the client will expect AuditAddNodes/AuditAddReferences
    # events on the server node. These will be used to add new nodes automatically, by recursively browsing from each given ParentId.
    enable-audit-discovery:

    # Update data in destinations on rebrowse or restart.
    # Set auto-rebrowse-period to some value to do this periodically.
    # Context refers to the structure of the node graph in OPC-UA. (assetId and parentId in CDF)
    # Metadata refers to any information obtained from OPC-UA properties. (metadata in CDF)
    # Enabling anything here will increase the startup- and rebrowse-time of the extractor. Enabling metadata will increase it more.
    update:
        objects:
            name: false
            description: false
            context: false
            metadata: false
        variables:
            name: false
            description: false
            context: false
            metadata: false

    # Map OPC-UA non-hierarchical references to relationships in CDF.
    # The generated relationships will have external-id
    # [prefix][reference type name (or inverse-name)];[namespace source][id source];[namespace target][id target]
    # Only relationships between mapped nodes will be added.
    # This may be relevant if the server contains functional relationships, like connected components,
    # a non-hierarchical reference based system for location, etc.
    relationships:
        enabled: false
        # True to enable also mapping the hierarchical references over. These are the ones that are normally
        # mapped to assetId/parentId relations in CDF. In that case the type is lost.
        # Requires relationships.enabled to be true.
        hierarchical: false
        # True to create inverse relationships for each of the hierarchical references.
        # For efficiency these are not read, they will almost always be there in practice.
        # Does nothing if hierarchical is false.
        inverse-hierarchical: false

    # Config related to mapping object- and variable types to destinations.
    node-types:
        # Add the TypeDefinition as a metadata field to all nodes
        metadata: false
        # Allow reading object- and vairable types as normal nodes and map them to destinations.
        # They will need to be in the mapped hierarchy.
        as-nodes: false

    # By default children of variables are treated as properties, if this is set to true,
    # they can be treated as objects or variables instead.
    map-variable-children: false

    # A list of transformations to be applied to the source nodes before pushing
    # The possible transformations are
    # "Ignore", ignore the node. This will ignore all descendants of the node.
    # If the filter does not use "is-array", "description" or "parent", this is done
    # while reading, and so children will not be read at all. Otherwise, the filtering happens later.
    # "Property", turn the node into a property, which is treated as metadata.
    # This also applies to descendants. Nested metadata is give a name like "grandparent_parent_variable", for
    # each variable in the tree.
    # "DropSubscriptions", do not subscribe to this node with neither events or data-points.
    # "TimeSeries", do not treat this variable as a property.
    # "AsEvents", convert datapoints generated by this variable to events.
    # "Include", include this node. If any Include filter is present all nodes are ignored by default.
    # "AsEvents", if this matches a variable, treat datapoints generated by this variable as events.
    # There is some overhead associated with the filters. They are applied sequentially, so it can help performance to put
    # "Ignore" filters first. This is also worth noting when it comes to TimeSeries transformations, which can undo Property
    # transformations.
    # It is possible to have multiple of each filter type.
    #
    # name, description, id, namespace, and type-definition can be one of
    #  - A regex string
    #  - A list of strings, in which case the filter is a match if any of these are equal to the value being matched.
    #  - An object on the form
    #    name:
    #      file: ...
    #    where "file" is a path to a local file containing newline-separated values to be matched exactly.
    transformations:
      # Type, either "Ignore", "Property", "DropSubscriptions", "TimeSeries", "AsEvents", or "Include"
      # - type:
      #  NodeFilter. All non-null filters must match each node for the transformation to be applied.
      #   filter:
            # Regex on node DisplayName
            # name:
            # Regex on node Description. If this is set, requires description to be non-null.
            # description:
            # Regex on node id. Ids on the form "i=123" or "s=string" are matched.
            # id:
            # Whether the node is an array. If this is set, the filter only matches varables.
            # is-array:
            # Regex on the full namespace of the node id.
            # namespace:
            # Regex on the id of the type definition. On the form "i=123" or "s=string"
            # type-definition:
            # The OPC-UA node class, exact match. Should be one of
            # "Object", "ObjectType", "Variable", "VariableType". The other types will work, but do nothing, since we never read those.
            # node-class:
            # The "historizing" attribute on variables. If this is set, the filter only matches variables.
            # historizing:
            # Another instance of NodeFilter which is applied to the parent node.
            # parent:
    # Configure extractor to trigger a rebrowse when there are changes to specific namespace metadata nodes.
    # Also supports filtering by namespace uris.
    rebrowse-triggers:
        # A dictionary of nodes to which we would like to listen for changes.
        targets:
            namespace-publication-date: false
        # A list of namespace uris from which the targets above will be selected and listen for
        namespaces:
        - http://opcfoundation.org/UA/
    # Configure soft deletes. When this is enabled, all read nodes are written to a state store after browse,
    # and nodes that are missing on subsequent browses are marked as deleted from CDF, with a configurable
    # marker.
    # A notable exception is relationships in CDF, which has no metadata, so these are hard-deleted if
    # cognite.delete-relationships is enabled
    deletes:
        # True to enable deletes. This requires a state store to be configured.
        enabled: false
        # Name of marker indicating that a node is deleted.
        # Added to metadata, or as a column in Raw.
        delete-marker: "deleted"

    # Configuration for ingesting status codes to CDF timeseries.
    status-codes:
        # Which data points to ingest to CDF.
        # `All` ingests all datapoints, including bad.
        # `Uncertain` ingests good and uncertain data points.
        # `GoodOnly` ingest only good datapoints.
        status-codes-to-ingest: GoodOnly
        ingest-status-codes: false

subscriptions:
    # Enable subscriptions on data-points.
    data-points: true
    # Enable subscriptions on events. Requires events.enabled to be set to true.
    events: true
    # Modify the DataChangeFilter used for datapoint subscriptions. See OPC-UA reference part 4 7.17.2 for details.
    # These are just passed to the server, they have no effect on extractor behavior.
    # Filters are applied to all nodes, but deadband should only affect some, according to the standard.
    data-change-filter:
        # One of Status, StatusValue, or StatusValueTimestamp.
        trigger: "StatusValue"
        # One of None, Absolute, or Percent.
        deadband-type: "None"
        # Double value of the deadband.
        deadband-value: 0
    # Log bad subscription datapoints
    log-bad-values: true
    # Ignore the access level parameter for history and datapoints.
    # This means using the "Historizing" parameter for history, and subscribing to all timeseries, independent of AccessLevel.
    ignore-access-level: false
    # How often the server requests updates from the source system (the source system is often the server itself)
    # 0 uses maximum rate set by server
    # Lower increases server load. This should be set to below the maximum update frequency of the source system.
    sampling-interval: 100
    # Length of internal server queue for each subscribed item. < 2 means that any updates occuring between publish requests are lost.
    queue-length: 10
    # The number of publish requests without a response before the server should send a keep alive message.
    keep-alive-count: 10
    # The number of publish requests without a response before the server should close the subscription.
    # Must be at least 3 * keep-alive-count.
    lifetime-count: 1000
    # Recreate subscriptions that have stopped publishing. Enabled by default.
    recreate-stopped-subscriptions: true
    # Grace period for recreating stopped subscriptions.
    # If this is negative, default to 8 * publishing-interval.
    # Syntax is N[timeunit] where timeunit is w, d, h, m, s or ms.
    recreate-subscription-grace-period: -1
    # List of alternative subscription configurations.
    # The first entry with a matching filter will be used for each node.
    alternative-configs:
        # Filter on node, if this matches or is null, the config will be applied.
        #- filter:
             # Regex match on node external ID.
        #    id:
             # Regex match on node data type, if it is a variable.
        #    data-type:
             # Match on whether this subscription is for data points or events.
        #    is-event-state:
           # See subscriptions.data-change-filter
        #  data-change-filter:
           # See subscriptions.sampling-interval
        #  sampling-interval: 100
           # See subscriptions.queue-length
        #  queue-length: 10

events:
    # Events are extracted with the following rules: By default all events are extracted.
    # Set all-events to false in order to disable extracting base OPC-UA events.
    # Specify events in the event-ids option to only extract a limited list of events.
    # Emitters are added from the node hierarchy, or from the emitter-ids option,
    # if found in both, the node hierarchy is used.

    # True to enable reading events from the server
    enabled: false

    # Automatically treat nodes with suitable EventNotifier as emitters.
    discover-emitters: true

    # Default true, enable reading both custom events and base opc-ua events.
    all-events: true

    # True to enable reading historical events from historizing event emitters
    history: false

    # True to also check the server node when looking for event emitters, default true.
    read-server: true

    # Regex filter on event type DisplayName, matches will not be extracted.
    exclude-event-filter:

    # List of BrowseName for properties to be excluded from automatic mapping to destination metadata.
    # All event properties are read, by default only "Time" and "Severity" are used from the base event.
    # Be aware that a maximum of 16 metadata entries are allowed in CDF.
    exclude-properties:
        #- Property1
        #- Property2
    # Map source browse names to other values in the destination. For CDF, internal properties may be overwritten, by default
    # "Message" is mapped to description, "SourceNode" is used for context and "EventType" is used for type. These may also be excluded or replaced by
    # overrides in DestinationNameMap. If multiple properties are mapped to the same value, the first non-null is used.

    # If "StartTime", "EndTime" or "SubType" are specified, either directly or through the map, these are used as event properties instead of metadata.
    # StartTime and EndTime should be either DateTime, or a number corresponding to the number of milliseconds since January 1 1970.
    # If no StartTime or EndTime are specified, both are set to the "Time" property of BaseEventType.
    # "Type" may be overriden case-by-case using "NameOverrides" in Extraction configuration, or in a dynamic way here. If no "Type" is specified,
    # it is generated from Event NodeId in the same way ExternalIds are generated for normal nodes.
    destination-name-map:
        #Property1: SubType

    # Event ids to map, with full namespace-uri, and node identifier on the form "i=123" or "s=somestring"
    # Custom events must be subtypes of the BaseEventType.
    # This is used to specify which specific events should be extracted, instead of just extracting all events.
    event-ids:
        #-   namespace-uri:
        #    node-id:
    # Id of nodes to be observed as event emitters. Empty Namespace/NodeId defaults to the server node.
    # This is used to add extra emitters that are not in the extracted node hierarchy, or that does not
    # correctly specify the EventNotifier property.
    emitter-ids:
        #-   namespace-uri:
        #    node-id:

    # Subset of the emitter-ids property. Used to make certain emitters historical.
    # Requires the events.history property to be true
    historizing-emitter-ids:
        #-   NamespaceUri:
        #    NodeId:

# Configure the extractor to read from MQTT using OPC-UA pubsub
# This requires the server pubsub configuration to be exposed through the Server object.
# You should consider setting subscriptions: data-points: false to avoid duplicate datapoints if this is enabled.

# This is a proof of concept, for experimentation with OPC-UA pubsub.
pub-sub:
    # Enable pubsub
    enabled: false
    # Default true, prefer using the UADP binary format, if false, will prefer JSON
    prefer-uadp: true
    # Save or read configuration from a file. If the file does not exist, it will be created
    # from server configuration. If this is pre-created manually, the server does not need to expose
    # pubsub configuration.
    file-name:

# Configure the extractor to support high availability.
high-availability:
    # Index of the extractor. Each extractor running in high availability needs a unique index.
    # When running with remote configs, this can be provided using an environment variable.
    index:

    # Use CDF Raw as high availability store
    # Must be identical for each instance of the extractor.
    raw:
        # Raw database name
        database-name:
        # Raw table name
        table-name:

    # Use a redis store for high availability.
    redis:
        # Redis connection string
        connection-string:
        # Redis table name
        table-name: