sift_py.yaml.rule

  1from __future__ import annotations
  2
  3import re
  4from pathlib import Path
  5from typing import Any, Dict, List, Literal, Union, cast
  6
  7import yaml
  8from typing_extensions import NotRequired, TypedDict
  9
 10from sift_py.ingestion.config.yaml.error import YamlConfigError
 11from sift_py.rule.config import RuleActionAnnotationKind
 12from sift_py.yaml.channel import (
 13    ChannelConfigYamlSpec,
 14    _validate_channel_reference,
 15)
 16from sift_py.yaml.utils import _handle_subdir, _type_fqn
 17
 18_SUB_EXPRESSION_REGEX = re.compile(r"^\$[a-zA-Z_]+$")
 19
 20
 21def load_named_expression_modules(paths: List[Path]) -> Dict[str, str]:
 22    """
 23    Takes in a list of paths to YAML files which contains named expressions and processes them into a `dict`.
 24    The key is the name of the expression and the value is the expression itself. For more information on
 25    named expression modules see `sift_py/yaml/rule.py`.
 26    """
 27
 28    named_expressions = {}
 29
 30    for path in paths:
 31        named_expr_module = _read_named_expression_module_yaml(path)
 32
 33        for name, expr in named_expr_module.items():
 34            if name in named_expressions:
 35                raise YamlConfigError(
 36                    f"Encountered expressions with identical names being loaded, '{name}'."
 37                )
 38            named_expressions[name] = expr
 39
 40    return named_expressions
 41
 42
 43def load_rule_modules(paths: List[Path]) -> List[RuleYamlSpec]:
 44    """
 45    Takes in a list of paths which may either be directories or files containing rule module YAML files,
 46    and processes them into a `list`. For more information on rule modules see
 47    RulemoduleYamlSpec in `sift_py/yaml/rule.py`.
 48    """
 49
 50    rule_modules: List[RuleYamlSpec] = []
 51
 52    def update_rule_modules(rule_module_path: Path):
 53        rule_module = _read_rule_module_yaml(rule_module_path)
 54        rule_modules.extend(rule_module)
 55
 56    for path in paths:
 57        if path.is_dir():
 58            _handle_subdir(path, update_rule_modules)
 59        elif path.is_file():
 60            update_rule_modules(path)
 61
 62    return rule_modules
 63
 64
 65def _read_named_expression_module_yaml(path: Path) -> Dict[str, str]:
 66    with open(path, "r") as f:
 67        named_expressions = cast(Dict[Any, Any], yaml.safe_load(f.read()))
 68
 69        for key, value in named_expressions.items():
 70            if not isinstance(key, str):
 71                raise YamlConfigError(
 72                    f"Expected '{key}' to be a string in named expression module '{path}'."
 73                )
 74            if not isinstance(value, str):
 75                raise YamlConfigError(
 76                    f"Expected expression of '{key}' to be a string in named expression module '{path}'."
 77                )
 78
 79        return cast(Dict[str, str], named_expressions)
 80
 81
 82def _read_rule_module_yaml(path: Path) -> List[RuleYamlSpec]:
 83    with open(path, "r") as f:
 84        module_rules = cast(Dict[Any, Any], yaml.safe_load(f.read()))
 85        rules = module_rules.get("rules")
 86        if not isinstance(rules, list):
 87            raise YamlConfigError(
 88                f"Expected '{rules}' to be a list in rule module yaml: '{path}'"
 89                f"{_type_fqn(RuleYamlSpec)}"
 90            )
 91
 92        for rule in cast(List[Any], rules):
 93            _validate_rule(rule)
 94
 95        return cast(List[RuleYamlSpec], rules)
 96
 97
 98def _validate_rule(val: Any):
 99    rule = cast(Dict[Any, Any], val)
100
101    name = rule.get("name")
102
103    if not isinstance(name, str):
104        raise YamlConfigError._invalid_property(name, "- name", "str", ["rules"])
105
106    channel_references = rule.get("channel_references")
107
108    if channel_references is not None:
109        if not isinstance(channel_references, list):
110            raise YamlConfigError._invalid_property(
111                channel_references,
112                "- channel_references",
113                f"List[Dict[str, {_type_fqn(ChannelConfigYamlSpec)}]]",
114                ["rules"],
115            )
116
117        for channel_reference in cast(List[Any], channel_references):
118            _validate_channel_reference(channel_reference)
119
120    contextual_channels = rule.get("contextual_channels")
121    if contextual_channels is not None:
122        if not isinstance(contextual_channels, list):
123            raise YamlConfigError._invalid_property(
124                contextual_channels,
125                "- contextual_channels",
126                "List[str]",
127                ["rules"],
128            )
129
130        for channel in contextual_channels:
131            if not isinstance(channel, str):
132                raise YamlConfigError._invalid_property(
133                    channel,
134                    "- contextual_channels[]",
135                    "str",
136                    ["rules"],
137                )
138
139    rule_client_key = rule.get("rule_client_key")
140    description = rule.get("description")
141    expression = rule.get("expression")
142    rule_type = rule.get("type")
143    assignee = rule.get("assignee")
144    tags = rule.get("tags")
145    sub_expressions = rule.get("sub_expressions")
146    asset_names = rule.get("asset_names")
147    tag_names = rule.get("tag_names")
148
149    if rule_client_key is not None and not isinstance(rule_client_key, str):
150        raise YamlConfigError._invalid_property(
151            rule_client_key, "- rule_client_key", "str", ["rules"]
152        )
153
154    if description is not None and not isinstance(description, str):
155        raise YamlConfigError._invalid_property(description, "- description", "str", ["rules"])
156
157    if isinstance(expression, dict):
158        expression_name = cast(Dict[Any, Any], expression).get("name")
159
160        if not isinstance(expression_name, str):
161            raise YamlConfigError._invalid_property(
162                expression_name,
163                "name",
164                "str",
165                ["rules", "- expression"],
166            )
167
168    elif not isinstance(expression, str):
169        raise YamlConfigError._invalid_property(
170            expression,
171            "- expression",
172            "<class 'str'> | <class 'dict'>",
173            ["rules"],
174        )
175
176    valid_rule_types = [kind.value for kind in RuleActionAnnotationKind]
177
178    if rule_type not in valid_rule_types:
179        raise YamlConfigError._invalid_property(
180            rule_type,
181            "- type",
182            " | ".join(valid_rule_types),
183            ["rules"],
184        )
185
186    if assignee is not None and not isinstance(assignee, str):
187        raise YamlConfigError._invalid_property(
188            assignee,
189            "- assignee",
190            "str",
191            ["rules"],
192        )
193
194    if tags is not None and not isinstance(tags, list):
195        raise YamlConfigError._invalid_property(
196            tags,
197            "- tags",
198            "List[str]",
199            ["rules"],
200        )
201
202    if sub_expressions is not None:
203        if not isinstance(channel_references, list):
204            raise YamlConfigError._invalid_property(
205                channel_references,
206                "- sub_expressions",
207                "List[Dict[str, List[Dict[str, str]]]]",
208                ["rules"],
209            )
210
211        for sub_expression in cast(List[Any], sub_expressions):
212            _validate_sub_expression(sub_expression)
213
214    if asset_names is not None and not isinstance(asset_names, list):
215        raise YamlConfigError._invalid_property(
216            asset_names,
217            "- asset_names",
218            "List[str]",
219            ["rules"],
220        )
221
222    if tag_names is not None and not isinstance(tag_names, list):
223        raise YamlConfigError._invalid_property(
224            tag_names,
225            "- tag_names",
226            "List[str]",
227            ["rules"],
228        )
229
230
231def _validate_sub_expression(val: Any):
232    sub_expression = cast(Dict[Any, Any], val)
233
234    for key in sub_expression.keys():
235        if not isinstance(key, str):
236            raise YamlConfigError._invalid_property(
237                sub_expression,
238                "- <str>",
239                "Dict[str, Any]",
240                ["rules", "- sub_expressions"],
241            )
242
243        if _SUB_EXPRESSION_REGEX.match(key) is None:
244            raise YamlConfigError(
245                f"Invalid sub-expression key, '{key}'. Characters must be in the character set [a-zA-Z_] and prefixed with a '$'."
246            )
247
248
249class RuleModuleYamlSpec(TypedDict):
250    """
251    The formal definition of what a rule module looks like in YAML.
252
253    `rules`: A list of rules that belong to the module.
254    """
255
256    rules: List[RuleYamlSpec]
257
258
259class RuleYamlSpec(TypedDict):
260    """
261    The formal definition of what a single rule looks like in YAML.
262
263    `name`: Name of the rule.
264    `rule_client_key`: User-defined string-key that uniquely identifies this rule config.
265    `description`: Description of rule.
266    `expression`:
267        Either an expression-string or a `sift_py.ingestion.config.yaml.spec.NamedExpressionYamlSpec` referencing a named expression.
268    `type`: Determines the action to perform if a rule gets evaluated to true.
269    `assignee`: If `type` is `review`, determines who to notify. Expects an email.
270    `tags`: Tags to associate with the rule.
271    `channel_references`: A list of channel references that maps to an actual channel. More below.
272    `contextual_channels`: A list of channel configs that provide context but aren't directly used in the expression.
273    `sub_expressions`: A list of sub-expressions which is a mapping of place-holders to sub-expressions. Only used if using named expressions.
274    `asset_names`: A list of asset names that this rule should be applied to. ONLY VALID if defining rules outside of a telemetry config.
275    `tag_names`: A list of tag names that this rule should be applied to. ONLY VALID if defining rules outside of a telemetry config.
276
277    Channel references:
278    A channel reference is a string containing a numerical value prefixed with "$". Examples include "$1", "$2", "$11", and so on.
279    The channel reference is mapped to an actual channel config. In YAML it would look something like this:
280
281    ```yaml
282    channel_references:
283      - $1: *vehicle_state_channel
284      - $2: *voltage_channel
285    contextual_channels:
286      - name: log
287    ```
288
289    Sub-expressions:
290    A sub-expression is made up of two components: A reference and the actual sub-expression. The sub-expression reference is
291    a string with a "$" prepended to another string comprised of characters in the following character set: `[a-zA-Z0-9_]`.
292    This reference should be mapped to the actual sub-expression. For example, say you have kinematic equations in `kinematics.yml`,
293    and the equation you're interested in using looks like the following:
294
295    ```yaml
296    kinetic_energy_gt:
297      0.5 * $mass * $1 * $1 > $threshold
298    ```
299
300    To properly use `kinetic_energy_gt` in your rule, it would look like the following:
301
302    ```yaml
303    rules:
304      - name: kinetic_energy
305        description: Tracks high energy output while in motion
306        type: review
307        assignee: bob@example.com
308        expression:
309          name: kinetic_energy_gt
310        channel_references:
311          - $1: *velocity_channel
312        sub_expressions:
313          - $mass: 10
314          - $threshold: 470
315        tags:
316            - nostromo
317    ```
318    """
319
320    name: str
321    rule_client_key: NotRequired[str]
322    description: NotRequired[str]
323    expression: Union[str, NamedExpressionYamlSpec]
324    type: Union[Literal["phase"], Literal["review"]]
325    assignee: NotRequired[str]
326    tags: NotRequired[List[str]]
327    channel_references: NotRequired[List[Dict[str, ChannelConfigYamlSpec]]]
328    contextual_channels: NotRequired[List[str]]
329    sub_expressions: NotRequired[List[Dict[str, str]]]
330    asset_names: NotRequired[List[str]]
331    tag_names: NotRequired[List[str]]
332
333
334class NamedExpressionYamlSpec(TypedDict):
335    """
336    A named expression. This class is the formal definition of what a named expression
337    should look like in YAML. The value of `name` may contain a mix of channel references
338    and channel identifiers.
339
340    For a formal definition of channel references and channel identifiers see the following:
341    `sift_py.ingestion.config.yaml.spec.RuleYamlSpec`.
342    """
343
344    name: str
def load_named_expression_modules(paths: List[pathlib.Path]) -> Dict[str, str]:
22def load_named_expression_modules(paths: List[Path]) -> Dict[str, str]:
23    """
24    Takes in a list of paths to YAML files which contains named expressions and processes them into a `dict`.
25    The key is the name of the expression and the value is the expression itself. For more information on
26    named expression modules see `sift_py/yaml/rule.py`.
27    """
28
29    named_expressions = {}
30
31    for path in paths:
32        named_expr_module = _read_named_expression_module_yaml(path)
33
34        for name, expr in named_expr_module.items():
35            if name in named_expressions:
36                raise YamlConfigError(
37                    f"Encountered expressions with identical names being loaded, '{name}'."
38                )
39            named_expressions[name] = expr
40
41    return named_expressions

Takes in a list of paths to YAML files which contains named expressions and processes them into a dict. The key is the name of the expression and the value is the expression itself. For more information on named expression modules see sift_py/yaml/rule.py.

def load_rule_modules(paths: List[pathlib.Path]) -> List[RuleYamlSpec]:
44def load_rule_modules(paths: List[Path]) -> List[RuleYamlSpec]:
45    """
46    Takes in a list of paths which may either be directories or files containing rule module YAML files,
47    and processes them into a `list`. For more information on rule modules see
48    RulemoduleYamlSpec in `sift_py/yaml/rule.py`.
49    """
50
51    rule_modules: List[RuleYamlSpec] = []
52
53    def update_rule_modules(rule_module_path: Path):
54        rule_module = _read_rule_module_yaml(rule_module_path)
55        rule_modules.extend(rule_module)
56
57    for path in paths:
58        if path.is_dir():
59            _handle_subdir(path, update_rule_modules)
60        elif path.is_file():
61            update_rule_modules(path)
62
63    return rule_modules

Takes in a list of paths which may either be directories or files containing rule module YAML files, and processes them into a list. For more information on rule modules see RulemoduleYamlSpec in sift_py/yaml/rule.py.

class RuleModuleYamlSpec(typing_extensions.TypedDict):
250class RuleModuleYamlSpec(TypedDict):
251    """
252    The formal definition of what a rule module looks like in YAML.
253
254    `rules`: A list of rules that belong to the module.
255    """
256
257    rules: List[RuleYamlSpec]

The formal definition of what a rule module looks like in YAML.

rules: A list of rules that belong to the module.

rules: List[RuleYamlSpec]
class RuleYamlSpec(typing_extensions.TypedDict):
260class RuleYamlSpec(TypedDict):
261    """
262    The formal definition of what a single rule looks like in YAML.
263
264    `name`: Name of the rule.
265    `rule_client_key`: User-defined string-key that uniquely identifies this rule config.
266    `description`: Description of rule.
267    `expression`:
268        Either an expression-string or a `sift_py.ingestion.config.yaml.spec.NamedExpressionYamlSpec` referencing a named expression.
269    `type`: Determines the action to perform if a rule gets evaluated to true.
270    `assignee`: If `type` is `review`, determines who to notify. Expects an email.
271    `tags`: Tags to associate with the rule.
272    `channel_references`: A list of channel references that maps to an actual channel. More below.
273    `contextual_channels`: A list of channel configs that provide context but aren't directly used in the expression.
274    `sub_expressions`: A list of sub-expressions which is a mapping of place-holders to sub-expressions. Only used if using named expressions.
275    `asset_names`: A list of asset names that this rule should be applied to. ONLY VALID if defining rules outside of a telemetry config.
276    `tag_names`: A list of tag names that this rule should be applied to. ONLY VALID if defining rules outside of a telemetry config.
277
278    Channel references:
279    A channel reference is a string containing a numerical value prefixed with "$". Examples include "$1", "$2", "$11", and so on.
280    The channel reference is mapped to an actual channel config. In YAML it would look something like this:
281
282    ```yaml
283    channel_references:
284      - $1: *vehicle_state_channel
285      - $2: *voltage_channel
286    contextual_channels:
287      - name: log
288    ```
289
290    Sub-expressions:
291    A sub-expression is made up of two components: A reference and the actual sub-expression. The sub-expression reference is
292    a string with a "$" prepended to another string comprised of characters in the following character set: `[a-zA-Z0-9_]`.
293    This reference should be mapped to the actual sub-expression. For example, say you have kinematic equations in `kinematics.yml`,
294    and the equation you're interested in using looks like the following:
295
296    ```yaml
297    kinetic_energy_gt:
298      0.5 * $mass * $1 * $1 > $threshold
299    ```
300
301    To properly use `kinetic_energy_gt` in your rule, it would look like the following:
302
303    ```yaml
304    rules:
305      - name: kinetic_energy
306        description: Tracks high energy output while in motion
307        type: review
308        assignee: bob@example.com
309        expression:
310          name: kinetic_energy_gt
311        channel_references:
312          - $1: *velocity_channel
313        sub_expressions:
314          - $mass: 10
315          - $threshold: 470
316        tags:
317            - nostromo
318    ```
319    """
320
321    name: str
322    rule_client_key: NotRequired[str]
323    description: NotRequired[str]
324    expression: Union[str, NamedExpressionYamlSpec]
325    type: Union[Literal["phase"], Literal["review"]]
326    assignee: NotRequired[str]
327    tags: NotRequired[List[str]]
328    channel_references: NotRequired[List[Dict[str, ChannelConfigYamlSpec]]]
329    contextual_channels: NotRequired[List[str]]
330    sub_expressions: NotRequired[List[Dict[str, str]]]
331    asset_names: NotRequired[List[str]]
332    tag_names: NotRequired[List[str]]

The formal definition of what a single rule looks like in YAML.

name: Name of the rule. rule_client_key: User-defined string-key that uniquely identifies this rule config. description: Description of rule. expression: Either an expression-string or a sift_py.ingestion.config.yaml.spec.NamedExpressionYamlSpec referencing a named expression. type: Determines the action to perform if a rule gets evaluated to true. assignee: If type is review, determines who to notify. Expects an email. tags: Tags to associate with the rule. channel_references: A list of channel references that maps to an actual channel. More below. contextual_channels: A list of channel configs that provide context but aren't directly used in the expression. sub_expressions: A list of sub-expressions which is a mapping of place-holders to sub-expressions. Only used if using named expressions. asset_names: A list of asset names that this rule should be applied to. ONLY VALID if defining rules outside of a telemetry config. tag_names: A list of tag names that this rule should be applied to. ONLY VALID if defining rules outside of a telemetry config.

Channel references: A channel reference is a string containing a numerical value prefixed with "$". Examples include "$1", "$2", "$11", and so on. The channel reference is mapped to an actual channel config. In YAML it would look something like this:

channel_references:
  - $1: *vehicle_state_channel
  - $2: *voltage_channel
contextual_channels:
  - name: log

Sub-expressions: A sub-expression is made up of two components: A reference and the actual sub-expression. The sub-expression reference is a string with a "$" prepended to another string comprised of characters in the following character set: [a-zA-Z0-9_]. This reference should be mapped to the actual sub-expression. For example, say you have kinematic equations in kinematics.yml, and the equation you're interested in using looks like the following:

kinetic_energy_gt:
  0.5 * $mass * $1 * $1 > $threshold

To properly use kinetic_energy_gt in your rule, it would look like the following:

rules:
  - name: kinetic_energy
    description: Tracks high energy output while in motion
    type: review
    assignee: bob@example.com
    expression:
      name: kinetic_energy_gt
    channel_references:
      - $1: *velocity_channel
    sub_expressions:
      - $mass: 10
      - $threshold: 470
    tags:
        - nostromo
name: str
rule_client_key: typing_extensions.NotRequired[str]
description: typing_extensions.NotRequired[str]
expression: Union[str, NamedExpressionYamlSpec]
type: Union[Literal['phase'], Literal['review']]
assignee: typing_extensions.NotRequired[str]
tags: typing_extensions.NotRequired[typing.List[str]]
channel_references: typing_extensions.NotRequired[typing.List[typing.Dict[str, sift_py.yaml.channel.ChannelConfigYamlSpec]]]
contextual_channels: typing_extensions.NotRequired[typing.List[str]]
sub_expressions: typing_extensions.NotRequired[typing.List[typing.Dict[str, str]]]
asset_names: typing_extensions.NotRequired[typing.List[str]]
tag_names: typing_extensions.NotRequired[typing.List[str]]
class NamedExpressionYamlSpec(typing_extensions.TypedDict):
335class NamedExpressionYamlSpec(TypedDict):
336    """
337    A named expression. This class is the formal definition of what a named expression
338    should look like in YAML. The value of `name` may contain a mix of channel references
339    and channel identifiers.
340
341    For a formal definition of channel references and channel identifiers see the following:
342    `sift_py.ingestion.config.yaml.spec.RuleYamlSpec`.
343    """
344
345    name: str

A named expression. This class is the formal definition of what a named expression should look like in YAML. The value of name may contain a mix of channel references and channel identifiers.

For a formal definition of channel references and channel identifiers see the following: sift_py.ingestion.config.yaml.spec.RuleYamlSpec.

name: str