Source code for onyo.lib.items

from __future__ import annotations

from collections import UserDict
from copy import deepcopy
from pathlib import Path
from typing import TYPE_CHECKING

from ruamel.yaml import CommentedMap  # pyre-ignore[21]

from onyo.lib.consts import (
    ANCHOR_FILE_NAME,
    ASSET_DIR_FILE_NAME,
    RESERVED_KEYS,
)
from onyo.lib.pseudokeys import (
    PSEUDO_KEYS,
    PSEUDOKEY_ALIASES,
    PseudoKey,
)
from onyo.lib.utils import (
    dict_to_yaml,
    yaml_to_dict,
)


if TYPE_CHECKING:
    from typing import (
        Any,
        Generator,
        Mapping,
        TypeVar,
    )
    from onyo.lib.onyo import OnyoRepo

    _KT = TypeVar("_KT")  # Key type
    _VT = TypeVar("_VT")  # Value type


[docs] def resolve_alias(key: _KT, alias_map: Mapping[str, str] | None = None) -> _KT: r"""Return the target key of a key alias. Parameters ---------- key Key name to resolve. alias_map Dictionary mapping aliases to key names. """ alias_map = {} if alias_map is None else alias_map try: resolved_key = alias_map[key] # pyre-ignore[6] # lookup again, in case it's an alias of an alias return resolve_alias(resolved_key, alias_map=alias_map) # pyre-ignore[7] except KeyError: return key
[docs] class ItemSpec(UserDict): r"""Nested dictionaries of static instructions to create an ``Item``. Compared to a dictionary, the primary features are: * load YAML (e.g. ``ItemSpec(Path('file.yaml').read_text())`` * dump YAML (e.g. ``spec.yaml()``) * equality including YAML comments (e.g. ``ItemSpec() == ItemSpec()``) * dot notation (e.g. ``spec['nested.dict.key']``) * alias resolution In contrast to ``Item``, an ``ItemSpec`` is entirely static. It is not associated with a repository, and thus has no sanity checks nor pseudokey lookup capabilities. Alias resolution is possible, but the mapping must be provided manually. Multidimensional dictionaries are traversed using a dot as the delimiter. In other words, it provides a view of the flattened dictionary:: > d = {'key': 'value', 'nested': {'key': 'another value'}} > spec = ItemSpec(d) > spec['nested.key'] 'another value' > list(spec.keys()) ['key', 'nested.key'] Iteration only considers the flattened view. Keys that contain a dictionary are not yielded when using ``.keys()``, ``.values()``, and ``.items()``. The underlying dictionary is available via the ``.data`` attribute when the standard Python behavior is needed. """
[docs] def __init__(self, spec: Mapping[_KT, _VT] | str | None = None, alias_map: Mapping[str, str] | None = None, **kwargs: _VT) -> None: r"""Initialize an ItemSpec. Parameters ---------- spec Dictionary or YAML string to load. alias_map Dictionary mapping aliases to key names. """ self._alias_map: Mapping[str, str] = {} if alias_map is None else alias_map if isinstance(spec, (ItemSpec, Item, Path)): raise ValueError(f'ItemSpec does not accept {type(spec)}') if isinstance(spec, str): # TODO: unlike other input methods, this does /not/ do alias # resolution on init spec = yaml_to_dict(spec) match spec: case CommentedMap(): # TODO: unlike other input methods, this does /not/ do alias # resolution on init super().__init__() # direct assignment to retain comments self.data = deepcopy(spec) self.update(**kwargs) case _: super().__init__(spec, **kwargs)
def __contains__(self, key: _KT) -> bool: r"""Whether ``key`` is in self. Unlike iteration over keys, keys that contain a dictionary are matchable and return ``True``. """ key = resolve_alias(key, alias_map=self._alias_map) try: self.__getitem__(key) return True except KeyError: return False def __delitem__(self, key: _KT) -> None: r"""Remove a ``key`` from self.""" key = resolve_alias(key, alias_map=self._alias_map) if isinstance(key, str): parts = key.split('.') effective_dict = self.data if len(parts) > 1: for lvl in range(len(parts) - 1): try: effective_dict = effective_dict[parts[lvl]] except KeyError as e: raise KeyError(f"'{'.'.join(parts[:lvl + 1])}'") from e del effective_dict[parts[-1]] else: super().__delitem__(key) def __eq__(self, other: Any) -> bool: r"""Whether another ItemSpec/Item and self have the same content, comments, and paths. Pseudokeys are ignored with the exception of: - `'onyo.is.asset'` - `'onyo.is.directory'` - `'onyo.path.absolute'` - `'onyo.path.file'` - `'onyo.path.name'` - `'onyo.path.relative'` """ if not isinstance(other, (ItemSpec, Item)): return False # NOTE: 'onyo.path.file' is checked first because it actually covers all # other tests. The other keys are kept to be self-documenting and # to protect against future implementation changes causing bugs. pseudo_keys_to_check = [ 'onyo.path.file', 'onyo.is.asset', 'onyo.is.directory', 'onyo.path.absolute', 'onyo.path.name', 'onyo.path.relative', ] for k in pseudo_keys_to_check: if self.get(k, None) != other.get(k, None): return False return self.equal_content(other) def __getitem__(self, key: _KT) -> Any: r"""Get the value of a key.""" key = resolve_alias(key, alias_map=self._alias_map) if isinstance(key, str): parts = key.split('.') effective_dict = self.data if len(parts) > 1: for lvl in range(len(parts) - 1): try: effective_dict = effective_dict[parts[lvl]] except KeyError as e: raise KeyError(f"'{'.'.join(parts[:lvl + 1])}'") from e except TypeError as e: raise KeyError(f"'{'.'.join(parts[:lvl])}' is not a dictionary.") from e try: return effective_dict[parts[-1]] except KeyError as e: raise KeyError(f"'{key}'") from e except TypeError as e: raise KeyError(f"'{'.'.join(parts[:-1])}' is not a dictionary.") from e return super().__getitem__(key) def __iter__(self) -> Generator[str, None, None]: r"""Return the iterator. A by-product of dot notation is that all keys are strings, regardless of their original type in the underlying dictionary. Keys that contain a dictionary are not yielded. """ return self._keys() def __len__(self) -> int: r"""Return the number of keys in the dot notation view. Keys that contain a dictionary are not counted. """ return len(list(self._keys())) def __setitem__(self, key: _KT, value: _VT) -> None: r"""Set the value of a key. Keys that are strings are interpreted for dot notation, and intermediate dictionaries are created as needed. """ key = resolve_alias(key, alias_map=self._alias_map) if isinstance(key, str): parts = key.split('.') effective_dict = self.data if len(parts) > 1: for lvl in range(len(parts) - 1): try: effective_dict = effective_dict[parts[lvl]] except KeyError: # nested dict doesn't exist yet effective_dict[parts[lvl]] = dict() effective_dict = effective_dict[parts[lvl]] effective_dict[parts[-1]] = value else: super().__setitem__(key, value) def _keys(self) -> Generator[str, None, None]: r"""Yield all keys recursively from nested dictionaries in dot notation. A by-product of dot notation is that all keys are strings, regardless of their original type in the underlying dictionary. Keys that contain a dictionary not yielded. """ def recursive_keys(d: dict): for k in d.keys(): if hasattr(d[k], "keys"): yield from (k + "." + sk for sk in recursive_keys(d[k])) else: # Cast as a string. One can't have a key 'some.1.more', # where 1 remains an integer. yield str(k) yield from recursive_keys(self.data)
[docs] def equal_content(self, other: ItemSpec | Item) -> bool: r"""Whether another ItemSpec/Item and self have the same content and comments. Pseudokeys are ignored entirely. Parameters ---------- other Item to compare with self. """ return self.yaml(exclude=RESERVED_KEYS) == other.yaml(exclude=RESERVED_KEYS)
[docs] def get(self, # pyre-ignore[14] key: _KT, default: Any = None) -> Any: r"""Return the value of ``key`` if it's in the dictionary, otherwise ``default``.""" key = resolve_alias(key, alias_map=self._alias_map) return super().get(key, default=default)
[docs] def yaml(self, exclude: list | None = None) -> str: r"""Get the stringified YAML including content and comments. Parameters ---------- exclude Keys to exclude from the output. By default, none are excluded. """ exclude = exclude or [] # deepcopy to keep comments content = deepcopy(self) for key in exclude: # TODO: resolve_alias()? if key in content: del content[key] return dict_to_yaml(content.data)
[docs] class Item(ItemSpec): r"""An item that an :py:class:`onyo.lib.inventory.Inventory` can potentially track. In contrast to an ``ItemSpec``, an ``Item`` is associated with a repository. Thus, it gains the ability to lookup/generate pseudokey values and the automatic resolution of user-defined key aliases. On initialization, sanity checks are performed on the provided pseudokey-related values (e.g. all ``'onyo.path'`` are consistent, generate the asset name from keys, etc). However, once instantiated, ``Item`` allows changes to be made and makes no assurances about the consistency of the information stored in it. For example, modifying ``onyo.is.directory`` or a key used in the asset name will not automatically update the value of ``onyo.path.file``. It is the responsibility of those modifying values to clear caches and ensure consistency. It is safest to use the operations available in :py:class:`onyo.lib.inventory.Inventory` to modify Items. """
[docs] def __init__(self, item: Mapping[_KT, _VT] | Path | None = None, repo: OnyoRepo | None = None, **kwargs: _VT) -> None: r"""Initialize an Item.""" # TODO: # - accept only Item, ItemSpec, or Path (no dict or str) # - repo is required # - sanity check of incoming Path or ItemSpec (specifically # path-related keys) super().__init__() self.repo: OnyoRepo | None = repo self._path: Path | None = None self._alias_map: Mapping[str, str] = PSEUDOKEY_ALIASES self.data = CommentedMap() self.update(PSEUDO_KEYS) match item: # TODO: BUG: Item/ItemSpec cases are incorrect. Direct assignment to self.data kills the pseudokeys # loaded above rather than only updating the ones that are specified in the incoming object. case Item(): self._path = item._path self.data = deepcopy(item.data) case ItemSpec(): self.data = deepcopy(item.data) case Path(): assert item.is_absolute() # currently no support for relative. This is how all existing code should work ATM. self._path = item self.update_from_path(item) case _ if item is not None: self.update(item) if kwargs: self.update(**kwargs)
def __getitem__(self, key: _KT) -> Any: r"""Get the value of a ``key``. The initializer methods are referenced in the mapping :py:data:`onyo.lib.pseudokeys.PSEUDO_KEYS`. They are called on-demand, when a pseudo-key is first accessed. This allows to distinguish a meaningful ``None`` (<unset>) from a not yet evaluated pseudo-key. """ value = super().__getitem__(key) if key in PSEUDO_KEYS and isinstance(value, PseudoKey): # Value still is the pseudo-key definition. # Query and set the response as the new value. new_value = value.implementation(self) self[key] = new_value return new_value return value def _fill_created(self, key: str | None = None) -> str | None: r"""Initializer for the ``'onyo.was.created'`` pseudo-keys. The entire ``'onyo.was.created'`` dict is initialized, regardless of which (if any) ``key`` is requested. Parameters ---------- key Name of pseudo-key to get the value of. """ # TODO: This is based on `git log --follow <path>`. The first appearance # of 'new_assets'/'new_directories' should be the match. However, # this is known to be problematic, both due to `git` and if the # Python interface was used. A more robust solution will involve a # more involved parsing of Inventory Operation records, and # tracing history and moves, etc. if self['onyo.is.template']: # Templates aren't tracked by inventory operations (only in git). # Thus there are no operations records to be parsed. return None if self.repo and self['onyo.path.absolute']: for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16] if 'operations' in commit: if (self['onyo.is.asset'] and commit['operations']['new_assets']) or \ (self['onyo.is.directory'] and commit['operations']['new_directories']): self['onyo.was.created'] = commit.data return commit[key] if key else None return None def _fill_modified(self, key: str | None = None) -> str | None: r"""Initializer for the ``'onyo.was.modified'`` pseudo-keys. The entire ``'onyo.was.modified'`` dict is initialized, regardless of which (if any) ``key`` is requested. Parameters ---------- key Name of pseudo-key to get the value of. """ # TODO: see `fill_created()` todo. if self['onyo.is.template']: # Templates aren't tracked by inventory operations (only in git). # Thus there are no operations records to be parsed. return None if self.repo and self['onyo.path.absolute']: for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16] if 'operations' in commit: if (self['onyo.is.asset'] and (commit['operations']['modify_assets'] or commit['operations']['new_assets'])) or \ (self['onyo.is.directory'] and (commit['operations']['new_directories'] or commit['operations']['move_directories'] or commit['operations']['rename_directories'])): self['onyo.was.modified'] = commit.data return commit[key] if key else None return None def _get_path_absolute(self) -> Path | None: r"""Initializer for the ``'onyo.path.absolute'`` pseudo-key.""" if self.repo and self._path and self._path.name == ASSET_DIR_FILE_NAME: return self._path.parent return self._path def _get_path_file(self) -> Path | None: r"""Initializer for the ``'onyo.path.file'`` pseudo-key.""" if self.repo and self['onyo.path.relative']: if self['onyo.is.directory']: if self['onyo.is.asset']: return self['onyo.path.relative'] / ASSET_DIR_FILE_NAME return self['onyo.path.relative'] / ANCHOR_FILE_NAME return self['onyo.path.relative'] return None def _get_path_name(self) -> str | None: r"""Initializer for the ``'onyo.path.name'`` pseudo-key.""" if self['onyo.path.absolute']: return self['onyo.path.absolute'].name return None def _get_path_parent(self) -> Path | None: r"""Initializer for the ``'onyo.path.parent'`` pseudo-key.""" if self.repo and self['onyo.path.relative']: return self['onyo.path.relative'].parent return None def _get_path_relative(self) -> Path | None: r"""Initializer for the ``'onyo.path.relative'`` pseudo-key.""" if self.repo and self['onyo.path.absolute']: try: return self['onyo.path.absolute'].relative_to(self.repo.git.root) # pyre-ignore[16] except ValueError: # return None (translates to '<unset>') if relative_to() fails b/c path is outside repo. pass return None def _is_asset(self) -> bool | None: r"""Initializer for the ``'onyo.is.asset'`` pseudo-key.""" if not self.repo or not self._path: return None # True, if the item is either an existing asset in the inventory or # it's representing "instructions" for creating one. # The latter implies it has non-pseudo-keys, or it is specifying "onyo.is.asset" # itself in which case this implementation here will be overruled anyway. return self.repo.is_asset_path(self._path) or \ any(k not in PSEUDO_KEYS for k in self.keys()) def _is_directory(self) -> bool | None: r"""Initializer for the ``'onyo.is.directory'`` pseudo-key.""" if not self.repo or not self._path: return None # True, if it's either an existing inventory dir or a template dir. # TODO: `is_dir()` should be looking up git-committed dirs instead. -> Property at OnyoRepo return self.repo.is_inventory_dir(self._path) or (self._path.is_dir() and self["onyo.is.template"]) # pyre-ignore[16] def _is_empty(self) -> bool | None: r"""Initializer for the ``'onyo.is.empty'`` pseudo-key.""" if self['onyo.is.directory'] and self.repo and self._path: # TODO: This likely can be faster when redoing/enhancing caching of repo paths. return not any(p.parent == self._path for p in self.repo.asset_paths) return None def _is_template(self) -> bool | None: r"""Initializer for the ``'onyo.is.template'`` pseudo-key.""" if not self.repo or not self._path: return None return self._path == self.repo.template_dir or self.repo.template_dir in self._path.parents # pyre-ignore[16]
[docs] def update_from_path(self, path: Path) -> None: r"""Update the internal dictionary with key/values from a YAML file. YAML comments are preserved on a best-effort basis. There is no straightforward way to merge YAML comments, and thus ones from ``path`` may overwrite internal ones. Parameters ---------- path Path of YAML file to update from. """ from onyo.lib.utils import get_asset_content self._path = path if self.repo and self.repo.is_asset_path(path): loader = self.repo.get_asset_content elif path.is_file(): loader = get_asset_content elif (path / ASSET_DIR_FILE_NAME).is_file(): # This is a hack. Existing functionality for asset dirs does not consider templates. # All the `is_asset_*` logic doesn't apply, since template files live outside of actual inventory paths and # even possibly outside the repo altogether. Consider rewriting this in context of `ItemSpec`. # Also needs to set "onyo.is.directory" and "onyo.path.file" accordingly if applicable. loader = get_asset_content path = path / ASSET_DIR_FILE_NAME else: return map_from_file = loader(path) self.update(map_from_file) if hasattr(map_from_file, 'copy_attributes'): # We got a (subclass of) ruamel.yaml.CommentBase. # Copy the attributes re comments, format, etc. for roundtrip. map_from_file.copy_attributes(self.data) # pyre-ignore[16]
[docs] def yaml(self, exclude: list | None = None) -> str: r"""Get the stringified YAML including content and comments. Parameters ---------- exclude Keys to exclude from the output. By default, all :py:data:`onyo.lib.consts.RESERVED_KEYS` (e.g. pseudokeys) are excluded. """ exclude = exclude or RESERVED_KEYS return super().yaml(exclude)
# TODO/Notes for next PR(s): # - Bug/Missing feature: pseudo-keys that are supposed to be settable by commands, are not yet # ensured to return bool/Path objects that the codebase acts upon when their values are coming in from # CLI or (template-)files, since everything is stringified now. # - We need plain files/directories represented for --yaml # - values in templates maybe to be evaluated matching expression or even plugin calls #714 # - Path attribute caching at git/onyo layers (is_file, etc.): # We actually know that everything we get from # git ls-tree is in fact a file or symlink. And we can derive # dirs from that path list (.anchor). That may be a lot faster. # Implement cache dict at GitRepo level.