Source code for tangl.core.registry

# tangl/core/registry.py
# language=markdown
"""
# Registries and groups (v38)

This module defines **ownership** and **membership** primitives for core.

- A **Registry** owns a set of entities and is the canonical dereference boundary for
  ID-linked structures.
- A **Group** is itself a registry member that stores *only* member UUIDs, and resolves
  those UUIDs back to entities via its owning registry.

## Two related but distinct ideas

1) **Owning boundary** (Registry)

A registry is responsible for:

- indexing members by `uid: UUID`
- selection via `Selector` (`find_one`, `find_all`, `chain_find_all`)
- structuring/unstructuring its members for persistence
- binding registry-aware items (`bind_registry`)

2) **Views over a registry** (Groups)

Groups do **not** own members. They only:

- maintain `member_ids: list[UUID]`
- provide iterators that dereference `member_ids` through the registry

This keeps un/structuring simple: members are persisted once in the registry, and groups
persist only the UUID references.

## Hook points

Registry operations accept an optional `_ctx` which higher layers may use to trigger
behavior hooks (`do_add_item`, `do_get_item`, `do_remove_item`). Core remains usable
without a dispatch system.

See Also
--------
- `tangl.core.graph.Graph` for a topology-specialized registry.
- `tangl.core.template.TemplateRegistry` for template ownership and lookup.
- `chain_find_one` was intentionally removed; use
  `next(Registry.chain_find_all(...), None)` when needed.

"""
from __future__ import annotations
from typing import Any, TypeVar, Generic, Iterator, Iterable, Optional, Self, TypeAlias
from uuid import UUID
import itertools
import logging
from functools import cached_property

from pydantic import Field, PrivateAttr, SkipValidation

from tangl.type_hints import UnstructuredData
from .entity import Entity
from .selector import Selector

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)

ET = TypeVar('ET', bound=Entity)

[docs] class Registry(Entity, Generic[ET]): """Indexed owning collection with selection and chaining. A `Registry` is core's **owning boundary** for intra-related entities. It is the canonical dereference mechanism for ID-linked graphs: - members are indexed by `uid: UUID` - other references should store only UUIDs and dereference through a registry ### Selection Use `find_one` / `find_all` with a `Selector` for flexible matching. Do not overload `get()` with fuzzy identifier logic; `get()` is strictly `UUID → entity`. ### Layering `chain_find_all` is core's primitive for layered composition: - treat multiple registries as a search chain - yields matching members across all registries in order ### Persistence `members` is declared with `Field(exclude=True)` so Pydantic model dumps do not automatically include it. `unstructure()` and `structure()` handle member payloads explicitly. `Registry.unstructure()` includes *all* members as unstructured constructor-form dicts. `Registry.structure()` recreates the registry and re-adds structured members. ### Duplicate IDs `add()` silently overwrites existing members for duplicate `uid` keys. ### Dispatch hooks Pass `_ctx` to `add`, `get`, or `remove` to allow higher layers to intercept operations. `__setitem__` intentionally raises; callers must use `add()` so registry-aware binding and dispatch hooks remain consistent. See Also -------- `tangl.core.graph.Graph`, `tangl.core.template.TemplateRegistry` Examples -------- >>> a = Entity(label="abc"); b = Entity(label="def") >>> r = Registry(); r.add(a); r.add(b) >>> len(r.members) 2 >>> r.get(a.uid) # indexed by uid <Entity:abc> >>> r.all_labels() == {"abc", "def"} True >>> s = Selector.from_identifier("abc") >>> r.find_one(s) <Entity:abc> >>> c = Entity(label="abc") >>> q = Registry(); q.add(c) >>> list(Registry.chain_find_all(r, q, selector=s)) == [a, c] True >>> data = r.unstructure() >>> rr = Registry.structure(data) >>> len(rr.members) 2 >>> r is not rr and r == rr # compare by value True >>> rr.add(Entity()) # compare by value includes members field >>> rr != r True """ members: dict[UUID, ET] = Field(default_factory=dict, exclude=True) # exclude=True just means that structure takes care of it manually, it is # still included in unstructured data used by eq_by_content def add(self, value: ET, _ctx=None) -> None: """Add an entity to the registry. Registry-aware values are bound via `bind_registry(self)`. Duplicate UIDs overwrite existing entries. When `_ctx` resolves, `do_add_item` may replace the inserted item. """ if hasattr(value, "bind_registry"): value.bind_registry(self) from .ctx import resolve_ctx _ctx = resolve_ctx(_ctx) if _ctx is not None: # chance to modify before inserting from .dispatch import do_add_item value = do_add_item(registry=self, item=value, ctx=_ctx) self.members[value.uid] = value def remove(self, key: UUID, _ctx=None) -> None: """Remove an entity by UUID. Missing keys are ignored. Registry-aware values are unbound through `bind_registry(None)`. When `_ctx` resolves, `do_remove_item` is invoked for post-removal inspection. """ item = self.members.pop(key, None) if item is not None and hasattr(item, "bind_registry"): item.bind_registry(None) from .ctx import resolve_ctx _ctx = resolve_ctx(_ctx) if _ctx is not None: # chance to review before discarding from .dispatch import do_remove_item do_remove_item(registry=self, item=item, ctx=_ctx) # or del self.members[key] if you want to throw a key error def get(self, key: UUID, _ctx=None) -> Optional[ET]: """Return a member by UUID or ``None`` when absent. When `_ctx` resolves, `do_get_item` may transform the returned value. """ item = self.members.get(key, None) from .ctx import resolve_ctx _ctx = resolve_ctx(_ctx) if _ctx is not None: # chance to modify before returning from .dispatch import do_get_item item = do_get_item(registry=self, item=item, ctx=_ctx) return item # or return self.members[key] if you want to throw a key error def all_labels(self) -> set[str]: """Return labels for all stored members.""" return {value.get_label() for value in self.members.values()} @classmethod def _filter_and_sort(cls, values, selector=None, sort_key=None) -> Iterator[ET]: """Apply optional selector filtering and optional sort ordering.""" if selector is not None: values = selector.filter(values) if sort_key is None: yield from values else: yield from sorted(values, key=sort_key) @staticmethod def _ensure_selector(selector: Selector | None) -> Selector | None: """Validate selector-only lookup input.""" if selector is None or isinstance(selector, Selector): return selector raise TypeError( f"Registry lookup requires Selector | None, got {type(selector)!r}" ) def find_all( self, selector: Selector | None = None, sort_key=None, ) -> Iterator[ET]: """Yield members matching an optional selector and optional sort key.""" selector = self._ensure_selector(selector) values = self.members.values() return self._filter_and_sort(values, selector=selector, sort_key=sort_key) def find_one( self, selector: Selector | None = None, sort_key=None, ) -> Optional[ET]: """Return first match from :meth:`find_all`, or ``None``.""" return next(self.find_all(selector, sort_key=sort_key), None) @classmethod def chain_find_all( cls, *registries: Self, selector: Selector | None = None, sort_key=None, ) -> Iterator[ET]: """Yield matches across registries in argument order. Use ``next(Registry.chain_find_all(...), None)`` for one-off first-match behavior. """ selector = cls._ensure_selector(selector) values = itertools.chain.from_iterable(r.members.values() for r in registries) return cls._filter_and_sort(values, selector=selector, sort_key=sort_key) def unstructure(self) -> UnstructuredData: """Return constructor-form data including explicitly unstructured members.""" data = super().unstructure() data["members"] = [value.unstructure() for value in self.members.values()] return data @classmethod def structure(cls, data: UnstructuredData, _ctx=None): """Structure a registry and re-add structured members.""" payload = dict(data) _members = payload.pop("members", []) obj = super().structure(payload, _ctx=_ctx) # type: Self for value in _members: obj.add(Entity.structure(value, _ctx=_ctx)) return obj # Provide mapping interface def values(self) -> Iterable[ET]: """Return registry member values.""" return self.members.values() def keys(self): """Legacy mapping alias for member UUID keys.""" return self.members.keys() def items(self): """Legacy mapping alias for ``(uid, member)`` pairs.""" return self.members.items() def clear(self) -> None: """Remove all members.""" self.members.clear() def __len__(self) -> int: return len(self.members) def __bool__(self) -> bool: return len(self.members) > 0 def __iter__(self) -> Iterator[ET]: # iter values not keys, gets '__contains__(item)' for free return iter(self.members.values()) def __contains__(self, item: Any) -> bool: """Support both UUID-key and member-instance containment checks.""" if isinstance(item, UUID): return item in self.members if hasattr(item, "uid"): return getattr(item, "uid") in self.members return item in self.members.values() def __getitem__(self, key: UUID): return self.get(key) def __delitem__(self, key: UUID): self.remove(key) def __setitem__(self, key, value): # refer to add raise KeyError(f"May not set items directly by key. Use `registry.add(item)` instead.") def _validate_linkable(self, item: RegistryAware) -> bool: """Validate that ``item`` is registry-aware and belongs to this registry.""" if not isinstance(item, RegistryAware): raise TypeError(f"Expected type-bound RegistryAware, got {type(item)}") if item.registry is not self: raise ValueError("Link item must belong to the same registry") if item.uid not in self.members: raise ValueError(f"Link item must be added to registry first") return True
# Additional bases for entities that can be gathered in a registry class RegistryAware(Entity): """Mixin for entities managed by a single registry. Registry-aware entities do not store direct pointers to peer members. Instead, they: - store UUID references (e.g., `member_ids` in groups) - dereference through `self.registry.get(uid)` when needed ### Binding contract A registry binds itself to an item by calling `item.bind_registry(self)` during `Registry.add()`. Registry-aware items should treat the registry reference as an implementation detail: - store it as a private attribute (`PrivateAttr`) so pydantic will not copy it - raise if rebound to a different registry ### Parent convenience `parent` is a convenience for hierarchical grouping: - it returns the first `HierarchicalGroup` in the owning registry that lists this item as a member - it is meaningful only when the registry contains hierarchical groups - plain `EntityGroup` membership does not define a parent; non-hierarchical groups are bags, not ownership paths - it is cached and must be invalidated when membership changes (`_invalidate_parent_attr`) Example: >>> a = RegistryAware(); r = Registry(); r.add(a) >>> a.registry is r True >>> Registry().add(a) # doctest: +ELLIPSIS Traceback (most recent call last): ... ValueError: Registry is already set ... >>> b = RegistryAware(registry=r) >>> b.registry is r True >>> a.registry is b.registry True """ _registry: SkipValidation[Registry[RegistryAware]] = PrivateAttr(None) # do not want _registry included in unstructuring or copied on creation @property def registry(self) -> Registry[RegistryAware] | None: return self.__dict__.get("_registry", None) def __init__(self, registry=None, graph=None, **kwargs) -> None: if registry is None and graph is not None: # Compatibility alias: legacy graph items are commonly built with graph=... registry = graph super().__init__(**kwargs) if registry is not None: registry.add(self) def bind_registry(self, registry: Registry | None) -> None: """Bind to a registry pointer or clear binding with ``None``.""" current = self.__dict__.get("_registry", None) if registry is None: self.__dict__["_registry"] = None return if current is not None and current is not registry: raise ValueError(f"Registry is already set {current!r} != {registry!r}") self.__dict__["_registry"] = registry def __getattr__(self, name: str) -> Any: """Expose registry binding even when Pydantic private attrs are not hydrated.""" if name == "_registry": return self.__dict__.get("_registry", None) return super().__getattr__(name) @cached_property def parent(self) -> Optional[RegistryAware]: """Return first owning :class:`HierarchicalGroup`, if present. Non-hierarchical :class:`EntityGroup` membership is intentionally ignored. """ if self.registry is None: return None selector = Selector(has_kind=HierarchicalGroup, has_member=self) return self.registry.find_one(selector) def _invalidate_parent_attr(self): # On reparent # `parent` is typically a cached_property; pop the cached value directly # so property-only compatibility overrides do not raise on `delattr`. self.__dict__.pop("parent", None) RT: TypeAlias = RegistryAware class EntityGroup(RegistryAware): """A registry member that provides a UUID-based view over peer members. An `EntityGroup` is itself stored *in* a registry and refers to other members of that same registry by UUID. - Groups do not own members. - Group membership is persisted as `member_ids: list[UUID]`. - `members()` dereferences each UUID through the registry. This pattern avoids deep nesting during structuring/unstructuring and keeps identity and persistence straightforward. Example: >>> reg = Registry() >>> a = RegistryAware(label="abc"); reg.add(a); d = RegistryAware(label="def"); reg.add(d); g = RegistryAware(label="ghi"); reg.add(g) >>> e = EntityGroup(); reg.add(e) >>> e.add_members(a, d, g) >>> list(e.members()) [<RegistryAware:abc>, <RegistryAware:def>, <RegistryAware:ghi>] >>> a = e.member(Selector.from_identifier("abc")) >>> assert e.has_member(a) >>> e.remove_member(a) >>> assert not e.has_member(a) >>> assert list(e.members()) == [d, g] """ member_ids: list[UUID] = Field(default_factory=list) def member(self, selector: Selector | None = None, sort_key=None) -> RT: """Return first dereferenced member matching optional selector.""" return next(self.members(selector, sort_key=sort_key), None) def members(self, selector: Selector | None = None, sort_key=None) -> Iterator[RT]: """Yield dereferenced members, optionally filtered and sorted.""" selector = Registry._ensure_selector(selector) or Selector() if self.registry is not None: items = (item for uid in self.member_ids if (item := self.registry.get(uid))) return self.registry._filter_and_sort(items, selector=selector, sort_key=sort_key) raise ValueError("Group registry is not set") def add_member(self, item: RT) -> None: """Add child membership with automatic reparenting.""" if item is self: raise ValueError("Group cannot add itself to itself") if self.registry._validate_linkable(item): self.member_ids.append(item.uid) def add_members(self, *items: RT) -> None: """Add multiple members.""" for item in items: self.add_member(item) def remove_member(self, item: RT) -> None: """Remove child membership and invalidate cached parent.""" if item is not None and item.uid in self.member_ids: self.member_ids.remove(item.uid) def has_member(self, item: RT) -> bool: """Return membership by UID (delegates to ``__contains__``).""" # for selection criteria, uses __contains__ compare-by-uid logger.debug(f"{self!r}: checking has_member({item!r}) = {item in self}") return item in self def __iter__(self) -> Iterator[RegistryAware]: # get __contains__ for free return iter(self.members()) def __contains__(self, item: RegistryAware) -> bool: # although this is better than iter, b/c it doesn't have to deref id's return item.uid in self.member_ids def unstructure(self) -> UnstructuredData: """Include ``member_ids`` explicitly for stable round-tripping.""" data = super().unstructure() if self.member_ids: data["member_ids"] = list(self.member_ids) return data class HierarchicalGroup(EntityGroup): """A group that supports parent/child nesting via group membership. `HierarchicalGroup` is an `EntityGroup` with an additional convention: - a child may belong to **at most one** parent at a time - re-parenting is implemented as: remove from old parent → add to new parent ### Derived hierarchy properties - `parent`: cached lookup of the first `HierarchicalGroup` that lists this group as a member - `root`: ascend parents until `None` - `ancestors`: `[self, parent, grandparent, ...]` - `path`: dotted label path from root (`root.child.grandchild`) These are convenience properties intended for scripts and navigation. They rely on correct invalidation of the cached `parent` when membership changes. Example: >>> r = Registry() >>> g = HierarchicalGroup(label="g", registry=r) >>> h = HierarchicalGroup(label="h", registry=r) >>> g.add_child(h) >>> assert h.parent is g >>> assert h.path == "g.h" >>> assert h.ancestors == [h, g] >>> g.remove_child(h) >>> assert h.parent is None """ # wraps member ops with parent management def add_member(self, item: RT) -> None: """Add child membership with automatic reparenting.""" # forces re-parenting, or could throw an exception instead logger.debug(f"{self!r}: adding member({item!r})") if item.parent is not None: # Remove also invalidates item's parent item.parent.remove_child(item) else: # Just invalidate the None parent item._invalidate_parent_attr() return super().add_member(item) def remove_member(self, item: RT) -> None: """Remove child membership and invalidate cached parent.""" if item is not None and item.uid in self.member_ids: logger.debug(f"{self!r}: removing member {item!r} from parent {item.parent!r}") item._invalidate_parent_attr() super().remove_member(item) # Aliases for membership ops -> children ops def children(self, selector: Selector | None = None, sort_key=None) -> Iterator[RT]: """Alias of :meth:`members` for hierarchy semantics.""" return self.members(selector=selector, sort_key=sort_key) def add_child(self, item: RT): self.add_member(item) def remove_child(self, item: RT): self.remove_member(item) @property def root(self) -> RT: root = self while root.parent is not None: root = root.parent return root @property def ancestors(self) -> list[RT]: root = self result = [self] while root.parent is not None: root = root.parent result.append(root) return result @property def path(self) -> str: if self.parent: return f"{self.parent.path}.{self.get_label()}" return self.get_label()