Source code for confluence_content_parser.document

from __future__ import annotations

from typing import Any, TypeVar, overload

from pydantic import BaseModel, Field

from .nodes import Node

T1 = TypeVar("T1", bound=Node)
T2 = TypeVar("T2", bound=Node)
T3 = TypeVar("T3", bound=Node)
T4 = TypeVar("T4", bound=Node)
T5 = TypeVar("T5", bound=Node)


[docs] class ConfluenceDocument(BaseModel): """A parsed Confluence document with convenient access to content.""" root: Node | None = None metadata: dict[str, Any] = Field(default_factory=dict) @property def text(self) -> str: """Get all text content from the document with proper line breaks.""" if not self.root: return "" parts = [] for child in self.root.get_children(): text = child.to_text().strip() if text: parts.append(text) return "\n\n".join(parts) @overload def find_all(self) -> list[Node]: ... @overload def find_all(self, node_type: type[T1]) -> list[T1]: ... @overload def find_all(self, t1: type[T1], t2: type[T2]) -> tuple[list[T1], list[T2]]: ... @overload def find_all(self, t1: type[T1], t2: type[T2], t3: type[T3]) -> tuple[list[T1], list[T2], list[T3]]: ... @overload def find_all( self, t1: type[T1], t2: type[T2], t3: type[T3], t4: type[T4] ) -> tuple[list[T1], list[T2], list[T3], list[T4]]: ... @overload def find_all( self, t1: type[T1], t2: type[T2], t3: type[T3], t4: type[T4], t5: type[T5] ) -> tuple[list[T1], list[T2], list[T3], list[T4], list[T5]]: ...
[docs] def find_all(self, *node_types) -> Any: # type: ignore[no-untyped-def,misc] """Find all nodes of specific type(s) in the document with modern variadic generics. Args: *node_types: Either no arguments (all nodes), a single node class, or multiple node classes. Returns: - No arguments: list[Node] (all nodes) - Single type: list[T] where T is the requested type - Multiple types: tuple of lists with proper typing for each type Examples: # All nodes all_nodes = document.find_all() # Single type headings = document.find_all(HeadingElement) # Multiple types headings, panels = document.find_all(HeadingElement, PanelMacro) """ if self.root: return self.root.find_all(*node_types) else: if len(node_types) == 0: return [] elif len(node_types) == 1: return [] else: return tuple([] for _ in node_types)
[docs] def walk(self) -> list[Node]: """Get all nodes in the document.""" return list(self.root.walk()) if self.root else []