from __future__ import annotations
import html
from abc import ABC
from collections.abc import Iterator
from enum import Enum
from typing import Any, TypeVar, overload
from pydantic import BaseModel, Field
T1 = TypeVar("T1", bound="Node")
T2 = TypeVar("T2", bound="Node")
T3 = TypeVar("T3", bound="Node")
T4 = TypeVar("T4", bound="Node")
T5 = TypeVar("T5", bound="Node")
[docs]
class Node(BaseModel, ABC):
"""Base class for all content nodes in the Confluence document tree."""
is_block_level: bool = False
[docs]
def walk(self) -> Iterator[Node]:
"""Walk through this node and all its descendants."""
yield self
for child in self.get_children():
yield from child.walk()
[docs]
def get_children(self) -> list[Node]:
"""Get direct children of this node. Override in subclasses."""
return []
[docs]
def to_text(self) -> str:
"""Get text representation of this node. Override in subclasses."""
return ""
@overload
def find_all(self) -> list[Node]: ...
@overload
def find_all(self, node_type: type[T1]) -> list[T1]: ...
@overload
def find_all(self, t1: type[T1], t2: type[T2]) -> tuple[list[T1], list[T2]]: ...
@overload
def find_all(self, t1: type[T1], t2: type[T2], t3: type[T3]) -> tuple[list[T1], list[T2], list[T3]]: ...
@overload
def find_all(
self, t1: type[T1], t2: type[T2], t3: type[T3], t4: type[T4]
) -> tuple[list[T1], list[T2], list[T3], list[T4]]: ...
@overload
def find_all(
self, t1: type[T1], t2: type[T2], t3: type[T3], t4: type[T4], t5: type[T5]
) -> tuple[list[T1], list[T2], list[T3], list[T4], list[T5]]: ...
[docs]
def find_all(self, *node_types) -> Any: # type: ignore[no-untyped-def,misc]
"""Find all nodes of specific type(s) in this subtree with modern variadic generics.
Args:
*node_types: Either no arguments (all nodes), a single node class, or multiple node classes.
Returns:
- No arguments: list[Node] (all nodes)
- Single type: list[T] where T is the requested type
- Multiple types: tuple of lists with proper typing for each type
Examples:
# All nodes
all_nodes = node.find_all()
# Single type - returns list[HeadingElement]
headings = node.find_all(HeadingElement)
# Multiple types - returns tuple with proper typing
headings, panels = node.find_all(HeadingElement, PanelMacro)
headings, panels, links = node.find_all(HeadingElement, PanelMacro, LinkElement)
"""
if len(node_types) == 0:
return list(self.walk())
if len(node_types) == 1:
node_type = node_types[0]
results = []
for node in self.walk():
if isinstance(node, node_type):
results.append(node)
return results
result_lists: list[list[Node]] = [[] for _ in node_types]
for node in self.walk():
for i, node_type in enumerate(node_types):
if isinstance(node, node_type):
result_lists[i].append(node)
return tuple(result_lists)
[docs]
class ContainerElement(Node):
"""Base for container elements."""
children: list[Node] = Field(default_factory=list)
styles: dict[str, str] = Field(default_factory=dict)
[docs]
def get_children(self) -> list[Node]:
return self.children
[docs]
def to_text(self) -> str:
parts = []
for child in self.children:
child_text = child.to_text()
if clean_child_text := child_text.strip():
parts.append(clean_child_text)
if self._has_block_children():
return "\n\n".join(parts)
else:
return " ".join(parts)
def _has_block_children(self) -> bool:
"""Check if this container has block-level children that should be separated by newlines."""
return any(child.is_block_level for child in self.children)
[docs]
class Fragment(ContainerElement):
"""Neutral container for multiple top-level nodes (non-rendering)."""
pass
[docs]
class LayoutSectionType(Enum):
"""Type of layout section."""
SINGLE = "single"
FIXED_WIDTH = "fixed-width"
TWO_EQUAL = "two_equal"
TWO_LEFT_SIDEBAR = "two_left_sidebar"
TWO_RIGHT_SIDEBAR = "two_right_sidebar"
THREE_EQUAL = "three_equal"
THREE_WITH_SIDEBARS = "three_with_sidebars"
THREE_LEFT_SIDEBARS = "three_left_sidebars"
THREE_RIGHT_SIDEBARS = "three_right_sidebars"
FOUR_EQUAL = "four_equal"
FIVE_EQUAL = "five_equal"
[docs]
class LayoutElement(ContainerElement):
"""A page layout container containing sections."""
is_block_level: bool = True
[docs]
class LayoutSection(ContainerElement):
"""A layout section (row) containing cells."""
section_type: LayoutSectionType
breakout_mode: str | None = None
breakout_width: str | None = None
is_block_level: bool = True
[docs]
class LayoutCell(ContainerElement):
"""A layout cell (column) containing content."""
is_block_level: bool = True
[docs]
class HeadingType(Enum):
"""Type of heading element."""
H1 = "h1"
H2 = "h2"
H3 = "h3"
H4 = "h4"
H5 = "h5"
H6 = "h6"
[docs]
class HeadingElement(ContainerElement):
"""A heading element."""
type: HeadingType
is_block_level: bool = True
[docs]
class TextEffectType(Enum):
"""Type of inline element."""
STRONG = "strong"
EMPHASIS = "em"
UNDERLINE = "u"
STRIKETHROUGH = "del"
MONOSPACE = "code"
SUBSCRIPT = "sub"
SUPERSCRIPT = "sup"
BLOCKQUOTE = "blockquote"
SPAN = "span"
[docs]
class TextEffectElement(ContainerElement):
"""Base for inline formatting elements like bold, italic, etc."""
type: TextEffectType
[docs]
class TextBreakType(Enum):
"""Type of text break element."""
PARAGRAPH = "p"
LINE_BREAK = "br"
HORIZONTAL_RULE = "hr"
[docs]
class TextBreakElement(ContainerElement):
"""A text break element."""
type: TextBreakType
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of text break elements."""
if self.type == TextBreakType.HORIZONTAL_RULE:
return "---"
elif self.type == TextBreakType.LINE_BREAK:
return "\n"
else:
return super().to_text()
[docs]
class ListType(Enum):
"""Type of list element."""
UNORDERED = "ul"
ORDERED = "ol"
TASK = "task-list"
[docs]
class ListElement(ContainerElement):
"""A list element."""
type: ListType
start: int | None = None
is_block_level: bool = True
[docs]
def to_text(self, indent_level: int = 0) -> str:
"""Convert list to text with appropriate markers and indentation."""
if not self.children:
return ""
return self._format_items(self.children, indent_level)
def _format_items(self, items: list[ListItem | ListElement | Node], indent_level: int) -> str:
"""Recursively format list items with proper indentation."""
parts = []
item_number = self.start or 1
indent = " " * indent_level
for item in items:
if isinstance(item, ListItem):
content_parts = []
nested_lists = []
for child in item.children:
if isinstance(child, ListElement):
nested_lists.append(child)
else:
child_text = child.to_text().strip()
if child_text:
content_parts.append(child_text)
content = " ".join(content_parts)
if item.status is not None:
marker = "â" if item.status == TaskListItemStatus.COMPLETE else "â"
parts.append(f"{indent}{marker} {content}")
elif self.type == ListType.UNORDERED:
parts.append(f"{indent}âĸ {content}")
elif self.type == ListType.ORDERED:
parts.append(f"{indent}{item_number}. {content}")
item_number += 1
else:
parts.append(f"{indent}{content}")
for nested_list in nested_lists:
nested_text = nested_list.to_text(indent_level + 1)
if nested_text:
parts.append(nested_text)
elif isinstance(item, ListElement):
nested_text = item.to_text(indent_level + 1)
if nested_text:
parts.append(nested_text)
else:
child_text = item.to_text().strip()
if child_text:
parts.append(f"{indent}{child_text}")
return "\n".join(parts)
[docs]
class TaskListItemStatus(Enum):
"""Type of task list item status."""
COMPLETE = "complete"
INCOMPLETE = "incomplete"
[docs]
class ListItem(ContainerElement):
"""A list item element that can be regular or task item."""
task_id: str | None = None
uuid: str | None = None
status: TaskListItemStatus | None = None
is_block_level: bool = True
[docs]
class LinkType(Enum):
"""Type of link element."""
EXTERNAL = "a"
MAILTO = "mailto"
SPACE = "ri:space"
PAGE = "ri:page"
BLOG_POST = "ri:blog-post"
USER = "ri:user"
ATTACHMENT = "ri:attachment"
ANCHOR = "ac:anchor"
[docs]
class LinkElement(ContainerElement):
"""A link element."""
type: LinkType
href: str | None = None
anchor: str | None = None
space_key: str | None = None
content_title: str | None = None
posting_day: str | None = None
version_at_save: str | None = None
account_id: str | None = None
filename: str | None = None
[docs]
def to_text(self) -> str:
"""Extract text from rich content or href."""
if not self.children:
return self.href or ""
resource_parts = []
content_parts = []
for child in self.children:
child_text = child.to_text().strip()
if child_text:
if hasattr(child, "type") and hasattr(child.type, "value"):
resource_parts.append(child_text)
else:
content_parts.append(child_text)
if resource_parts and content_parts:
resource_text = " ".join(resource_parts)
content_text = " ".join(content_parts)
return f"{resource_text} {content_text}"
elif resource_parts:
return " ".join(resource_parts)
elif content_parts:
return " ".join(content_parts)
else:
return self.href or ""
[docs]
class Image(ContainerElement):
"""An image element."""
src: str | None = None
alt: str | None = None
title: str | None = None
width: str | None = None
height: str | None = None
alignment: str | None = None
layout: str | None = None
original_height: str | None = None
original_width: str | None = None
custom_width: bool | None = None
filename: str | None = None
version_at_save: str | None = None
url_value: str | None = None
[docs]
def to_text(self) -> str:
"""Generate text representation with caption if present."""
image_text = f"đŧī¸ Image: {self.alt or self.src or self.filename or self.url_value or 'Unknown'}"
if self.children:
caption_text = "".join(child.to_text() for child in self.children)
if caption_text.strip():
return f"{image_text}\nCaption: {caption_text.strip()}"
return image_text
[docs]
class Table(ContainerElement):
"""A table element with metadata and rows."""
width: str | None = None
layout: str | None = None
local_id: str | None = None
display_mode: str | None = None
[docs]
def to_text(self) -> str:
"""Generate text representation of table."""
if not self.children:
return ""
lines = []
for row in self.children:
row_text = row.to_text()
if clean_row_text := row_text.strip():
lines.append(clean_row_text)
return "\n".join(lines)
[docs]
class TableRow(ContainerElement):
"""A table row."""
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Format row as text with | separators."""
if not self.children:
return ""
cell_texts = [child.to_text() for child in self.children]
return " | ".join(cell_texts)
[docs]
class TableCell(ContainerElement):
"""A table cell."""
is_header: bool = False
rowspan: int | None = None
colspan: int | None = None
[docs]
class Emoticon(Node):
"""An emoticon element."""
name: str
emoji_shortname: str | None = None
emoji_id: str | None = None
emoji_fallback: str | None = None
[docs]
def to_text(self) -> str:
"""Return the best text representation of the emoticon."""
if self.emoji_fallback:
return self.emoji_fallback
elif self.emoji_shortname:
return self.emoji_shortname
else:
return f":{self.name}:"
[docs]
class Time(Node):
"""A time element with datetime."""
datetime: str | None = None
[docs]
def to_text(self) -> str:
"""Generate text representation of time."""
if self.datetime:
return f"đ
{self.datetime}"
else:
return "đ
Date"
[docs]
class ResourceIdentifierType(Enum):
"""Type of resource identifier."""
PAGE = "page"
BLOG_POST = "blog-post"
ATTACHMENT = "attachment"
URL = "url"
SHORTCUT = "shortcut"
USER = "user"
SPACE = "space"
CONTENT_ENTITY = "content-entity"
[docs]
class ResourceIdentifier(Node):
"""A resource identifier element."""
type: ResourceIdentifierType
space_key: str | None = None
content_title: str | None = None
content_id: str | None = None
posting_day: str | None = None
filename: str | None = None
value: str | None = None
key: str | None = None
parameter: str | None = None
account_id: str | None = None
local_id: str | None = None
userkey: str | None = None
version_at_save: str | None = None
[docs]
def to_text(self) -> str:
"""Generate appropriate text representation based on type."""
if self.type == ResourceIdentifierType.PAGE:
return "đ Page"
elif self.type == ResourceIdentifierType.BLOG_POST:
return f"đ Blog: {self.posting_day}" if self.posting_day else "đ Blog"
elif self.type == ResourceIdentifierType.ATTACHMENT:
return f"đ Attachment: {self.filename}" if self.filename else "đ Attachment"
elif self.type == ResourceIdentifierType.URL:
return f"đ URL: {self.value}" if self.value else "đ URL"
elif self.type == ResourceIdentifierType.USER:
if self.account_id:
return f"đ¤ User: {self.account_id}"
elif self.userkey:
return f"đ¤ User: {self.userkey}"
else:
return "đ¤ User"
elif self.type == ResourceIdentifierType.SPACE:
return f"đ Space: {self.space_key}" if self.space_key else "đ Space"
elif self.type == ResourceIdentifierType.SHORTCUT:
return f"đ Shortcut: {self.key}@{self.parameter}" if self.key and self.parameter else "đ Shortcut"
elif self.type == ResourceIdentifierType.CONTENT_ENTITY:
return f"đ Content: {self.content_id}" if self.content_id else "đ Content"
[docs]
class PlaceholderElement(Node):
"""A placeholder element for content hints."""
text: str
[docs]
def to_text(self) -> str:
"""Generate text representation of placeholder."""
return f"đ Placeholder: {self.text}"
[docs]
class PanelMacroType(Enum):
"""Type of panel macro based on visual presentation."""
PANEL = "panel"
NOTE = "note"
SUCCESS = "success"
WARNING = "warning"
ERROR = "error"
INFO = "info"
[docs]
class PanelMacro(ContainerElement):
"""A panel macro element with background color and optional icon."""
type: PanelMacroType
bg_color: str | None = None
panel_icon: str | None = None
panel_icon_id: str | None = None
panel_icon_text: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of panel with content."""
content = super().to_text()
if self.type == PanelMacroType.NOTE:
return f"đ NOTE: {content}" if content else "đ NOTE"
elif self.type == PanelMacroType.SUCCESS:
return f"â
SUCCESS: {content}" if content else "â
SUCCESS"
elif self.type == PanelMacroType.WARNING:
return f"â ī¸ WARNING: {content}" if content else "â ī¸ WARNING"
elif self.type == PanelMacroType.ERROR:
return f"â ERROR: {content}" if content else "â ERROR"
elif self.type == PanelMacroType.INFO:
return f"âšī¸ INFO: {content}" if content else "âšī¸ INFO"
elif self.type == PanelMacroType.PANEL:
if self.panel_icon_text:
return f"{self.panel_icon_text} {content}" if content else self.panel_icon_text
else:
return f"đ PANEL: {content}" if content else "đ PANEL"
[docs]
class CodeMacro(Node):
"""A code macro element with syntax highlighting."""
language: str | None = None
breakout_mode: str | None = None
breakout_width: str | None = None
code: str
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of code block."""
if self.language:
return f"```{self.language}\n{self.code}\n```"
else:
return f"```\n{self.code}\n```"
[docs]
class ExpandMacro(ContainerElement):
"""An expand/collapse macro element."""
title: str | None = None
breakout_width: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of expand macro."""
content = super().to_text()
title = self.title or "Expand"
return f"âļ {title}\n{content}" if content else f"âļ {title}"
[docs]
class StatusMacro(Node):
"""A status macro element with title and color."""
title: str | None = None
colour: str | None = None
[docs]
def to_text(self) -> str:
"""Generate text representation of status."""
title = self.title or "Status"
if self.colour:
return f"đˇī¸ Status: {title} ({self.colour})"
else:
return f"đˇī¸ Status: {title}"
[docs]
class TocMacro(Node):
"""A table of contents macro element."""
style: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of table of contents."""
return "đ Table of Contents"
[docs]
class JiraMacro(Node):
"""A JIRA issue macro element."""
key: str | None = None
server_id: str | None = None
server: str | None = None
[docs]
def to_text(self) -> str:
"""Generate text representation of JIRA issue."""
if self.key:
if self.server and self.server != "System Jira":
return f"đĢ {self.key} ({self.server})"
else:
return f"đĢ {self.key}"
else:
return "đĢ JIRA Issue"
[docs]
class IncludeMacro(ContainerElement):
"""An include macro element for including other pages."""
space_key: str | None = None
content_title: str | None = None
version_at_save: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of include macro."""
if self.content_title:
return f"đ Include: {self.content_title}"
else:
return "đ Include Page"
[docs]
class TasksReportMacro(Node):
"""A tasks report macro element."""
spaces: str | None = None
is_missing_required_parameters: bool = False
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of tasks report."""
if self.spaces:
return f"đ Tasks Report: {self.spaces}"
else:
return "đ Tasks Report"
[docs]
class ExcerptIncludeMacro(ContainerElement):
"""An excerpt include macro element."""
space_key: str | None = None
content_title: str | None = None
posting_day: str | None = None
version_at_save: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of excerpt include."""
if self.content_title:
if self.posting_day:
return f"đ Excerpt: {self.content_title} ({self.posting_day})"
else:
return f"đ Excerpt: {self.content_title}"
else:
return "đ Excerpt Include"
[docs]
class AttachmentsMacro(Node):
"""An attachments macro element for listing page attachments."""
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of attachments macro."""
return "đ Attachments"
[docs]
class ViewPdfMacro(Node):
"""A view PDF macro element."""
filename: str | None = None
version_at_save: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of PDF viewer."""
if self.filename:
return f"đ PDF: {self.filename}"
else:
return "đ PDF Viewer"
[docs]
class ViewFileMacro(Node):
"""A view file macro element for displaying files inline."""
filename: str | None = None
version_at_save: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of file viewer."""
if self.filename:
return f"đ File: {self.filename}"
else:
return "đ File Viewer"
[docs]
class ProfileMacro(ContainerElement):
"""A profile macro element for displaying user profiles."""
account_id: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of profile macro."""
if self.account_id:
return f"đ¤ Profile: {self.account_id}"
else:
return "đ¤ User Profile"
[docs]
class AnchorMacro(Node):
"""An anchor macro element for creating page anchors."""
anchor_name: str | None = None
[docs]
def to_text(self) -> str:
"""Generate text representation of anchor."""
if self.anchor_name:
return f"â Anchor: {self.anchor_name}"
else:
return "â Anchor"
[docs]
class ExcerptMacro(ContainerElement):
"""An excerpt macro element for marking excerptable content."""
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of excerpt."""
content = super().to_text()
return f"đ Excerpt: {content}" if content else "đ Excerpt"
[docs]
class DecisionListItemState(Enum):
"""State of decision list item."""
DECIDED = "DECIDED"
PENDING = "PENDING"
[docs]
class DecisionList(ContainerElement):
"""A decision list element containing decision items."""
local_id: str | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of decision list."""
if not self.children:
return "đ Decision List"
parts = []
for child in self.children:
child_text = child.to_text().strip()
if child_text:
parts.append(child_text)
return "\n".join(parts) if parts else "đ Decision List"
[docs]
class DetailsMacro(ContainerElement):
"""A details macro element for collapsible content sections."""
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of details macro."""
content = super().to_text()
return f"đ Details: {content}" if content else "đ Details"
[docs]
class DecisionListItem(ContainerElement):
"""A decision item element."""
local_id: str | None = None
state: DecisionListItemState | None = None
is_block_level: bool = True
[docs]
def to_text(self) -> str:
"""Generate text representation of decision item."""
content = super().to_text()
if self.state == DecisionListItemState.DECIDED:
return f"â
{content}" if content else "â
"
else:
return f"âŗ {content}" if content else "âŗ"
[docs]
class Text(Node):
"""A node containing plain text content."""
text: str
[docs]
def to_text(self) -> str:
return html.unescape(self.text)