Examples
This section provides comprehensive examples of using the Confluence Content Parser for common tasks.
Basic Parsing and Text Extraction
This example demonstrates the fundamental parsing capabilities and text extraction:
Basic Usage Example
#!/usr/bin/env python3
"""
Basic usage example for Confluence Content Parser.
This example demonstrates core parsing capabilities including:
- Text formatting (bold, italic, code)
- Links and references
- Status macros
- Tables
- Lists and task lists
- Details macros with placeholders
"""
from confluence_content_parser import ConfluenceParser
def main():
confluence_content = """
<h1>Project Documentation</h1>
<p>This document demonstrates <strong>basic parsing</strong> of Confluence content with <em>formatting</em> and <code>inline code</code>.</p>
<h2>Project Status</h2>
<p>Current status: <ac:structured-macro ac:name="status">
<ac:parameter ac:name="title">In Progress</ac:parameter>
<ac:parameter ac:name="colour">Yellow</ac:parameter>
</ac:structured-macro></p>
<h2>Team Information</h2>
<ac:structured-macro ac:name="details">
<ac:rich-text-body>
<table>
<tr><th>Role</th><th>Assignee</th></tr>
<tr><td>Project Lead</td><td><ac:placeholder>@ mention lead</ac:placeholder></td></tr>
<tr><td>Developer</td><td><ac:placeholder>@ mention developer</ac:placeholder></td></tr>
<tr><td>QA Engineer</td><td><ac:placeholder>@ mention qa</ac:placeholder></td></tr>
</table>
</ac:rich-text-body>
</ac:structured-macro>
<h2>Tasks</h2>
<ac:task-list>
<ac:task>
<ac:task-id>1</ac:task-id>
<ac:task-status>complete</ac:task-status>
<ac:task-body>Set up project repository</ac:task-body>
</ac:task>
<ac:task>
<ac:task-id>2</ac:task-id>
<ac:task-status>incomplete</ac:task-status>
<ac:task-body>Implement core features</ac:task-body>
</ac:task>
</ac:task-list>
<h2>External Resources</h2>
<ul>
<li>Documentation: <ac:link><ri:url ri:value="https://docs.example.com"/></ac:link></li>
<li>Repository: <ac:link><ri:url ri:value="https://github.com/example/project"/></ac:link></li>
</ul>
"""
# Parse the content
parser = ConfluenceParser()
document = parser.parse(confluence_content)
print("=== BASIC CONFLUENCE PARSING EXAMPLE ===\n")
# Get clean text output
print("1. DOCUMENT TEXT:")
print(document.text)
print("\n" + "=" * 50 + "\n")
# Extract specific elements using find_all
print("2. HEADINGS:")
from confluence_content_parser import HeadingElement
headings = document.find_all(HeadingElement)
for heading in headings:
print(f" H{heading.type.value[-1]}: {heading.to_text()}")
print()
print("3. STATUS ELEMENTS:")
from confluence_content_parser import StatusMacro
status_elements = document.find_all(StatusMacro)
for status in status_elements:
print(f" {status.to_text()}")
print()
print("4. TABLES:")
from confluence_content_parser import Table
tables = document.find_all(Table)
for i, table in enumerate(tables, 1):
print(f" Table {i}: {len(table.children)} rows")
print(f" Content: {table.to_text()}")
print()
print("5. LINKS:")
from confluence_content_parser import LinkElement
links = document.find_all(LinkElement)
for link in links:
print(f" {link.to_text()}")
print()
print("6. TASK LISTS:")
from confluence_content_parser import ListElement, ListType
lists = document.find_all(ListElement)
task_lists = [list_element for list_element in lists if list_element.type == ListType.TASK]
for task_list in task_lists:
print(f" Tasks: {task_list.to_text()}")
print()
print("7. PLACEHOLDER ELEMENTS:")
from confluence_content_parser import PlaceholderElement
placeholders = document.find_all(PlaceholderElement)
for placeholder in placeholders:
print(f" {placeholder.to_text()}")
print()
print("8. DETAILS MACROS:")
from confluence_content_parser import DetailsMacro
details = document.find_all(DetailsMacro)
for detail in details:
print(f" {detail.to_text()}")
print()
print("9. MULTIPLE TYPE SEARCH:")
# Find multiple element types at once
headings_multi, status_multi, placeholders_multi = document.find_all(
HeadingElement, StatusMacro, PlaceholderElement
)
print(f" Found in one search: {len(headings_multi)} headings, {len(status_multi)} status elements, {len(placeholders_multi)} placeholders")
# Compare with individual searches (should match)
assert len(headings_multi) == len(headings)
assert len(status_multi) == len(status_elements)
assert len(placeholders_multi) == len(placeholders)
print(" ✓ Results match individual searches")
print()
# Document statistics
all_nodes = document.walk()
print("10. DOCUMENT STATISTICS:")
print(f" Total nodes: {len(all_nodes)}")
print(f" Headings: {len(headings)}")
print(f" Tables: {len(tables)}")
print(f" Links: {len(links)}")
print(f" Status elements: {len(status_elements)}")
print(f" Placeholders: {len(placeholders)}")
print(f" Details macros: {len(details)}")
# Check for any parsing issues
diagnostics = document.metadata.get("diagnostics", [])
if diagnostics:
print(f"\n11. PARSING DIAGNOSTICS: {diagnostics}")
else:
print("\n11. PARSING: No issues detected ✓")
if __name__ == "__main__":
main()
Output Analysis
When you run the basic example, you’ll see:
Document Text: Clean, formatted text with proper spacing
Element Extraction: Specific element types found and processed
Statistics: Overview of document structure
Diagnostics: Any parsing issues encountered
Advanced Content Processing
Working with Complex Layouts
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import LayoutElement, LayoutSection, LayoutCell
# Complex layout content
layout_content = """
<ac:layout>
<ac:layout-section ac:type="two_equal">
<ac:layout-cell>
<h2>Left Column</h2>
<p>Content for the left side.</p>
</ac:layout-cell>
<ac:layout-cell>
<h2>Right Column</h2>
<p>Content for the right side.</p>
</ac:layout-cell>
</ac:layout-section>
</ac:layout>
"""
parser = ConfluenceParser()
document = parser.parse(layout_content)
# Find layout structure
layouts = document.find_all(LayoutElement)
for layout in layouts:
sections = layout.find_all(LayoutSection)
for section in sections:
print(f"Section type: {section.section_type.value}")
cells = section.find_all(LayoutCell)
print(f"Number of cells: {len(cells)}")
Processing Macros and Special Content
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import PanelMacro, CodeMacro, ExpandMacro
macro_content = """
<ac:structured-macro ac:name="info">
<ac:rich-text-body>
<p>This is important information.</p>
</ac:rich-text-body>
</ac:structured-macro>
<ac:structured-macro ac:name="code">
<ac:parameter ac:name="language">python</ac:parameter>
<ac:plain-text-body>
def hello_world():
print("Hello, World!")
</ac:plain-text-body>
</ac:structured-macro>
<ac:structured-macro ac:name="expand">
<ac:parameter ac:name="title">Click to expand</ac:parameter>
<ac:rich-text-body>
<p>Hidden content here.</p>
</ac:rich-text-body>
</ac:structured-macro>
"""
parser = ConfluenceParser()
document = parser.parse(macro_content)
# Process different macro types
panels = document.find_all(PanelMacro)
for panel in panels:
print(f"Panel type: {panel.type.value}")
print(f"Content: {panel.to_text()}")
code_blocks = document.find_all(CodeMacro)
for code in code_blocks:
print(f"Language: {code.language}")
print(f"Code: {code.code}")
expand_sections = document.find_all(ExpandMacro)
for expand in expand_sections:
print(f"Title: {expand.title}")
print(f"Content: {expand.to_text()}")
Table Processing
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import Table, TableRow, TableCell
table_content = """
<table>
<tr>
<th>Name</th>
<th>Role</th>
<th>Department</th>
</tr>
<tr>
<td>John Doe</td>
<td>Developer</td>
<td>Engineering</td>
</tr>
<tr>
<td>Jane Smith</td>
<td>Designer</td>
<td>UX</td>
</tr>
</table>
"""
parser = ConfluenceParser()
document = parser.parse(table_content)
# Extract table data
tables = document.find_all(Table)
for table in tables:
rows = table.find_all(TableRow)
for i, row in enumerate(rows):
cells = row.find_all(TableCell)
cell_data = []
for cell in cells:
cell_text = cell.to_text()
if cell.is_header:
cell_text = f"**{cell_text}**" # Mark headers
cell_data.append(cell_text)
print(f"Row {i + 1}: {' | '.join(cell_data)}")
Task List Processing
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import ListElement, ListItem, ListType, TaskListItemStatus
task_content = """
<ac:task-list>
<ac:task>
<ac:task-id>1</ac:task-id>
<ac:task-status>complete</ac:task-status>
<ac:task-body>Complete project setup</ac:task-body>
</ac:task>
<ac:task>
<ac:task-id>2</ac:task-id>
<ac:task-status>incomplete</ac:task-status>
<ac:task-body>Write documentation</ac:task-body>
</ac:task>
</ac:task-list>
"""
parser = ConfluenceParser()
document = parser.parse(task_content)
# Process task lists
lists = document.find_all(ListElement)
task_lists = [lst for lst in lists if lst.type == ListType.TASK]
for task_list in task_lists:
print("Task List:")
items = task_list.find_all(ListItem)
for item in items:
status_symbol = "✓" if item.status == TaskListItemStatus.COMPLETE else "○"
task_text = item.to_text()
print(f" {status_symbol} {task_text}")
Error Handling and Diagnostics
Diagnostics and Error Handling
#!/usr/bin/env python3
"""
Diagnostics usage example for Confluence Content Parser.
This example demonstrates:
- How to handle parsing errors and diagnostics
- Unknown elements and macros
- Unicode and encoding issues
- Best practices for robust parsing
- Troubleshooting common issues
"""
from confluence_content_parser import ConfluenceParser
def main():
problematic_content = """
<h1>Diagnostics Example</h1>
<p>This content contains various elements that will generate diagnostics:</p>
<!-- Unknown macro that doesn't exist -->
<ac:structured-macro ac:name="unknown-macro" ac:schema-version="1">
<ac:parameter ac:name="param1">value1</ac:parameter>
<ac:rich-text-body>
<p>This macro is not implemented</p>
</ac:rich-text-body>
</ac:structured-macro>
<!-- Known elements -->
<ac:structured-macro ac:name="details" ac:schema-version="1">
<ac:rich-text-body>
<table>
<tr><th>Field</th><th>Value</th></tr>
<tr><td>Status</td><td><ac:structured-macro ac:name="status">
<ac:parameter ac:name="title">Active</ac:parameter>
<ac:parameter ac:name="colour">Green</ac:parameter>
</ac:structured-macro></td></tr>
</table>
</ac:rich-text-body>
</ac:structured-macro>
<!-- Links to various resources -->
<h2>Links Example</h2>
<ul>
<li>External URL: <ac:link><ri:url ri:value="https://example.com"/></ac:link></li>
<li>Page link: <ac:link><ri:page ri:space-key="DOC" ri:content-title="User Guide"/></ac:link></li>
<li>User mention: <ac:link><ri:user ri:account-id="user123"/></ac:link></li>
<li>Attachment: <ac:link><ri:attachment ri:filename="document.pdf"/></ac:link></li>
</ul>
<!-- Task list -->
<h2>Tasks</h2>
<ac:task-list>
<ac:task>
<ac:task-id>task1</ac:task-id>
<ac:task-status>complete</ac:task-status>
<ac:task-body>Review documentation</ac:task-body>
</ac:task>
<ac:task>
<ac:task-id>task2</ac:task-id>
<ac:task-status>incomplete</ac:task-status>
<ac:task-body><ac:placeholder>Add task description here</ac:placeholder></ac:task-body>
</ac:task>
</ac:task-list>
<!-- Unknown elements (these will be skipped) -->
<unknown-element>This should be skipped</unknown-element>
<!-- Inline comment marker (should be skipped) -->
<p>Some text with <ac:inline-comment-marker ac:ref="comment-123">inline comment</ac:inline-comment-marker></p>
<!-- Panel with content -->
<ac:structured-macro ac:name="panel" ac:schema-version="1">
<ac:parameter ac:name="title">Important Note</ac:parameter>
<ac:parameter ac:name="bgColor">#FFF2CC</ac:parameter>
<ac:rich-text-body>
<p>This is a panel with <strong>formatted content</strong> and placeholders:</p>
<p><ac:placeholder>Add important information here</ac:placeholder></p>
</ac:rich-text-body>
</ac:structured-macro>
"""
print("=== DIAGNOSTICS EXAMPLE ===\n")
# Parse with diagnostics enabled (default)
print("1. PARSING WITH DIAGNOSTICS:")
parser = ConfluenceParser(raise_on_finish=False) # Don't raise errors, collect diagnostics
doc = parser.parse(problematic_content)
print(f" Document parsed successfully: {doc.root is not None}")
print(f" Total elements found: {len(doc.walk())}")
print()
# Check diagnostics
print("2. PARSING DIAGNOSTICS:")
diagnostics = doc.metadata.get("diagnostics", [])
if diagnostics:
print(f" Found {len(diagnostics)} diagnostic messages:")
for i, diag in enumerate(diagnostics, 1):
print(f" {i}. {diag}")
else:
print(" No diagnostic messages (all elements parsed successfully)")
print()
# Analyze what was successfully parsed
print("3. SUCCESSFULLY PARSED ELEMENTS:")
# Count different types of elements
from confluence_content_parser import (
DetailsMacro,
HeadingElement,
LinkElement,
ListElement,
PanelMacro,
PlaceholderElement,
StatusMacro,
Table,
)
element_counts = {
"Headings": len(doc.find_all(HeadingElement)),
"Status macros": len(doc.find_all(StatusMacro)),
"Details macros": len(doc.find_all(DetailsMacro)),
"Placeholders": len(doc.find_all(PlaceholderElement)),
"Links": len(doc.find_all(LinkElement)),
"Task lists": len(
[
list_element
for list_element in doc.find_all(ListElement)
if hasattr(list_element.type, "value") and list_element.type.value == "task-list"
]
),
"Panels": len(doc.find_all(PanelMacro)),
"Tables": len(doc.find_all(Table)),
}
for element_type, count in element_counts.items():
print(f" {element_type}: {count}")
print()
# Link analysis with type breakdown
print("4. LINK ANALYSIS:")
links = doc.find_all(LinkElement)
if links:
link_types = {}
for link in links:
link_type = link.type.value if hasattr(link.type, "value") else str(link.type)
link_types[link_type] = link_types.get(link_type, 0) + 1
for link_type, count in link_types.items():
print(f" {link_type} links: {count}")
print("\n Link details:")
for i, link in enumerate(links, 1):
link_text = link.to_text().strip()
link_type = link.type.value if hasattr(link.type, "value") else str(link.type)
print(f" {i}. {link_type}: {link_text}")
else:
print(" No links found")
print()
# Placeholder analysis
print("5. PLACEHOLDER ANALYSIS:")
placeholders = doc.find_all(PlaceholderElement)
if placeholders:
print(f" Found {len(placeholders)} placeholders:")
for i, placeholder in enumerate(placeholders, 1):
print(f" {i}. {placeholder.to_text()}")
else:
print(" No placeholders found")
print()
# Document text extraction
print("6. CLEAN TEXT OUTPUT:")
print(" " + "=" * 47)
clean_text = doc.text
# Show first few lines of clean text
text_lines = clean_text.split("\n")[:10]
for line in text_lines:
if line.strip():
print(f" {line.strip()}")
if len(clean_text.split("\n")) > 10:
print(" ... (truncated)")
print(" " + "=" * 47)
print()
# Error handling example
print("7. ERROR HANDLING EXAMPLE:")
try:
# Try parsing with raise_on_finish=True
strict_parser = ConfluenceParser(raise_on_finish=True)
strict_parser.parse(problematic_content)
print(" Strict parsing succeeded (no unknown elements)")
except Exception as e:
print(f" Strict parsing failed as expected: {type(e).__name__}")
print(f" Error details: {str(e)}")
print()
# Best practices
print("8. PARSING STATISTICS:")
total_elements = len(doc.walk())
successful_elements = total_elements
failed_elements = len(diagnostics)
if total_elements > 0:
success_rate = ((successful_elements) / (successful_elements + failed_elements)) * 100
print(f" Total parsed elements: {successful_elements}")
print(f" Failed/unknown elements: {failed_elements}")
print(f" Success rate: {success_rate:.1f}%")
print(f" Document length: {len(clean_text)} characters")
print(f" Non-empty lines: {len([line for line in clean_text.split('\\n') if line.strip()])}")
if __name__ == "__main__":
main()
Custom Content Analysis
Document Statistics
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import (
HeadingElement, LinkElement, Image, Table,
ListElement, PanelMacro, CodeMacro
)
def analyze_document(content):
"""Analyze a Confluence document and return statistics."""
parser = ConfluenceParser()
document = parser.parse(content)
# Efficient analysis with multiple-type searches
headings, links, images = document.find_all(HeadingElement, LinkElement, Image)
tables, lists, panels, codes = document.find_all(Table, ListElement, PanelMacro, CodeMacro)
stats = {
'total_nodes': len(list(document.walk())),
'headings': len(headings),
'links': len(links),
'images': len(images),
'tables': len(tables),
'lists': len(lists),
'panels': len(panels),
'code_blocks': len(codes),
'text_length': len(document.text),
'diagnostics': document.metadata.get('diagnostics', [])
}
return stats
# Usage
content = """<h1>Sample</h1><p>Text with <strong>formatting</strong></p>"""
stats = analyze_document(content)
print("Document Analysis:")
for key, value in stats.items():
if key != 'diagnostics':
print(f" {key.replace('_', ' ').title()}: {value}")
if stats['diagnostics']:
print(" Parsing Issues:")
for diagnostic in stats['diagnostics']:
print(f" - {diagnostic}")
Content Search and Filtering
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import HeadingElement, Text, LinkElement
def search_content(document, search_term):
"""Search for content containing a specific term."""
matching_nodes = []
for node in document.walk():
text_content = node.to_text().lower()
if search_term.lower() in text_content:
matching_nodes.append({
'type': type(node).__name__,
'content': node.to_text()[:100] + '...' if len(node.to_text()) > 100 else node.to_text(),
'node': node
})
return matching_nodes
def find_external_links(document):
"""Find all external links in the document."""
from confluence_content_parser import LinkType
links = document.find_all(LinkElement)
external_links = []
for link in links:
if link.type == LinkType.EXTERNAL and link.href:
external_links.append({
'url': link.href,
'text': link.to_text(),
'context': link.to_text()
})
return external_links
# Usage example
parser = ConfluenceParser()
document = parser.parse(confluence_content)
# Search for specific content
api_references = search_content(document, 'API')
print(f"Found {len(api_references)} API references")
# Find external links
external_links = find_external_links(document)
for link in external_links:
print(f"External link: {link['url']} ({link['text']})")
Content Transformation
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import HeadingElement, CodeMacro, PanelMacro
def convert_to_markdown(document):
"""Convert a Confluence document to Markdown format."""
markdown_lines = []
for node in document.walk():
if isinstance(node, HeadingElement):
level = int(node.type.value[1]) # Extract number from h1, h2, etc.
prefix = '#' * level
markdown_lines.append(f"{prefix} {node.to_text()}")
elif isinstance(node, CodeMacro):
language = node.language or ''
markdown_lines.append(f"```{language}")
markdown_lines.append(node.code)
markdown_lines.append("```")
elif isinstance(node, PanelMacro):
panel_type = node.type.value.upper()
content = ' '.join(child.to_text() for child in node.children)
markdown_lines.append(f"> **{panel_type}**: {content}")
return '\n\n'.join(markdown_lines)
# Usage
parser = ConfluenceParser()
document = parser.parse(confluence_content)
markdown = convert_to_markdown(document)
print(markdown)
Performance Optimization
Streaming Large Documents
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import HeadingElement
def process_large_document(xml_content, chunk_size=1000):
"""Process large documents in chunks to manage memory."""
parser = ConfluenceParser()
document = parser.parse(xml_content)
# Efficient extraction of key elements for document outline
headings, panels, codes = document.find_all(HeadingElement, PanelMacro, CodeMacro)
outline = []
for heading in headings:
level = int(heading.type.value[1])
text = heading.to_text()
outline.append(f"{' ' * (level - 1)}- {text}")
# Add summary of other content
if panels or codes:
outline.append(f"Content: {len(panels)} panels, {len(codes)} code blocks")
return outline
def extract_text_efficiently(document):
"""Extract text without loading entire tree into memory."""
text_chunks = []
current_chunk = []
current_size = 0
max_chunk_size = 1000
for node in document.walk():
text = node.to_text()
if text.strip():
current_chunk.append(text)
current_size += len(text)
if current_size >= max_chunk_size:
text_chunks.append(' '.join(current_chunk))
current_chunk = []
current_size = 0
if current_chunk:
text_chunks.append(' '.join(current_chunk))
return text_chunks
Integration Examples
Document Validation
from confluence_content_parser import ConfluenceParser, ParsingError
def validate_confluence_document(xml_content):
"""Validate and report on Confluence document quality."""
parser = ConfluenceParser(raise_on_finish=False)
try:
document = parser.parse(xml_content)
validation_results = {
'valid': True,
'warnings': document.metadata.get('diagnostics', []),
'statistics': {
'total_nodes': len(list(document.walk())),
'text_length': len(document.text)
}
}
return validation_results
except Exception as e:
return {
'valid': False,
'error': str(e),
'warnings': [],
'statistics': {}
}
# Usage
validation = validate_confluence_document(xml_content)
if validation['valid']:
print("Document is valid")
if validation['warnings']:
print(f"Warnings: {validation['warnings']}")
else:
print(f"Validation failed: {validation['error']}")
Batch Processing
from confluence_content_parser import ConfluenceParser
import concurrent.futures
import os
def process_single_file(file_path):
"""Process a single Confluence XML file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
parser = ConfluenceParser(raise_on_finish=False)
document = parser.parse(content)
return {
'file': file_path,
'success': True,
'text_length': len(document.text),
'diagnostics': document.metadata.get('diagnostics', [])
}
except Exception as e:
return {
'file': file_path,
'success': False,
'error': str(e)
}
def batch_process_files(file_paths, max_workers=4):
"""Process multiple Confluence files in parallel."""
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_file = {
executor.submit(process_single_file, file_path): file_path
for file_path in file_paths
}
for future in concurrent.futures.as_completed(future_to_file):
result = future.result()
results.append(result)
return results
# Usage
xml_files = ['doc1.xml', 'doc2.xml', 'doc3.xml']
results = batch_process_files(xml_files)
for result in results:
if result['success']:
print(f"Processed {result['file']}: {result['text_length']} chars")
else:
print(f"Failed {result['file']}: {result['error']}")
Testing Patterns
Unit Testing with Parser
import unittest
from confluence_content_parser import ConfluenceParser
from confluence_content_parser import HeadingElement, PanelMacro
class TestConfluenceParser(unittest.TestCase):
def setUp(self):
self.parser = ConfluenceParser()
def test_heading_parsing(self):
"""Test that headings are parsed correctly."""
content = "<h1>Main Title</h1><h2>Subtitle</h2>"
document = self.parser.parse(content)
headings = document.find_all(HeadingElement)
self.assertEqual(len(headings), 2)
self.assertEqual(headings[0].to_text(), "Main Title")
self.assertEqual(headings[1].to_text(), "Subtitle")
def test_multiple_type_search(self):
"""Test multiple type search functionality."""
content = '''
<h1>Title</h1>
<ac:structured-macro ac:name="info">
<ac:rich-text-body><p>Info content</p></ac:rich-text-body>
</ac:structured-macro>
'''
document = self.parser.parse(content)
# Test multiple type search
headings, panels = document.find_all(HeadingElement, PanelMacro)
self.assertEqual(len(headings), 1)
self.assertEqual(len(panels), 1)
self.assertEqual(headings[0].to_text(), "Title")
self.assertIn("Info content", panels[0].to_text())
def test_panel_macro(self):
"""Test panel macro parsing."""
content = '''
<ac:structured-macro ac:name="info">
<ac:rich-text-body>
<p>Important information</p>
</ac:rich-text-body>
</ac:structured-macro>
'''
document = self.parser.parse(content)
panels = document.find_all(PanelMacro)
self.assertEqual(len(panels), 1)
self.assertIn("Important information", panels[0].to_text())
def test_error_handling(self):
"""Test error handling for malformed content."""
malformed_content = "<h1>Unclosed heading"
# Should not raise exception with raise_on_finish=False
parser = ConfluenceParser(raise_on_finish=False)
document = parser.parse(malformed_content)
# Check diagnostics
diagnostics = document.metadata.get('diagnostics', [])
self.assertIsInstance(diagnostics, list)
if __name__ == '__main__':
unittest.main()
See Also
User Guide - Comprehensive guide to using the library
API Reference - Complete API documentation
examples/ directory - Additional examples in the repository