""" DOCX-Rendering und PDF-Konvertierung. """ from __future__ import annotations import logging import re import subprocess import tempfile from pathlib import Path from docx import Document from docxtpl import DocxTemplate logger = logging.getLogger(__name__) PLACEHOLDER_RE = re.compile(r"\{\{\s*([A-Za-z_][A-Za-z0-9_]*)") def extract_placeholders(docx_path: Path) -> list[str]: """Liest die Jinja-Platzhalter aus einem DOCX und gibt sie sortiert zurück.""" doc = Document(str(docx_path)) found: set[str] = set() for para in doc.paragraphs: for m in PLACEHOLDER_RE.finditer(para.text): found.add(m.group(1)) for table in doc.tables: for row in table.rows: for cell in row.cells: for para in cell.paragraphs: for m in PLACEHOLDER_RE.finditer(para.text): found.add(m.group(1)) return sorted(found) def render_docx(template_path: Path, context: dict, out_path: Path) -> Path: """Füllt das DOCX-Template mit Kontext und schreibt das Ergebnis.""" tpl = DocxTemplate(str(template_path)) tpl.render(context) tpl.save(str(out_path)) return out_path def docx_to_pdf(docx_path: Path, out_dir: Path) -> Path: """Konvertiert DOCX nach PDF mit LibreOffice headless. LibreOffice braucht ein eigenes Profilverzeichnis, sonst kollidieren parallele Worker. """ out_dir.mkdir(parents=True, exist_ok=True) with tempfile.TemporaryDirectory(prefix="lo-profile-") as profile_dir: cmd = [ "soffice", "--headless", "--nologo", "--norestore", "--nolockcheck", f"-env:UserInstallation=file://{profile_dir}", "--convert-to", "pdf", "--outdir", str(out_dir), str(docx_path), ] logger.info("LibreOffice convert: %s", " ".join(cmd)) result = subprocess.run( # noqa: S603 cmd, capture_output=True, text=True, timeout=120, check=False ) if result.returncode != 0: raise RuntimeError( f"LibreOffice-Konvertierung fehlgeschlagen: {result.stderr}" ) pdf_path = out_dir / (docx_path.stem + ".pdf") if not pdf_path.exists(): raise FileNotFoundError(f"PDF nicht gefunden: {pdf_path}") return pdf_path