75 lines
2.3 KiB
Python
75 lines
2.3 KiB
Python
|
|
"""
|
||
|
|
DOCX-Rendering und PDF-Konvertierung.
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import logging
|
||
|
|
import re
|
||
|
|
import subprocess
|
||
|
|
import tempfile
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from docx import Document
|
||
|
|
from docxtpl import DocxTemplate
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
PLACEHOLDER_RE = re.compile(r"\{\{\s*([A-Za-z_][A-Za-z0-9_]*)")
|
||
|
|
|
||
|
|
|
||
|
|
def extract_placeholders(docx_path: Path) -> list[str]:
|
||
|
|
"""Liest die Jinja-Platzhalter aus einem DOCX und gibt sie sortiert zurück."""
|
||
|
|
doc = Document(str(docx_path))
|
||
|
|
found: set[str] = set()
|
||
|
|
for para in doc.paragraphs:
|
||
|
|
for m in PLACEHOLDER_RE.finditer(para.text):
|
||
|
|
found.add(m.group(1))
|
||
|
|
for table in doc.tables:
|
||
|
|
for row in table.rows:
|
||
|
|
for cell in row.cells:
|
||
|
|
for para in cell.paragraphs:
|
||
|
|
for m in PLACEHOLDER_RE.finditer(para.text):
|
||
|
|
found.add(m.group(1))
|
||
|
|
return sorted(found)
|
||
|
|
|
||
|
|
|
||
|
|
def render_docx(template_path: Path, context: dict, out_path: Path) -> Path:
|
||
|
|
"""Füllt das DOCX-Template mit Kontext und schreibt das Ergebnis."""
|
||
|
|
tpl = DocxTemplate(str(template_path))
|
||
|
|
tpl.render(context)
|
||
|
|
tpl.save(str(out_path))
|
||
|
|
return out_path
|
||
|
|
|
||
|
|
|
||
|
|
def docx_to_pdf(docx_path: Path, out_dir: Path) -> Path:
|
||
|
|
"""Konvertiert DOCX nach PDF mit LibreOffice headless.
|
||
|
|
|
||
|
|
LibreOffice braucht ein eigenes Profilverzeichnis, sonst kollidieren
|
||
|
|
parallele Worker.
|
||
|
|
"""
|
||
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
with tempfile.TemporaryDirectory(prefix="lo-profile-") as profile_dir:
|
||
|
|
cmd = [
|
||
|
|
"soffice",
|
||
|
|
"--headless",
|
||
|
|
"--nologo",
|
||
|
|
"--norestore",
|
||
|
|
"--nolockcheck",
|
||
|
|
f"-env:UserInstallation=file://{profile_dir}",
|
||
|
|
"--convert-to", "pdf",
|
||
|
|
"--outdir", str(out_dir),
|
||
|
|
str(docx_path),
|
||
|
|
]
|
||
|
|
logger.info("LibreOffice convert: %s", " ".join(cmd))
|
||
|
|
result = subprocess.run( # noqa: S603
|
||
|
|
cmd, capture_output=True, text=True, timeout=120, check=False
|
||
|
|
)
|
||
|
|
if result.returncode != 0:
|
||
|
|
raise RuntimeError(
|
||
|
|
f"LibreOffice-Konvertierung fehlgeschlagen: {result.stderr}"
|
||
|
|
)
|
||
|
|
pdf_path = out_dir / (docx_path.stem + ".pdf")
|
||
|
|
if not pdf_path.exists():
|
||
|
|
raise FileNotFoundError(f"PDF nicht gefunden: {pdf_path}")
|
||
|
|
return pdf_path
|