Erste lauffähige Version

This commit is contained in:
2026-05-21 10:36:16 +02:00
commit 6a103adac4
98 changed files with 4107 additions and 0 deletions
@@ -0,0 +1,74 @@
"""
DOCX-Rendering und PDF-Konvertierung.
"""
from __future__ import annotations
import logging
import re
import subprocess
import tempfile
from pathlib import Path
from docx import Document
from docxtpl import DocxTemplate
logger = logging.getLogger(__name__)
PLACEHOLDER_RE = re.compile(r"\{\{\s*([A-Za-z_][A-Za-z0-9_]*)")
def extract_placeholders(docx_path: Path) -> list[str]:
"""Liest die Jinja-Platzhalter aus einem DOCX und gibt sie sortiert zurück."""
doc = Document(str(docx_path))
found: set[str] = set()
for para in doc.paragraphs:
for m in PLACEHOLDER_RE.finditer(para.text):
found.add(m.group(1))
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for para in cell.paragraphs:
for m in PLACEHOLDER_RE.finditer(para.text):
found.add(m.group(1))
return sorted(found)
def render_docx(template_path: Path, context: dict, out_path: Path) -> Path:
"""Füllt das DOCX-Template mit Kontext und schreibt das Ergebnis."""
tpl = DocxTemplate(str(template_path))
tpl.render(context)
tpl.save(str(out_path))
return out_path
def docx_to_pdf(docx_path: Path, out_dir: Path) -> Path:
"""Konvertiert DOCX nach PDF mit LibreOffice headless.
LibreOffice braucht ein eigenes Profilverzeichnis, sonst kollidieren
parallele Worker.
"""
out_dir.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix="lo-profile-") as profile_dir:
cmd = [
"soffice",
"--headless",
"--nologo",
"--norestore",
"--nolockcheck",
f"-env:UserInstallation=file://{profile_dir}",
"--convert-to", "pdf",
"--outdir", str(out_dir),
str(docx_path),
]
logger.info("LibreOffice convert: %s", " ".join(cmd))
result = subprocess.run( # noqa: S603
cmd, capture_output=True, text=True, timeout=120, check=False
)
if result.returncode != 0:
raise RuntimeError(
f"LibreOffice-Konvertierung fehlgeschlagen: {result.stderr}"
)
pdf_path = out_dir / (docx_path.stem + ".pdf")
if not pdf_path.exists():
raise FileNotFoundError(f"PDF nicht gefunden: {pdf_path}")
return pdf_path
@@ -0,0 +1,14 @@
from pathlib import Path
from pypdf import PdfWriter
def merge_pdfs(pdfs: list[Path], out_path: Path) -> Path:
writer = PdfWriter()
for pdf in pdfs:
writer.append(str(pdf))
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("wb") as f:
writer.write(f)
writer.close()
return out_path