Overview
Manual report generation is tedious and error-prone. This guide builds a system that extracts data from multiple sources, processes it through templates, and generates professional reports automatically. The pattern works for QBRs, security scorecards, compliance reports, and more.
Architecture
┌───────────────┐ ┌──────────────┐ ┌──────────────┐
│ Data Sources │ │ Processing │ │ Output │
│ │ │ │ │ │
│ CSV Files │───►│ Extract │───►│ HTML Report │
│ Excel │ │ Transform │ │ PDF Export │
│ APIs │ │ Template │ │ Email │
│ Databases │ │ │ │ │
└───────────────┘ └──────────────┘ └──────────────┘Requirements
pip install jinja2 pandas openpyxl weasyprint| Package | Purpose |
|---|---|
| jinja2 | HTML template rendering |
| pandas | Data extraction and transformation |
| openpyxl | Excel file reading |
| weasyprint | HTML to PDF conversion |
Process
Step 1: Project Structure
report-generator/
├── templates/
│ ├── base.html # Base layout
│ └── qbr.html # QBR report template
├── data/
│ └── clients/ # Per-client data files
├── output/ # Generated reports
├── extractors.py # Data extraction logic
├── generator.py # Report generation engine
└── main.py # Entry pointStep 2: Data Extraction
# extractors.py
import pandas as pd
from pathlib import Path
from typing import Any
class DataExtractor:
"""Extract and normalize data from various sources."""
def __init__(self, data_dir: Path):
self.data_dir = data_dir
def extract_client_data(self, client_id: str) -> dict[str, Any]:
"""Extract all data for a specific client."""
client_dir = self.data_dir / "clients" / client_id
return {
"client_info": self._read_client_info(client_dir),
"tickets": self._read_tickets(client_dir),
"security_score": self._calculate_security_score(client_dir),
"recommendations": self._read_recommendations(client_dir),
}
def _read_client_info(self, path: Path) -> dict:
df = pd.read_excel(path / "info.xlsx")
return df.iloc[0].to_dict()
def _read_tickets(self, path: Path) -> list[dict]:
df = pd.read_csv(path / "tickets.csv")
return df.to_dict(orient="records")
def _calculate_security_score(self, path: Path) -> dict:
df = pd.read_csv(path / "security-audit.csv")
total = len(df)
passed = len(df[df["status"] == "pass"])
score = round((passed / total) * 100) if total > 0 else 0
return {
"score": score,
"total_checks": total,
"passed": passed,
"failed": total - passed,
"grade": self._score_to_grade(score),
}
@staticmethod
def _score_to_grade(score: int) -> str:
if score >= 90: return "A"
if score >= 80: return "B"
if score >= 70: return "C"
if score >= 60: return "D"
return "F"
def _read_recommendations(self, path: Path) -> list[dict]:
rec_file = path / "recommendations.json"
if rec_file.exists():
import json
return json.loads(rec_file.read_text())
return []Step 3: Jinja2 Template
{# templates/qbr.html #}
<!DOCTYPE html>
<html>
<head>
<style>
body { font-family: 'Segoe UI', sans-serif; color: #1a1a2e; margin: 40px; }
.header { background: linear-gradient(135deg, #0f3460, #16213e); color: white;
padding: 30px; border-radius: 8px; margin-bottom: 30px; }
.metric-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 20px;
margin: 20px 0; }
.metric-card { background: #f8f9fa; border-radius: 8px; padding: 20px;
text-align: center; border-left: 4px solid #0f3460; }
.metric-value { font-size: 2em; font-weight: bold; color: #0f3460; }
.metric-label { color: #666; font-size: 0.9em; }
table { width: 100%; border-collapse: collapse; margin: 20px 0; }
th { background: #0f3460; color: white; padding: 12px; text-align: left; }
td { padding: 10px 12px; border-bottom: 1px solid #e0e0e0; }
tr:nth-child(even) { background: #f8f9fa; }
.score-badge { display: inline-block; padding: 8px 16px; border-radius: 20px;
font-weight: bold; font-size: 1.5em; }
.grade-A { background: #d4edda; color: #155724; }
.grade-B { background: #cce5ff; color: #004085; }
.grade-C { background: #fff3cd; color: #856404; }
.grade-D, .grade-F { background: #f8d7da; color: #721c24; }
</style>
</head>
<body>
<div class="header">
<h1>Quarterly Business Review</h1>
<p>{{ client.name }} — {{ report_date }}</p>
</div>
<h2>Security Scorecard</h2>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">
<span class="score-badge grade-{{ security.grade }}">
{{ security.grade }}
</span>
</div>
<div class="metric-label">Security Grade</div>
</div>
<div class="metric-card">
<div class="metric-value">{{ security.score }}%</div>
<div class="metric-label">Compliance Score</div>
</div>
<div class="metric-card">
<div class="metric-value">{{ security.passed }}/{{ security.total_checks }}</div>
<div class="metric-label">Checks Passed</div>
</div>
</div>
<h2>Support Summary</h2>
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">{{ tickets | length }}</div>
<div class="metric-label">Total Tickets</div>
</div>
<div class="metric-card">
<div class="metric-value">
{{ tickets | selectattr("priority", "equalto", "high") | list | length }}
</div>
<div class="metric-label">High Priority</div>
</div>
<div class="metric-card">
<div class="metric-value">
{{ tickets | selectattr("status", "equalto", "resolved") | list | length }}
</div>
<div class="metric-label">Resolved</div>
</div>
</div>
{% if recommendations %}
<h2>Recommendations</h2>
<table>
<tr><th>Priority</th><th>Recommendation</th><th>Impact</th></tr>
{% for rec in recommendations %}
<tr>
<td>{{ rec.priority }}</td>
<td>{{ rec.description }}</td>
<td>{{ rec.impact }}</td>
</tr>
{% endfor %}
</table>
{% endif %}
</body>
</html>Step 4: Report Generator
# generator.py
from pathlib import Path
from datetime import datetime
from jinja2 import Environment, FileSystemLoader
from weasyprint import HTML
class ReportGenerator:
def __init__(self, template_dir: Path, output_dir: Path):
self.env = Environment(loader=FileSystemLoader(template_dir))
self.output_dir = output_dir
self.output_dir.mkdir(parents=True, exist_ok=True)
def generate(self, template_name: str, data: dict, filename: str) -> Path:
"""Render template with data and export to PDF."""
template = self.env.get_template(template_name)
# Add common variables
data["report_date"] = datetime.now().strftime("%B %d, %Y")
data["generated_at"] = datetime.now().isoformat()
# Render HTML
html_content = template.render(**data)
# Save HTML (useful for debugging)
html_path = self.output_dir / f"{filename}.html"
html_path.write_text(html_content)
# Convert to PDF
pdf_path = self.output_dir / f"{filename}.pdf"
HTML(string=html_content).write_pdf(str(pdf_path))
return pdf_pathStep 5: Batch Processing
# main.py
from pathlib import Path
from extractors import DataExtractor
from generator import ReportGenerator
def generate_all_reports():
data_dir = Path("data")
extractor = DataExtractor(data_dir)
generator = ReportGenerator(Path("templates"), Path("output"))
# Process each client
clients_dir = data_dir / "clients"
for client_dir in clients_dir.iterdir():
if not client_dir.is_dir():
continue
client_id = client_dir.name
print(f"Generating report for {client_id}...")
data = extractor.extract_client_data(client_id)
pdf = generator.generate(
template_name="qbr.html",
data={
"client": data["client_info"],
"tickets": data["tickets"],
"security": data["security_score"],
"recommendations": data["recommendations"],
},
filename=f"QBR-{client_id}-{datetime.now().strftime('%Y-%m')}"
)
print(f" -> {pdf}")
if __name__ == "__main__":
from datetime import datetime
generate_all_reports()Key Takeaways
- Separate data extraction from presentation — makes templates reusable
- Jinja2 filters (
selectattr,length) handle data aggregation in templates - WeasyPrint produces print-quality PDFs from HTML/CSS
- Process clients in a loop for multi-tenant batch generation
- Store HTML alongside PDF for debugging template issues