some testing with python

This commit is contained in:
samantha42
2026-03-20 21:53:55 +01:00
parent 1d06e51db3
commit 7e7c3f6bf4
14 changed files with 3152 additions and 2 deletions

View File

@@ -0,0 +1,428 @@
"""
FP&A API Loader
Reads generated CSV files and loads them into the Go FP&A API.
Real endpoint map (from main.go):
POST /api/v1/budgets ← one budget record per request
PUT /api/v1/budgets/{id} ← update (not used in seeding)
DELETE /api/v1/budgets/{id} ← delete (not used in seeding)
POST /api/v1/actuals/ingest ← bulk actuals ingest
GET /api/v1/variance ← read-only, not loaded
GET /api/v1/variance/alerts ← read-only, not loaded
GET /api/v1/health ← health check
Load order matters:
1. Budgets first — actuals reference budget lines by category+period
2. Actuals second — ingested in bulk against existing budgets
"""
import csv
import json
import time
import os
import requests
from typing import List, Dict, Any, Optional, Callable
from dataclasses import dataclass
DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data", "csv")
# ── Config ────────────────────────────────────────────────────────────────────
@dataclass
class LoaderConfig:
base_url: str = "http://localhost:8080"
batch_size: int = 50 # used for actuals ingest
delay_between_batches: float = 0.05
dry_run: bool = False
auth_token: Optional[str] = None
DEFAULT_CONFIG = LoaderConfig()
# ── CSV reading helpers ───────────────────────────────────────────────────────
INT_FIELDS = {"year", "month"}
FLOAT_FIELDS = {
"budget_amount", "actual_amount", "variance", "variance_pct",
"product_revenue", "service_revenue", "total_revenue",
"cogs_product", "cogs_service", "total_cogs",
"gross_profit", "gross_margin_pct", "total_opex",
"ebitda", "ebitda_margin_pct", "net_income",
"cash_collected_product", "cash_collected_service",
"cash_paid_opex", "cash_paid_cogs", "net_operating_cash_flow",
"capex", "net_investing_cash_flow", "loan_repayment",
"equity_raised", "net_financing_cash_flow",
"net_change_in_cash", "closing_cash_balance",
"annual_salary_budget", "actual_salary_paid_ytd", "headcount_fte",
}
def _coerce(row: Dict[str, str]) -> Dict[str, Any]:
out = {}
for k, v in row.items():
if k in INT_FIELDS:
out[k] = int(v) if v else None
elif k in FLOAT_FIELDS:
out[k] = float(v) if v else None
else:
out[k] = v
return out
def read_csv(filename: str) -> List[Dict[str, Any]]:
path = os.path.join(DATA_DIR, filename)
if not os.path.exists(path):
raise FileNotFoundError(f"CSV not found: {path} (run generators/generate_data.py first)")
with open(path, newline="") as f:
return [_coerce(row) for row in csv.DictReader(f)]
# ── Payload mappers ───────────────────────────────────────────────────────────
# Each mapper takes one CSV row and returns the JSON body your Go handler expects.
# Adjust field names here if your Go structs use different names.
def revenue_row_to_budget(row: Dict) -> Dict:
"""revenue_budget_vs_actuals.csv → POST /api/v1/budgets"""
return {
"category": row["revenue_type"], # "Product" | "Service"
"department": "Revenue",
"period": row["period"], # "2023-01"
"year": row["year"],
"month": row["month"],
"amount": row["budget_amount"],
"currency": "USD",
"notes": f"{row['revenue_type']} revenue budget",
}
def revenue_row_to_actual(row: Dict) -> Dict:
"""revenue_budget_vs_actuals.csv → POST /api/v1/actuals/ingest"""
return {
"category": row["revenue_type"],
"department": "Revenue",
"period": row["period"],
"year": row["year"],
"month": row["month"],
"amount": row["actual_amount"],
"currency": "USD",
"source": "csv_import",
}
def opex_row_to_budget(row: Dict) -> Dict:
"""opex_budget_vs_actuals.csv → POST /api/v1/budgets"""
return {
"category": row["category"], # "Salaries", "Cloud Infrastructure", …
"department": row["department"],
"period": row["period"],
"year": row["year"],
"month": row["month"],
"amount": row["budget_amount"],
"currency": "USD",
"notes": f"{row['department']} opex budget",
}
def opex_row_to_actual(row: Dict) -> Dict:
"""opex_budget_vs_actuals.csv → POST /api/v1/actuals/ingest"""
return {
"category": row["category"],
"department": row["department"],
"period": row["period"],
"year": row["year"],
"month": row["month"],
"amount": row["actual_amount"],
"currency": "USD",
"source": "csv_import",
}
def pl_row_to_actual(row: Dict) -> Dict:
"""
pl_income_statement.csv → POST /api/v1/actuals/ingest
The P&L is a derived/summary view; we ingest key line items as actuals.
Budgets for these are already covered by revenue + opex CSVs.
"""
return {
"category": "P&L Summary",
"department": "Finance",
"period": row["period"],
"year": row["year"],
"month": row["month"],
"amount": row["net_income"],
"currency": "USD",
"source": "csv_import",
"metadata": {
"total_revenue": row["total_revenue"],
"gross_profit": row["gross_profit"],
"gross_margin_pct": row["gross_margin_pct"],
"ebitda": row["ebitda"],
"ebitda_margin_pct": row["ebitda_margin_pct"],
},
}
def cashflow_row_to_actual(row: Dict) -> Dict:
"""cash_flow.csv → POST /api/v1/actuals/ingest"""
return {
"category": "Cash Flow",
"department": "Finance",
"period": row["period"],
"year": row["year"],
"month": row["month"],
"amount": row["net_change_in_cash"],
"currency": "USD",
"source": "csv_import",
"metadata": {
"net_operating_cash_flow": row["net_operating_cash_flow"],
"net_investing_cash_flow": row["net_investing_cash_flow"],
"net_financing_cash_flow": row["net_financing_cash_flow"],
"closing_cash_balance": row["closing_cash_balance"],
"equity_raised": row["equity_raised"],
},
}
def headcount_row_to_actual(row: Dict) -> Dict:
"""headcount_workforce.csv → POST /api/v1/actuals/ingest (active employees only)"""
return {
"category": "Headcount",
"department": row["department"],
"period": row["period"],
"year": row["year"],
"month": row["month"],
"amount": row["actual_salary_paid_ytd"],
"currency": "USD",
"source": "csv_import",
"metadata": {
"employee_id": row["employee_id"],
"role": row["role"],
"status": row["status"],
"fte": row["headcount_fte"],
"annual_salary": row["annual_salary_budget"],
},
}
# ── HTTP helpers ──────────────────────────────────────────────────────────────
def _headers(config: LoaderConfig) -> Dict:
h = {"Content-Type": "application/json"}
if config.auth_token:
h["Authorization"] = f"Bearer {config.auth_token}"
return h
def post_one(url: str, payload: Dict, config: LoaderConfig) -> Dict:
"""Single POST — used for /api/v1/budgets (one record per call)."""
if config.dry_run:
print(f" [DRY RUN] POST {url}")
print(f" {json.dumps(payload, indent=6)}")
return {"status": "dry_run"}
try:
resp = requests.post(url, json=payload, headers=_headers(config), timeout=10)
resp.raise_for_status()
return resp.json() if resp.content else {"status": "ok"}
except requests.exceptions.ConnectionError:
return {"error": "connection_refused"}
except requests.exceptions.HTTPError as e:
return {"error": str(e), "status_code": resp.status_code, "body": resp.text}
except Exception as e:
return {"error": str(e)}
def post_batch(url: str, records: List[Dict], config: LoaderConfig) -> Dict:
"""Batch POST — used for /api/v1/actuals/ingest."""
if config.dry_run:
print(f" [DRY RUN] POST {url} ({len(records)} records)")
print(f" Sample: {json.dumps(records[0], indent=6)}")
return {"status": "dry_run", "count": len(records)}
try:
resp = requests.post(url, json={"records": records},
headers=_headers(config), timeout=30)
resp.raise_for_status()
return resp.json() if resp.content else {"status": "ok"}
except requests.exceptions.ConnectionError:
return {"error": "connection_refused"}
except requests.exceptions.HTTPError as e:
return {"error": str(e), "status_code": resp.status_code, "body": resp.text}
except Exception as e:
return {"error": str(e)}
# ── Budget loader: POST /api/v1/budgets ───────────────────────────────────────
def load_budgets(config: LoaderConfig = DEFAULT_CONFIG) -> Dict:
"""
Loads budget rows from revenue + opex CSVs.
Each row is a separate POST (budget records are individual, not batched).
Deduplicated by (category, department, period) to avoid double-posting.
"""
url = config.base_url.rstrip("/") + "/api/v1/budgets"
budget_rows = []
for filename, mapper in [
("revenue_budget_vs_actuals.csv", revenue_row_to_budget),
("opex_budget_vs_actuals.csv", opex_row_to_budget),
]:
rows = read_csv(filename)
budget_rows.extend(mapper(r) for r in rows)
# Deduplicate: last write wins for same category+dept+period
seen = {}
for row in budget_rows:
key = (row["category"], row["department"], row["period"])
seen[key] = row
unique = list(seen.values())
print(f"\n📋 Loading budgets → {url}")
print(f" {len(unique)} unique budget lines")
results = {"total": len(unique), "ok": 0, "errors": []}
for i, payload in enumerate(unique, 1):
result = post_one(url, payload, config)
if "error" in result:
results["errors"].append({**result, "payload": payload})
if len(results["errors"]) <= 3: # don't flood the console
print(f" ⚠ [{i}/{len(unique)}] {result['error']}")
else:
results["ok"] += 1
if i % 50 == 0 or i == len(unique):
print(f"{i}/{len(unique)} budgets posted")
time.sleep(0.01) # light throttle for individual POSTs
return results
# ── Actuals loader: POST /api/v1/actuals/ingest ───────────────────────────────
def load_actuals(config: LoaderConfig = DEFAULT_CONFIG) -> Dict:
"""
Collects actuals from all CSV sources and bulk-ingests them.
Filters headcount to Active employees only to avoid salary double-counting.
"""
url = config.base_url.rstrip("/") + "/api/v1/actuals/ingest"
all_actuals = []
# Revenue actuals
for row in read_csv("revenue_budget_vs_actuals.csv"):
all_actuals.append(revenue_row_to_actual(row))
# Opex actuals
for row in read_csv("opex_budget_vs_actuals.csv"):
all_actuals.append(opex_row_to_actual(row))
# P&L summary actuals
for row in read_csv("pl_income_statement.csv"):
all_actuals.append(pl_row_to_actual(row))
# Cash flow actuals
for row in read_csv("cash_flow.csv"):
all_actuals.append(cashflow_row_to_actual(row))
# Headcount actuals — active employees only, one record per employee per month
headcount_seen = set()
for row in read_csv("headcount_workforce.csv"):
if row["status"] != "Active":
continue
key = (row["employee_id"], row["period"])
if key in headcount_seen:
continue
headcount_seen.add(key)
all_actuals.append(headcount_row_to_actual(row))
print(f"\n📥 Loading actuals → {url}")
print(f" {len(all_actuals)} records | batch size {config.batch_size}")
results = {"total": len(all_actuals), "batches": 0, "errors": []}
for i in range(0, len(all_actuals), config.batch_size):
batch = all_actuals[i : i + config.batch_size]
result = post_batch(url, batch, config)
results["batches"] += 1
if "error" in result:
results["errors"].append(result)
print(f" ⚠ batch {results['batches']}: {result['error']}")
else:
print(f" ✓ batch {results['batches']} ({len(batch)} records)")
time.sleep(config.delay_between_batches)
return results
# ── Variance check: GET /api/v1/variance ──────────────────────────────────────
def check_variance(config: LoaderConfig = DEFAULT_CONFIG, period: Optional[str] = None):
"""
Calls the variance report after loading to verify data landed correctly.
"""
url = config.base_url.rstrip("/") + "/api/v1/variance"
params = {"period": period} if period else {}
print(f"\n📊 Fetching variance report → {url}")
if config.dry_run:
print(" [DRY RUN] skipped")
return {}
try:
resp = requests.get(url, params=params, timeout=10)
resp.raise_for_status()
data = resp.json()
print(f"{len(data) if isinstance(data, list) else 1} variance entries returned")
return data
except Exception as e:
print(f"{e}")
return {"error": str(e)}
def check_alerts(config: LoaderConfig = DEFAULT_CONFIG):
"""Calls /api/v1/variance/alerts — shows any over/under budget flags."""
url = config.base_url.rstrip("/") + "/api/v1/variance/alerts"
print(f"\n🚨 Fetching variance alerts → {url}")
if config.dry_run:
print(" [DRY RUN] skipped")
return {}
try:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
data = resp.json()
count = len(data) if isinstance(data, list) else "?"
print(f"{count} alerts")
return data
except Exception as e:
print(f"{e}")
return {"error": str(e)}
# ── Full seed run ─────────────────────────────────────────────────────────────
def seed_all(config: LoaderConfig = DEFAULT_CONFIG):
"""
Full seed sequence:
1. Load budgets (POST /api/v1/budgets, one at a time)
2. Load actuals (POST /api/v1/actuals/ingest, batched)
3. Spot-check variance report
"""
print(f"\n🚀 FP&A Seed → {config.base_url}")
if config.dry_run:
print(" *** DRY RUN — no requests will be sent ***\n")
budget_result = load_budgets(config)
actuals_result = load_actuals(config)
check_variance(config)
check_alerts(config)
b_errors = len(budget_result.get("errors", []))
a_errors = len(actuals_result.get("errors", []))
print("\n── Seed Summary ─────────────────────────────────")
print(f" Budgets : {budget_result['ok']}/{budget_result['total']} ok"
+ (f"{b_errors} errors" if b_errors else ""))
print(f" Actuals : {actuals_result['total']} records in "
f"{actuals_result['batches']} batches"
+ (f"{a_errors} errors" if a_errors else ""))
print()
return {"budgets": budget_result, "actuals": actuals_result}
# ── CLI ───────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Seed FP&A CSV data into Go API")
parser.add_argument("--url", default="http://localhost:8080", help="API base URL")
parser.add_argument("--batch", type=int, default=50, help="Actuals batch size")
parser.add_argument("--dry-run", action="store_true", help="Print payloads, don't send")
parser.add_argument("--token", default=None, help="Bearer auth token")
parser.add_argument("--only", choices=["budgets", "actuals", "variance", "alerts"],
help="Run only one step instead of full seed")
args = parser.parse_args()
cfg = LoaderConfig(
base_url=args.url,
batch_size=args.batch,
dry_run=args.dry_run,
auth_token=args.token,
)
if args.only == "budgets":
load_budgets(cfg)
elif args.only == "actuals":
load_actuals(cfg)
elif args.only == "variance":
print(json.dumps(check_variance(cfg), indent=2))
elif args.only == "alerts":
print(json.dumps(check_alerts(cfg), indent=2))
else:
seed_all(cfg)