some testing with python

2026-03-20 21:53:55 +01:00
parent 1d06e51db3
commit 7e7c3f6bf4
14 changed files with 3152 additions and 2 deletions
--- a/testing/loader/api_loader.py
+++ b/testing/loader/api_loader.py
@@ -0,0 +1,428 @@
+"""
+FP&A API Loader
+Reads generated CSV files and loads them into the Go FP&A API.
+
+Real endpoint map (from main.go):
+  POST /api/v1/budgets          ← one budget record per request
+  PUT  /api/v1/budgets/{id}     ← update (not used in seeding)
+  DELETE /api/v1/budgets/{id}   ← delete (not used in seeding)
+  POST /api/v1/actuals/ingest   ← bulk actuals ingest
+  GET  /api/v1/variance         ← read-only, not loaded
+  GET  /api/v1/variance/alerts  ← read-only, not loaded
+  GET  /api/v1/health           ← health check
+
+Load order matters:
+  1. Budgets first  — actuals reference budget lines by category+period
+  2. Actuals second — ingested in bulk against existing budgets
+"""
+
+import csv
+import json
+import time
+import os
+import requests
+from typing import List, Dict, Any, Optional, Callable
+from dataclasses import dataclass
+
+DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data", "csv")
+
+# ── Config ────────────────────────────────────────────────────────────────────
+@dataclass
+class LoaderConfig:
+    base_url: str = "http://localhost:8080"
+    batch_size: int = 50           # used for actuals ingest
+    delay_between_batches: float = 0.05
+    dry_run: bool = False
+    auth_token: Optional[str] = None
+
+DEFAULT_CONFIG = LoaderConfig()
+
+# ── CSV reading helpers ───────────────────────────────────────────────────────
+INT_FIELDS = {"year", "month"}
+FLOAT_FIELDS = {
+    "budget_amount", "actual_amount", "variance", "variance_pct",
+    "product_revenue", "service_revenue", "total_revenue",
+    "cogs_product", "cogs_service", "total_cogs",
+    "gross_profit", "gross_margin_pct", "total_opex",
+    "ebitda", "ebitda_margin_pct", "net_income",
+    "cash_collected_product", "cash_collected_service",
+    "cash_paid_opex", "cash_paid_cogs", "net_operating_cash_flow",
+    "capex", "net_investing_cash_flow", "loan_repayment",
+    "equity_raised", "net_financing_cash_flow",
+    "net_change_in_cash", "closing_cash_balance",
+    "annual_salary_budget", "actual_salary_paid_ytd", "headcount_fte",
+}
+
+def _coerce(row: Dict[str, str]) -> Dict[str, Any]:
+    out = {}
+    for k, v in row.items():
+        if k in INT_FIELDS:
+            out[k] = int(v) if v else None
+        elif k in FLOAT_FIELDS:
+            out[k] = float(v) if v else None
+        else:
+            out[k] = v
+    return out
+
+def read_csv(filename: str) -> List[Dict[str, Any]]:
+    path = os.path.join(DATA_DIR, filename)
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"CSV not found: {path}  (run generators/generate_data.py first)")
+    with open(path, newline="") as f:
+        return [_coerce(row) for row in csv.DictReader(f)]
+
+# ── Payload mappers ───────────────────────────────────────────────────────────
+# Each mapper takes one CSV row and returns the JSON body your Go handler expects.
+# Adjust field names here if your Go structs use different names.
+
+def revenue_row_to_budget(row: Dict) -> Dict:
+    """revenue_budget_vs_actuals.csv → POST /api/v1/budgets"""
+    return {
+        "category":   row["revenue_type"],          # "Product" | "Service"
+        "department": "Revenue",
+        "period":     row["period"],                 # "2023-01"
+        "year":       row["year"],
+        "month":      row["month"],
+        "amount":     row["budget_amount"],
+        "currency":   "USD",
+        "notes":      f"{row['revenue_type']} revenue budget",
+    }
+
+def revenue_row_to_actual(row: Dict) -> Dict:
+    """revenue_budget_vs_actuals.csv → POST /api/v1/actuals/ingest"""
+    return {
+        "category":   row["revenue_type"],
+        "department": "Revenue",
+        "period":     row["period"],
+        "year":       row["year"],
+        "month":      row["month"],
+        "amount":     row["actual_amount"],
+        "currency":   "USD",
+        "source":     "csv_import",
+    }
+
+def opex_row_to_budget(row: Dict) -> Dict:
+    """opex_budget_vs_actuals.csv → POST /api/v1/budgets"""
+    return {
+        "category":   row["category"],               # "Salaries", "Cloud Infrastructure", …
+        "department": row["department"],
+        "period":     row["period"],
+        "year":       row["year"],
+        "month":      row["month"],
+        "amount":     row["budget_amount"],
+        "currency":   "USD",
+        "notes":      f"{row['department']} opex budget",
+    }
+
+def opex_row_to_actual(row: Dict) -> Dict:
+    """opex_budget_vs_actuals.csv → POST /api/v1/actuals/ingest"""
+    return {
+        "category":   row["category"],
+        "department": row["department"],
+        "period":     row["period"],
+        "year":       row["year"],
+        "month":      row["month"],
+        "amount":     row["actual_amount"],
+        "currency":   "USD",
+        "source":     "csv_import",
+    }
+
+def pl_row_to_actual(row: Dict) -> Dict:
+    """
+    pl_income_statement.csv → POST /api/v1/actuals/ingest
+    The P&L is a derived/summary view; we ingest key line items as actuals.
+    Budgets for these are already covered by revenue + opex CSVs.
+    """
+    return {
+        "category":   "P&L Summary",
+        "department": "Finance",
+        "period":     row["period"],
+        "year":       row["year"],
+        "month":      row["month"],
+        "amount":     row["net_income"],
+        "currency":   "USD",
+        "source":     "csv_import",
+        "metadata": {
+            "total_revenue":      row["total_revenue"],
+            "gross_profit":       row["gross_profit"],
+            "gross_margin_pct":   row["gross_margin_pct"],
+            "ebitda":             row["ebitda"],
+            "ebitda_margin_pct":  row["ebitda_margin_pct"],
+        },
+    }
+
+def cashflow_row_to_actual(row: Dict) -> Dict:
+    """cash_flow.csv → POST /api/v1/actuals/ingest"""
+    return {
+        "category":   "Cash Flow",
+        "department": "Finance",
+        "period":     row["period"],
+        "year":       row["year"],
+        "month":      row["month"],
+        "amount":     row["net_change_in_cash"],
+        "currency":   "USD",
+        "source":     "csv_import",
+        "metadata": {
+            "net_operating_cash_flow":  row["net_operating_cash_flow"],
+            "net_investing_cash_flow":  row["net_investing_cash_flow"],
+            "net_financing_cash_flow":  row["net_financing_cash_flow"],
+            "closing_cash_balance":     row["closing_cash_balance"],
+            "equity_raised":            row["equity_raised"],
+        },
+    }
+
+def headcount_row_to_actual(row: Dict) -> Dict:
+    """headcount_workforce.csv → POST /api/v1/actuals/ingest (active employees only)"""
+    return {
+        "category":   "Headcount",
+        "department": row["department"],
+        "period":     row["period"],
+        "year":       row["year"],
+        "month":      row["month"],
+        "amount":     row["actual_salary_paid_ytd"],
+        "currency":   "USD",
+        "source":     "csv_import",
+        "metadata": {
+            "employee_id":    row["employee_id"],
+            "role":           row["role"],
+            "status":         row["status"],
+            "fte":            row["headcount_fte"],
+            "annual_salary":  row["annual_salary_budget"],
+        },
+    }
+
+# ── HTTP helpers ──────────────────────────────────────────────────────────────
+def _headers(config: LoaderConfig) -> Dict:
+    h = {"Content-Type": "application/json"}
+    if config.auth_token:
+        h["Authorization"] = f"Bearer {config.auth_token}"
+    return h
+
+def post_one(url: str, payload: Dict, config: LoaderConfig) -> Dict:
+    """Single POST — used for /api/v1/budgets (one record per call)."""
+    if config.dry_run:
+        print(f"    [DRY RUN] POST {url}")
+        print(f"    {json.dumps(payload, indent=6)}")
+        return {"status": "dry_run"}
+    try:
+        resp = requests.post(url, json=payload, headers=_headers(config), timeout=10)
+        resp.raise_for_status()
+        return resp.json() if resp.content else {"status": "ok"}
+    except requests.exceptions.ConnectionError:
+        return {"error": "connection_refused"}
+    except requests.exceptions.HTTPError as e:
+        return {"error": str(e), "status_code": resp.status_code, "body": resp.text}
+    except Exception as e:
+        return {"error": str(e)}
+
+def post_batch(url: str, records: List[Dict], config: LoaderConfig) -> Dict:
+    """Batch POST — used for /api/v1/actuals/ingest."""
+    if config.dry_run:
+        print(f"    [DRY RUN] POST {url}  ({len(records)} records)")
+        print(f"    Sample: {json.dumps(records[0], indent=6)}")
+        return {"status": "dry_run", "count": len(records)}
+    try:
+        resp = requests.post(url, json={"records": records},
+                             headers=_headers(config), timeout=30)
+        resp.raise_for_status()
+        return resp.json() if resp.content else {"status": "ok"}
+    except requests.exceptions.ConnectionError:
+        return {"error": "connection_refused"}
+    except requests.exceptions.HTTPError as e:
+        return {"error": str(e), "status_code": resp.status_code, "body": resp.text}
+    except Exception as e:
+        return {"error": str(e)}
+
+# ── Budget loader: POST /api/v1/budgets ───────────────────────────────────────
+def load_budgets(config: LoaderConfig = DEFAULT_CONFIG) -> Dict:
+    """
+    Loads budget rows from revenue + opex CSVs.
+    Each row is a separate POST (budget records are individual, not batched).
+    Deduplicated by (category, department, period) to avoid double-posting.
+    """
+    url = config.base_url.rstrip("/") + "/api/v1/budgets"
+    budget_rows = []
+
+    for filename, mapper in [
+        ("revenue_budget_vs_actuals.csv", revenue_row_to_budget),
+        ("opex_budget_vs_actuals.csv",    opex_row_to_budget),
+    ]:
+        rows = read_csv(filename)
+        budget_rows.extend(mapper(r) for r in rows)
+
+    # Deduplicate: last write wins for same category+dept+period
+    seen = {}
+    for row in budget_rows:
+        key = (row["category"], row["department"], row["period"])
+        seen[key] = row
+    unique = list(seen.values())
+
+    print(f"\n📋 Loading budgets  →  {url}")
+    print(f"   {len(unique)} unique budget lines")
+
+    results = {"total": len(unique), "ok": 0, "errors": []}
+    for i, payload in enumerate(unique, 1):
+        result = post_one(url, payload, config)
+        if "error" in result:
+            results["errors"].append({**result, "payload": payload})
+            if len(results["errors"]) <= 3:  # don't flood the console
+                print(f"   ⚠  [{i}/{len(unique)}] {result['error']}")
+        else:
+            results["ok"] += 1
+            if i % 50 == 0 or i == len(unique):
+                print(f"   ✓  {i}/{len(unique)} budgets posted")
+        time.sleep(0.01)  # light throttle for individual POSTs
+
+    return results
+
+# ── Actuals loader: POST /api/v1/actuals/ingest ───────────────────────────────
+def load_actuals(config: LoaderConfig = DEFAULT_CONFIG) -> Dict:
+    """
+    Collects actuals from all CSV sources and bulk-ingests them.
+    Filters headcount to Active employees only to avoid salary double-counting.
+    """
+    url = config.base_url.rstrip("/") + "/api/v1/actuals/ingest"
+    all_actuals = []
+
+    # Revenue actuals
+    for row in read_csv("revenue_budget_vs_actuals.csv"):
+        all_actuals.append(revenue_row_to_actual(row))
+
+    # Opex actuals
+    for row in read_csv("opex_budget_vs_actuals.csv"):
+        all_actuals.append(opex_row_to_actual(row))
+
+    # P&L summary actuals
+    for row in read_csv("pl_income_statement.csv"):
+        all_actuals.append(pl_row_to_actual(row))
+
+    # Cash flow actuals
+    for row in read_csv("cash_flow.csv"):
+        all_actuals.append(cashflow_row_to_actual(row))
+
+    # Headcount actuals — active employees only, one record per employee per month
+    headcount_seen = set()
+    for row in read_csv("headcount_workforce.csv"):
+        if row["status"] != "Active":
+            continue
+        key = (row["employee_id"], row["period"])
+        if key in headcount_seen:
+            continue
+        headcount_seen.add(key)
+        all_actuals.append(headcount_row_to_actual(row))
+
+    print(f"\n📥 Loading actuals  →  {url}")
+    print(f"   {len(all_actuals)} records  |  batch size {config.batch_size}")
+
+    results = {"total": len(all_actuals), "batches": 0, "errors": []}
+    for i in range(0, len(all_actuals), config.batch_size):
+        batch = all_actuals[i : i + config.batch_size]
+        result = post_batch(url, batch, config)
+        results["batches"] += 1
+        if "error" in result:
+            results["errors"].append(result)
+            print(f"   ⚠  batch {results['batches']}: {result['error']}")
+        else:
+            print(f"   ✓  batch {results['batches']} ({len(batch)} records)")
+        time.sleep(config.delay_between_batches)
+
+    return results
+
+# ── Variance check: GET /api/v1/variance ──────────────────────────────────────
+def check_variance(config: LoaderConfig = DEFAULT_CONFIG, period: Optional[str] = None):
+    """
+    Calls the variance report after loading to verify data landed correctly.
+    """
+    url = config.base_url.rstrip("/") + "/api/v1/variance"
+    params = {"period": period} if period else {}
+    print(f"\n📊 Fetching variance report  →  {url}")
+    if config.dry_run:
+        print("   [DRY RUN] skipped")
+        return {}
+    try:
+        resp = requests.get(url, params=params, timeout=10)
+        resp.raise_for_status()
+        data = resp.json()
+        print(f"   ✓  {len(data) if isinstance(data, list) else 1} variance entries returned")
+        return data
+    except Exception as e:
+        print(f"   ⚠  {e}")
+        return {"error": str(e)}
+
+def check_alerts(config: LoaderConfig = DEFAULT_CONFIG):
+    """Calls /api/v1/variance/alerts — shows any over/under budget flags."""
+    url = config.base_url.rstrip("/") + "/api/v1/variance/alerts"
+    print(f"\n🚨 Fetching variance alerts  →  {url}")
+    if config.dry_run:
+        print("   [DRY RUN] skipped")
+        return {}
+    try:
+        resp = requests.get(url, timeout=10)
+        resp.raise_for_status()
+        data = resp.json()
+        count = len(data) if isinstance(data, list) else "?"
+        print(f"   ✓  {count} alerts")
+        return data
+    except Exception as e:
+        print(f"   ⚠  {e}")
+        return {"error": str(e)}
+
+# ── Full seed run ─────────────────────────────────────────────────────────────
+def seed_all(config: LoaderConfig = DEFAULT_CONFIG):
+    """
+    Full seed sequence:
+      1. Load budgets  (POST /api/v1/budgets, one at a time)
+      2. Load actuals  (POST /api/v1/actuals/ingest, batched)
+      3. Spot-check variance report
+    """
+    print(f"\n🚀 FP&A Seed  →  {config.base_url}")
+    if config.dry_run:
+        print("   *** DRY RUN — no requests will be sent ***\n")
+
+    budget_result  = load_budgets(config)
+    actuals_result = load_actuals(config)
+    check_variance(config)
+    check_alerts(config)
+
+    b_errors = len(budget_result.get("errors", []))
+    a_errors = len(actuals_result.get("errors", []))
+
+    print("\n── Seed Summary ─────────────────────────────────")
+    print(f"  Budgets : {budget_result['ok']}/{budget_result['total']} ok"
+          + (f"  ⚠ {b_errors} errors" if b_errors else "  ✅"))
+    print(f"  Actuals : {actuals_result['total']} records in "
+          f"{actuals_result['batches']} batches"
+          + (f"  ⚠ {a_errors} errors" if a_errors else "  ✅"))
+    print()
+    return {"budgets": budget_result, "actuals": actuals_result}
+
+# ── CLI ───────────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Seed FP&A CSV data into Go API")
+    parser.add_argument("--url",       default="http://localhost:8080", help="API base URL")
+    parser.add_argument("--batch",     type=int, default=50, help="Actuals batch size")
+    parser.add_argument("--dry-run",   action="store_true", help="Print payloads, don't send")
+    parser.add_argument("--token",     default=None, help="Bearer auth token")
+    parser.add_argument("--only",      choices=["budgets", "actuals", "variance", "alerts"],
+                        help="Run only one step instead of full seed")
+    args = parser.parse_args()
+
+    cfg = LoaderConfig(
+        base_url=args.url,
+        batch_size=args.batch,
+        dry_run=args.dry_run,
+        auth_token=args.token,
+    )
+
+    if args.only == "budgets":
+        load_budgets(cfg)
+    elif args.only == "actuals":
+        load_actuals(cfg)
+    elif args.only == "variance":
+        print(json.dumps(check_variance(cfg), indent=2))
+    elif args.only == "alerts":
+        print(json.dumps(check_alerts(cfg), indent=2))
+    else:
+        seed_all(cfg)