service-finder/backend/app/scripts/smart_admin_audit.py

#!/usr/bin/env python3
"""
Smart Admin Audit Script

This script performs a targeted audit of the Service Finder admin system:
1. Finds business hardcoded values (excluding trivial 0, 1, True, False)
2. Identifies which API modules lack /admin prefixed endpoints
3. Generates a comprehensive gap analysis report in Markdown format
"""

import ast
import os
import re
import datetime
from pathlib import Path
from typing import List, Dict, Set, Tuple, Any
import sys

# Project root (relative to script location)
# In container: /app/app/scripts/smart_admin_audit.py -> parent.parent.parent = /app
PROJECT_ROOT = Path("/app")
BACKEND_DIR = PROJECT_ROOT  # /app is the backend root in container
ENDPOINTS_DIR = BACKEND_DIR / "app" / "api" / "v1" / "endpoints"
SERVICES_DIR = BACKEND_DIR / "app" / "services"
MODELS_DIR = BACKEND_DIR / "app" / "models"
OUTPUT_FILE = PROJECT_ROOT / "admin_gap_analysis.md"

# Patterns for business hardcoded values (exclude trivial values)
BUSINESS_PATTERNS = [
    r"award_points\s*=\s*(\d+)",
    r"validation_level\s*=\s*(\d+)",
    r"max_vehicles\s*=\s*(\d+)",
    r"max_users\s*=\s*(\d+)",
    r"credit_limit\s*=\s*(\d+)",
    r"daily_limit\s*=\s*(\d+)",
    r"monthly_limit\s*=\s*(\d+)",
    r"threshold\s*=\s*(\d+)",
    r"quota\s*=\s*(\d+)",
    r"priority\s*=\s*(\d+)",
    r"timeout\s*=\s*(\d+)",
    r"retry_count\s*=\s*(\d+)",
    r"batch_size\s*=\s*(\d+)",
    r"page_size\s*=\s*(\d+)",
    r"cache_ttl\s*=\s*(\d+)",
    r"expiry_days\s*=\s*(\d+)",
    r"cooldown\s*=\s*(\d+)",
    r"penalty\s*=\s*(\d+)",
    r"reward\s*=\s*(\d+)",
    r"discount\s*=\s*(\d+)",
    r"commission\s*=\s*(\d+)",
    r"fee\s*=\s*(\d+)",
    r"vat_rate\s*=\s*(\d+)",
    r"service_fee\s*=\s*(\d+)",
    r"subscription_fee\s*=\s*(\d+)",
]

# Trivial values to exclude
TRIVIAL_VALUES = {"0", "1", "True", "False", "None", "''", '""', "[]", "{}"}

def find_hardcoded_values() -> List[Dict[str, Any]]:
    """
    Scan Python files for business-relevant hardcoded values.
    Returns list of findings with file, line, value, and context.
    """
    findings = []

    # Walk through backend directory
    for root, dirs, files in os.walk(BACKEND_DIR):
        # Skip virtual environments and test directories
        if any(exclude in root for exclude in ["__pycache__", ".venv", "tests", "migrations"]):
            continue

        for file in files:
            if file.endswith(".py"):
                filepath = Path(root) / file
                try:
                    with open(filepath, "r", encoding="utf-8") as f:
                        content = f.read()

                    # Parse AST to find assignments
                    tree = ast.parse(content, filename=str(filepath))

                    for node in ast.walk(tree):
                        if isinstance(node, ast.Assign):
                            for target in node.targets:
                                if isinstance(target, ast.Name):
                                    var_name = target.id
                                    # Check if assignment value is a constant
                                    if isinstance(node.value, ast.Constant):
                                        value = node.value.value
                                        value_str = str(value)

                                        # Skip trivial values
                                        if value_str in TRIVIAL_VALUES:
                                            continue

                                        # Check if variable name matches business patterns
                                        for pattern in BUSINESS_PATTERNS:
                                            if re.match(pattern.replace(r"\s*=\s*(\d+)", ""), var_name):
                                                findings.append({
                                                    "file": str(filepath.relative_to(PROJECT_ROOT)),
                                                    "line": node.lineno,
                                                    "variable": var_name,
                                                    "value": value_str,
                                                    "context": ast.get_source_segment(content, node)
                                                })
                                                break

                                        # Also check numeric values > 1 or strings that look like config
                                        if isinstance(value, (int, float)) and value > 1:
                                            findings.append({
                                                "file": str(filepath.relative_to(PROJECT_ROOT)),
                                                "line": node.lineno,
                                                "variable": var_name,
                                                "value": value_str,
                                                "context": ast.get_source_segment(content, node)
                                            })
                                        elif isinstance(value, str) and len(value) > 10 and " " not in value:
                                            # Could be API keys, URLs, etc
                                            findings.append({
                                                "file": str(filepath.relative_to(PROJECT_ROOT)),
                                                "line": node.lineno,
                                                "variable": var_name,
                                                "value": f'"{value_str[:50]}..."',
                                                "context": ast.get_source_segment(content, node)
                                            })

                except (SyntaxError, UnicodeDecodeError):
                    continue

    return findings

def analyze_admin_endpoints() -> Dict[str, Dict[str, Any]]:
    """
    Analyze which API modules have /admin prefixed endpoints.
    Returns dict with module analysis.
    """
    modules = {}

    if not ENDPOINTS_DIR.exists():
        print(f"Warning: Endpoints directory not found: {ENDPOINTS_DIR}")
        return modules

    for endpoint_file in ENDPOINTS_DIR.glob("*.py"):
        module_name = endpoint_file.stem
        with open(endpoint_file, "r", encoding="utf-8") as f:
            content = f.read()

        # Check for router definition
        router_match = re.search(r"router\s*=\s*APIRouter\(.*?prefix\s*=\s*[\"']/admin[\"']", content, re.DOTALL)
        has_admin_prefix = bool(router_match)

        # Check for admin endpoints (routes with /admin in path)
        admin_routes = re.findall(r'@router\.\w+\([\"\'][^\"\']*?/admin[^\"\']*?[\"\']', content)

        # Check for admin-specific functions
        admin_functions = re.findall(r"def\s+\w+.*admin.*:", content, re.IGNORECASE)

        modules[module_name] = {
            "has_admin_prefix": has_admin_prefix,
            "admin_routes_count": len(admin_routes),
            "admin_functions": len(admin_functions),
            "file_size": len(content),
            "has_admin_file": (endpoint_file.stem == "admin")
        }

    return modules

def identify_missing_admin_modules(modules: Dict[str, Dict[str, Any]]) -> List[str]:
    """
    Identify which core modules lack admin endpoints.
    """
    core_modules = [
        "users", "vehicles", "services", "assets", "organizations",
        "billing", "gamification", "analytics", "security", "documents",
        "evidence", "expenses", "finance_admin", "notifications", "reports",
        "catalog", "providers", "search", "social", "system_parameters"
    ]

    missing = []
    for module in core_modules:
        if module not in modules:
            missing.append(module)
            continue

        mod_info = modules[module]
        if not mod_info["has_admin_prefix"] and mod_info["admin_routes_count"] == 0:
            missing.append(module)

    return missing

def generate_markdown_report(hardcoded_findings: List[Dict[str, Any]],
                            modules: Dict[str, Dict[str, Any]],
                            missing_admin_modules: List[str]) -> str:
    """
    Generate comprehensive Markdown report.
    """
    report = []
    report.append("# Admin System Gap Analysis Report")
    report.append(f"*Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*")
    report.append("")

    # Executive Summary
    report.append("## 📊 Executive Summary")
    report.append("")
    report.append(f"- **Total hardcoded business values found:** {len(hardcoded_findings)}")
    report.append(f"- **API modules analyzed:** {len(modules)}")
    report.append(f"- **Modules missing admin endpoints:** {len(missing_admin_modules)}")
    report.append("")

    # Hardcoded Values Section
    report.append("## 🔍 Hardcoded Business Values")
    report.append("")
    report.append("These values should be moved to `system_parameters` table for dynamic configuration.")
    report.append("")

    if hardcoded_findings:
        report.append("| File | Line | Variable | Value | Context |")
        report.append("|------|------|----------|-------|---------|")
        for finding in hardcoded_findings[:50]:  # Limit to 50 for readability
            file_link = finding["file"]
            line = finding["line"]
            variable = finding["variable"]
            value = finding["value"]
            context = finding["context"].replace("|", "\\|").replace("\n", " ").strip()[:100]
            report.append(f"| `{file_link}` | {line} | `{variable}` | `{value}` | `{context}` |")

        if len(hardcoded_findings) > 50:
            report.append(f"\n*... and {len(hardcoded_findings) - 50} more findings*")
    else:
        report.append("*No significant hardcoded business values found.*")
    report.append("")

    # Admin Endpoints Analysis
    report.append("## 🏗️ Admin Endpoints Analysis")
    report.append("")
    report.append("### Modules with Admin Prefix")
    report.append("")

    admin_modules = [m for m, info in modules.items() if info["has_admin_prefix"]]
    if admin_modules:
        report.append(", ".join(f"`{m}`" for m in admin_modules))
    else:
        report.append("*No modules have `/admin` prefix*")
    report.append("")

    report.append("### Modules with Admin Routes (but no prefix)")
    report.append("")
    mixed_modules = [m for m, info in modules.items() if not info["has_admin_prefix"] and info["admin_routes_count"] > 0]
    if mixed_modules:
        for module in mixed_modules:
            info = modules[module]
            report.append(f"- `{module}`: {info['admin_routes_count']} admin routes")
    else:
        report.append("*No mixed admin routes found*")
    report.append("")

    # Missing Admin Modules
    report.append("## ⚠️ Critical Gaps: Missing Admin Endpoints")
    report.append("")
    report.append("These core business modules lack dedicated admin endpoints:")
    report.append("")

    if missing_admin_modules:
        for module in missing_admin_modules:
            report.append(f"- **{module}** - No `/admin` prefix and no admin routes")
        report.append("")
        report.append("### Recommended Actions:")
        report.append("1. Create `/admin` prefixed routers for each missing module")
        report.append("2. Implement CRUD endpoints for administrative operations")
        report.append("3. Add audit logging and permission checks")
    else:
        report.append("*All core modules have admin endpoints!*")
    report.append("")

    # Recommendations
    report.append("## 🚀 Recommendations")
    report.append("")
    report.append("### Phase 1: Hardcode Elimination")
    report.append("1. Create `system_parameters` migration if not exists")
    report.append("2. Move identified hardcoded values to database")
    report.append("3. Implement `ConfigService` for dynamic value retrieval")
    report.append("")
    report.append("### Phase 2: Admin Endpoint Expansion")
    report.append("1. Prioritize modules with highest business impact:")
    report.append("   - `users` (user management)")
    report.append("   - `billing` (financial oversight)")
    report.append("   - `security` (access control)")
    report.append("2. Follow consistent pattern: `/admin/{module}/...`")
    report.append("3. Implement RBAC with `admin` and `superadmin` roles")
    report.append("")
    report.append("### Phase 3: Monitoring & Audit")
    report.append("1. Add admin action logging to `SecurityAuditLog`")
    report.append("2. Implement admin dashboard with real-time metrics")
    report.append("3. Create automated health checks for admin endpoints")
    report.append("")

    # Technical Details
    report.append("## 🔧 Technical Details")
    report.append("")
    report.append("### Scan Parameters")
    report.append(f"- Project root: `{PROJECT_ROOT}`")
    report.append(f"- Files scanned: Python files in `{BACKEND_DIR}`")
    report.append(f"- Business patterns: {len(BUSINESS_PATTERNS)}")
    report.append(f"- Trivial values excluded: {', '.join(TRIVIAL_VALUES)}")
    report.append("")

    return "\n".join(report)

def main():
    """Main execution function."""
    print("🔍 Starting Smart Admin Audit...")

    # 1. Find hardcoded values
    print("Step 1: Scanning for hardcoded business values...")
    hardcoded_findings = find_hardcoded_values()
    print(f"  Found {len(hardcoded_findings)} potential hardcoded values")

    # 2. Analyze admin endpoints
    print("Step 2: Analyzing admin endpoints...")
    modules = analyze_admin_endpoints()
    print(f"  Analyzed {len(modules)} API modules")

    # 3. Identify missing admin modules
    missing_admin_modules = identify_missing_admin_modules(modules)
    print(f"  Found {len(missing_admin_modules)} modules missing admin endpoints")

    # 4. Generate report
    print("Step 3: Generating Markdown report...")
    import datetime
    report = generate_markdown_report(hardcoded_findings, modules, missing_admin_modules)

    # Write to file
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        f.write(report)

    print(f"✅ Report generated: {OUTPUT_FILE}")
    print(f"   - Hardcoded values: {len(hardcoded_findings)}")
    print(f"   - Modules analyzed: {len(modules)}")
    print(f"   - Missing admin: {len(missing_admin_modules)}")

    # Print summary to console
    if missing_admin_modules:
        print("\n⚠️  CRITICAL GAPS:")
        for module in missing_admin_modules[:5]:
            print(f"   - {module} lacks admin endpoints")
        if len(missing_admin_modules) > 5:
            print(f"   ... and {len(missing_admin_modules) - 5} more")

    return 0

if __name__ == "__main__":
    sys.exit(main())