átlagos kiegészítséek jó sok

2026-03-22 11:02:05 +00:00
parent f53e0b53df
commit 5d44339f21
249 changed files with 20922 additions and 2253 deletions
--- a/backend/app/scripts/check_robots_integrity.py
+++ b/backend/app/scripts/check_robots_integrity.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python3
+"""
+Robot Health & Integrity Audit Script - Recursive Deep Integrity Audit
+
+Ez a szkript automatikusan diagnosztizálja az összes robotunk (Scout, Enricher, Validator, Auditor)
+üzembiztonságát rekurzív felfedezéssel. A következő ellenőrzéseket végzi el:
+
+1. Auto-Discovery: Rekurzívan bejárja a `backend/app/workers/` teljes könyvtárszerkezetét
+2. Identification: Minden `.py` fájlt, ami nem `__init__.py` és nem segédfájl, kezel robotként/worker-ként
+3. Deep Import Test: Megpróbálja importálni mindet, különös figyelemmel a kritikus modulokra
+4. Model Sync 2.0: Ellenőrzi, hogy az összes robot a helyes modelleket használja-e
+5. Interface Standardizálás: Ellenőrzi a `run()` metódus jelenlétét
+6. Kategorizált jelentés: Service, Vehicle General, Vehicle Special, System & OCR kategóriák
+"""
+
+import sys
+import importlib
+import inspect
+import asyncio
+from pathlib import Path
+from typing import List, Dict, Any, Tuple
+import logging
+import re
+
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
+logger = logging.getLogger("Robot-Integrity-Audit")
+
+# Root directory for workers (relative to backend/app)
+WORKERS_ROOT = Path(__file__).parent.parent / "workers"
+
+# Exclusion patterns for non-robot files
+EXCLUDE_PATTERNS = [
+    "__init__.py",
+    "__pycache__",
+    ".pyc",
+    "test_",
+    "mapping_",
+    "config",
+    "dictionary",
+    "rules",
+    "report",
+    "monitor_",
+    "py_to_database",
+    "README",
+    # Files with dots in name (not valid Python module names)
+    r".*\..*\.py",  # Matches files like "something.1.0.py"
+]
+
+# Categorization patterns
+CATEGORY_PATTERNS = {
+    "Service Robots": [
+        r"service_robot_\d+",
+        r"service/.*\.py$",
+    ],
+    "Vehicle General": [
+        r"vehicle_robot_[0-4]_.*",
+        r"R[0-4]_.*\.py$",
+        r"vehicle_robot_1_[245]_.*",  # NHTSA, Heavy EU, GB
+        r"vehicle_robot_2_.*",  # RDW, AutoData
+    ],
+    "Vehicle Special": [
+        r"bike_.*\.py$",
+        r"vehicle_ultimate_.*\.py$",
+        r"ultimatespecs/.*\.py$",
+    ],
+    "System & OCR": [
+        r"system_.*\.py$",
+        r"subscription_.*\.py$",
+        r"ocr/.*\.py$",
+    ],
+}
+
+def discover_robot_files() -> List[Tuple[str, Path, str]]:
+    """
+    Recursively discover all robot files in the workers directory.
+    Returns list of (module_name, file_path, category) tuples.
+    """
+    robot_files = []
+    
+    for py_file in WORKERS_ROOT.rglob("*.py"):
+        # Skip excluded files
+        file_name = py_file.name
+        # Check for simple pattern matches
+        skip = False
+        for pattern in EXCLUDE_PATTERNS:
+            if pattern.startswith('r.') and len(pattern) > 2:
+                # Regex pattern (simplified)
+                if re.match(pattern[2:], file_name):
+                    skip = True
+                    break
+            elif pattern in file_name:
+                skip = True
+                break
+        
+        # Also skip files with multiple dots in name (not valid Python modules)
+        if file_name.count('.') > 1:  # e.g., "something.1.0.py"
+            skip = True
+        
+        if skip:
+            continue
+        
+        # Skip directories
+        if not py_file.is_file():
+            continue
+        
+        # Calculate module name (relative to backend/app)
+        try:
+            rel_path = py_file.relative_to(Path(__file__).parent.parent)
+            # Convert path parts to module names, handling dots in filenames
+            module_parts = []
+            for part in rel_path.parts:
+                if part.endswith('.py'):
+                    part = part[:-3]  # Remove .py
+                # Replace dots with underscores in filename (e.g., "1.0" -> "1_0")
+                part = part.replace('.', '_')
+                module_parts.append(part)
+            
+            # Add 'app' prefix since we're in backend/app directory
+            module_name = "app." + ".".join(module_parts)
+            
+            # Determine category
+            category = "Uncategorized"
+            for cat_name, patterns in CATEGORY_PATTERNS.items():
+                for pattern in patterns:
+                    if re.search(pattern, str(rel_path), re.IGNORECASE):
+                        category = cat_name
+                        break
+                if category != "Uncategorized":
+                    break
+            
+            robot_files.append((module_name, py_file, category))
+            
+        except ValueError as e:
+            logger.warning(f"Could not determine module for {py_file}: {e}")
+    
+    # Sort by category and module name
+    robot_files.sort(key=lambda x: (x[2], x[0]))
+    return robot_files
+
+async def test_import(module_name: str) -> Tuple[bool, str]:
+    """Try to import a robot module and return (success, error_message)."""
+    try:
+        module = importlib.import_module(module_name)
+        logger.info(f"✅ {module_name} import successful")
+        return True, ""
+    except ImportError as e:
+        error_msg = f"ImportError: {e}"
+        logger.error(f"❌ {module_name} import failed: {e}")
+        return False, error_msg
+    except SyntaxError as e:
+        error_msg = f"SyntaxError at line {e.lineno}: {e.msg}"
+        logger.error(f"❌ {module_name} syntax error: {e}")
+        return False, error_msg
+    except Exception as e:
+        error_msg = f"Exception: {type(e).__name__}: {e}"
+        logger.error(f"❌ {module_name} import failed: {e}")
+        return False, error_msg
+
+async def check_model_sync(module_name: str) -> List[str]:
+    """Check if a robot uses correct model references."""
+    errors = []
+    try:
+        module = importlib.import_module(module_name)
+        
+        # Get all classes in the module
+        classes = [cls for name, cls in inspect.getmembers(module, inspect.isclass) 
+                  if not name.startswith('_')]
+        
+        for cls in classes:
+            # Check class source code for model references
+            try:
+                source = inspect.getsource(cls)
+                
+                # Look for common model name issues
+                old_patterns = [
+                    r"VehicleModelDefinitions",  # Plural mistake
+                    r"vehicle_model_definitions",  # Old table name
+                    r"ExternalReferenceQueues",  # Plural mistake
+                ]
+                
+                for pattern in old_patterns:
+                    if re.search(pattern, source):
+                        errors.append(f"⚠️ {module_name}.{cls.__name__} uses old pattern: {pattern}")
+                        
+            except (OSError, TypeError):
+                pass  # Can't get source for built-in or C extensions
+                
+    except Exception as e:
+        # If we can't import, this will be caught in import test
+        pass
+    
+    return errors
+
+async def test_robot_interface(module_name: str) -> Tuple[bool, List[str]]:
+    """Test if a robot has a proper interface (run method, etc.)."""
+    interface_issues = []
+    
+    try:
+        module = importlib.import_module(module_name)
+        
+        # Find the main robot class (usually ends with the module name or contains 'Robot')
+        classes = [cls for name, cls in inspect.getmembers(module, inspect.isclass)
+                  if not name.startswith('_')]
+        
+        if not classes:
+            interface_issues.append("No classes found")
+            return False, interface_issues
+        
+        main_class = None
+        for cls in classes:
+            cls_name = cls.__name__
+            # Heuristic: class name contains 'Robot' or matches file name pattern
+            if 'Robot' in cls_name or cls_name.lower().replace('_', '') in module_name.lower().replace('_', ''):
+                main_class = cls
+                break
+        
+        if main_class is None:
+            main_class = classes[0]  # Fallback to first class
+        
+        # Check for run/execute/process method (can be classmethod or instance method)
+        has_run_method = hasattr(main_class, 'run')
+        has_execute_method = hasattr(main_class, 'execute')
+        has_process_method = hasattr(main_class, 'process')
+        
+        if not (has_run_method or has_execute_method or has_process_method):
+            interface_issues.append(f"No run/execute/process method in {main_class.__name__}")
+        else:
+            # Log which method is found
+            if has_run_method:
+                run_method = getattr(main_class, 'run')
+                # Check if it's a classmethod or instance method
+                if inspect.ismethod(run_method) and run_method.__self__ is main_class:
+                    logger.debug(f"✅ {module_name}.{main_class.__name__}.run is classmethod")
+                elif inspect.iscoroutinefunction(run_method):
+                    logger.debug(f"✅ {module_name}.{main_class.__name__}.run is async")
+                else:
+                    logger.debug(f"ℹ️ {module_name}.{main_class.__name__}.run is sync")
+        
+        # Try to instantiate only if the class appears to be instantiable (not abstract)
+        # Check if class has __init__ that doesn't require special arguments
+        try:
+            # First check if class can be instantiated with no arguments
+            sig = inspect.signature(main_class.__init__)
+            params = list(sig.parameters.keys())
+            # If only 'self' parameter, it's instantiable
+            if len(params) == 1:  # only self
+                instance = main_class()
+                interface_issues.append(f"Instantiation successful")
+            else:
+                interface_issues.append(f"Instantiation requires arguments, skipping")
+        except (TypeError, AttributeError):
+            # __init__ may not be standard, try anyway
+            try:
+                instance = main_class()
+                interface_issues.append(f"Instantiation successful")
+            except Exception as e:
+                interface_issues.append(f"Instantiation failed (expected): {e}")
+        
+        # If we found at least one of the required methods, consider interface OK
+        interface_ok = has_run_method or has_execute_method or has_process_method
+        
+        return interface_ok, interface_issues
+        
+    except Exception as e:
+        interface_issues.append(f"Interface test error: {e}")
+        return False, interface_issues
+
+async def check_syntax_errors(file_path: Path) -> List[str]:
+    """Check for syntax errors by attempting to compile the file."""
+    errors = []
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            source = f.read()
+        compile(source, str(file_path), 'exec')
+    except SyntaxError as e:
+        errors.append(f"Syntax error at line {e.lineno}: {e.msg}")
+    except Exception as e:
+        errors.append(f"Compilation error: {e}")
+    return errors
+
+async def generate_categorized_report(results: Dict) -> str:
+    """Generate a categorized audit report."""
+    report_lines = []
+    report_lines.append("# 🤖 Robot Integrity Audit Report")
+    report_lines.append(f"Generated: {importlib.import_module('datetime').datetime.now().isoformat()}")
+    report_lines.append(f"Total robots discovered: {results['total_robots']}")
+    report_lines.append("")
+    
+    for category in ["Service Robots", "Vehicle General", "Vehicle Special", "System & OCR", "Uncategorized"]:
+        cat_robots = [r for r in results['robots'] if r['category'] == category]
+        if not cat_robots:
+            continue
+            
+        report_lines.append(f"## {category}")
+        report_lines.append(f"**Count:** {len(cat_robots)}")
+        
+        # Statistics
+        import_success = sum(1 for r in cat_robots if r['import_success'])
+        syntax_success = sum(1 for r in cat_robots if not r['syntax_errors'])
+        interface_ok = sum(1 for r in cat_robots if r['interface_ok'])
+        
+        report_lines.append(f"- Import successful: {import_success}/{len(cat_robots)}")
+        report_lines.append(f"- Syntax clean: {syntax_success}/{len(cat_robots)}")
+        report_lines.append(f"- Interface OK: {interface_ok}/{len(cat_robots)}")
+        
+        # List problematic robots
+        problematic = [r for r in cat_robots if not r['import_success'] or r['syntax_errors'] or not r['interface_ok']]
+        if problematic:
+            report_lines.append("\n**Problematic robots:**")
+            for robot in problematic:
+                issues = []
+                if not robot['import_success']:
+                    issues.append("Import failed")
+                if robot['syntax_errors']:
+                    issues.append(f"Syntax errors ({len(robot['syntax_errors'])})")
+                if not robot['interface_ok']:
+                    issues.append("Interface issues")
+                report_lines.append(f"- `{robot['module']}`: {', '.join(issues)}")
+        
+        report_lines.append("")
+    
+    # Summary
+    report_lines.append("## 📊 Summary")
+    report_lines.append(f"- **Total robots:** {results['total_robots']}")
+    report_lines.append(f"- **Import successful:** {results['import_success']}/{results['total_robots']}")
+    report_lines.append(f"- **Syntax clean:** {results['syntax_clean']}/{results['total_robots']}")
+    report_lines.append(f"- **Interface OK:** {results['interface_ok']}/{results['total_robots']}")
+    
+    # Critical issues
+    critical = [r for r in results['robots'] if not r['import_success']]
+    if critical:
+        report_lines.append("\n## 🚨 Critical Issues (Import Failed)")
+        for robot in critical:
+            report_lines.append(f"- `{robot['module']}`: {robot['import_error']}")
+    
+    return "\n".join(report_lines)
+
+async def main():
+    """Main audit function with recursive discovery."""
+    logger.info("🤖 Starting Recursive Deep Integrity Audit")
+    logger.info("=" * 60)
+    
+    # Discover all robot files
+    logger.info("\n🔍 STEP 1: Discovering robot files...")
+    robot_files = discover_robot_files()
+    
+    if not robot_files:
+        logger.error("❌ No robot files found!")
+        return False
+    
+    logger.info(f"📁 Found {len(robot_files)} robot files")
+    
+    results = {
+        'robots': [],
+        'total_robots': len(robot_files),
+        'import_success': 0,
+        'syntax_clean': 0,
+        'interface_ok': 0,
+    }
+    
+    # Process each robot
+    logger.info("\n📦 STEP 2: Import and syntax tests...")
+    logger.info("-" * 40)
+    
+    for i, (module_name, file_path, category) in enumerate(robot_files, 1):
+        logger.info(f"\n[{i}/{len(robot_files)}] Testing: {module_name} ({category})")
+        
+        # Check syntax first
+        syntax_errors = await check_syntax_errors(file_path)
+        
+        # Test import
+        import_success, import_error = await test_import(module_name)
+        
+        # Test interface
+        interface_ok, interface_issues = await test_robot_interface(module_name)
+        
+        # Check model sync
+        model_errors = await check_model_sync(module_name)
+        
+        robot_result = {
+            'module': module_name,
+            'file': str(file_path),
+            'category': category,
+            'import_success': import_success,
+            'import_error': import_error,
+            'syntax_errors': syntax_errors,
+            'interface_ok': interface_ok,
+            'interface_issues': interface_issues,
+            'model_errors': model_errors,
+        }
+        
+        results['robots'].append(robot_result)
+        
+        if import_success:
+            results['import_success'] += 1
+        if not syntax_errors:
+            results['syntax_clean'] += 1
+        if interface_ok:
+            results['interface_ok'] += 1
+        
+        # Log summary for this robot
+        status_symbol = "✅" if import_success and not syntax_errors else "❌"
+        logger.info(f"{status_symbol} {module_name}: Import={import_success}, Syntax={len(syntax_errors)} errors, Interface={interface_ok}")
+    
+    # Generate report
+    logger.info("\n📊 STEP 3: Generating categorized report...")
+    report = await generate_categorized_report(results)
+    
+    # Print summary to console
+    logger.info("\n" + "=" * 60)
+    logger.info("📊 AUDIT SUMMARY")
+    logger.info("=" * 60)
+    logger.info(f"Total robots discovered: {results['total_robots']}")
+    logger.info(f"Import successful: {results['import_success']}/{results['total_robots']}")
+    logger.info(f"Syntax clean: {results['syntax_clean']}/{results['total_robots']}")
+    logger.info(f"Interface OK: {results['interface_ok']}/{results['total_robots']}")
+    
+    # Save report to file
+    report_path = Path(__file__).parent.parent.parent / "audit_report_robots.md"
+    with open(report_path, 'w', encoding='utf-8') as f:
+        f.write(report)
+    logger.info(f"\n📄 Full report saved to: {report_path}")
+    
+    # Determine overall status
+    critical_count = sum(1 for r in results['robots'] if not r['import_success'])
+    if critical_count > 0:
+        logger.error(f"🚨 ROBOT INTEGRITY CHECK FAILED - {critical_count} critical issues found!")
+        return False
+    elif results['import_success'] < results['total_robots']:
+        logger.warning("⚠️ ROBOT INTEGRITY CHECK PASSED with warnings")
+        return True
+    else:
+        logger.info("✅ ROBOT INTEGRITY CHECK PASSED - All systems operational!")
+        return True
+
+if __name__ == "__main__":
+    success = asyncio.run(main())
+    sys.exit(0 if success else 1)