# /app/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py import asyncio import httpx import logging import os import re from sqlalchemy import text, select, update from app.database import AsyncSessionLocal from app.models.vehicle_definitions import VehicleModelDefinition # MB 2.0 Naplózás logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s') logger = logging.getLogger("Robot-1") class CatalogHunter: RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json" RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json" RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json" RDW_TOKEN = os.getenv("RDW_APP_TOKEN") HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {} BATCH_SIZE = 50 @classmethod def normalize(cls, text_val: str) -> str: if not text_val: return "" return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower() @classmethod def parse_int(cls, value) -> int: try: if value is None or str(value).strip() == "": return 0 return int(float(value)) except (ValueError, TypeError): return 0 @classmethod async def fetch_extra_tech(cls, client, plate): params = {"kenteken": plate} results = {"power_kw": 0, "euro_klasse": None, "fuel_desc": "Unknown", "engine_code": None} try: resp_fuel, resp_eng = await asyncio.gather( client.get(cls.RDW_FUEL, params=params, headers=cls.HEADERS), client.get(cls.RDW_ENGINE, params=params, headers=cls.HEADERS) ) if resp_fuel.status_code == 200: fuel_rows = resp_fuel.json() max_p = 0 f_types = [] for row in fuel_rows: p1 = cls.parse_int(row.get("netto_maximum_vermogen") or row.get("nettomaximumvermogen")) p2 = cls.parse_int(row.get("nominaal_continu_maximum_vermogen") or row.get("nominaalcontinuvermogen")) p = max(p1, p2) if p > max_p: max_p = p f = row.get("brandstof_omschrijving") if f and f not in f_types: f_types.append(f) if not results["euro_klasse"]: results["euro_klasse"] = row.get("uitlaatemissieniveau") or row.get("euro_klasse") results["power_kw"] = max_p results["fuel_desc"] = ", ".join(f_types) if f_types else "Unknown" if resp_eng.status_code == 200: eng_rows = resp_eng.json() if eng_rows: results["engine_code"] = eng_rows[0].get("motorcode") except Exception as e: logger.error(f"❌ RDW-Extra hiba ({plate}): {e}") return results @classmethod async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority): clean_make = make_name.strip().upper() clean_model = model_name.strip().upper() logger.info(f"🎯 VADÁSZAT INDUL: {clean_make} {clean_model} (Prio: {priority})") current_offset = 0 async with httpx.AsyncClient(timeout=30.0) as client: while True: params = { "merk": clean_make, "handelsbenaming": clean_model, "$limit": cls.BATCH_SIZE, "$offset": current_offset, "$order": "kenteken DESC" } try: r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS) batch = r.json() if r.status_code == 200 else [] except Exception: break if not batch: await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id}) await db.commit() logger.info(f"🏁 {clean_make} {clean_model} feldolgozva.") return for item in batch: try: plate = item.get("kenteken") if not plate: continue raw_model = str(item.get("handelsbenaming", "Unknown")).upper() model_name_clean = raw_model.replace(clean_make, "").strip() or raw_model norm_name = cls.normalize(model_name_clean) ccm = cls.parse_int(item.get("cilinderinhoud")) tech = await cls.fetch_extra_tech(client, plate) # Ellenőrizzük, van-e már ilyen technikai variációnk stmt = select(VehicleModelDefinition).where( VehicleModelDefinition.make == clean_make, VehicleModelDefinition.normalized_name == norm_name, VehicleModelDefinition.engine_capacity == ccm, VehicleModelDefinition.fuel_type == tech["fuel_desc"] ).limit(1) existing = (await db.execute(stmt)).scalar_one_or_none() if existing: # Csak frissítjük, ha találtunk pontosabb adatot if tech["engine_code"]: existing.engine_code = tech["engine_code"] if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"] existing.priority_score = priority # Prioritás frissítése else: # ÚJ REKORD LÉTREHOZÁSA db.add(VehicleModelDefinition( make=clean_make, marketing_name=model_name_clean, normalized_name=norm_name, marketing_name_aliases=[raw_model], technical_code=plate, fuel_type=tech["fuel_desc"], engine_capacity=ccm, engine_code=tech["engine_code"], power_kw=tech["power_kw"], cylinders=cls.parse_int(item.get("aantal_cilinders")), euro_classification=tech["euro_klasse"], vehicle_class=v_class, priority_score=priority, status="ACTIVE", # <--- EZ KELL A RÖNTGENNEK! source="PRECISION-HUNTER-v2.1" )) except Exception as e: logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}") await db.commit() current_offset += len(batch) # Ha már van elég variációnk ebből a típusból, nem kell mind a 100.000 autót átnézni if current_offset >= 500: break await asyncio.sleep(0.1) @classmethod async def run(cls): logger.info("🤖 Vehicle Catalog Hunter ONLINE") while True: async with AsyncSessionLocal() as db: # Lekérjük a prioritásos feladatokat query = text(""" SELECT id, make, model, vehicle_class, priority_score FROM data.catalog_discovery WHERE status IN ('pending', 'processing') ORDER BY priority_score DESC LIMIT 1 """) task = (await db.execute(query)).fetchone() if task: # status 'processing'-re állítása, hogy más robot ne nyúljon hozzá await db.execute(text("UPDATE data.catalog_discovery SET status = 'processing' WHERE id = :id"), {"id": task[0]}) await db.commit() await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4]) else: await asyncio.sleep(30) if __name__ == "__main__": asyncio.run(CatalogHunter.run())