import asyncio import httpx import logging import os import re import sys from sqlalchemy import text, select from app.database import AsyncSessionLocal from app.models.vehicle_definitions import VehicleModelDefinition # MB 2.0 Szigorú naplózás logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout ) logger = logging.getLogger("Robot-1") class CatalogHunter: """ Vehicle Robot 1.7.3: Mega-Hunter (Industrial Master Version) Teljes körű RDW adatbányászat Variant/Version szintű granuláltsággal. """ RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json" RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json" RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json" RDW_TOKEN = os.getenv("RDW_APP_TOKEN") HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {} BATCH_SIZE = 50 @classmethod def normalize(cls, text_val: str) -> str: if not text_val: return "" return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower() @classmethod def parse_int(cls, value) -> int: try: if value is None or str(value).strip() == "": return 0 return int(float(value)) except (ValueError, TypeError): return 0 @classmethod def parse_float(cls, value) -> float: try: if value is None or str(value).strip() == "": return 0.0 return float(value) except (ValueError, TypeError): return 0.0 @classmethod async def fetch_tech_details(cls, client, plate): """ Extra technikai adatok (kW, Euro, CO2, Motorkód) párhuzamos lekérése. """ urls = { "fuel": f"{cls.RDW_FUEL}?kenteken={plate}", "engine": f"{cls.RDW_ENGINE}?kenteken={plate}" } results = { "power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0 } try: resps = await asyncio.gather(*[client.get(u, headers=cls.HEADERS) for u in urls.values()]) # Üzemanyag és emisszió (8ys7-d773) if resps[0].status_code == 200 and resps[0].json(): f = resps[0].json()[0] p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen")) p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen")) results.update({ "power_kw": max(p1, p2), "fuel_desc": f.get("brandstof_omschrijving") or "Unknown", "euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"), "co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")), "consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd")) }) # Motorkód (jh96-v4pq) if resps[1].status_code == 200 and resps[1].json(): results["engine_code"] = resps[1].json()[0].get("motorcode") except Exception as e: logger.error(f"❌ RDW-Extra hiba ({plate}): {e}") return results @classmethod async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority): clean_make = make_name.strip().upper() clean_model = model_name.strip().upper() logger.info(f"🎯 MEGA-VADÁSZAT INDUL: {clean_make} {clean_model}") current_offset = 0 async with httpx.AsyncClient(timeout=30.0) as client: while True: params = { "merk": clean_make, "handelsbenaming": clean_model, "$limit": cls.BATCH_SIZE, "$offset": current_offset, "$order": "kenteken DESC" } try: r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS) batch = r.json() if r.status_code == 200 else [] except Exception: break if not batch: await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id}) await db.commit() logger.info(f"🏁 {clean_make} {clean_model} minden variánsa feldolgozva.") return for item in batch: try: plate = item.get("kenteken") if not plate: continue # Alapadatok azonosítása variant = item.get("variant") version = item.get("uitvoering") # Az Execution/Version kód ccm = cls.parse_int(item.get("cilinderinhoud")) raw_model = str(item.get("handelsbenaming", "Unknown")).upper() model_name_clean = raw_model.replace(clean_make, "").strip() or raw_model norm_name = cls.normalize(model_name_clean) # Extra technikai mélyfúrás (kW, Fuel, Engine) tech = await cls.fetch_tech_details(client, plate) # Ellenőrzés: Létezik-e már ez a pontos technikai variáns? stmt = select(VehicleModelDefinition).where( VehicleModelDefinition.make == clean_make, VehicleModelDefinition.normalized_name == norm_name, VehicleModelDefinition.engine_capacity == ccm, VehicleModelDefinition.variant_code == variant, VehicleModelDefinition.version_code == version, VehicleModelDefinition.fuel_type == tech["fuel_desc"] ).limit(1) existing = (await db.execute(stmt)).scalar_one_or_none() if existing: # Meglévő rekord frissítése a prioritással és hiányzó adatokkal existing.priority_score = priority if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"] if tech["engine_code"]: existing.engine_code = tech["engine_code"] if tech["co2"] > 0: existing.co2_emissions_combined = tech["co2"] else: # ÚJ REKORD LÉTREHOZÁSA - MINDEN ADAT MEZŐVEL db.add(VehicleModelDefinition( make=clean_make, marketing_name=model_name_clean, normalized_name=norm_name, variant_code=variant, version_code=version, type_approval_number=item.get("typegoedkeuringsnummer"), technical_code=plate, # Kötelező mező! engine_capacity=ccm, power_kw=tech["power_kw"], fuel_type=tech["fuel_desc"], engine_code=tech["engine_code"], # Fizikai méretek és súlyok (RDW Main-ből) seats=cls.parse_int(item.get("aantal_zitplaatsen")), doors=cls.parse_int(item.get("aantal_deuren")), width=cls.parse_int(item.get("breedte")), wheelbase=cls.parse_int(item.get("wielbasis")), list_price=cls.parse_int(item.get("catalogusprijs")), max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")), towing_weight_unbraked=cls.parse_int(item.get("maximum_massa_trekken_ongeremd")), towing_weight_braked=cls.parse_int(item.get("maximum_trekken_massa_geremd")), curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")), max_weight=cls.parse_int(item.get("technische_max_massa_voertuig") or item.get("toegestane_maximum_massa_voertuig")), body_type=item.get("inrichting"), # Emissziós és környezeti adatok (RDW Extra-ból) co2_emissions_combined=tech["co2"], fuel_consumption_combined=tech["consumption"], euro_classification=tech["euro_class"], cylinders=cls.parse_int(item.get("aantal_cilinders")), # Meta adatok vehicle_class=v_class, priority_score=priority, status="ACTIVE", source="MEGA-HUNTER-v1.7.3" )) except Exception as e: logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}") # Batch commit 50 soronként await db.commit() current_offset += len(batch) # Biztonsági korlát: egy típusból ne szedjünk le többet, mint ami a variációkhoz kell if current_offset >= 300: break await asyncio.sleep(0.1) @classmethod async def run(cls): logger.info("🤖 Mega-Hunter Robot v1.7.3 ONLINE") while True: try: async with AsyncSessionLocal() as db: # Legmagasabb prioritású modellek bekérése a Discovery listából query = text(""" SELECT id, make, model, vehicle_class, priority_score FROM data.catalog_discovery WHERE status IN ('pending', 'processing') ORDER BY priority_score DESC LIMIT 1 """) task = (await db.execute(query)).fetchone() if task: # Állapot rögzítése a "dupla munka" ellen await db.execute(text("UPDATE data.catalog_discovery SET status = 'processing' WHERE id = :id"), {"id": task[0]}) await db.commit() await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4]) else: logger.info("😴 Nincs több feladat, pihenés 30 másodpercig...") await asyncio.sleep(30) except Exception as e: logger.error(f"💀 Kritikus hiba a futtatás során: {e}") await asyncio.sleep(10) if __name__ == "__main__": asyncio.run(CatalogHunter.run())