import asyncio import httpx import logging import json import os import datetime from sqlalchemy import text from app.db.session import SessionLocal from app.models.asset import AssetCatalog logging.basicConfig(level=logging.INFO) logger = logging.getLogger("Robot-v1.4-Powerhouse") class CatalogMaster: """ Master Hunter Robot v1.4 - Powerhouse Edition - Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery. - Előkészített, kikommentelt Brit (DVLA) integráció. - Async Semaphore: Párhuzamos technikai dúsítás (egyszerre 10 szálon). - Intelligens összefésülés a globális források között. """ # API Végpontok RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json" RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json" RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json" RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json" # AMERIKAI BATCH API: Egyetlen hívással az összes modell évjárat szerint US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json" # BRIT API (Kikapcsolva a tokenig) # UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles" RDW_TOKEN = os.getenv("RDW_APP_TOKEN") UK_API_KEY = os.getenv("UK_DVLA_API_KEY") # Jövőbeli token helye HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {} # HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {} CATEGORY_MAP = { "Personenauto": "car", "Motorfiets": "motorcycle", "Bedrijfsauto": "truck", "Vrachtwagen": "truck", "Opleggertrekker": "truck", "Bus": "bus", "Aanhangwagen": "trailer", "Oplegger": "trailer", "Landbouw- of bosbouwtrekker": "agricultural", "camper": "camper" } # Szabályozzuk a párhuzamos dúsítást, hogy ne tiltsanak le (egyszerre max 10 kérés) semaphore = asyncio.Semaphore(5) @classmethod def clean_kw(cls, val): try: if val is None: return None f_val = float(str(val).replace(',', '.')) if 0 < f_val < 1.0: return None v = int(f_val) return v if v > 0 else None except (ValueError, TypeError): return None @classmethod def clean_int(cls, val): try: if val is None: return None return int(float(str(val).replace(',', '.'))) except (ValueError, TypeError): return None @classmethod async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None): async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client: for attempt in range(3): # 3-szor próbáljuk újra, ha kell try: if method == "POST": resp = await client.post(url, json=json_data, timeout=30) else: resp = await client.get(url, params=params, timeout=30) if resp.status_code == 429: # HOPPÁ, túl gyorsak vagyunk! wait_time = (attempt + 1) * 5 # Egyre többet vár: 5s, 10s... logger.warning(f"⚠️ RDW limit elérve! Pihenő {wait_time} mp...") await asyncio.sleep(wait_time) continue return resp.json() if resp.status_code in [200, 201] else [] except Exception as e: logger.error(f"❌ API Hiba ({url}): {e}") await asyncio.sleep(2) return [] @classmethod async def get_deep_tech(cls, plate, main_kw=None, vin=None): """Mély dúsítás párhuzamos forrásokból.""" async with cls.semaphore: res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None} # --- 1. HOLLAND (RDW) DÚSÍTÁS --- fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW) axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW) # Holland adatok párhuzamos lekérése fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task) if fuel_data: f0 = fuel_data[0] if not res["kw"]: res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen")) res["fuel"] = f0.get("brandstof_omschrijving", "Unknown") res["euro"] = f0.get("uitlaatemissieniveau") if axle_data: res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen")) # --- 2. BRIT (DVLA) ELLENŐRZÉS (KIKOMMENTELVE A TOKENIG) --- """ if cls.UK_API_KEY and (not res["kw"] or not res["euro"]): uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST", json_data={"registrationNumber": plate}, headers=cls.HEADERS_UK) if uk_data and not isinstance(uk_data, list): res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity")) res["euro"] = res["euro"] or uk_data.get("euroStatus") """ return res @classmethod async def discover_holland(cls, make_name, limit=1000): """Holland Discovery ág.""" offset, variants = 0, {} while True: params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset} data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW) if not data: break for item in data: plate = item.get("kenteken") if not plate: continue model = str(item.get("handelsbenaming", "Unknown")).upper() ccm = cls.clean_int(item.get("cilinderinhoud")) weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar")) kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar") raw_date = item.get("datum_eerste_toelating") year = int(str(raw_date)[:4]) if raw_date else 2024 v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other") key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}" if key not in variants: variants[key] = { "model": model, "ccm": ccm, "weight": weight, "v_class": v_class, "plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin") } if len(data) < limit: break offset += limit return variants @classmethod async def discover_usa_batch(cls, make_name): """Amerikai NHTSA Batch Discovery ág (2020-2025 évjáratokra).""" variants = {} # Az utolsó 5 évjáratot nézzük a legfrissebb modellekért years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1) async def fetch_year(year): url = cls.US_BATCH.format(make=make_name.upper(), year=year) data = await cls.fetch_api(url) if data and "Results" in data: for m in data["Results"]: m_name = m.get("Model_Name", "Unknown").upper() # US adatoknál nincs rendszám, de a Robot 2 dúsítani fogja ha kell key = f"US-{m_name}-{year}" variants[key] = { "model": m_name, "ccm": None, "weight": None, "v_class": "car", "plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None } await asyncio.gather(*(fetch_year(y) for y in years)) return variants @classmethod async def process_make(cls, db, task_id, make_name): logger.info(f"🚀 >>> {make_name} Powerhouse v1.4 INDUL...") # PÁRHUZAMOS DISCOVERY: Holland és USA egyszerre holland_task = cls.discover_holland(make_name) usa_task = cls.discover_usa_batch(make_name) holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task) # Összefésülés (Holland élvez elsőbbséget a rendszám miatt) all_variants = {**usa_variants, **holland_variants} logger.info(f"📊 Összesen {len(all_variants)} egyedi variáns (NL: {len(holland_variants)}, US: {len(usa_variants)})") # PÁRHUZAMOS DÚSÍTÁS async def enrich_and_save(v): deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"]) try: db_item = AssetCatalog( make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"], fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"], max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"], year_from=v["prod_year"], euro_class=deep["euro"], factory_data={ "source": "Powerhouse-v1.4", "discovery_nl": v["plate"] != "US-DISCOVERY", "enriched_at": str(datetime.datetime.now()) } ) return db_item except Exception: return None # Egyszerre indítjuk a dúsításokat (A semaphore korlátozza a szálakat) results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values())) # Mentés total_saved = 0 for item in results: if item: db.add(item) total_saved += 1 await db.commit() await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id}) await db.commit() logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.") @classmethod async def run(cls): logger.info("🤖 Robot 1.4 (Powerhouse) ONLINE - Multi-Worker Safe") while True: async with SessionLocal() as db: # 1. 'FOR UPDATE SKIP LOCKED' - Megfogjuk a sort és lelakatoljuk, # de a többi robot átugorja, amit mi már fogunk. query = text(""" SELECT id, make FROM data.catalog_discovery WHERE status = 'pending' LIMIT 1 FOR UPDATE SKIP LOCKED """) res = await db.execute(query) task = res.fetchone() if task: task_id, make_name = task # 2. Azonnal átállítjuk 'running'-ra a tranzakción belül, # így senki más nem nyúl hozzá. await db.execute( text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"), {"id": task_id} ) await db.commit() # Itt véglegesítjük a foglalást # 3. Indulhat a tényleges munka await cls.process_make(db, task_id, make_name) else: logger.info("😴 Várólista üres (vagy minden sor foglalt). Alvás 60 mp...") await asyncio.sleep(60) await asyncio.sleep(1) if __name__ == "__main__": asyncio.run(CatalogMaster.run())