import asyncio import httpx import logging import json import os import datetime import sys from sqlalchemy import text from app.db.session import SessionLocal from app.models.asset import AssetCatalog # --- KÉNYSZERÍTETT IDŐBÉLYEGES LOGOLÁS --- # Töröljük az esetleges korábbi konfigurációkat, hogy az időbélyeg garantált legyen for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig( level=logging.INFO, format='%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stdout ) logger = logging.getLogger("Robot-v1.4.1-Powerhouse") class CatalogMaster: """ Master Hunter Robot v1.4.1 - Powerhouse Edition - Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery. - Garantált időbélyeges naplózás. - Multi-Worker Safe (FOR UPDATE SKIP LOCKED). - Rate Limit (429) védelem. """ # API Végpontok RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json" RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json" RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json" RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json" US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json" # BRIT API (Token után aktiválható) UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles" RDW_TOKEN = os.getenv("RDW_APP_TOKEN") UK_API_KEY = os.getenv("UK_DVLA_API_KEY") HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {} HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {} CATEGORY_MAP = { "Personenauto": "car", "Motorfiets": "motorcycle", "Bedrijfsauto": "truck", "Vrachtwagen": "truck", "Opleggertrekker": "truck", "Bus": "bus", "Aanhangwagen": "trailer", "Oplegger": "trailer", "Landbouw- of bosbouwtrekker": "agricultural", "camper": "camper" } # Szabályozzuk a párhuzamos dúsítást (egyszerre max 5 kérés robotpéldányonként) semaphore = asyncio.Semaphore(5) @classmethod def clean_kw(cls, val): try: if val is None: return None f_val = float(str(val).replace(',', '.')) if 0 < f_val < 1.0: return None v = int(f_val) return v if v > 0 else None except (ValueError, TypeError): return None @classmethod def clean_int(cls, val): try: if val is None: return None return int(float(str(val).replace(',', '.'))) except (ValueError, TypeError): return None @classmethod async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None): """Intelligens API hívó 429-es védelemmel és időzített logolással.""" async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client: for attempt in range(3): try: if method == "POST": resp = await client.post(url, json=json_data, timeout=30) else: resp = await client.get(url, params=params, timeout=30) if resp.status_code == 429: wait_time = (attempt + 1) * 5 logger.warning(f"⚠️ RATE LIMIT! Várakozás {wait_time}mp: {url}") await asyncio.sleep(wait_time) continue return resp.json() if resp.status_code in [200, 201] else [] except Exception as e: logger.error(f"❌ API Hiba ({url}): {e}") await asyncio.sleep(2) return [] @classmethod async def get_deep_tech(cls, plate, main_kw=None, vin=None): """Mély dúsítás több forrásból párhuzamosan.""" async with cls.semaphore: res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None} # --- 1. HOLLAND (RDW) DÚSÍTÁS --- fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW) axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW) fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task) if fuel_data: f0 = fuel_data[0] if not res["kw"]: res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen")) res["fuel"] = f0.get("brandstof_omschrijving", "Unknown") res["euro"] = f0.get("uitlaatemissieniveau") if axle_data: res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen")) # --- 2. BRIT (DVLA) ELLENŐRZÉS (AKTIVÁLHATÓ KULCCSAL) --- """ if cls.UK_API_KEY and (not res["kw"] or not res["euro"]): uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST", json_data={"registrationNumber": plate}, headers=cls.HEADERS_UK) if uk_data and not isinstance(uk_data, list): res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity")) res["euro"] = res["euro"] or uk_data.get("euroStatus") """ return res @classmethod async def discover_holland(cls, make_name, limit=1000): """Holland Discovery ág: rendszámok gyűjtése.""" offset, variants = 0, {} while True: params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset} data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW) if not data: break for item in data: plate = item.get("kenteken") if not plate: continue model = str(item.get("handelsbenaming", "Unknown")).upper() ccm = cls.clean_int(item.get("cilinderinhoud")) weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar")) kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar") raw_date = item.get("datum_eerste_toelating") year = int(str(raw_date)[:4]) if raw_date else 2024 v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other") key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}" if key not in variants: variants[key] = { "model": model, "ccm": ccm, "weight": weight, "v_class": v_class, "plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin") } if len(data) < limit: break offset += limit return variants @classmethod async def discover_usa_batch(cls, make_name): """Amerikai NHTSA Batch Discovery: Típusok gyűjtése.""" variants = {} years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1) async def fetch_year(year): url = cls.US_BATCH.format(make=make_name.upper(), year=year) logger.info(f"🇺🇸 USA Batch Discovery indítása: {make_name} ({year})") data = await cls.fetch_api(url) if data and "Results" in data: for m in data["Results"]: m_name = m.get("Model_Name", "Unknown").upper() key = f"US-{m_name}-{year}" if key not in variants: variants[key] = { "model": m_name, "ccm": None, "weight": None, "v_class": "car", "plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None } await asyncio.gather(*(fetch_year(y) for y in years)) return variants @classmethod async def process_make(cls, db, task_id, make_name): logger.info(f"🚀 >>> {make_name} Powerhouse v1.4.1 INDUL...") # Párhuzamos Discovery holland_task = cls.discover_holland(make_name) usa_task = cls.discover_usa_batch(make_name) holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task) all_variants = {**usa_variants, **holland_variants} logger.info(f"📊 Összefésült variánsok száma: {len(all_variants)}") async def enrich_and_save(v): deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"]) try: db_item = AssetCatalog( make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"], fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"], max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"], year_from=v["prod_year"], euro_class=deep["euro"], factory_data={ "source": "Powerhouse-v1.4.1", "discovery_nl": v["plate"] != "US-DISCOVERY", "enriched_at": str(datetime.datetime.now()) } ) return db_item except Exception: return None # Párhuzamos dúsítás (Semaphore korláttal) results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values())) total_saved = 0 for item in results: if item: db.add(item) total_saved += 1 await db.commit() await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id}) await db.commit() logger.info(f"🏁 {make_name} KÉSZ. {total_saved} egyedi rekord rögzítve.") @classmethod async def run(cls): logger.info("🤖 Robot 1.4.1 (Powerhouse) ONLINE - Multi-Worker Safe Mode") while True: async with SessionLocal() as db: # SKIP LOCKED védelem a párhuzamos futtatáshoz query = text(""" SELECT id, make FROM data.catalog_discovery WHERE status = 'pending' LIMIT 1 FOR UPDATE SKIP LOCKED """) res = await db.execute(query) task = res.fetchone() if task: task_id, make_name = task await db.execute( text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"), {"id": task_id} ) await db.commit() await cls.process_make(db, task_id, make_name) else: logger.info("😴 Várólista üres vagy minden feladat foglalt. Alvás 60mp...") await asyncio.sleep(60) await asyncio.sleep(1) if __name__ == "__main__": asyncio.run(CatalogMaster.run())