import asyncio import httpx import logging import os import sys from sqlalchemy import text, select from app.db.session import SessionLocal from app.models.vehicle_definitions import VehicleModelDefinition # Logolás beállítása logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s' ) logger = logging.getLogger("Hunter-v2.4-Paginator") class CatalogHunter: RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json" RDW_TOKEN = os.getenv("RDW_APP_TOKEN") HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {} @classmethod async def get_total_count(cls, client, make_name): """Lekéri, összesen hány rekord létezik az adott márkához.""" query_filter = f"upper(merk) like '%{make_name.upper()}%'" params = { "$where": query_filter, "$select": "count(*)" } try: resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW) if resp.status_code == 200: data = resp.json() return int(data[0]['count']) return 0 except Exception as e: logger.error(f"⚠️ Nem sikerült a számlálás: {e}") return 0 @classmethod async def process_make(cls, db, task_id, make_name): clean_make = make_name.strip().upper() async with httpx.AsyncClient(timeout=60) as client: # 1. LÉPÉS: Megszámoljuk az összes rekordot total_available = await cls.get_total_count(client, clean_make) logger.info(f"🚀 >>> {clean_make} feltérképezése: {total_available} variáns található az RDW-ben.") if total_available == 0: logger.warning(f"⚠️ {clean_make} márkához nem érkezett adat az API-tól.") await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id}) await db.commit() return # 2. LÉPÉS: Lapozás (Pagination) limit = 1000 offset = 0 total_added = 0 while offset < total_available: logger.info(f"📑 Lapozás: {clean_make} | {offset} -> {offset + limit} (Összesen: {total_available})") query_filter = f"upper(merk) like '%{clean_make}%'" params = { "$where": query_filter, "$limit": limit, "$offset": offset, "$order": ":id" # Socrata stabil lapozáshoz javasolt } resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW) if resp.status_code != 200: logger.error(f"❌ Hiba a lapozásnál ({offset}): {resp.status_code}") break batch = resp.json() if not batch: break # Feldolgozás for item in batch: res_make = str(item.get("merk", clean_make)).upper() model = str(item.get("handelsbenaming", "Unknown")).upper() ccm = int(float(item.get("cilinderinhoud") or 0)) kw = int(float(item.get("netto_maximum_vermogen") or 0)) # Deduplikáció check stmt = select(VehicleModelDefinition.id).where( VehicleModelDefinition.make == res_make, VehicleModelDefinition.marketing_name == model, VehicleModelDefinition.engine_capacity == ccm, VehicleModelDefinition.power_kw == kw ).limit(1) exists = (await db.execute(stmt)).scalar_one_or_none() if not exists: db.add(VehicleModelDefinition( make=res_make, technical_code=item.get("kenteken"), marketing_name=model, engine_capacity=ccm, power_kw=kw if kw > 0 else None, status="unverified", source="HUNTER-v2.4-PAGINATED" )) total_added += 1 await db.commit() # Lapvégi mentés offset += limit # 3. LÉPÉS: Befejezés await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id}) await db.commit() logger.info(f"✅ {clean_make} KÉSZ. {total_available} rekord átnézve, {total_added} új variáns stagingbe mentve.") @classmethod async def run(cls): logger.info("🤖 Robot 1 (Hunter) ONLINE - Paginator v2.4") while True: async with SessionLocal() as db: query = text(""" SELECT id, make FROM data.catalog_discovery WHERE status = 'pending' ORDER BY CASE WHEN make IN ('SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL') THEN 1 ELSE 2 END, id ASC LIMIT 1 FOR UPDATE SKIP LOCKED """) res = await db.execute(query) task = res.fetchone() if task: await cls.process_make(db, task[0], task[1]) else: await asyncio.sleep(20) if __name__ == "__main__": asyncio.run(CatalogHunter.run())