import asyncio import httpx import logging import json import re import os import datetime from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select, func, or_, text from app.db.session import SessionLocal from app.models.asset import AssetCatalog logging.basicConfig(level=logging.INFO) logger = logging.getLogger("Robot1-Ghost-Commander-v1.1.9") class CatalogScout: """ Robot 1.1.9: Environment Master. - .env alapú hitelesítés (RDW App Token) - Prioritás: RDW (EU) -> NHTSA (US) -> CarQuery (Ban-figyeléssel) - 2.5s lekérési frissítés a biztonságért """ CQ_URL = "https://www.carqueryapi.com/api/0.3/" NHTSA_BASE = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/" RDW_URL = "https://opendata.rdw.nl/resource/ed7h-m8uz.json" # Adatok beolvasása környezeti változókból RDW_TOKEN = os.getenv("RDW_APP_TOKEN") HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "application/json", "X-App-Token": RDW_TOKEN } # BAN FIGYELŐ ÁLLAPOT cq_banned_until = None # --- KATEGÓRIA DEFINÍCIÓK (Szigorúan az eredeti lista szerint) --- MOTO_MAKES = ['ducati', 'ktm', 'triumph', 'aprilia', 'benelli', 'vespa', 'simson', 'mz', 'etz', 'jawa', 'husqvarna', 'gasgas', 'sherco'] MARINE_IDS = ['DF', 'DT', 'OUTBOARD', 'MARINE', 'JET SKI', 'SEA-DOO', 'WAVERUNNER', 'YACHT', 'BOAT'] AERIAL_IDS = ['CESSNA', 'PIPER', 'AIRBUS', 'BOEING', 'HELICOPTER', 'AIRCRAFT', 'BEECHCRAFT', 'EMBRAER', 'DRONE'] ATV_IDS = ['LT-', 'LTZ', 'LTR', 'KINGQUAD', 'QUAD', 'POLARIS', 'CAN-AM', 'MULE', 'RZR', 'ARCTIC CAT', 'UTV', 'SIDE-BY-SIDE'] RACING_IDS = ['RM-Z', 'KX', 'CRF', 'YZ', 'SX-F', 'XC-W', 'RM125', 'RM250', 'CR125', 'CR250', 'MC450'] MOTO_KEYWORDS = ['CBR', 'GSX', 'YZF', 'NINJA', 'Z1000', 'DR-Z', 'MT-0', 'V-STROM', 'ADVENTURE', 'SCRAMBLER', 'CBF', 'VFR', 'HAYABUSA'] BUS_KEYWORDS = ['BUS', 'COACH', 'INTERCITY', 'SHUTTLE', 'TRANSIT'] TRUCK_KEYWORDS = ['TRUCK', 'SEMI', 'TRACTOR', 'HAULER', 'ACTROS', 'MAN', 'SCANIA', 'IVECO', 'VOLVO FH', 'DAF', 'TGX', 'RENAULT T'] TRAILER_KEYWORDS = ['TRAILER', 'SEMITRAILER', 'PÓTKOCSI', 'UTÁNFUTÓ', 'SCHMITZ', 'KRONE', 'KÖGEL'] FALLBACK_BRANDS = ['Audi', 'BMW', 'Mercedes-Benz', 'Volkswagen', 'Toyota', 'Ford', 'Honda', 'Hyundai', 'Kia', 'Mazda', 'Nissan', 'Volvo', 'Skoda', 'Opel', 'Tesla', 'Lexus', 'Porsche', 'Dacia', 'Suzuki'] @classmethod def identify_class(cls, make: str, model: str) -> str: m_full = f"{str(make)} {str(model)}".upper() if any(x in m_full for x in cls.AERIAL_IDS): return "aerial" if any(x in m_full for x in cls.MARINE_IDS): return "marine" if any(x in m_full for x in cls.ATV_IDS): return "atv" if any(x in m_full or str(make).lower() in cls.MOTO_MAKES for x in (cls.RACING_IDS + cls.MOTO_KEYWORDS)): return "motorcycle" if any(x in m_full for x in cls.BUS_KEYWORDS): return "bus" if any(x in m_full for x in cls.TRUCK_KEYWORDS): return "truck" if any(x in m_full for x in cls.TRAILER_KEYWORDS): return "trailer" return "car" @classmethod async def fetch_api(cls, url, params=None, is_cq=False): if is_cq and cls.cq_banned_until and datetime.datetime.now() < cls.cq_banned_until: return "SILENT_SKIP" async with httpx.AsyncClient(headers=cls.HEADERS, follow_redirects=True) as client: try: # CarQuery: 5.0mp szünet (Hard Ban ellen), többi: 2.5mp (User kérése szerint) await asyncio.sleep(5.0 if is_cq else 2.5) resp = await client.get(url, params=params, timeout=35) if resp.status_code == 403 or "denied" in resp.text.lower(): logger.error("🚫 CarQuery BAN! 2 óra kényszerpihenő aktiválva.") cls.cq_banned_until = datetime.datetime.now() + datetime.timedelta(hours=2) return "DENIED" if resp.status_code != 200: return None content = resp.text.strip() if is_cq: match = re.search(r'(\{.*\}|\[.*\])', content, re.DOTALL) if match: content = match.group(0) return json.loads(content) except Exception as e: logger.error(f"❌ API hiba: {e}") return None @classmethod async def is_model_processed(cls, db: AsyncSession, make: str, model: str, year: int): stmt = select(AssetCatalog.id).where(AssetCatalog.make == make, AssetCatalog.model == model, AssetCatalog.year_from == year).limit(1) result = await db.execute(stmt) return result.scalars().first() is not None @classmethod async def auto_heal(cls, db: AsyncSession, cq_active: bool): logger.info("🛠️ Auto-Heal: Hiányos rekordok dúsítása...") stmt = select(AssetCatalog).where(AssetCatalog.engine_variant == 'Standard', AssetCatalog.fuel_type == 'Unknown').limit(20) results = await db.execute(stmt) for r in results.scalars().all(): # 1. RDW javítás (Holland Open Data + Token) rdw = await cls.fetch_api(cls.RDW_URL, {"merk": r.make.upper(), "handelsbenaming": r.model.upper(), "$limit": 1}) if rdw and isinstance(rdw, list) and len(rdw) > 0: item = rdw[0] r.fuel_type = item.get("brandstof_omschrijving", "Unknown") r.factory_data.update({"hp": item.get("netto_maximum_vermogen"), "cc": item.get("cilinderinhoud"), "source": "heal_v1.9_rdw"}) continue # 2. CQ javítás (Ha nem vagyunk kitiltva) if cq_active: t_data = await cls.fetch_api(cls.CQ_URL, {"cmd": "getTrims", "make": r.make.lower(), "model": r.model, "year": r.year_from}, is_cq=True) if t_data and t_data not in ["DENIED", "SILENT_SKIP"] and "Trims" in t_data: t = t_data["Trims"][0] r.engine_variant = t.get("model_trim") or "Standard" r.factory_data.update({"hp": t.get("model_engine_power_ps"), "cc": t.get("model_engine_cc"), "source": "heal_v1.9_cq"}) await db.commit() @classmethod async def run(cls): logger.info(f"🤖 Robot 1.9.2 indítása (RDW Token: {'Aktív' if cls.RDW_TOKEN else 'HIÁNYZIK!'})") for year in range(2026, 1989, -1): logger.info(f"📅 --- CIKLUS: {year} ---") cq_now_active = not (cls.cq_banned_until and datetime.datetime.now() < cls.cq_banned_until) async with SessionLocal() as db: await cls.auto_heal(db, cq_now_active) # 1. MÁRKALISTA (NHTSA + Fallback) makes_to_process = [] for b in cls.FALLBACK_BRANDS: makes_to_process.append({"id": b.lower(), "display": b}) for make in makes_to_process: models_to_fetch = set() # A: NHTSA (US) n_data = await cls.fetch_api(f"{cls.NHTSA_BASE}{make['display']}/modelyear/{year}?format=json") if n_data and n_data.get("Results"): for r in n_data["Results"]: models_to_fetch.add(r["Model_Name"]) # B: RDW (Holland) - Tokennel védve rdw_m = await cls.fetch_api(cls.RDW_URL, {"merk": make['display'].upper(), "$limit": 30}) if rdw_m and isinstance(rdw_m, list): for r in rdw_m: models_to_fetch.add(r.get("handelsbenaming")) async with SessionLocal() as db: for model_name in models_to_fetch: if not model_name or await cls.is_model_processed(db, make["display"], model_name, year): continue # C: CarQuery (Csak ha nincs ban) found_trims = [] t_data = await cls.fetch_api(cls.CQ_URL, {"cmd": "getTrims", "make": make["id"], "model": model_name, "year": year}, is_cq=True) if t_data and t_data not in ["DENIED", "SILENT_SKIP"] and "Trims" in t_data: found_trims = t_data["Trims"] if not found_trims: found_trims = [{"model_trim": "Standard", "model_engine_fuel": "Unknown"}] for t in found_trims: db.add(AssetCatalog( make=make["display"], model=model_name, year_from=year, engine_variant=t.get("model_trim") or "Standard", fuel_type=t.get("model_engine_fuel") or "Unknown", vehicle_class=cls.identify_class(make["display"], model_name), factory_data={ "hp": t.get("model_engine_power_ps"), "cc": t.get("model_engine_cc"), "source": "ghost_v1.9.2", "sync_date": str(datetime.datetime.now()) } )) await db.commit() if __name__ == "__main__": asyncio.run(CatalogScout.run())