Files
service-finder/archive/2026.02.18 Archive_old_mapps/catalog_robot1.4.1.py.old
2026-02-26 08:19:25 +01:00

270 lines
11 KiB
Python

import asyncio
import httpx
import logging
import json
import os
import datetime
import sys
from sqlalchemy import text
from app.db.session import SessionLocal
from app.models.asset import AssetCatalog
# --- KÉNYSZERÍTETT IDŐBÉLYEGES LOGOLÁS ---
# Töröljük az esetleges korábbi konfigurációkat, hogy az időbélyeg garantált legyen
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
stream=sys.stdout
)
logger = logging.getLogger("Robot-v1.4.1-Powerhouse")
class CatalogMaster:
"""
Master Hunter Robot v1.4.1 - Powerhouse Edition
- Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery.
- Garantált időbélyeges naplózás.
- Multi-Worker Safe (FOR UPDATE SKIP LOCKED).
- Rate Limit (429) védelem.
"""
# API Végpontok
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json"
# BRIT API (Token után aktiválható)
UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
UK_API_KEY = os.getenv("UK_DVLA_API_KEY")
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
CATEGORY_MAP = {
"Personenauto": "car",
"Motorfiets": "motorcycle",
"Bedrijfsauto": "truck",
"Vrachtwagen": "truck",
"Opleggertrekker": "truck",
"Bus": "bus",
"Aanhangwagen": "trailer",
"Oplegger": "trailer",
"Landbouw- of bosbouwtrekker": "agricultural",
"camper": "camper"
}
# Szabályozzuk a párhuzamos dúsítást (egyszerre max 5 kérés robotpéldányonként)
semaphore = asyncio.Semaphore(5)
@classmethod
def clean_kw(cls, val):
try:
if val is None: return None
f_val = float(str(val).replace(',', '.'))
if 0 < f_val < 1.0: return None
v = int(f_val)
return v if v > 0 else None
except (ValueError, TypeError):
return None
@classmethod
def clean_int(cls, val):
try:
if val is None: return None
return int(float(str(val).replace(',', '.')))
except (ValueError, TypeError):
return None
@classmethod
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
"""Intelligens API hívó 429-es védelemmel és időzített logolással."""
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
for attempt in range(3):
try:
if method == "POST":
resp = await client.post(url, json=json_data, timeout=30)
else:
resp = await client.get(url, params=params, timeout=30)
if resp.status_code == 429:
wait_time = (attempt + 1) * 5
logger.warning(f"⚠️ RATE LIMIT! Várakozás {wait_time}mp: {url}")
await asyncio.sleep(wait_time)
continue
return resp.json() if resp.status_code in [200, 201] else []
except Exception as e:
logger.error(f"❌ API Hiba ({url}): {e}")
await asyncio.sleep(2)
return []
@classmethod
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
"""Mély dúsítás több forrásból párhuzamosan."""
async with cls.semaphore:
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
# --- 1. HOLLAND (RDW) DÚSÍTÁS ---
fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task)
if fuel_data:
f0 = fuel_data[0]
if not res["kw"]:
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
res["euro"] = f0.get("uitlaatemissieniveau")
if axle_data:
res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen"))
# --- 2. BRIT (DVLA) ELLENŐRZÉS (AKTIVÁLHATÓ KULCCSAL) ---
"""
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST",
json_data={"registrationNumber": plate},
headers=cls.HEADERS_UK)
if uk_data and not isinstance(uk_data, list):
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity"))
res["euro"] = res["euro"] or uk_data.get("euroStatus")
"""
return res
@classmethod
async def discover_holland(cls, make_name, limit=1000):
"""Holland Discovery ág: rendszámok gyűjtése."""
offset, variants = 0, {}
while True:
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
if not data: break
for item in data:
plate = item.get("kenteken")
if not plate: continue
model = str(item.get("handelsbenaming", "Unknown")).upper()
ccm = cls.clean_int(item.get("cilinderinhoud"))
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
raw_date = item.get("datum_eerste_toelating")
year = int(str(raw_date)[:4]) if raw_date else 2024
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}"
if key not in variants:
variants[key] = {
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
"plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin")
}
if len(data) < limit: break
offset += limit
return variants
@classmethod
async def discover_usa_batch(cls, make_name):
"""Amerikai NHTSA Batch Discovery: Típusok gyűjtése."""
variants = {}
years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1)
async def fetch_year(year):
url = cls.US_BATCH.format(make=make_name.upper(), year=year)
logger.info(f"🇺🇸 USA Batch Discovery indítása: {make_name} ({year})")
data = await cls.fetch_api(url)
if data and "Results" in data:
for m in data["Results"]:
m_name = m.get("Model_Name", "Unknown").upper()
key = f"US-{m_name}-{year}"
if key not in variants:
variants[key] = {
"model": m_name, "ccm": None, "weight": None, "v_class": "car",
"plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None
}
await asyncio.gather(*(fetch_year(y) for y in years))
return variants
@classmethod
async def process_make(cls, db, task_id, make_name):
logger.info(f"🚀 >>> {make_name} Powerhouse v1.4.1 INDUL...")
# Párhuzamos Discovery
holland_task = cls.discover_holland(make_name)
usa_task = cls.discover_usa_batch(make_name)
holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task)
all_variants = {**usa_variants, **holland_variants}
logger.info(f"📊 Összefésült variánsok száma: {len(all_variants)}")
async def enrich_and_save(v):
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
try:
db_item = AssetCatalog(
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
year_from=v["prod_year"], euro_class=deep["euro"],
factory_data={
"source": "Powerhouse-v1.4.1",
"discovery_nl": v["plate"] != "US-DISCOVERY",
"enriched_at": str(datetime.datetime.now())
}
)
return db_item
except Exception:
return None
# Párhuzamos dúsítás (Semaphore korláttal)
results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values()))
total_saved = 0
for item in results:
if item:
db.add(item)
total_saved += 1
await db.commit()
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} egyedi rekord rögzítve.")
@classmethod
async def run(cls):
logger.info("🤖 Robot 1.4.1 (Powerhouse) ONLINE - Multi-Worker Safe Mode")
while True:
async with SessionLocal() as db:
# SKIP LOCKED védelem a párhuzamos futtatáshoz
query = text("""
SELECT id, make FROM data.catalog_discovery
WHERE status = 'pending'
LIMIT 1
FOR UPDATE SKIP LOCKED
""")
res = await db.execute(query)
task = res.fetchone()
if task:
task_id, make_name = task
await db.execute(
text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"),
{"id": task_id}
)
await db.commit()
await cls.process_make(db, task_id, make_name)
else:
logger.info("😴 Várólista üres vagy minden feladat foglalt. Alvás 60mp...")
await asyncio.sleep(60)
await asyncio.sleep(1)
if __name__ == "__main__":
asyncio.run(CatalogMaster.run())