272 lines
12 KiB
Python
Executable File
272 lines
12 KiB
Python
Executable File
import asyncio
|
|
import httpx
|
|
import logging
|
|
import json
|
|
import os
|
|
import datetime
|
|
from sqlalchemy import text
|
|
from app.db.session import SessionLocal
|
|
from app.models.asset import AssetCatalog
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("Robot-v1.4-Powerhouse")
|
|
|
|
class CatalogMaster:
|
|
"""
|
|
Master Hunter Robot v1.4 - Powerhouse Edition
|
|
- Párhuzamos Holland (RDW) és Amerikai (NHTSA Batch) Discovery.
|
|
- Előkészített, kikommentelt Brit (DVLA) integráció.
|
|
- Async Semaphore: Párhuzamos technikai dúsítás (egyszerre 10 szálon).
|
|
- Intelligens összefésülés a globális források között.
|
|
"""
|
|
|
|
# API Végpontok
|
|
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
|
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
|
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
|
|
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
|
|
|
|
# AMERIKAI BATCH API: Egyetlen hívással az összes modell évjárat szerint
|
|
US_BATCH = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json"
|
|
|
|
# BRIT API (Kikapcsolva a tokenig)
|
|
# UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
|
|
|
|
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
|
UK_API_KEY = os.getenv("UK_DVLA_API_KEY") # Jövőbeli token helye
|
|
|
|
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
|
# HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
|
|
|
|
CATEGORY_MAP = {
|
|
"Personenauto": "car",
|
|
"Motorfiets": "motorcycle",
|
|
"Bedrijfsauto": "truck",
|
|
"Vrachtwagen": "truck",
|
|
"Opleggertrekker": "truck",
|
|
"Bus": "bus",
|
|
"Aanhangwagen": "trailer",
|
|
"Oplegger": "trailer",
|
|
"Landbouw- of bosbouwtrekker": "agricultural",
|
|
"camper": "camper"
|
|
}
|
|
|
|
# Szabályozzuk a párhuzamos dúsítást, hogy ne tiltsanak le (egyszerre max 10 kérés)
|
|
semaphore = asyncio.Semaphore(5)
|
|
|
|
@classmethod
|
|
def clean_kw(cls, val):
|
|
try:
|
|
if val is None: return None
|
|
f_val = float(str(val).replace(',', '.'))
|
|
if 0 < f_val < 1.0: return None
|
|
v = int(f_val)
|
|
return v if v > 0 else None
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
@classmethod
|
|
def clean_int(cls, val):
|
|
try:
|
|
if val is None: return None
|
|
return int(float(str(val).replace(',', '.')))
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
@classmethod
|
|
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
|
|
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
|
|
for attempt in range(3): # 3-szor próbáljuk újra, ha kell
|
|
try:
|
|
if method == "POST":
|
|
resp = await client.post(url, json=json_data, timeout=30)
|
|
else:
|
|
resp = await client.get(url, params=params, timeout=30)
|
|
|
|
if resp.status_code == 429: # HOPPÁ, túl gyorsak vagyunk!
|
|
wait_time = (attempt + 1) * 5 # Egyre többet vár: 5s, 10s...
|
|
logger.warning(f"⚠️ RDW limit elérve! Pihenő {wait_time} mp...")
|
|
await asyncio.sleep(wait_time)
|
|
continue
|
|
|
|
return resp.json() if resp.status_code in [200, 201] else []
|
|
except Exception as e:
|
|
logger.error(f"❌ API Hiba ({url}): {e}")
|
|
await asyncio.sleep(2)
|
|
return []
|
|
|
|
@classmethod
|
|
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
|
|
"""Mély dúsítás párhuzamos forrásokból."""
|
|
async with cls.semaphore:
|
|
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
|
|
|
|
# --- 1. HOLLAND (RDW) DÚSÍTÁS ---
|
|
fuel_task = cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
|
axle_task = cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
|
|
|
# Holland adatok párhuzamos lekérése
|
|
fuel_data, axle_data = await asyncio.gather(fuel_task, axle_task)
|
|
|
|
if fuel_data:
|
|
f0 = fuel_data[0]
|
|
if not res["kw"]:
|
|
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
|
|
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
|
|
res["euro"] = f0.get("uitlaatemissieniveau")
|
|
|
|
if axle_data:
|
|
res["axles"] = cls.clean_int(axle_data[0].get("aantal_assen"))
|
|
|
|
# --- 2. BRIT (DVLA) ELLENŐRZÉS (KIKOMMENTELVE A TOKENIG) ---
|
|
"""
|
|
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
|
|
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST",
|
|
json_data={"registrationNumber": plate},
|
|
headers=cls.HEADERS_UK)
|
|
if uk_data and not isinstance(uk_data, list):
|
|
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity"))
|
|
res["euro"] = res["euro"] or uk_data.get("euroStatus")
|
|
"""
|
|
|
|
return res
|
|
|
|
@classmethod
|
|
async def discover_holland(cls, make_name, limit=1000):
|
|
"""Holland Discovery ág."""
|
|
offset, variants = 0, {}
|
|
while True:
|
|
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
|
|
data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
|
|
if not data: break
|
|
|
|
for item in data:
|
|
plate = item.get("kenteken")
|
|
if not plate: continue
|
|
model = str(item.get("handelsbenaming", "Unknown")).upper()
|
|
ccm = cls.clean_int(item.get("cilinderinhoud"))
|
|
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
|
|
kw = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
|
|
raw_date = item.get("datum_eerste_toelating")
|
|
year = int(str(raw_date)[:4]) if raw_date else 2024
|
|
|
|
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
|
|
key = f"{model}-{ccm}-{weight}-{v_class}-{kw}-{year}"
|
|
|
|
if key not in variants:
|
|
variants[key] = {
|
|
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
|
|
"plate": plate, "main_kw": kw, "prod_year": year, "vin": item.get("vin")
|
|
}
|
|
if len(data) < limit: break
|
|
offset += limit
|
|
return variants
|
|
|
|
@classmethod
|
|
async def discover_usa_batch(cls, make_name):
|
|
"""Amerikai NHTSA Batch Discovery ág (2020-2025 évjáratokra)."""
|
|
variants = {}
|
|
# Az utolsó 5 évjáratot nézzük a legfrissebb modellekért
|
|
years = range(datetime.datetime.now().year - 5, datetime.datetime.now().year + 1)
|
|
|
|
async def fetch_year(year):
|
|
url = cls.US_BATCH.format(make=make_name.upper(), year=year)
|
|
data = await cls.fetch_api(url)
|
|
if data and "Results" in data:
|
|
for m in data["Results"]:
|
|
m_name = m.get("Model_Name", "Unknown").upper()
|
|
# US adatoknál nincs rendszám, de a Robot 2 dúsítani fogja ha kell
|
|
key = f"US-{m_name}-{year}"
|
|
variants[key] = {
|
|
"model": m_name, "ccm": None, "weight": None, "v_class": "car",
|
|
"plate": "US-DISCOVERY", "main_kw": None, "prod_year": year, "vin": None
|
|
}
|
|
|
|
await asyncio.gather(*(fetch_year(y) for y in years))
|
|
return variants
|
|
|
|
@classmethod
|
|
async def process_make(cls, db, task_id, make_name):
|
|
logger.info(f"🚀 >>> {make_name} Powerhouse v1.4 INDUL...")
|
|
|
|
# PÁRHUZAMOS DISCOVERY: Holland és USA egyszerre
|
|
holland_task = cls.discover_holland(make_name)
|
|
usa_task = cls.discover_usa_batch(make_name)
|
|
|
|
holland_variants, usa_variants = await asyncio.gather(holland_task, usa_task)
|
|
|
|
# Összefésülés (Holland élvez elsőbbséget a rendszám miatt)
|
|
all_variants = {**usa_variants, **holland_variants}
|
|
logger.info(f"📊 Összesen {len(all_variants)} egyedi variáns (NL: {len(holland_variants)}, US: {len(usa_variants)})")
|
|
|
|
# PÁRHUZAMOS DÚSÍTÁS
|
|
async def enrich_and_save(v):
|
|
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
|
|
try:
|
|
db_item = AssetCatalog(
|
|
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
|
|
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
|
|
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
|
|
year_from=v["prod_year"], euro_class=deep["euro"],
|
|
factory_data={
|
|
"source": "Powerhouse-v1.4",
|
|
"discovery_nl": v["plate"] != "US-DISCOVERY",
|
|
"enriched_at": str(datetime.datetime.now())
|
|
}
|
|
)
|
|
return db_item
|
|
except Exception:
|
|
return None
|
|
|
|
# Egyszerre indítjuk a dúsításokat (A semaphore korlátozza a szálakat)
|
|
results = await asyncio.gather(*(enrich_and_save(v) for v in all_variants.values()))
|
|
|
|
# Mentés
|
|
total_saved = 0
|
|
for item in results:
|
|
if item:
|
|
db.add(item)
|
|
total_saved += 1
|
|
|
|
await db.commit()
|
|
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
|
await db.commit()
|
|
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.")
|
|
|
|
@classmethod
|
|
async def run(cls):
|
|
logger.info("🤖 Robot 1.4 (Powerhouse) ONLINE - Multi-Worker Safe")
|
|
while True:
|
|
async with SessionLocal() as db:
|
|
# 1. 'FOR UPDATE SKIP LOCKED' - Megfogjuk a sort és lelakatoljuk,
|
|
# de a többi robot átugorja, amit mi már fogunk.
|
|
query = text("""
|
|
SELECT id, make FROM data.catalog_discovery
|
|
WHERE status = 'pending'
|
|
LIMIT 1
|
|
FOR UPDATE SKIP LOCKED
|
|
""")
|
|
|
|
res = await db.execute(query)
|
|
task = res.fetchone()
|
|
|
|
if task:
|
|
task_id, make_name = task
|
|
# 2. Azonnal átállítjuk 'running'-ra a tranzakción belül,
|
|
# így senki más nem nyúl hozzá.
|
|
await db.execute(
|
|
text("UPDATE data.catalog_discovery SET status = 'running' WHERE id = :id"),
|
|
{"id": task_id}
|
|
)
|
|
await db.commit() # Itt véglegesítjük a foglalást
|
|
|
|
# 3. Indulhat a tényleges munka
|
|
await cls.process_make(db, task_id, make_name)
|
|
else:
|
|
logger.info("😴 Várólista üres (vagy minden sor foglalt). Alvás 60 mp...")
|
|
await asyncio.sleep(60)
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(CatalogMaster.run()) |