173 lines
8.2 KiB
Plaintext
Executable File
173 lines
8.2 KiB
Plaintext
Executable File
# /app/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
|
|
import asyncio
|
|
import httpx
|
|
import logging
|
|
import os
|
|
import re
|
|
from sqlalchemy import text, select, update
|
|
from app.database import AsyncSessionLocal
|
|
from app.models.vehicle_definitions import VehicleModelDefinition
|
|
|
|
# MB 2.0 Naplózás
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s')
|
|
logger = logging.getLogger("Robot-1")
|
|
|
|
class CatalogHunter:
|
|
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
|
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
|
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
|
|
|
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
|
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
|
BATCH_SIZE = 50
|
|
|
|
@classmethod
|
|
def normalize(cls, text_val: str) -> str:
|
|
if not text_val: return ""
|
|
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
|
|
|
@classmethod
|
|
def parse_int(cls, value) -> int:
|
|
try:
|
|
if value is None or str(value).strip() == "": return 0
|
|
return int(float(value))
|
|
except (ValueError, TypeError): return 0
|
|
|
|
@classmethod
|
|
async def fetch_extra_tech(cls, client, plate):
|
|
params = {"kenteken": plate}
|
|
results = {"power_kw": 0, "euro_klasse": None, "fuel_desc": "Unknown", "engine_code": None}
|
|
try:
|
|
resp_fuel, resp_eng = await asyncio.gather(
|
|
client.get(cls.RDW_FUEL, params=params, headers=cls.HEADERS),
|
|
client.get(cls.RDW_ENGINE, params=params, headers=cls.HEADERS)
|
|
)
|
|
if resp_fuel.status_code == 200:
|
|
fuel_rows = resp_fuel.json()
|
|
max_p = 0
|
|
f_types = []
|
|
for row in fuel_rows:
|
|
p1 = cls.parse_int(row.get("netto_maximum_vermogen") or row.get("nettomaximumvermogen"))
|
|
p2 = cls.parse_int(row.get("nominaal_continu_maximum_vermogen") or row.get("nominaalcontinuvermogen"))
|
|
p = max(p1, p2)
|
|
if p > max_p: max_p = p
|
|
f = row.get("brandstof_omschrijving")
|
|
if f and f not in f_types: f_types.append(f)
|
|
if not results["euro_klasse"]:
|
|
results["euro_klasse"] = row.get("uitlaatemissieniveau") or row.get("euro_klasse")
|
|
results["power_kw"] = max_p
|
|
results["fuel_desc"] = ", ".join(f_types) if f_types else "Unknown"
|
|
if resp_eng.status_code == 200:
|
|
eng_rows = resp_eng.json()
|
|
if eng_rows: results["engine_code"] = eng_rows[0].get("motorcode")
|
|
except Exception as e:
|
|
logger.error(f"❌ RDW-Extra hiba ({plate}): {e}")
|
|
return results
|
|
|
|
@classmethod
|
|
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
|
|
clean_make = make_name.strip().upper()
|
|
clean_model = model_name.strip().upper()
|
|
logger.info(f"🎯 VADÁSZAT INDUL: {clean_make} {clean_model} (Prio: {priority})")
|
|
|
|
current_offset = 0
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
while True:
|
|
params = {
|
|
"merk": clean_make,
|
|
"handelsbenaming": clean_model,
|
|
"$limit": cls.BATCH_SIZE,
|
|
"$offset": current_offset,
|
|
"$order": "kenteken DESC"
|
|
}
|
|
try:
|
|
r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS)
|
|
batch = r.json() if r.status_code == 200 else []
|
|
except Exception: break
|
|
|
|
if not batch:
|
|
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
|
await db.commit()
|
|
logger.info(f"🏁 {clean_make} {clean_model} feldolgozva.")
|
|
return
|
|
|
|
for item in batch:
|
|
try:
|
|
plate = item.get("kenteken")
|
|
if not plate: continue
|
|
raw_model = str(item.get("handelsbenaming", "Unknown")).upper()
|
|
model_name_clean = raw_model.replace(clean_make, "").strip() or raw_model
|
|
norm_name = cls.normalize(model_name_clean)
|
|
ccm = cls.parse_int(item.get("cilinderinhoud"))
|
|
|
|
tech = await cls.fetch_extra_tech(client, plate)
|
|
|
|
# Ellenőrizzük, van-e már ilyen technikai variációnk
|
|
stmt = select(VehicleModelDefinition).where(
|
|
VehicleModelDefinition.make == clean_make,
|
|
VehicleModelDefinition.normalized_name == norm_name,
|
|
VehicleModelDefinition.engine_capacity == ccm,
|
|
VehicleModelDefinition.fuel_type == tech["fuel_desc"]
|
|
).limit(1)
|
|
|
|
existing = (await db.execute(stmt)).scalar_one_or_none()
|
|
|
|
if existing:
|
|
# Csak frissítjük, ha találtunk pontosabb adatot
|
|
if tech["engine_code"]: existing.engine_code = tech["engine_code"]
|
|
if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"]
|
|
existing.priority_score = priority # Prioritás frissítése
|
|
else:
|
|
# ÚJ REKORD LÉTREHOZÁSA
|
|
db.add(VehicleModelDefinition(
|
|
make=clean_make,
|
|
marketing_name=model_name_clean,
|
|
normalized_name=norm_name,
|
|
marketing_name_aliases=[raw_model],
|
|
technical_code=plate,
|
|
fuel_type=tech["fuel_desc"],
|
|
engine_capacity=ccm,
|
|
engine_code=tech["engine_code"],
|
|
power_kw=tech["power_kw"],
|
|
cylinders=cls.parse_int(item.get("aantal_cilinders")),
|
|
euro_classification=tech["euro_klasse"],
|
|
vehicle_class=v_class,
|
|
priority_score=priority,
|
|
status="ACTIVE", # <--- EZ KELL A RÖNTGENNEK!
|
|
source="PRECISION-HUNTER-v2.1"
|
|
))
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}")
|
|
|
|
await db.commit()
|
|
current_offset += len(batch)
|
|
# Ha már van elég variációnk ebből a típusból, nem kell mind a 100.000 autót átnézni
|
|
if current_offset >= 500: break
|
|
await asyncio.sleep(0.1)
|
|
|
|
@classmethod
|
|
async def run(cls):
|
|
logger.info("🤖 Vehicle Catalog Hunter ONLINE")
|
|
while True:
|
|
async with AsyncSessionLocal() as db:
|
|
# Lekérjük a prioritásos feladatokat
|
|
query = text("""
|
|
SELECT id, make, model, vehicle_class, priority_score
|
|
FROM data.catalog_discovery
|
|
WHERE status IN ('pending', 'processing')
|
|
ORDER BY priority_score DESC
|
|
LIMIT 1
|
|
""")
|
|
task = (await db.execute(query)).fetchone()
|
|
|
|
if task:
|
|
# status 'processing'-re állítása, hogy más robot ne nyúljon hozzá
|
|
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processing' WHERE id = :id"), {"id": task[0]})
|
|
await db.commit()
|
|
|
|
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
|
|
else:
|
|
await asyncio.sleep(30)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(CatalogHunter.run()) |