STABLE: Final schema sync, optimized gitignore
This commit is contained in:
@@ -1,136 +1,182 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/catalog_robot.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from sqlalchemy import text, select
|
||||
from app.db.session import SessionLocal
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
# Logolás beállítása
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger("Hunter-v2.4-Paginator")
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
||||
logger = logging.getLogger("Robot-v1.1.0-Precision")
|
||||
|
||||
class CatalogHunter:
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
"""
|
||||
v1.1.0 Precision-Hunter (Multi-Source Edition)
|
||||
- Integrált Motorkód (Engine Code) vadászat a jh96-v4pq táblából.
|
||||
- Teljesítmény (kW) és Euro besorolás a 8ys7-d773 táblából.
|
||||
- Alapadatok (CCM, Cyl) a m9d7-ebf2 főtáblából.
|
||||
"""
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json" # Főtábla
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json" # Üzemanyag/kW
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json" # Motorkód tábla
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
async def get_total_count(cls, client, make_name):
|
||||
"""Lekéri, összesen hány rekord létezik az adott márkához."""
|
||||
query_filter = f"upper(merk) like '%{make_name.upper()}%'"
|
||||
params = {
|
||||
"$where": query_filter,
|
||||
"$select": "count(*)"
|
||||
}
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return ""
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
return int(data[0]['count'])
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError):
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
async def fetch_extra_tech(cls, client, plate):
|
||||
"""
|
||||
Összetett adatgyűjtés: Motorkód + Teljesítmény + Euro besorolás.
|
||||
Két külön API hívást indít párhuzamosan a rendszámhoz.
|
||||
"""
|
||||
params = {"kenteken": plate}
|
||||
results = {"power_kw": 0, "euro_klasse": None, "fuel_desc": "Unknown", "engine_code": None}
|
||||
|
||||
try:
|
||||
# 1. Lekérdezés: Üzemanyag és Teljesítmény (kW)
|
||||
# 2. Lekérdezés: Motorkód
|
||||
resp_fuel, resp_eng = await asyncio.gather(
|
||||
client.get(cls.RDW_FUEL, params=params, headers=cls.HEADERS),
|
||||
client.get(cls.RDW_ENGINE, params=params, headers=cls.HEADERS)
|
||||
)
|
||||
|
||||
# Üzemanyag adatok feldolgozása
|
||||
if resp_fuel.status_code == 200:
|
||||
fuel_rows = resp_fuel.json()
|
||||
max_p = 0
|
||||
f_types = []
|
||||
for row in fuel_rows:
|
||||
p = max(cls.parse_int(row.get("netto_maximum_vermogen")),
|
||||
cls.parse_int(row.get("nominaal_continu_maximum_vermogen")))
|
||||
if p > max_p: max_p = p
|
||||
f = row.get("brandstof_omschrijving")
|
||||
if f and f not in f_types: f_types.append(f)
|
||||
if not results["euro_klasse"]:
|
||||
results["euro_klasse"] = row.get("uitlaatemissieniveau") or row.get("euro_klasse")
|
||||
|
||||
results["power_kw"] = max_p
|
||||
results["fuel_desc"] = ", ".join(f_types) if f_types else "Unknown"
|
||||
|
||||
# Motorkód feldolgozása
|
||||
if resp_eng.status_code == 200:
|
||||
eng_rows = resp_eng.json()
|
||||
if eng_rows:
|
||||
# Az első érvényes motorkódot vesszük ki
|
||||
results["engine_code"] = eng_rows[0].get("motorcode")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Nem sikerült a számlálás: {e}")
|
||||
return 0
|
||||
logger.error(f"❌ RDW-Extra hiba ({plate}): {e}")
|
||||
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
async def process_make(cls, db, task_id, make_name):
|
||||
clean_make = make_name.strip().upper()
|
||||
logger.info(f"🎯 PRECÍZIÓS KUTATÁS INDUL: {clean_make}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
# 1. LÉPÉS: Megszámoljuk az összes rekordot
|
||||
total_available = await cls.get_total_count(client, clean_make)
|
||||
logger.info(f"🚀 >>> {clean_make} feltérképezése: {total_available} variáns található az RDW-ben.")
|
||||
|
||||
if total_available == 0:
|
||||
logger.warning(f"⚠️ {clean_make} márkához nem érkezett adat az API-tól.")
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
return
|
||||
current_offset = 0
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
while True:
|
||||
params = {"merk": clean_make, "$limit": cls.BATCH_SIZE, "$offset": current_offset, "$order": "kenteken DESC"}
|
||||
try:
|
||||
r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS)
|
||||
if r.status_code != 200: break
|
||||
batch = r.json()
|
||||
except Exception: break
|
||||
|
||||
if not batch:
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"🏁 {clean_make} TELJESEN KÉSZ.")
|
||||
return
|
||||
|
||||
# 2. LÉPÉS: Lapozás (Pagination)
|
||||
limit = 1000
|
||||
offset = 0
|
||||
total_added = 0
|
||||
|
||||
while offset < total_available:
|
||||
logger.info(f"📑 Lapozás: {clean_make} | {offset} -> {offset + limit} (Összesen: {total_available})")
|
||||
|
||||
query_filter = f"upper(merk) like '%{clean_make}%'"
|
||||
params = {
|
||||
"$where": query_filter,
|
||||
"$limit": limit,
|
||||
"$offset": offset,
|
||||
"$order": ":id" # Socrata stabil lapozáshoz javasolt
|
||||
}
|
||||
|
||||
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"❌ Hiba a lapozásnál ({offset}): {resp.status_code}")
|
||||
break
|
||||
|
||||
batch = resp.json()
|
||||
if not batch: break
|
||||
|
||||
# Feldolgozás
|
||||
for item in batch:
|
||||
res_make = str(item.get("merk", clean_make)).upper()
|
||||
model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
ccm = int(float(item.get("cilinderinhoud") or 0))
|
||||
kw = int(float(item.get("netto_maximum_vermogen") or 0))
|
||||
|
||||
# Deduplikáció check
|
||||
stmt = select(VehicleModelDefinition.id).where(
|
||||
VehicleModelDefinition.make == res_make,
|
||||
VehicleModelDefinition.marketing_name == model,
|
||||
VehicleModelDefinition.engine_capacity == ccm,
|
||||
VehicleModelDefinition.power_kw == kw
|
||||
).limit(1)
|
||||
|
||||
exists = (await db.execute(stmt)).scalar_one_or_none()
|
||||
if not exists:
|
||||
db.add(VehicleModelDefinition(
|
||||
make=res_make,
|
||||
technical_code=item.get("kenteken"),
|
||||
marketing_name=model,
|
||||
engine_capacity=ccm,
|
||||
power_kw=kw if kw > 0 else None,
|
||||
status="unverified",
|
||||
source="HUNTER-v2.4-PAGINATED"
|
||||
))
|
||||
total_added += 1
|
||||
|
||||
await db.commit() # Lapvégi mentés
|
||||
offset += limit
|
||||
async with db.begin_nested():
|
||||
try:
|
||||
plate = item.get("kenteken")
|
||||
if not plate: continue
|
||||
|
||||
# 3. LÉPÉS: Befejezés
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"✅ {clean_make} KÉSZ. {total_available} rekord átnézve, {total_added} új variáns stagingbe mentve.")
|
||||
raw_model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
model_name = raw_model.replace(clean_make, "").strip() or raw_model
|
||||
norm_name = cls.normalize(model_name)
|
||||
|
||||
# Alapadatok a főtáblából
|
||||
ccm = cls.parse_int(item.get("cilinderinhoud"))
|
||||
cyl = cls.parse_int(item.get("aantal_cilinders"))
|
||||
doors = cls.parse_int(item.get("aantal_deuren"))
|
||||
v_class = item.get("voertuigsoort")
|
||||
b_type = item.get("inrichting")
|
||||
v_code = item.get("variant")
|
||||
ver_code = item.get("uitvoering")
|
||||
|
||||
# Évjárat
|
||||
date_str = item.get("datum_eerste_toelating", "0000")
|
||||
year = int(str(date_str)[:4]) if len(str(date_str)) >= 4 else 0
|
||||
|
||||
# Párhuzamos technikai dúsítás (Motorkód + kW + Euro)
|
||||
tech = await cls.fetch_extra_tech(client, plate)
|
||||
|
||||
# Mentés vagy Frissítés
|
||||
stmt = select(VehicleModelDefinition).where(
|
||||
VehicleModelDefinition.make == clean_make,
|
||||
VehicleModelDefinition.normalized_name == norm_name,
|
||||
VehicleModelDefinition.variant_code == v_code,
|
||||
VehicleModelDefinition.version_code == ver_code,
|
||||
VehicleModelDefinition.fuel_type == tech["fuel_desc"]
|
||||
).limit(1)
|
||||
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Frissítés: Ha korábban nem volt meg a motorkód vagy kW, most pótoljuk
|
||||
if tech["engine_code"]: existing.engine_code = tech["engine_code"]
|
||||
if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"]
|
||||
if tech["euro_klasse"]: existing.euro_classification = tech["euro_klasse"]
|
||||
else:
|
||||
db.add(VehicleModelDefinition(
|
||||
make=clean_make, marketing_name=model_name, normalized_name=norm_name,
|
||||
marketing_name_aliases=[raw_model], technical_code=plate,
|
||||
variant_code=v_code, version_code=ver_code, vehicle_class=v_class,
|
||||
body_type=b_type, fuel_type=tech["fuel_desc"], engine_capacity=ccm,
|
||||
engine_code=tech["engine_code"], # ÚJ MEZŐ!
|
||||
power_kw=tech["power_kw"], cylinders=cyl, doors=doors,
|
||||
euro_classification=tech["euro_klasse"],
|
||||
year_from=year if year > 0 else None, year_to=year if year > 0 else None,
|
||||
source="PRECISION-HUNTER-v1.1.0"
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Hiba ({plate}): {e}")
|
||||
|
||||
await db.commit()
|
||||
current_offset += len(batch)
|
||||
logger.info(f"📈 {clean_make}: {current_offset} rendszám feldolgozva (Engine codes + kW OK)")
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 1 (Hunter) ONLINE - Paginator v2.4")
|
||||
logger.info("🤖 Robot v1.1.0 PRECISION-HUNTER ONLINE")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
query = text("""
|
||||
SELECT id, make FROM data.catalog_discovery
|
||||
WHERE status = 'pending'
|
||||
ORDER BY
|
||||
CASE WHEN make IN ('SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL') THEN 1 ELSE 2 END,
|
||||
id ASC
|
||||
LIMIT 1 FOR UPDATE SKIP LOCKED
|
||||
""")
|
||||
res = await db.execute(query)
|
||||
task = res.fetchone()
|
||||
if task:
|
||||
await cls.process_make(db, task[0], task[1])
|
||||
else:
|
||||
await asyncio.sleep(20)
|
||||
async with AsyncSessionLocal() as db:
|
||||
query = text("SELECT id, make FROM data.catalog_discovery WHERE status IN ('pending', 'processing') ORDER BY priority_score DESC LIMIT 1")
|
||||
task = (await db.execute(query)).fetchone()
|
||||
if task: await cls.process_make(db, task[0], task[1])
|
||||
else: await asyncio.sleep(60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
Reference in New Issue
Block a user