136 lines
5.7 KiB
Python
136 lines
5.7 KiB
Python
import asyncio
|
|
import httpx
|
|
import logging
|
|
import os
|
|
import sys
|
|
from sqlalchemy import text, select
|
|
from app.db.session import SessionLocal
|
|
from app.models.vehicle_definitions import VehicleModelDefinition
|
|
|
|
# Logolás beállítása
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
|
)
|
|
logger = logging.getLogger("Hunter-v2.4-Paginator")
|
|
|
|
class CatalogHunter:
|
|
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
|
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
|
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
|
|
|
@classmethod
|
|
async def get_total_count(cls, client, make_name):
|
|
"""Lekéri, összesen hány rekord létezik az adott márkához."""
|
|
query_filter = f"upper(merk) like '%{make_name.upper()}%'"
|
|
params = {
|
|
"$where": query_filter,
|
|
"$select": "count(*)"
|
|
}
|
|
try:
|
|
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
return int(data[0]['count'])
|
|
return 0
|
|
except Exception as e:
|
|
logger.error(f"⚠️ Nem sikerült a számlálás: {e}")
|
|
return 0
|
|
|
|
@classmethod
|
|
async def process_make(cls, db, task_id, make_name):
|
|
clean_make = make_name.strip().upper()
|
|
|
|
async with httpx.AsyncClient(timeout=60) as client:
|
|
# 1. LÉPÉS: Megszámoljuk az összes rekordot
|
|
total_available = await cls.get_total_count(client, clean_make)
|
|
logger.info(f"🚀 >>> {clean_make} feltérképezése: {total_available} variáns található az RDW-ben.")
|
|
|
|
if total_available == 0:
|
|
logger.warning(f"⚠️ {clean_make} márkához nem érkezett adat az API-tól.")
|
|
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
|
await db.commit()
|
|
return
|
|
|
|
# 2. LÉPÉS: Lapozás (Pagination)
|
|
limit = 1000
|
|
offset = 0
|
|
total_added = 0
|
|
|
|
while offset < total_available:
|
|
logger.info(f"📑 Lapozás: {clean_make} | {offset} -> {offset + limit} (Összesen: {total_available})")
|
|
|
|
query_filter = f"upper(merk) like '%{clean_make}%'"
|
|
params = {
|
|
"$where": query_filter,
|
|
"$limit": limit,
|
|
"$offset": offset,
|
|
"$order": ":id" # Socrata stabil lapozáshoz javasolt
|
|
}
|
|
|
|
resp = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS_RDW)
|
|
if resp.status_code != 200:
|
|
logger.error(f"❌ Hiba a lapozásnál ({offset}): {resp.status_code}")
|
|
break
|
|
|
|
batch = resp.json()
|
|
if not batch: break
|
|
|
|
# Feldolgozás
|
|
for item in batch:
|
|
res_make = str(item.get("merk", clean_make)).upper()
|
|
model = str(item.get("handelsbenaming", "Unknown")).upper()
|
|
ccm = int(float(item.get("cilinderinhoud") or 0))
|
|
kw = int(float(item.get("netto_maximum_vermogen") or 0))
|
|
|
|
# Deduplikáció check
|
|
stmt = select(VehicleModelDefinition.id).where(
|
|
VehicleModelDefinition.make == res_make,
|
|
VehicleModelDefinition.marketing_name == model,
|
|
VehicleModelDefinition.engine_capacity == ccm,
|
|
VehicleModelDefinition.power_kw == kw
|
|
).limit(1)
|
|
|
|
exists = (await db.execute(stmt)).scalar_one_or_none()
|
|
if not exists:
|
|
db.add(VehicleModelDefinition(
|
|
make=res_make,
|
|
technical_code=item.get("kenteken"),
|
|
marketing_name=model,
|
|
engine_capacity=ccm,
|
|
power_kw=kw if kw > 0 else None,
|
|
status="unverified",
|
|
source="HUNTER-v2.4-PAGINATED"
|
|
))
|
|
total_added += 1
|
|
|
|
await db.commit() # Lapvégi mentés
|
|
offset += limit
|
|
|
|
# 3. LÉPÉS: Befejezés
|
|
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
|
await db.commit()
|
|
logger.info(f"✅ {clean_make} KÉSZ. {total_available} rekord átnézve, {total_added} új variáns stagingbe mentve.")
|
|
|
|
@classmethod
|
|
async def run(cls):
|
|
logger.info("🤖 Robot 1 (Hunter) ONLINE - Paginator v2.4")
|
|
while True:
|
|
async with SessionLocal() as db:
|
|
query = text("""
|
|
SELECT id, make FROM data.catalog_discovery
|
|
WHERE status = 'pending'
|
|
ORDER BY
|
|
CASE WHEN make IN ('SUZUKI', 'TOYOTA', 'SKODA', 'VOLKSWAGEN', 'OPEL') THEN 1 ELSE 2 END,
|
|
id ASC
|
|
LIMIT 1 FOR UPDATE SKIP LOCKED
|
|
""")
|
|
res = await db.execute(query)
|
|
task = res.fetchone()
|
|
if task:
|
|
await cls.process_make(db, task[0], task[1])
|
|
else:
|
|
await asyncio.sleep(20)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(CatalogHunter.run()) |