átlagos kiegészítséek jó sok

This commit is contained in:
Roo
2026-03-22 11:02:05 +00:00
parent f53e0b53df
commit 5d44339f21
249 changed files with 20922 additions and 2253 deletions

View File

@@ -0,0 +1,208 @@
import asyncio
import httpx
import logging
import os
import sys
from datetime import datetime, timedelta
from sqlalchemy import text, select
from app.database import AsyncSessionLocal
from app.models.asset import AssetCatalog
# MB 2.0 Szigorú naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-0-Discovery: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Vehicle-Robot-0-Discovery")
class DiscoveryEngine:
"""
THOUGHT PROCESS (IPARI ÜZEMMÓD 2.0):
1. Őrkutya (Watchdog): Megkeresi és kiszabadítja a beragadt feladatokat óránként.
2. Differential Sync (Különbözeti Szinkron): Csak a hiányzó vagy új modelleket rögzíti, a gold_enriched-eket kihagyja.
3. Monthly Scheduler: Havonta egyszer tölti le a teljes RDW adatbázist lapozva.
"""
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
SYNC_STATE_FILE = "/app/temp/.last_rdw_sync" # Állapotfájl, hogy Docker újrainduláskor se kezdje elölről azonnal
@staticmethod
async def run_watchdog():
""" 1. FÁZIS: Az Őrkutya (Dead-Letter Queue Manager) """
logger.info("🐕 Őrkutya: Beragadt feladatok keresése a rendszerben...")
try:
async with AsyncSessionLocal() as db:
# A) Hunter takarítás (visszaállítás pending-re, ha a Hunter lefagyott)
res1 = await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'pending' WHERE status = 'processing' RETURNING id;"))
hunter_resets = len(res1.fetchall())
if hunter_resets > 0:
logger.warning(f"🔄 {hunter_resets} db beragadt Hunter feladat (processing) visszaállítva 'pending'-re.")
# B) AI Robotok takarítása (2 órás timeout)
query2 = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = CASE
WHEN status = 'research_in_progress' THEN 'unverified'
WHEN status = 'ai_synthesis_in_progress' THEN 'awaiting_ai_synthesis'
END
WHERE status IN ('research_in_progress', 'ai_synthesis_in_progress')
AND updated_at < NOW() - INTERVAL '2 hours'
RETURNING id;
""")
res2 = await db.execute(query2)
ai_resets = len(res2.fetchall())
if ai_resets > 0:
logger.warning(f"🔄 {ai_resets} db beragadt AI feladat visszaállítva.")
await db.commit()
except Exception as e:
logger.error(f"❌ Őrkutya hiba: {e}")
@staticmethod
async def seed_manual_bootstrap():
""" 2. FÁZIS: Alapozó adatok rögzítése """
initial_data = [
{"make": "AUDI", "model": "A4", "generation": "B8 (2008-2015)"}, # vehicle_class törölve
{"make": "BMW", "model": "3 SERIES", "generation": "F30 (2012-2019)"}
]
try:
async with AsyncSessionLocal() as db:
for item in initial_data:
stmt = select(AssetCatalog).where(AssetCatalog.make == item["make"], AssetCatalog.model == item["model"])
if not (await db.execute(stmt)).scalar_one_or_none():
db.add(AssetCatalog(**item))
await db.commit()
except Exception as e:
logger.warning(f"Manual bootstrap hiba (Ignorálható, ha az adatbázis már tele van): {e}")
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, params: dict, retries: int = 3):
""" Hibatűrő HTTP kérés API leállások ellen. """
for attempt in range(retries):
try:
resp = await client.get(url, params=params, headers=cls.HEADERS)
if resp.status_code == 200:
return resp
elif resp.status_code == 429:
await asyncio.sleep(2 ** attempt)
else:
return None
except httpx.RequestError:
if attempt == retries - 1:
return None
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def seed_from_rdw(cls):
""" 3. FÁZIS: Távoli felfedezés - KÜLÖNBÖZETI SZINKRONIZÁCIÓ (Differential Sync) """
logger.info("📥 RDW TÖMEGES LETÖLTÉS: Új modellek keresése (Differential Sync)...")
limit = 10000
offset = 0
inserted_count = 0
updated_count = 0
async with httpx.AsyncClient(timeout=60.0) as client:
while True:
params = {
"$select": "merk,handelsbenaming,voertuigsoort,count(*) as total",
"$group": "merk,handelsbenaming,voertuigsoort",
"$order": "total DESC",
"$limit": limit,
"$offset": offset
}
resp = await cls.fetch_with_retry(client, "https://opendata.rdw.nl/resource/m9d7-ebf2.json", params)
if not resp: break
raw_data = resp.json()
if not raw_data: break
logger.info(f"📊 Lapozás: {offset} - {offset + len(raw_data)} tételek analízise...")
async with AsyncSessionLocal() as db:
for entry in raw_data:
make = str(entry.get("merk", "")).upper().strip()
model = str(entry.get("handelsbenaming", "")).upper().strip()
v_kind = entry.get("voertuigsoort", "")
total_count = int(entry.get("total", 0))
if not make or not model: continue
if "Personenauto" in v_kind: v_class = 'car'
elif "Motorfiets" in v_kind: v_class = 'motorcycle'
else: v_class = 'truck'
# A MÁGIA: Különbözeti Szinkronizáció SQL + Explicit Type Casting
query = text("""
INSERT INTO vehicle.catalog_discovery (make, model, vehicle_class, status, priority_score)
SELECT
CAST(:make AS VARCHAR),
CAST(:model AS VARCHAR),
CAST(:v_class AS VARCHAR),
'pending',
:priority
WHERE NOT EXISTS (
SELECT 1 FROM vehicle.vehicle_model_definitions
WHERE make = CAST(:make AS VARCHAR)
AND marketing_name = CAST(:model AS VARCHAR)
AND status = 'gold_enriched'
)
ON CONFLICT (make, model)
DO UPDATE SET priority_score = EXCLUDED.priority_score
WHERE vehicle.catalog_discovery.status != 'processed'
RETURNING xmax;
""")
result = await db.execute(query, {
"make": make, "model": model, "v_class": v_class, "priority": total_count
})
row = result.fetchone()
if row:
if row[0] == 0: inserted_count += 1 # Új beszúrás
else: updated_count += 1 # Meglévő frissítése
await db.commit()
offset += limit
await asyncio.sleep(1)
logger.info(f"✅ RDW Szinkron kész! Új modellek a listán: {inserted_count} | Frissített prioritások: {updated_count}")
# Sikeres futás regisztrálása a fájlrendszeren
os.makedirs(os.path.dirname(cls.SYNC_STATE_FILE), exist_ok=True)
with open(cls.SYNC_STATE_FILE, 'w') as f:
f.write(datetime.now().isoformat())
@classmethod
def should_run_rdw_sync(cls) -> bool:
""" Ellenőrzi, hogy eltelt-e 30 nap a legutóbbi sikeres RDW szinkronizáció óta. """
if not os.path.exists(cls.SYNC_STATE_FILE):
return True
try:
with open(cls.SYNC_STATE_FILE, 'r') as f:
last_sync = datetime.fromisoformat(f.read().strip())
return datetime.now() - last_sync > timedelta(days=30)
except Exception:
return True
@classmethod
async def run(cls):
""" FŐ CIKLUS: Havi ütemező és Óránkénti Őrkutya """
logger.info("🚀 ÉLES ÜZEM: Discovery Engine (Differential Sync) & Watchdog indítása...")
await cls.seed_manual_bootstrap()
while True:
# 1. Óránkénti takarítás
await cls.run_watchdog()
# 2. Havi szinkronizáció ellenőrzése
if cls.should_run_rdw_sync():
await cls.seed_from_rdw()
else:
logger.info("🛌 Az RDW szinkronizáció már lefutott az elmúlt 30 napban. Ugrás...")
# 3. Alvás 1 órát (Heartbeat)
logger.info("⏱️ A Discovery Engine most 1 órát pihen a következő Őrkutya futásig.")
await asyncio.sleep(3600)
if __name__ == "__main__":
asyncio.run(DiscoveryEngine.run())

View File

@@ -0,0 +1,108 @@
# /app/app/workers/vehicle/vehicle_robot_0_strategist.py
import asyncio
import httpx
import logging
import os
from sqlalchemy import text
from app.database import AsyncSessionLocal # MB 2.0 Standard import
# Sentinel rendszerhez illesztett logolás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
logger = logging.getLogger("Vehicle-Robot-0-Strategist")
class Robot0Strategist:
"""
THOUGHT PROCESS:
1. A robot célja a 'priority_score' meghatározása valós piaci adatok (RDW) alapján.
2. Első lépésben ellenőrizzük a sémát (Self-healing), hogy létezik-e az oszlop.
3. A kategóriákat (autó, motor, teher) szétválasztjuk, hogy célzott prioritásokat kapjunk.
4. Az 'ON CONFLICT' logika garantálja, hogy ne rontsuk el a már feldolgozott (processed) sorokat.
5. A prioritás alapja a darabszám: minél több van egy típusból, annál előrébb kerül a listán.
"""
RDW_API = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
# Holland típusok leképezése belső kategóriákra
CATEGORIES = [
{"name": "car", "rdw_types": ["'Personenauto'"]},
{"name": "motorcycle", "rdw_types": ["'Motorfiets'"]},
{"name": "truck", "rdw_types": ["'Bedrijfsauto'", "'Vrachtwagen'", "'Opleggertrekker'"]},
{"name": "other", "rdw_types": ["NOT IN ('Personenauto', 'Motorfiets', 'Bedrijfsauto', 'Vrachtwagen', 'Opleggertrekker')"]}
]
async def get_popular_makes(self, vehicle_class: str, rdw_types: list):
""" Piaci adatok lekérése darabszám szerinti sorrendben. """
if "NOT IN" in rdw_types[0]:
type_filter = f"voertuigsoort {rdw_types[0]}"
else:
type_filter = " OR ".join([f"voertuigsoort = {t}" for t in rdw_types])
params = {
"$select": "merk, count(*) AS darabszam",
"$where": type_filter,
"$group": "merk",
"$order": "darabszam DESC",
"$limit": 500
}
async with httpx.AsyncClient(timeout=45.0) as client:
try:
resp = await client.get(self.RDW_API, params=params, headers=self.HEADERS)
if resp.status_code == 200:
return resp.json()
logger.error(f"⚠️ RDW API Hiba: {resp.status_code}")
return []
except Exception as e:
logger.error(f"❌ Kapcsolati hiba az RDW felé: {e}")
return []
async def run(self):
logger.info("🚀 Robot 0 (Strategist) ONLINE - Piaci elemzés indítása...")
# --- SÉMA ELLENŐRZÉS (Golyóálló megoldás) ---
async with AsyncSessionLocal() as db:
try:
await db.execute(text("ALTER TABLE vehicle.catalog_discovery ADD COLUMN IF NOT EXISTS priority_score INTEGER DEFAULT 0;"))
await db.commit()
logger.info("✅ Adatbázis séma rendben (priority_score aktív).")
except Exception as e:
await db.rollback()
logger.error(f"⚠️ Séma hiba: {e}")
for category in self.CATEGORIES:
v_class = category["name"]
logger.info(f"📊 {v_class.upper()} hadosztály prioritásainak számítása...")
makes = await self.get_popular_makes(v_class, category["rdw_types"])
if not makes: continue
added_count = 0
for item in makes:
make_name = str(item.get("merk", "")).upper().strip()
if not make_name: continue
count = int(item.get("darabszam", 0))
async with AsyncSessionLocal() as db:
try:
# UPSERT: Beállítjuk a prioritást, de nem bántjuk a már kész rekordokat
query = text("""
INSERT INTO vehicle.catalog_discovery (make, model, vehicle_class, status, source, attempts, priority_score)
VALUES (:make, 'ALL_VARIANTS', :class, 'pending', 'STRATEGIST-V2', 0, :score)
ON CONFLICT (make, model, vehicle_class)
DO UPDATE SET priority_score = :score
WHERE vehicle.catalog_discovery.status NOT IN ('processed', 'in_progress');
""")
await db.execute(query, {"make": make_name, "class": v_class, "score": count})
await db.commit()
added_count += 1
except Exception as e:
await db.rollback()
logger.warning(f"❌ Hiba a márka rögzítésekor ({make_name}): {e}")
logger.info(f"{v_class.upper()} kategória kész: {added_count} márka rangsorolva.")
if __name__ == "__main__":
asyncio.run(Robot0Strategist().run())

View File

@@ -0,0 +1,224 @@
import asyncio
import httpx
import logging
import os
import re
import sys
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
# Naplózás beállítása a standard kimenetre
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Robot-1-Hunter")
class CatalogHunter:
"""
Vehicle Robot 1.9.3: The Truly Invincible Hunter (SAVEPOINT PATCH)
Kezeli az ALL_VARIANTS utasítást és row-level tranzakcióvédelmet használ.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return ""
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, retries: int = 3):
""" Hibatűrő HTTP lekérdezés exponenciális várakozással. """
for attempt in range(retries):
try:
resp = await client.get(url, headers=cls.HEADERS)
if resp.status_code == 200:
return resp
elif resp.status_code == 429: # Rate limit
await asyncio.sleep(2 ** attempt)
else:
return resp
except httpx.RequestError as e:
if attempt == retries - 1:
logger.debug(f"Hálózati hiba: {e}")
raise
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def fetch_tech_details(cls, client, plate):
""" Technikai adatok (üzemanyag, teljesítmény, motorkód) begyűjtése. """
results = {
"power_kw": 0, "engine_code": None, "euro_class": None,
"fuel_desc": "Unknown", "co2": 0, "consumption": 0.0
}
try:
# Üzemanyag adatok
f_resp = await cls.fetch_with_retry(client, f"{cls.RDW_FUEL}?kenteken={plate}")
if f_resp and f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
results.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
# Motorkód adatok
e_resp = await cls.fetch_with_retry(client, f"{cls.RDW_ENGINE}?kenteken={plate}")
if e_resp and e_resp.status_code == 200 and e_resp.json():
results["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception:
pass
return results
@classmethod
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
""" Egy adott márka/modell (vagy wildcard) feldolgozása. """
clean_make = make_name.strip().upper()
clean_model = model_name.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
offset = 0
async with httpx.AsyncClient(timeout=30.0) as client:
while True:
# Dinamikus paraméterezés: ALL_VARIANTS esetén nem szűrünk modellre
if clean_model == 'ALL_VARIANTS':
params = f"merk={clean_make}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
else:
params = f"merk={clean_make}&handelsbenaming={clean_model}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await cls.fetch_with_retry(client, f"{cls.RDW_MAIN}?{params}")
batch = r.json() if r and r.status_code == 200 else []
except Exception as e:
logger.error(f"❌ API hiba: {e}")
break
if not batch:
break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
# SAVEPOINT: Ha egy rekord mentése hibás, a tranzakció blokk nem sérül
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
# Valódi modellnév kinyerése (Wildcard esetén fontos)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
stmt = insert(VehicleModelDefinition).values(
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
type_approval_number=item.get("typegoedkeuringsnummer"),
technical_code=plate,
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
power_kw=tech["power_kw"],
fuel_type=tech["fuel_desc"],
engine_code=tech["engine_code"],
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
body_type=item.get("inrichting"),
co2_emissions_combined=tech["co2"],
fuel_consumption_combined=tech["consumption"],
euro_classification=tech["euro_class"],
cylinders=cls.parse_int(item.get("aantal_cilinders")),
vehicle_class=v_class,
priority_score=priority,
status="unverified", # R2 Researcher számára előkészítve
source="MEGA-HUNTER-v1.9.3"
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type']
)
await db.execute(stmt)
except Exception as e:
logger.warning(f"⚠️ Sor eldobva ({plate}): {e}")
# Batch commit a sikeres sorok után
await db.commit()
offset += len(batch)
if offset >= 500: # Biztonsági korlát egy-egy márkánál
break
await asyncio.sleep(0.5)
# Discovery feladat lezárása
await db.execute(
text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"),
{"id": task_id}
)
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v1.9.3 ONLINE (SAVEPOINT ENABLED)")
while True:
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS: Keresés, Zárolás és Állapotváltás egy lépésben
query = text("""
UPDATE vehicle.catalog_discovery
SET status = 'processing'
WHERE id = (
SELECT id FROM vehicle.catalog_discovery
WHERE status = 'pending'
ORDER BY priority_score DESC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, model, vehicle_class, priority_score;
""")
result = await db.execute(query)
task = result.fetchone()
await db.commit()
if task:
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
else:
# Ha nincs munka, 30 másodperc pihenő
await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,179 @@
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
# version: 1.9.6
import asyncio
import httpx
import logging
import os
import re
import sys
from datetime import datetime
from sqlalchemy import text, func
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
# MB 2.0 Standard Naplózás
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Robot-1-Hunter")
class CatalogHunter:
"""
Vehicle Robot 1.9.6: Mega-Hunter (TIMESTAMP & INTEGRITY PATCH)
Kezeli az ALL_VARIANTS-t, a Savepoint-okat és az összes kötelező mezőt.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return ""
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, retries: int = 3):
for attempt in range(retries):
try:
resp = await client.get(url, headers=cls.HEADERS)
if resp.status_code == 200: return resp
elif resp.status_code == 429: await asyncio.sleep(2 ** attempt)
else: return resp
except httpx.RequestError:
if attempt == retries - 1: raise
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def fetch_tech_details(cls, client, plate):
results = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await cls.fetch_with_retry(client, f"{cls.RDW_FUEL}?kenteken={plate}")
if f_resp and f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
results.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await cls.fetch_with_retry(client, f"{cls.RDW_ENGINE}?kenteken={plate}")
if e_resp and e_resp.status_code == 200 and e_resp.json():
results["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return results
@classmethod
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
clean_make = make_name.strip().upper()
clean_model = model_name.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
offset = 0
async with httpx.AsyncClient(timeout=30.0) as client:
while True:
if clean_model == 'ALL_VARIANTS':
params = f"merk={clean_make}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
else:
params = f"merk={clean_make}&handelsbenaming={clean_model}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await cls.fetch_with_retry(client, f"{cls.RDW_MAIN}?{params}")
batch = r.json() if r and r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
stmt = insert(VehicleModelDefinition).values(
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate,
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
power_kw=tech["power_kw"],
fuel_type=tech["fuel_desc"],
engine_code=tech["engine_code"],
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
vehicle_class=v_class,
priority_score=priority,
market='EU', # KÖTELEZŐ
status="unverified",
is_manual=False,
created_at=func.now(), # KÖTELEZŐ DÁTUMOK
updated_at=func.now(),
source="MEGA-HUNTER-v1.9.6"
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type']
)
await db.execute(stmt)
except Exception as e:
logger.warning(f"⚠️ Sor eldobva ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v1.9.6 ONLINE (TIMESTAMP PATCH)")
while True:
try:
async with AsyncSessionLocal() as db:
query = text("""
UPDATE vehicle.catalog_discovery SET status = 'processing'
WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending'
ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1)
RETURNING id, make, model, vehicle_class, priority_score;
""")
result = await db.execute(query)
task = result.fetchone()
await db.commit()
if task: await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
else: await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,168 @@
import asyncio
import httpx
import logging
import os
import re
import sys
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
"""
Vehicle Robot 2.1.2: A Végleges Vadász
Tökéletes adattípus szinkron. raw_search_context -> string.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return "UNKNOWN"
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_tech_details(cls, client, plate):
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
if f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
res.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
if e_resp.status_code == 200 and e_resp.json():
res["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return res
@classmethod
async def process_task(cls, db, task):
clean_make = task.make.strip().upper()
clean_model = task.model.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
async with httpx.AsyncClient(timeout=30.0) as client:
offset = 0
while True:
params = f"merk={clean_make}"
if clean_model != 'ALL_VARIANTS':
params += f"&handelsbenaming={clean_model}"
params += f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
datum_eerste_toelating = str(item.get("datum_eerste_toelating", ""))
year_from = cls.parse_int(datum_eerste_toelating[:4]) if len(datum_eerste_toelating) >= 4 else 0
stmt = insert(VehicleModelDefinition).values(
market='EU',
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate,
type_approval_number=item.get("typegoedkeuringsnummer"),
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
fuel_consumption_combined=tech["consumption"],
co2_emissions_combined=tech["co2"],
vehicle_class=task.vehicle_class,
body_type=item.get("inrichting"),
fuel_type=tech["fuel_desc"],
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
power_kw=tech["power_kw"],
cylinders=cls.parse_int(item.get("aantal_cilinders")),
engine_code=tech["engine_code"],
euro_classification=tech["euro_class"],
year_from=year_from,
priority_score=task.priority_score,
status="unverified",
source="MEGA-HUNTER-v2.1.2",
# JAVÍTÁS: A raw_search_context most már üres STRING (''), ahogy a modell elvárja!
raw_search_context='',
research_metadata={},
specifications={},
marketing_name_aliases=[]
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
)
await db.execute(stmt)
except Exception as e:
logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v2.1.2 (Adattípus Fix) ONLINE")
while True:
try:
async with AsyncSessionLocal() as db:
query = text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;")
res = await db.execute(query)
task = res.fetchone()
await db.commit()
if task: await cls.process_task(db, task)
else: await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,205 @@
# /app/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
import asyncio
import httpx
import logging
import os
import re
import sys
import json
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
"""
Vehicle Robot 2.2.0: Fast-Track to Gold Edition
Ha az RDW-ből megvan minden kulcsadat (kw, ccm, fuel), azonnal 'gold_enriched'-re teszi a járművet
és beírja a vehicle_catalog mestertáblába!
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return "UNKNOWN"
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_tech_details(cls, client, plate):
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
if f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
res.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
if e_resp.status_code == 200 and e_resp.json():
res["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return res
@classmethod
async def process_task(cls, db, task):
clean_make = task.make.strip().upper()
clean_model = task.model.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
async with httpx.AsyncClient(timeout=30.0) as client:
offset = 0
while True:
params = f"merk={clean_make}"
if clean_model != 'ALL_VARIANTS':
params += f"&handelsbenaming={clean_model}"
params += f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
datum_eerste_toelating = str(item.get("datum_eerste_toelating", ""))
year_from = cls.parse_int(datum_eerste_toelating[:4]) if len(datum_eerste_toelating) >= 4 else 0
engine_ccm = cls.parse_int(item.get("cilinderinhoud"))
power_kw = tech["power_kw"]
fuel_type = tech["fuel_desc"]
# FAST-TRACK LOGIKA: Ha a kötelező műszaki adatok megvannak, azonnal ARANY minősítést kap!
# Villanyautóknál a CCM lehet 0, ezt is kezeljük.
is_gold = False
if (power_kw > 0 and engine_ccm > 0) or (power_kw > 0 and "elektri" in fuel_type.lower()):
is_gold = True
final_status = "gold_enriched" if is_gold else "unverified"
# 1. Beírjuk a VMD-be (Staging tábla)
stmt = insert(VehicleModelDefinition).values(
market='EU',
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate,
type_approval_number=item.get("typegoedkeuringsnummer"),
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
fuel_consumption_combined=tech["consumption"],
co2_emissions_combined=tech["co2"],
vehicle_class=task.vehicle_class,
body_type=item.get("inrichting"),
fuel_type=fuel_type,
engine_capacity=engine_ccm,
power_kw=power_kw,
cylinders=cls.parse_int(item.get("aantal_cilinders")),
engine_code=tech["engine_code"],
euro_classification=tech["euro_class"],
year_from=year_from,
priority_score=task.priority_score,
status=final_status, # Dinamikus státusz
source="MEGA-HUNTER-v2.2.0-FAST",
raw_search_context='',
research_metadata={},
specifications={"fast_track": True}, # Jelezzük, hogy ez RDW-ből jött közvetlenül
marketing_name_aliases=[]
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
).returning(VehicleModelDefinition.id)
res = await db.execute(stmt)
vmd_id = res.scalar()
# 2. HA ARANY, AZONNAL LÉPÜNK A VÉGSŐ KATALÓGUSBA (Ahogy az Alchemist is tenné)
if is_gold and vmd_id:
cat_stmt = text("""
INSERT INTO vehicle.vehicle_catalog
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING;
""")
await db.execute(cat_stmt, {
"m_id": vmd_id,
"make": clean_make,
"model": actual_model[:50],
"kw": power_kw,
"ccm": engine_ccm,
"fuel": fuel_type,
"factory": json.dumps({"source": "RDW API Direct", "verified": True})
})
logger.info(f"✨ FAST-TRACK ARANY: {clean_make} {actual_model} (KW: {power_kw}, CCM: {engine_ccm})")
except Exception as e:
logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v2.2.0 (Fast-Track Edition) ONLINE")
while True:
try:
async with AsyncSessionLocal() as db:
query = text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;")
res = await db.execute(query)
task = res.fetchone()
await db.commit()
if task: await cls.process_task(db, task)
else: await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,140 @@
import asyncio, httpx, logging, os, re, sys, json
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower() if text_val else "UNKNOWN"
@classmethod
def parse_int(cls, value) -> int:
try: return int(float(value)) if value and str(value).strip() else 0
except: return 0
@classmethod
def parse_float(cls, value) -> float:
try: return float(value) if value and str(value).strip() else 0.0
except: return 0.0
@classmethod
async def fetch_tech_details(cls, client, plate):
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
if f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1, p2 = cls.parse_int(f.get("netto_maximum_vermogen")), cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
res.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
if e_resp.status_code == 200 and e_resp.json():
res["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return res
@classmethod
async def process_task(cls, db, task):
clean_make, clean_model = task.make.strip().upper(), task.model.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
async with httpx.AsyncClient(timeout=30.0) as client:
offset = 0
while True:
params = f"merk={clean_make}" + (f"&handelsbenaming={clean_model}" if clean_model != 'ALL_VARIANTS' else "") + f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
datum = str(item.get("datum_eerste_toelating", ""))
year_from = cls.parse_int(datum[:4]) if len(datum) >= 4 else 0
engine_ccm, power_kw, fuel_type = cls.parse_int(item.get("cilinderinhoud")), tech["power_kw"], tech["fuel_desc"]
# FAST-TRACK LOGIKA: Ha van KW és CCM, egyből ARANY!
is_gold = (power_kw > 0 and engine_ccm > 0) or (power_kw > 0 and "elektri" in fuel_type.lower())
final_status = "gold_enriched" if is_gold else "unverified"
stmt = insert(VehicleModelDefinition).values(
market='EU', make=clean_make, marketing_name=actual_model, normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"), version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate, type_approval_number=item.get("typegoedkeuringsnummer"),
seats=cls.parse_int(item.get("aantal_zitplaatsen")), doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")), wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")), max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")), max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
fuel_consumption_combined=tech["consumption"], co2_emissions_combined=tech["co2"],
vehicle_class=task.vehicle_class, body_type=item.get("inrichting"), fuel_type=fuel_type,
engine_capacity=engine_ccm, power_kw=power_kw, cylinders=cls.parse_int(item.get("aantal_cilinders")),
engine_code=tech["engine_code"], euro_classification=tech["euro_class"], year_from=year_from,
priority_score=task.priority_score, status=final_status, source="MEGA-HUNTER-v2.2.0-FAST",
raw_search_context='', research_metadata={}, specifications={"fast_track": True} if is_gold else {}, marketing_name_aliases=[]
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
).returning(VehicleModelDefinition.id)
res = await db.execute(stmt)
vmd_id = res.scalar()
# Automatikus Publikálás (Ha Arany)
if is_gold and vmd_id:
cat_stmt = text("""
INSERT INTO vehicle.vehicle_catalog (master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING;
""")
await db.execute(cat_stmt, {"m_id": vmd_id, "make": clean_make, "model": actual_model[:50], "kw": power_kw, "ccm": engine_ccm, "fuel": fuel_type, "factory": '{"source": "RDW Fast-Track"}'})
logger.info(f"✨ FAST-TRACK ARANY: {clean_make} {actual_model}")
except Exception as e: logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v2.2.0 (Fast-Track) ONLINE")
while True:
try:
async with AsyncSessionLocal() as db:
res = await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;"))
task = res.fetchone()
await db.commit()
if task: await cls.process_task(db, task)
else: await asyncio.sleep(30)
except Exception: await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,239 @@
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_2_researcher.py
import asyncio
import logging
import warnings
import os
import json
from datetime import datetime
from sqlalchemy import text, update, func
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
from duckduckgo_search import DDGS
# MB 2.0 Szabvány naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-2-Researcher: %(message)s')
logger = logging.getLogger("Vehicle-Robot-2-Researcher")
class QuotaManager:
""" Szigorú napi limit figyelő a fizetős/hatósági API-khoz """
def __init__(self, service_name: str, daily_limit: int):
self.service_name = service_name
self.daily_limit = daily_limit
self.state_file = f"/app/temp/.quota_{service_name}.json"
self._ensure_file()
def _ensure_file(self):
os.makedirs(os.path.dirname(self.state_file), exist_ok=True)
if not os.path.exists(self.state_file):
with open(self.state_file, 'w') as f:
json.dump({"date": datetime.now().strftime("%Y-%m-%d"), "count": 0}, f)
def can_make_request(self) -> bool:
with open(self.state_file, 'r') as f:
data = json.load(f)
today = datetime.now().strftime("%Y-%m-%d")
if data["date"] != today:
data = {"date": today, "count": 0} # Új nap, kvóta nullázása
if data["count"] >= self.daily_limit:
return False
# Növeljük a számlálót
data["count"] += 1
with open(self.state_file, 'w') as f:
json.dump(data, f)
return True
class VehicleResearcher:
"""
Vehicle Robot 2.5: Sniper Researcher (Mesterlövész Adatgyűjtő)
Célzott keresésekkel és strukturált aktakészítéssel dolgozik az AI kímélése érdekében.
"""
def __init__(self):
self.max_attempts = 5
self.search_timeout = 15.0
# Kvóta menedzserek beállítása (.env-ből olvasva)
dvla_limit = int(os.getenv("DVLA_DAILY_LIMIT", "1000"))
self.dvla_quota = QuotaManager("dvla", dvla_limit)
self.dvla_token = os.getenv("DVLA_API_KEY")
async def fetch_ddg_targeted(self, label: str, query: str) -> str:
""" Célzott keresés szálbiztosan a DuckDuckGo-n. """
try:
def search():
with DDGS() as ddgs:
# max_results=2: Nem kell sok zaj, csak a legrelevánsabb 2 találat
results = ddgs.text(query, max_results=2)
return [f"- {r.get('body', '')}" for r in results] if results else []
results = await asyncio.wait_for(asyncio.to_thread(search), timeout=self.search_timeout)
if not results:
return f"[SOURCE: {label}]\nNincs érdemi találat.\n"
content = f"[SOURCE: {label} | KERESÉS: {query}]\n"
content += "\n".join(results) + "\n"
return content
except Exception as e:
logger.debug(f"Keresési hiba ({label}): {e}")
return f"[SOURCE: {label}]\nKERESÉSI HIBA.\n"
def extract_specs_from_text(self, text: str) -> dict:
""" Regex alapú kinyerés a nyers szövegből: ccm, kW, motoradatok. """
import re
specs = {}
# CCM (köbcentiméter) minta: 1998 cc, 2.0 L, 2000 cm³
ccm_pattern = r'(\d{3,4})\s*(?:cc|ccm|cm³|cm3|cc\.)'
match = re.search(ccm_pattern, text, re.IGNORECASE)
if match:
specs['ccm'] = int(match.group(1))
else:
# Alternatív minta: 2.0 liter -> 2000 cc
liter_pattern = r'(\d+\.?\d*)\s*(?:L|liter|)'
match = re.search(liter_pattern, text, re.IGNORECASE)
if match:
liters = float(match.group(1))
specs['ccm'] = int(liters * 1000)
# KW (kilowatt) minta: 150 kW, 150kW, 150 KW
kw_pattern = r'(\d{2,4})\s*(?:kW|kw|KW)'
match = re.search(kw_pattern, text, re.IGNORECASE)
if match:
specs['kw'] = int(match.group(1))
else:
# Le (lóerő) átváltás: 150 LE -> 110 kW (kb)
hp_pattern = r'(\d{2,4})\s*(?:HP|hp|LE|le|Ps)'
match = re.search(hp_pattern, text, re.IGNORECASE)
if match:
hp = int(match.group(1))
specs['kw'] = int(hp * 0.7355) # hozzávetőleges átváltás
# Motor kód minta: motor kód: 1.8 TSI, engine code: N47
engine_pattern = r'(?:motor\s*kód|engine\s*code|motor\s*code)[:\s]+([A-Z0-9\.\- ]+)'
match = re.search(engine_pattern, text, re.IGNORECASE)
if match:
specs['engine_code'] = match.group(1).strip()
return specs
async def research_vehicle(self, db, vehicle_id: int, make: str, model: str, engine: str, year: str, current_attempts: int):
""" Egy jármű átvilágítása és a strukturált 'Akta' elkészítése a GPU számára. """
engine_safe = engine or ""
year_safe = str(year) if year else ""
logger.info(f"🔎 Mesterlövész Kutatás: {make} {model} (Motor: {engine_safe})")
# 1. TIER: Ingyenes, Célzott Keresések (A legmegbízhatóbb források)
queries = [
("ULTIMATE_SPECS", f"{make} {model} {engine_safe} {year_safe} site:ultimatespecs.com"),
("AUTO_DATA", f"{make} {model} {engine_safe} {year_safe} site:auto-data.net"),
("COMMON_ISSUES", f"{make} {model} {engine_safe} reliability common problems")
]
tasks = [self.fetch_ddg_targeted(label, q) for label, q in queries]
search_results = await asyncio.gather(*tasks)
# 2. TIER: Fizetős / Kvótás API-k (Példa a DVLA helyére)
# Ha a jövőben bejön brit rendszám, itt hívjuk meg a DVLA-t:
# if has_uk_plate and self.dvla_quota.can_make_request():
# uk_data = await self.fetch_dvla_data(plate)
# search_results.append(uk_data)
# 3. ÖSSZESÍTÉS (Az Akta összeállítása)
# Maximalizáljuk a szöveg hosszát, hogy az AI GPU ne fulladjon le!
full_context = "\n".join(search_results)
if len(full_context) > 2500:
full_context = full_context[:2500] + "\n...[TRUNCATED TO SAVE GPU TOKENS]"
# Regex alapú specifikáció kinyerés
extracted_specs = self.extract_specs_from_text(full_context)
try:
if len(full_context.strip()) > 150: # Csökkentettük az elvárást, mert a célzott keresés tömörebb
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
raw_search_context=full_context,
research_metadata=extracted_specs,
status='awaiting_ai_synthesis', # Kész az Akta, mehet az Alkimistának!
last_research_at=func.now(),
attempts=current_attempts + 1
)
)
logger.info(f"✅ Akta rögzítve ({len(full_context)} karakter): {make} {model}")
else:
new_status = 'suspended_research' if current_attempts + 1 >= self.max_attempts else 'unverified'
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
status=new_status,
attempts=current_attempts + 1,
last_research_at=func.now()
)
)
if new_status == 'suspended_research':
logger.warning(f"🛑 Felfüggesztve (Nincs nyom a weben): {make} {model}")
else:
logger.warning(f"⚠️ Kevés adat: {make} {model}, visszatéve a sorba.")
await db.commit()
except Exception as e:
await db.rollback()
logger.error(f"🚨 Adatbázis hiba az eredmény mentésénél ({vehicle_id}): {e}")
@classmethod
async def run(cls):
self_instance = cls()
logger.info("🚀 Vehicle Researcher 2.5 ONLINE (Sniper & Quota Manager)")
while True:
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS
query = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = 'research_in_progress'
WHERE id = (
SELECT id FROM vehicle.vehicle_model_definitions
WHERE status IN ('unverified', 'awaiting_research', 'ACTIVE')
AND attempts < :max_attempts
AND is_manual = FALSE
ORDER BY
CASE WHEN make = 'TOYOTA' THEN 1 ELSE 2 END,
attempts ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, marketing_name, engine_code, year_from, attempts;
""")
result = await db.execute(query, {"max_attempts": self_instance.max_attempts})
task = result.fetchone()
await db.commit()
if task:
v_id, v_make, v_model, v_engine, v_year, v_attempts = task
async with AsyncSessionLocal() as process_db:
await self_instance.research_vehicle(process_db, v_id, v_make, v_model, v_engine, v_year, v_attempts)
await asyncio.sleep(2) # Rate limit védelem a DDG felé
else:
await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
try:
asyncio.run(VehicleResearcher.run())
except KeyboardInterrupt:
logger.info("🛑 Kutató robot leállítva.")

View File

@@ -0,0 +1,225 @@
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py
import asyncio
import logging
import datetime
import random
import sys
import json
import os
from sqlalchemy import text, func, update, case
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.models.asset import AssetCatalog
from app.services.ai_service import AIService
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
class TechEnricher:
"""
Vehicle Robot 3: Alchemist Pro (Atomi Zárolás + Kézi Moderáció Patch)
Tiszta GPU fókusz: Csak az AI elemzésre és adategyesítésre koncentrál.
Nincs felesleges webkeresés. Szigorú, de intelligens Sane-Check.
"""
def __init__(self):
self.max_attempts = 5
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
def check_budget(self) -> bool:
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def validate_merged_data(self, merged_kw: int, merged_ccm: int, v_class: str, fuel: str, current_attempts: int) -> tuple[bool, str]:
""" Intelligens validáció a MERGE után. Visszaadja a státuszt és a hiba okát. """
if merged_ccm > 18000:
return False, f"Irreális CCM érték ({merged_ccm})"
if merged_kw > 1500 and v_class != "truck":
return False, f"Irreális KW érték ({merged_kw})"
# Ha hiányzik a KW
if merged_kw == 0:
if current_attempts < 3:
return False, "Hiányzó KW adat. Újrakutatás javasolt."
else:
logger.warning("Sane-check: Többszöri próbálkozás után sincs KW, de átengedjük részlegesként.")
# Ha hiányzik a CCM (és belsőégésű)
if merged_ccm == 0 and "electric" not in fuel and "elektric" not in fuel and v_class != "trailer":
if current_attempts < 3:
return False, "Hiányzó CCM (belsőégésű motornál). Újrakutatás javasolt."
else:
logger.warning("Sane-check: Többszöri próbálkozás után sincs CCM, átengedjük részlegesként.")
return True, "OK"
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
# Pontos azonosító a logokhoz (Márka, Modell, ID, RDW adatok)
v_ident = f"{base_info['make'].upper()} {base_info['m_name']} (ID: {record_id}, RDW: {base_info['rdw_ccm']}ccm, KW: {base_info['rdw_kw']})"
attempt_str = f"[Próba: {current_attempts + 1}/{self.max_attempts}]"
ai_data = {} # Üres dict, ha az AI hívás elszállna
try:
logger.info(f"🧠 AI dúsítás indul: {v_ident} {attempt_str}")
# 1. LÉPÉS: AI Hívás (Rábízzuk az adatokat a modellre)
ai_data = await AIService.get_clean_vehicle_data(
base_info['make'],
base_info['m_name'],
base_info
)
if not ai_data:
raise ValueError("Teljesen üres AI válasz (API hiba vagy extrém hallucináció).")
# 2. LÉPÉS: HIBRID MERGE (Még a validáció előtt!)
# Az RDW adatok felülbírálják az AI-t a hatósági paramétereknél
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else int(ai_data.get("kw", 0) or 0)
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else int(ai_data.get("ccm", 0) or 0)
# Üzemanyag tisztítása
fuel_rdw = base_info.get('rdw_fuel', '')
final_fuel = fuel_rdw if fuel_rdw and fuel_rdw != "Unknown" else ai_data.get("fuel_type", "petrol")
final_engine = base_info['rdw_engine'] if base_info['rdw_engine'] else ai_data.get("engine_code", "Unknown")
final_euro = base_info['rdw_euro'] or ai_data.get("euro_classification")
final_cylinders = base_info['rdw_cylinders'] or ai_data.get("cylinders")
# 3. LÉPÉS: Intelligens Validáció
is_valid, error_msg = self.validate_merged_data(final_kw, final_ccm, base_info['v_type'], final_fuel.lower(), current_attempts)
if not is_valid:
raise ValueError(f"Validációs hiba: {error_msg}")
# 4. LÉPÉS: Mentés az Arany Katalógusba
clean_model = str(ai_data.get("marketing_name", base_info['m_name']))[:50].upper()
cat_stmt = text("""
INSERT INTO vehicle.vehicle_catalog
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING
RETURNING id;
""")
await db.execute(cat_stmt, {
"m_id": record_id,
"make": base_info['make'].upper(),
"model": clean_model,
"kw": final_kw,
"ccm": final_ccm,
"fuel": final_fuel,
"factory": json.dumps(ai_data)
})
# 5. LÉPÉS: Staging tábla (VMD) lezárása
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="gold_enriched",
engine_capacity=final_ccm,
power_kw=final_kw,
fuel_type=final_fuel,
engine_code=final_engine,
euro_classification=final_euro,
cylinders=final_cylinders,
specifications=ai_data, # Elmentjük az AI teljes outputját a mestertáblába is
updated_at=func.now()
)
)
await db.commit()
logger.info(f"✨ ARANY REKORD KÉSZ: {v_ident}")
self.ai_calls_today += 1
except Exception as e:
await db.rollback()
logger.warning(f"⚠️ Alkimista hiba - {v_ident}: {e}")
# Ha elértük a limitet, KÉZI MODERÁCIÓRA küldjük, egyébként vissza a Kutatónak
new_status = 'manual_review_needed' if current_attempts + 1 >= self.max_attempts else 'unverified'
# Elmentjük az AI részleges válaszát (vagy a hibát), hogy az admin lássa, mit rontott el a gép
review_data = ai_data if ai_data else {"error": "Nincs értékelhető JSON adat az AI-tól", "raw_context": base_info['web_context']}
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
attempts=current_attempts + 1,
last_error=str(e)[:200],
status=new_status,
specifications=review_data, # Kézi ellenőrzéshez beírjuk a törött adatot!
updated_at=func.now()
)
)
await db.commit()
if new_status == 'unverified':
logger.info(f"♻️ Akta visszaküldve a Robot-2-nek (Kutató). {attempt_str}")
else:
logger.error(f"🛑 Max próbálkozás elérve! Kézi moderációra küldve: {v_ident}")
async def run(self):
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Atomi Zárolás + Moderáció Patch)")
while True:
if not self.check_budget():
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
await asyncio.sleep(3600); continue
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS (A "Szent Grál" a race condition ellen)
query = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = 'ai_synthesis_in_progress'
WHERE id = (
SELECT id FROM vehicle.vehicle_model_definitions
WHERE status IN ('awaiting_ai_synthesis', 'ACTIVE')
AND attempts < :max_attempts
AND is_manual = FALSE
ORDER BY
CASE WHEN status = 'awaiting_ai_synthesis' THEN 1 ELSE 2 END,
priority_score DESC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity,
fuel_type, engine_code, euro_classification, cylinders, raw_search_context, attempts;
""")
result = await db.execute(query, {"max_attempts": self.max_attempts})
task = result.fetchone()
await db.commit()
if task:
# Szétbontjuk a lekérdezett rekordot a base_info dict-be
r_id = task[0]
base_info = {
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
"rdw_fuel": task[6] or "petrol", "rdw_engine": task[7] or "",
"rdw_euro": task[8], "rdw_cylinders": task[9],
"web_context": task[10] or ""
}
attempts = task[11]
# Külön adatbázis kapcsolat a feldolgozáshoz (hosszú AI hívás miatt)
async with AsyncSessionLocal() as process_db:
await self.process_single_record(process_db, r_id, base_info, attempts)
# GPU hűtés / Ollama rate limit
await asyncio.sleep(random.uniform(1.5, 3.5))
else:
logger.info("😴 Nincs feldolgozandó akta, az Alkimista pihen...")
await asyncio.sleep(15)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(TechEnricher().run())

View File

@@ -0,0 +1,168 @@
import asyncio
import logging
import datetime
import random
import sys
import json
import os
from sqlalchemy import text, func, update
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.services.ai_service import AIService
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] R3-Alchemist: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-3-Alchemist")
class TechEnricher:
"""
Vehicle Robot 3: Alchemist Pro (Sentinel Gateway Edition)
Az AIService 2.2-t használja (Ollama -> Groq Fallback).
Kinyeri a felszereltségi szintet (trim_level) és pótolja a hiányzó adatokat.
"""
def __init__(self):
self.max_attempts = 5
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
def check_budget(self) -> bool:
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def validate_merged_data(self, merged_kw: int, merged_ccm: int, v_class: str, fuel: str, current_attempts: int) -> tuple[bool, str]:
if merged_ccm > 18000:
return False, f"Irreális CCM érték ({merged_ccm})"
if merged_kw > 1500 and v_class not in ["truck", "other"]:
return False, f"Irreális KW érték ({merged_kw})"
if merged_kw == 0 and current_attempts < 3:
return False, "Hiányzó KW adat. Újrakutatás javasolt."
if merged_ccm == 0 and "elektr" not in fuel.lower() and v_class != "trailer" and current_attempts < 3:
return False, "Hiányzó CCM (belsőégésű motornál)."
return True, "OK"
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
v_ident = f"{base_info['make'].upper()} {base_info['m_name']} (ID: {record_id})"
attempt_str = f"[Próba: {current_attempts + 1}/{self.max_attempts}]"
try:
logger.info(f"🧠 AI dúsítás indul: {v_ident} {attempt_str}")
# Szigorú Prompt a Master AI Service-nek
prompt = f"""
Elemezd az alábbi járműadatokat és a webes kutatást! Készíts belőle egy JSON objektumot.
Jármű: {base_info['make']} {base_info['m_name']}
Hatósági adatok: {base_info['rdw_ccm']} ccm, {base_info['rdw_kw']} kW, Üzemanyag: {base_info['rdw_fuel']}
Webes szöveg: {base_info['web_context'][:2000]}
FELADATOK:
1. Keresd meg a felszereltségi szintet (trim_level) a modell nevéből vagy a szövegből (pl. AMG, Highline, Titanium, M-Sport, Elegance, ST-Line). Ha nincs, legyen üres string.
2. Ha az RDW adatokban a kW vagy a ccm 0, pótold a szövegből a helyes értéket!
KIZÁRÓLAG EGY ÉRVÉNYES JSON-T ADJ VISSZA! (A Groq/Gemini miatt kötelező a JSON szó használata).
Várt kulcsok: "kw" (int), "ccm" (int), "trim_level" (string), "transmission" (string), "drive_type" (string).
"""
# Hívjuk a te profi Gateway-edet! (_execute_ai_call átveszi a db session-t is a beállításokhoz)
ai_data = await AIService._execute_ai_call(db, prompt, model_key="text")
if not ai_data:
raise ValueError("Üres AI válasz (Minden fallback elbukott).")
# HIBRID MERGE
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else int(ai_data.get("kw", 0) or 0)
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else int(ai_data.get("ccm", 0) or 0)
trim_level = str(ai_data.get("trim_level", ""))[:100]
# Sane-Check
is_valid, error_msg = self.validate_merged_data(final_kw, final_ccm, base_info['v_type'], base_info['rdw_fuel'], current_attempts)
if not is_valid:
raise ValueError(f"Validációs hiba: {error_msg}")
# Staging tábla frissítése (Arany minősítés)
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="gold_enriched",
engine_capacity=final_ccm,
power_kw=final_kw,
trim_level=trim_level if trim_level.lower() not in ["null", "none"] else "",
specifications=ai_data,
updated_at=func.now()
)
)
await db.commit()
logger.info(f"✨ ARANY REKORD KÉSZ: {v_ident} | Trim: {trim_level}")
self.ai_calls_today += 1
except Exception as e:
await db.rollback()
logger.warning(f"⚠️ Alkimista hiba - {v_ident}: {e}")
new_status = 'manual_review_needed' if current_attempts + 1 >= self.max_attempts else 'unverified'
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
attempts=current_attempts + 1,
last_error=str(e)[:200],
status=new_status,
updated_at=func.now()
)
)
await db.commit()
if new_status == 'unverified':
logger.info(f"♻️ Akta visszaküldve a Kutatónak (R2). {attempt_str}")
async def run(self):
logger.info(f"🚀 R3 Alchemist Pro ONLINE (Sentinel Gateway Integráció)")
while True:
if not self.check_budget():
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
await asyncio.sleep(3600); continue
try:
async with AsyncSessionLocal() as db:
query = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = 'ai_synthesis_in_progress'
WHERE id = (
SELECT id FROM vehicle.vehicle_model_definitions
WHERE status = 'awaiting_ai_synthesis'
AND attempts < :max_attempts
AND is_manual = FALSE
ORDER BY priority_score DESC
FOR UPDATE SKIP LOCKED LIMIT 1
)
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity, fuel_type, raw_search_context, attempts;
""")
result = await db.execute(query, {"max_attempts": self.max_attempts})
task = result.fetchone()
await db.commit()
if task:
base_info = {
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
"rdw_fuel": task[6] or "petrol", "web_context": task[7] or ""
}
async with AsyncSessionLocal() as process_db:
await self.process_single_record(process_db, task[0], base_info, task[8])
else:
await asyncio.sleep(10)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(TechEnricher().run())