Initial commit: Robot ökoszisztéma v2.0 - Stabilizált jármű és szerviz robotok

This commit is contained in:
Kincses
2026-03-04 02:03:03 +01:00
commit 250f4f4b8f
7942 changed files with 449625 additions and 0 deletions

View File

@@ -0,0 +1,201 @@
import asyncio
import httpx
import logging
import os
import sys
from datetime import datetime, timedelta
from sqlalchemy import text, select
from app.database import AsyncSessionLocal
from app.models.asset import AssetCatalog
# MB 2.0 Szigorú naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-0-Discovery: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Vehicle-Robot-0-Discovery")
class DiscoveryEngine:
"""
THOUGHT PROCESS (IPARI ÜZEMMÓD 2.0):
1. Őrkutya (Watchdog): Megkeresi és kiszabadítja a beragadt feladatokat óránként.
2. Differential Sync (Különbözeti Szinkron): Csak a hiányzó vagy új modelleket rögzíti, a gold_enriched-eket kihagyja.
3. Monthly Scheduler: Havonta egyszer tölti le a teljes RDW adatbázist lapozva.
"""
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
SYNC_STATE_FILE = "/app/temp/.last_rdw_sync" # Állapotfájl, hogy Docker újrainduláskor se kezdje elölről azonnal
@staticmethod
async def run_watchdog():
""" 1. FÁZIS: Az Őrkutya (Dead-Letter Queue Manager) """
logger.info("🐕 Őrkutya: Beragadt feladatok keresése a rendszerben...")
try:
async with AsyncSessionLocal() as db:
# A) Hunter takarítás (visszaállítás pending-re, ha a Hunter lefagyott)
res1 = await db.execute(text("UPDATE data.catalog_discovery SET status = 'pending' WHERE status = 'processing' RETURNING id;"))
hunter_resets = len(res1.fetchall())
if hunter_resets > 0:
logger.warning(f"🔄 {hunter_resets} db beragadt Hunter feladat (processing) visszaállítva 'pending'-re.")
# B) AI Robotok takarítása (2 órás timeout)
query2 = text("""
UPDATE data.vehicle_model_definitions
SET status = CASE
WHEN status = 'research_in_progress' THEN 'unverified'
WHEN status = 'ai_synthesis_in_progress' THEN 'awaiting_ai_synthesis'
END
WHERE status IN ('research_in_progress', 'ai_synthesis_in_progress')
AND updated_at < NOW() - INTERVAL '2 hours'
RETURNING id;
""")
res2 = await db.execute(query2)
ai_resets = len(res2.fetchall())
if ai_resets > 0:
logger.warning(f"🔄 {ai_resets} db beragadt AI feladat visszaállítva.")
await db.commit()
except Exception as e:
logger.error(f"❌ Őrkutya hiba: {e}")
@staticmethod
async def seed_manual_bootstrap():
""" 2. FÁZIS: Alapozó adatok rögzítése """
initial_data = [
{"make": "AUDI", "model": "A4", "generation": "B8 (2008-2015)", "vehicle_class": "car"},
{"make": "BMW", "model": "3 SERIES", "generation": "F30 (2012-2019)", "vehicle_class": "car"}
]
try:
async with AsyncSessionLocal() as db:
for item in initial_data:
stmt = select(AssetCatalog).where(AssetCatalog.make == item["make"], AssetCatalog.model == item["model"])
if not (await db.execute(stmt)).scalar_one_or_none():
db.add(AssetCatalog(**item))
await db.commit()
except Exception as e:
logger.warning(f"Manual bootstrap hiba (Ignorálható, ha az adatbázis már tele van): {e}")
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, params: dict, retries: int = 3):
""" Hibatűrő HTTP kérés API leállások ellen. """
for attempt in range(retries):
try:
resp = await client.get(url, params=params, headers=cls.HEADERS)
if resp.status_code == 200:
return resp
elif resp.status_code == 429:
await asyncio.sleep(2 ** attempt)
else:
return None
except httpx.RequestError:
if attempt == retries - 1:
return None
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def seed_from_rdw(cls):
""" 3. FÁZIS: Távoli felfedezés - KÜLÖNBÖZETI SZINKRONIZÁCIÓ (Differential Sync) """
logger.info("📥 RDW TÖMEGES LETÖLTÉS: Új modellek keresése (Differential Sync)...")
limit = 10000
offset = 0
inserted_count = 0
updated_count = 0
async with httpx.AsyncClient(timeout=60.0) as client:
while True:
params = {
"$select": "merk,handelsbenaming,voertuigsoort,count(*) as total",
"$group": "merk,handelsbenaming,voertuigsoort",
"$order": "total DESC",
"$limit": limit,
"$offset": offset
}
resp = await cls.fetch_with_retry(client, "https://opendata.rdw.nl/resource/m9d7-ebf2.json", params)
if not resp: break
raw_data = resp.json()
if not raw_data: break
logger.info(f"📊 Lapozás: {offset} - {offset + len(raw_data)} tételek analízise...")
async with AsyncSessionLocal() as db:
for entry in raw_data:
make = str(entry.get("merk", "")).upper().strip()
model = str(entry.get("handelsbenaming", "")).upper().strip()
v_kind = entry.get("voertuigsoort", "")
total_count = int(entry.get("total", 0))
if not make or not model: continue
if "Personenauto" in v_kind: v_class = 'car'
elif "Motorfiets" in v_kind: v_class = 'motorcycle'
else: v_class = 'truck'
# A MÁGIA: Különbözeti Szinkronizáció SQL
query = text("""
INSERT INTO data.catalog_discovery (make, model, vehicle_class, status, priority_score)
SELECT :make, :model, :v_class, 'pending', :priority
WHERE NOT EXISTS (
SELECT 1 FROM data.vehicle_model_definitions
WHERE make = :make AND marketing_name = :model AND status = 'gold_enriched'
)
ON CONFLICT (make, model)
DO UPDATE SET priority_score = EXCLUDED.priority_score
WHERE data.catalog_discovery.status != 'processed'
RETURNING xmax;
""")
result = await db.execute(query, {
"make": make, "model": model, "v_class": v_class, "priority": total_count
})
row = result.fetchone()
if row:
if row[0] == 0: inserted_count += 1 # Új beszúrás
else: updated_count += 1 # Meglévő frissítése
await db.commit()
offset += limit
await asyncio.sleep(1)
logger.info(f"✅ RDW Szinkron kész! Új modellek a listán: {inserted_count} | Frissített prioritások: {updated_count}")
# Sikeres futás regisztrálása a fájlrendszeren
os.makedirs(os.path.dirname(cls.SYNC_STATE_FILE), exist_ok=True)
with open(cls.SYNC_STATE_FILE, 'w') as f:
f.write(datetime.now().isoformat())
@classmethod
def should_run_rdw_sync(cls) -> bool:
""" Ellenőrzi, hogy eltelt-e 30 nap a legutóbbi sikeres RDW szinkronizáció óta. """
if not os.path.exists(cls.SYNC_STATE_FILE):
return True
try:
with open(cls.SYNC_STATE_FILE, 'r') as f:
last_sync = datetime.fromisoformat(f.read().strip())
return datetime.now() - last_sync > timedelta(days=30)
except Exception:
return True
@classmethod
async def run(cls):
""" FŐ CIKLUS: Havi ütemező és Óránkénti Őrkutya """
logger.info("🚀 ÉLES ÜZEM: Discovery Engine (Differential Sync) & Watchdog indítása...")
await cls.seed_manual_bootstrap()
while True:
# 1. Óránkénti takarítás
await cls.run_watchdog()
# 2. Havi szinkronizáció ellenőrzése
if cls.should_run_rdw_sync():
await cls.seed_from_rdw()
else:
logger.info("🛌 Az RDW szinkronizáció már lefutott az elmúlt 30 napban. Ugrás...")
# 3. Alvás 1 órát (Heartbeat)
logger.info("⏱️ A Discovery Engine most 1 órát pihen a következő Őrkutya futásig.")
await asyncio.sleep(3600)
if __name__ == "__main__":
asyncio.run(DiscoveryEngine.run())

View File

@@ -0,0 +1,81 @@
# /app/app/workers/vehicle/vehicle_robot_0_gb_discovery.py
import asyncio
import logging
import csv
import os
import sys
from sqlalchemy import text
from app.database import AsyncSessionLocal
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-0-GB: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-0-GB-Discovery")
class GBDiscoveryEngine:
"""
UK Open Data (CSV) beolvasó.
Célja a valós brit rendszámok kinyerése API hívások számára.
"""
CSV_FILE_PATH = "/mnt/nas/app_data/uk_mot_data.csv" # Ide kell majd betenned a letöltött CSV-t
@classmethod
async def process_csv(cls):
if not os.path.exists(cls.CSV_FILE_PATH):
logger.warning(f"Nincs CSV fájl a {cls.CSV_FILE_PATH} útvonalon. Alvás...")
return
logger.info("🇬🇧 GB Discovery: CSV feldolgozás indítása...")
inserted = 0
# Létrehozzuk a GB várólistát (ha még nem létezne)
async with AsyncSessionLocal() as db:
await db.execute(text("""
CREATE TABLE IF NOT EXISTS data.gb_catalog_discovery (
id SERIAL PRIMARY KEY,
vrm VARCHAR(20) UNIQUE NOT NULL,
make VARCHAR(100),
model VARCHAR(100),
status VARCHAR(20) DEFAULT 'pending'
);
"""))
await db.commit()
# CSV olvasás (Példa oszlopok: vrm, make, model)
with open(cls.CSV_FILE_PATH, mode='r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
vrm = row.get("vrm", "").strip().replace(" ", "").upper()
make = row.get("make", "").strip().upper()
model = row.get("model", "").strip().upper()
if not vrm or not make: continue
# Szűrünk: Csak akkor tesszük be, ha ez az autó még nincs gold_enriched állapotban!
query = text("""
INSERT INTO data.gb_catalog_discovery (vrm, make, model)
SELECT :vrm, :make, :model
WHERE NOT EXISTS (
SELECT 1 FROM data.vehicle_model_definitions
WHERE make = :make AND marketing_name = :model AND status = 'gold_enriched'
)
ON CONFLICT (vrm) DO NOTHING;
""")
res = await db.execute(query, {"vrm": vrm, "make": make, "model": model})
if res.rowcount > 0:
inserted += 1
# Időnként commitolunk, hogy ne egye meg a RAM-ot
if inserted % 1000 == 0:
await db.commit()
logger.info(f"Edig betöltve: {inserted} új GB rendszám...")
await db.commit()
logger.info(f"✅ GB CSV Feldolgozva. Új rendszámok a várólistán: {inserted}")
@classmethod
async def run(cls):
while True:
await cls.process_csv()
await asyncio.sleep(86400) # Napi 1x fut le (24 óra)
if __name__ == "__main__":
asyncio.run(GBDiscoveryEngine.run())

View File

@@ -0,0 +1,108 @@
# /app/app/workers/vehicle/vehicle_robot_0_strategist.py
import asyncio
import httpx
import logging
import os
from sqlalchemy import text
from app.database import AsyncSessionLocal # MB 2.0 Standard import
# Sentinel rendszerhez illesztett logolás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
logger = logging.getLogger("Vehicle-Robot-0-Strategist")
class Robot0Strategist:
"""
THOUGHT PROCESS:
1. A robot célja a 'priority_score' meghatározása valós piaci adatok (RDW) alapján.
2. Első lépésben ellenőrizzük a sémát (Self-healing), hogy létezik-e az oszlop.
3. A kategóriákat (autó, motor, teher) szétválasztjuk, hogy célzott prioritásokat kapjunk.
4. Az 'ON CONFLICT' logika garantálja, hogy ne rontsuk el a már feldolgozott (processed) sorokat.
5. A prioritás alapja a darabszám: minél több van egy típusból, annál előrébb kerül a listán.
"""
RDW_API = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
# Holland típusok leképezése belső kategóriákra
CATEGORIES = [
{"name": "car", "rdw_types": ["'Personenauto'"]},
{"name": "motorcycle", "rdw_types": ["'Motorfiets'"]},
{"name": "truck", "rdw_types": ["'Bedrijfsauto'", "'Vrachtwagen'", "'Opleggertrekker'"]},
{"name": "other", "rdw_types": ["NOT IN ('Personenauto', 'Motorfiets', 'Bedrijfsauto', 'Vrachtwagen', 'Opleggertrekker')"]}
]
async def get_popular_makes(self, vehicle_class: str, rdw_types: list):
""" Piaci adatok lekérése darabszám szerinti sorrendben. """
if "NOT IN" in rdw_types[0]:
type_filter = f"voertuigsoort {rdw_types[0]}"
else:
type_filter = " OR ".join([f"voertuigsoort = {t}" for t in rdw_types])
params = {
"$select": "merk, count(*) AS darabszam",
"$where": type_filter,
"$group": "merk",
"$order": "darabszam DESC",
"$limit": 500
}
async with httpx.AsyncClient(timeout=45.0) as client:
try:
resp = await client.get(self.RDW_API, params=params, headers=self.HEADERS)
if resp.status_code == 200:
return resp.json()
logger.error(f"⚠️ RDW API Hiba: {resp.status_code}")
return []
except Exception as e:
logger.error(f"❌ Kapcsolati hiba az RDW felé: {e}")
return []
async def run(self):
logger.info("🚀 Robot 0 (Strategist) ONLINE - Piaci elemzés indítása...")
# --- SÉMA ELLENŐRZÉS (Golyóálló megoldás) ---
async with AsyncSessionLocal() as db:
try:
await db.execute(text("ALTER TABLE data.catalog_discovery ADD COLUMN IF NOT EXISTS priority_score INTEGER DEFAULT 0;"))
await db.commit()
logger.info("✅ Adatbázis séma rendben (priority_score aktív).")
except Exception as e:
await db.rollback()
logger.error(f"⚠️ Séma hiba: {e}")
for category in self.CATEGORIES:
v_class = category["name"]
logger.info(f"📊 {v_class.upper()} hadosztály prioritásainak számítása...")
makes = await self.get_popular_makes(v_class, category["rdw_types"])
if not makes: continue
added_count = 0
for item in makes:
make_name = str(item.get("merk", "")).upper().strip()
if not make_name: continue
count = int(item.get("darabszam", 0))
async with AsyncSessionLocal() as db:
try:
# UPSERT: Beállítjuk a prioritást, de nem bántjuk a már kész rekordokat
query = text("""
INSERT INTO data.catalog_discovery (make, model, vehicle_class, status, source, attempts, priority_score)
VALUES (:make, 'ALL_VARIANTS', :class, 'pending', 'STRATEGIST-V2', 0, :score)
ON CONFLICT (make, model, vehicle_class)
DO UPDATE SET priority_score = :score
WHERE data.catalog_discovery.status NOT IN ('processed', 'in_progress');
""")
await db.execute(query, {"make": make_name, "class": v_class, "score": count})
await db.commit()
added_count += 1
except Exception as e:
await db.rollback()
logger.warning(f"❌ Hiba a márka rögzítésekor ({make_name}): {e}")
logger.info(f"{v_class.upper()} kategória kész: {added_count} márka rangsorolva.")
if __name__ == "__main__":
asyncio.run(Robot0Strategist().run())

View File

@@ -0,0 +1,207 @@
import asyncio
import httpx
import logging
import os
import re
import sys
from sqlalchemy import text, select
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
"""
Vehicle Robot 1.9.2: The Invincible Mega-Hunter (CONCURRENCY PATCH)
Szigorú sor-zárolás (SKIP LOCKED) és exponenciális API újrapróbálkozás.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return ""
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, retries: int = 3):
""" Hibatűrő HTTP kérés API leállások és Rate Limitek ellen. """
for attempt in range(retries):
try:
resp = await client.get(url, headers=cls.HEADERS)
if resp.status_code == 200:
return resp
elif resp.status_code == 429: # Rate limit
await asyncio.sleep(2 ** attempt) # 1, 2, 4 másodperc pihenő
else:
return resp # Egyéb hiba (pl 404), nem próbáljuk újra
except httpx.RequestError as e:
if attempt == retries - 1:
logger.debug(f"API Hiba végleges ({url}): {e}")
raise
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def fetch_tech_details(cls, client, plate):
results = {
"power_kw": 0, "engine_code": None, "euro_class": None,
"fuel_desc": "Unknown", "co2": 0, "consumption": 0.0
}
try:
f_resp = await cls.fetch_with_retry(client, f"{cls.RDW_FUEL}?kenteken={plate}")
if f_resp and f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
results.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await cls.fetch_with_retry(client, f"{cls.RDW_ENGINE}?kenteken={plate}")
if e_resp and e_resp.status_code == 200 and e_resp.json():
results["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception as e:
logger.debug(f"Hiba a technikai részleteknél ({plate}): {e}")
return results
@classmethod
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
clean_make = make_name.strip().upper()
clean_model = model_name.strip().upper()
logger.info(f"🎯 IPARI ADATBÁNYÁSZAT INDUL: {clean_make} {clean_model}")
offset = 0
async with httpx.AsyncClient(timeout=30.0) as client:
while True:
params = f"merk={clean_make}&handelsbenaming={clean_model}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await cls.fetch_with_retry(client, f"{cls.RDW_MAIN}?{params}")
batch = r.json() if r and r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
try:
plate = item.get("kenteken")
if not plate: continue
variant = item.get("variant") or "UNKNOWN"
version = item.get("uitvoering") or "UNKNOWN"
ccm = cls.parse_int(item.get("cilinderinhoud"))
norm_name = cls.normalize(clean_model.replace(clean_make, "").strip() or clean_model)
tech = await cls.fetch_tech_details(client, plate)
stmt = insert(VehicleModelDefinition).values(
make=clean_make,
marketing_name=clean_model,
normalized_name=norm_name,
variant_code=variant,
version_code=version,
type_approval_number=item.get("typegoedkeuringsnummer"),
technical_code=plate,
engine_capacity=ccm,
power_kw=tech["power_kw"],
fuel_type=tech["fuel_desc"],
engine_code=tech["engine_code"],
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
towing_weight_unbraked=cls.parse_int(item.get("maximum_massa_trekken_ongeremd")),
towing_weight_braked=cls.parse_int(item.get("maximum_trekken_massa_geremd")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig") or item.get("toegestane_maximum_massa_voertuig")),
body_type=item.get("inrichting"),
co2_emissions_combined=tech["co2"],
fuel_consumption_combined=tech["consumption"],
euro_classification=tech["euro_class"],
cylinders=cls.parse_int(item.get("aantal_cilinders")),
vehicle_class=v_class,
priority_score=priority,
status="ACTIVE",
source="MEGA-HUNTER-v1.9.2"
)
do_nothing_stmt = stmt.on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type']
)
await db.execute(do_nothing_stmt)
except Exception as e:
logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}")
try:
await db.commit()
except Exception as e:
await db.rollback()
logger.error(f"❌ Batch commit hiba (Ignorálva): {e}")
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5) # Lassítjuk kicsit a terhelést
# Discovery státusz frissítése
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Invincible Mega-Hunter v1.9.2 ONLINE (CONCURRENCY PATCHED)")
while True:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS (Race condition ellenszere)
# Keresünk egy pending feladatot, azonnal zároljuk és átállítjuk processingre!
query = text("""
UPDATE data.catalog_discovery
SET status = 'processing'
WHERE id = (
SELECT id FROM data.catalog_discovery
WHERE status = 'pending'
ORDER BY priority_score DESC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, model, vehicle_class, priority_score;
""")
task = (await db.execute(query)).fetchone()
await db.commit()
if task:
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
else:
await asyncio.sleep(30)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,228 @@
import asyncio
import httpx
import logging
import os
import re
import sys
from sqlalchemy import text, select
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
# MB 2.0 Szigorú naplózás
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
"""
Vehicle Robot 1.7.3: Mega-Hunter (Industrial Master Version)
Teljes körű RDW adatbányászat Variant/Version szintű granuláltsággal.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return ""
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_tech_details(cls, client, plate):
""" Extra technikai adatok (kW, Euro, CO2, Motorkód) párhuzamos lekérése. """
urls = {
"fuel": f"{cls.RDW_FUEL}?kenteken={plate}",
"engine": f"{cls.RDW_ENGINE}?kenteken={plate}"
}
results = {
"power_kw": 0,
"engine_code": None,
"euro_class": None,
"fuel_desc": "Unknown",
"co2": 0,
"consumption": 0.0
}
try:
resps = await asyncio.gather(*[client.get(u, headers=cls.HEADERS) for u in urls.values()])
# Üzemanyag és emisszió (8ys7-d773)
if resps[0].status_code == 200 and resps[0].json():
f = resps[0].json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
results.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
# Motorkód (jh96-v4pq)
if resps[1].status_code == 200 and resps[1].json():
results["engine_code"] = resps[1].json()[0].get("motorcode")
except Exception as e:
logger.error(f"❌ RDW-Extra hiba ({plate}): {e}")
return results
@classmethod
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
clean_make = make_name.strip().upper()
clean_model = model_name.strip().upper()
logger.info(f"🎯 MEGA-VADÁSZAT INDUL: {clean_make} {clean_model}")
current_offset = 0
async with httpx.AsyncClient(timeout=30.0) as client:
while True:
params = {
"merk": clean_make,
"handelsbenaming": clean_model,
"$limit": cls.BATCH_SIZE,
"$offset": current_offset,
"$order": "kenteken DESC"
}
try:
r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch:
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {clean_make} {clean_model} minden variánsa feldolgozva.")
return
for item in batch:
try:
plate = item.get("kenteken")
if not plate: continue
# Alapadatok azonosítása
variant = item.get("variant")
version = item.get("uitvoering") # Az Execution/Version kód
ccm = cls.parse_int(item.get("cilinderinhoud"))
raw_model = str(item.get("handelsbenaming", "Unknown")).upper()
model_name_clean = raw_model.replace(clean_make, "").strip() or raw_model
norm_name = cls.normalize(model_name_clean)
# Extra technikai mélyfúrás (kW, Fuel, Engine)
tech = await cls.fetch_tech_details(client, plate)
# Ellenőrzés: Létezik-e már ez a pontos technikai variáns?
stmt = select(VehicleModelDefinition).where(
VehicleModelDefinition.make == clean_make,
VehicleModelDefinition.normalized_name == norm_name,
VehicleModelDefinition.engine_capacity == ccm,
VehicleModelDefinition.variant_code == variant,
VehicleModelDefinition.version_code == version,
VehicleModelDefinition.fuel_type == tech["fuel_desc"]
).limit(1)
existing = (await db.execute(stmt)).scalar_one_or_none()
if existing:
# Meglévő rekord frissítése a prioritással és hiányzó adatokkal
existing.priority_score = priority
if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"]
if tech["engine_code"]: existing.engine_code = tech["engine_code"]
if tech["co2"] > 0: existing.co2_emissions_combined = tech["co2"]
else:
# ÚJ REKORD LÉTREHOZÁSA - MINDEN ADAT MEZŐVEL
db.add(VehicleModelDefinition(
make=clean_make,
marketing_name=model_name_clean,
normalized_name=norm_name,
variant_code=variant,
version_code=version,
type_approval_number=item.get("typegoedkeuringsnummer"),
technical_code=plate, # Kötelező mező!
engine_capacity=ccm,
power_kw=tech["power_kw"],
fuel_type=tech["fuel_desc"],
engine_code=tech["engine_code"],
# Fizikai méretek és súlyok (RDW Main-ből)
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
towing_weight_unbraked=cls.parse_int(item.get("maximum_massa_trekken_ongeremd")),
towing_weight_braked=cls.parse_int(item.get("maximum_trekken_massa_geremd")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig") or item.get("toegestane_maximum_massa_voertuig")),
body_type=item.get("inrichting"),
# Emissziós és környezeti adatok (RDW Extra-ból)
co2_emissions_combined=tech["co2"],
fuel_consumption_combined=tech["consumption"],
euro_classification=tech["euro_class"],
cylinders=cls.parse_int(item.get("aantal_cilinders")),
# Meta adatok
vehicle_class=v_class,
priority_score=priority,
status="ACTIVE",
source="MEGA-HUNTER-v1.7.3"
))
except Exception as e:
logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}")
# Batch commit 50 soronként
await db.commit()
current_offset += len(batch)
# Biztonsági korlát: egy típusból ne szedjünk le többet, mint ami a variációkhoz kell
if current_offset >= 300: break
await asyncio.sleep(0.1)
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter Robot v1.7.3 ONLINE")
while True:
try:
async with AsyncSessionLocal() as db:
# Legmagasabb prioritású modellek bekérése a Discovery listából
query = text("""
SELECT id, make, model, vehicle_class, priority_score
FROM data.catalog_discovery
WHERE status IN ('pending', 'processing')
ORDER BY priority_score DESC
LIMIT 1
""")
task = (await db.execute(query)).fetchone()
if task:
# Állapot rögzítése a "dupla munka" ellen
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processing' WHERE id = :id"), {"id": task[0]})
await db.commit()
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
else:
logger.info("😴 Nincs több feladat, pihenés 30 másodpercig...")
await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Kritikus hiba a futtatás során: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,173 @@
# /app/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
import asyncio
import httpx
import logging
import os
import re
from sqlalchemy import text, select, update
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
# MB 2.0 Naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s')
logger = logging.getLogger("Robot-1")
class CatalogHunter:
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return ""
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
async def fetch_extra_tech(cls, client, plate):
params = {"kenteken": plate}
results = {"power_kw": 0, "euro_klasse": None, "fuel_desc": "Unknown", "engine_code": None}
try:
resp_fuel, resp_eng = await asyncio.gather(
client.get(cls.RDW_FUEL, params=params, headers=cls.HEADERS),
client.get(cls.RDW_ENGINE, params=params, headers=cls.HEADERS)
)
if resp_fuel.status_code == 200:
fuel_rows = resp_fuel.json()
max_p = 0
f_types = []
for row in fuel_rows:
p1 = cls.parse_int(row.get("netto_maximum_vermogen") or row.get("nettomaximumvermogen"))
p2 = cls.parse_int(row.get("nominaal_continu_maximum_vermogen") or row.get("nominaalcontinuvermogen"))
p = max(p1, p2)
if p > max_p: max_p = p
f = row.get("brandstof_omschrijving")
if f and f not in f_types: f_types.append(f)
if not results["euro_klasse"]:
results["euro_klasse"] = row.get("uitlaatemissieniveau") or row.get("euro_klasse")
results["power_kw"] = max_p
results["fuel_desc"] = ", ".join(f_types) if f_types else "Unknown"
if resp_eng.status_code == 200:
eng_rows = resp_eng.json()
if eng_rows: results["engine_code"] = eng_rows[0].get("motorcode")
except Exception as e:
logger.error(f"❌ RDW-Extra hiba ({plate}): {e}")
return results
@classmethod
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
clean_make = make_name.strip().upper()
clean_model = model_name.strip().upper()
logger.info(f"🎯 VADÁSZAT INDUL: {clean_make} {clean_model} (Prio: {priority})")
current_offset = 0
async with httpx.AsyncClient(timeout=30.0) as client:
while True:
params = {
"merk": clean_make,
"handelsbenaming": clean_model,
"$limit": cls.BATCH_SIZE,
"$offset": current_offset,
"$order": "kenteken DESC"
}
try:
r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch:
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
logger.info(f"🏁 {clean_make} {clean_model} feldolgozva.")
return
for item in batch:
try:
plate = item.get("kenteken")
if not plate: continue
raw_model = str(item.get("handelsbenaming", "Unknown")).upper()
model_name_clean = raw_model.replace(clean_make, "").strip() or raw_model
norm_name = cls.normalize(model_name_clean)
ccm = cls.parse_int(item.get("cilinderinhoud"))
tech = await cls.fetch_extra_tech(client, plate)
# Ellenőrizzük, van-e már ilyen technikai variációnk
stmt = select(VehicleModelDefinition).where(
VehicleModelDefinition.make == clean_make,
VehicleModelDefinition.normalized_name == norm_name,
VehicleModelDefinition.engine_capacity == ccm,
VehicleModelDefinition.fuel_type == tech["fuel_desc"]
).limit(1)
existing = (await db.execute(stmt)).scalar_one_or_none()
if existing:
# Csak frissítjük, ha találtunk pontosabb adatot
if tech["engine_code"]: existing.engine_code = tech["engine_code"]
if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"]
existing.priority_score = priority # Prioritás frissítése
else:
# ÚJ REKORD LÉTREHOZÁSA
db.add(VehicleModelDefinition(
make=clean_make,
marketing_name=model_name_clean,
normalized_name=norm_name,
marketing_name_aliases=[raw_model],
technical_code=plate,
fuel_type=tech["fuel_desc"],
engine_capacity=ccm,
engine_code=tech["engine_code"],
power_kw=tech["power_kw"],
cylinders=cls.parse_int(item.get("aantal_cilinders")),
euro_classification=tech["euro_klasse"],
vehicle_class=v_class,
priority_score=priority,
status="ACTIVE", # <--- EZ KELL A RÖNTGENNEK!
source="PRECISION-HUNTER-v2.1"
))
except Exception as e:
logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}")
await db.commit()
current_offset += len(batch)
# Ha már van elég variációnk ebből a típusból, nem kell mind a 100.000 autót átnézni
if current_offset >= 500: break
await asyncio.sleep(0.1)
@classmethod
async def run(cls):
logger.info("🤖 Vehicle Catalog Hunter ONLINE")
while True:
async with AsyncSessionLocal() as db:
# Lekérjük a prioritásos feladatokat
query = text("""
SELECT id, make, model, vehicle_class, priority_score
FROM data.catalog_discovery
WHERE status IN ('pending', 'processing')
ORDER BY priority_score DESC
LIMIT 1
""")
task = (await db.execute(query)).fetchone()
if task:
# status 'processing'-re állítása, hogy más robot ne nyúljon hozzá
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processing' WHERE id = :id"), {"id": task[0]})
await db.commit()
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
else:
await asyncio.sleep(30)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,192 @@
import asyncio
import httpx
import logging
import os
import sys
import json
from datetime import datetime
from sqlalchemy import text, func
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-GB: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1-GB-Hunter")
class QuotaManager:
""" Szigorú napi limit figyelő a DVLA API-hoz """
def __init__(self, service_name: str, daily_limit: int):
self.service_name = service_name
self.daily_limit = daily_limit
self.state_file = f"/app/temp/.quota_{service_name}.json"
self._ensure_file()
def _ensure_file(self):
os.makedirs(os.path.dirname(self.state_file), exist_ok=True)
if not os.path.exists(self.state_file):
with open(self.state_file, 'w') as f:
json.dump({"date": datetime.now().strftime("%Y-%m-%d"), "count": 0}, f)
def can_make_request(self) -> bool:
with open(self.state_file, 'r') as f:
data = json.load(f)
today = datetime.now().strftime("%Y-%m-%d")
if data["date"] != today:
data = {"date": today, "count": 0}
if data["count"] >= self.daily_limit:
return False
data["count"] += 1
with open(self.state_file, 'w') as f:
json.dump(data, f)
return True
class GBHunter:
"""
Vehicle Robot 1-GB: The DVLA Sniper
Kizárólag brit rendszámok (VRM) alapján kérdez le hiteles adatokat.
"""
DVLA_URL = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
def __init__(self):
self.api_key = os.getenv("DVLA_API_KEY")
self.daily_limit = int(os.getenv("DVLA_DAILY_LIMIT", "1000"))
self.quota = QuotaManager("dvla", self.daily_limit)
self.headers = {
"x-api-key": self.api_key,
"Content-Type": "application/json"
}
async def fetch_dvla_data(self, client: httpx.AsyncClient, vrm: str):
""" API hívás hibatűréssel (Max 3 próba) """
if not self.api_key:
logger.error("❌ HIÁNYZIK A DVLA_API_KEY a .env fájlból!")
return None
payload = {"registrationNumber": vrm}
for attempt in range(3):
try:
resp = await client.post(self.DVLA_URL, json=payload, headers=self.headers)
if resp.status_code == 200:
return resp.json()
elif resp.status_code == 404:
logger.warning(f"⚠️ DVLA nem találta ezt a rendszámot: {vrm}")
return "NOT_FOUND"
elif resp.status_code == 429: # Rate limit / Túl gyors hívás
await asyncio.sleep(2 ** attempt)
elif resp.status_code == 403:
logger.error("❌ DVLA API hiba: 403 Forbidden (Hibás API kulcs!)")
return None
else:
logger.error(f"❌ DVLA API hiba: {resp.status_code}")
return None
except httpx.RequestError as e:
if attempt == 2:
logger.error(f"Kritikus hálózati hiba: {e}")
return None
await asyncio.sleep(2 ** attempt)
return None
async def process_record(self, db, record_id: int, vrm: str, make_csv: str, model_csv: str):
logger.info(f"🇬🇧 GB Vadászat indul: {vrm} ({make_csv} {model_csv})")
if not self.quota.can_make_request():
logger.warning("🛑 NAPI DVLA KVÓTA ELÉRVE! A robot holnapig alvó módba lép.")
return "QUOTA_EXCEEDED"
async with httpx.AsyncClient(timeout=15.0) as client:
data = await self.fetch_dvla_data(client, vrm)
if data == "NOT_FOUND":
# Hibás volt a CSV rendszám, lezárjuk a feladatot
await db.execute(text("UPDATE data.gb_catalog_discovery SET status = 'invalid_vrm' WHERE id = :id"), {"id": record_id})
await db.commit()
return "DONE"
if not data:
# Hálózati hiba, visszateszük a sorba
await db.execute(text("UPDATE data.gb_catalog_discovery SET status = 'pending' WHERE id = :id"), {"id": record_id})
await db.commit()
return "ERROR"
# SIKERES LEKÉRDEZÉS - Adatok kinyerése
ccm = data.get("engineCapacity", 0)
fuel = data.get("fuelType", "Unknown")
year = data.get("yearOfManufacture", 0)
co2 = data.get("co2Emissions", 0)
approval = data.get("typeApproval", "")
dvla_make = data.get("make", make_csv)
# Beszúrás a Mestertáblába (A hiányzó lóerőt majd az Alkimista megszerzi!)
query_vmd = text("""
INSERT INTO data.vehicle_model_definitions
(make, marketing_name, vehicle_class, fuel_type, engine_capacity, co2_emissions_combined, year_from, type_approval_number, status, source)
VALUES (:make, :model, 'car', :fuel, :ccm, :co2, :year, :approval, 'ACTIVE', 'GB-DVLA-API')
ON CONFLICT (make, normalized_name, variant_code, version_code, fuel_type) DO NOTHING;
""")
try:
await db.execute(query_vmd, {
"make": dvla_make.upper(),
"model": model_csv.upper(),
"fuel": fuel,
"ccm": ccm,
"co2": co2,
"year": year if year else None,
"approval": approval
})
# Pipáljuk a feladatot
await db.execute(text("UPDATE data.gb_catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": record_id})
await db.commit()
logger.info(f"✅ GB Rekord mentve a VMD táblába: {dvla_make} {model_csv} ({ccm}cc {fuel})")
return "DONE"
except Exception as e:
await db.rollback()
logger.error(f"🚨 Adatbázis hiba mentéskor: {e}")
await db.execute(text("UPDATE data.gb_catalog_discovery SET status = 'pending' WHERE id = :id"), {"id": record_id})
await db.commit()
return "ERROR"
@classmethod
async def run(cls):
self_instance = cls()
logger.info("🤖 Robot-1-GB (DVLA Sniper) ONLINE (Atomi Zárolás & Kvótamenedzser)")
while True:
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS
query = text("""
UPDATE data.gb_catalog_discovery
SET status = 'processing'
WHERE id = (
SELECT id FROM data.gb_catalog_discovery
WHERE status = 'pending'
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, vrm, make, model;
""")
result = await db.execute(query)
task = result.fetchone()
await db.commit()
if task:
status = await self_instance.process_record(db, task[0], task[1], task[2], task[3])
if status == "QUOTA_EXCEEDED":
await asyncio.sleep(3600) # Alsó mód, ha elfogyott a limit
else:
await asyncio.sleep(1.5) # Lassítás, hogy kíméljük az API-t
else:
await asyncio.sleep(60) # Nincs új brit rendszám
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(GBHunter().run())

View File

@@ -0,0 +1,194 @@
import asyncio
import logging
import warnings
import os
import json
from datetime import datetime
from sqlalchemy import text, update, func
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
from duckduckgo_search import DDGS
# MB 2.0 Szabvány naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-2-Researcher: %(message)s')
logger = logging.getLogger("Vehicle-Robot-2-Researcher")
class QuotaManager:
""" Szigorú napi limit figyelő a fizetős/hatósági API-khoz """
def __init__(self, service_name: str, daily_limit: int):
self.service_name = service_name
self.daily_limit = daily_limit
self.state_file = f"/app/temp/.quota_{service_name}.json"
self._ensure_file()
def _ensure_file(self):
os.makedirs(os.path.dirname(self.state_file), exist_ok=True)
if not os.path.exists(self.state_file):
with open(self.state_file, 'w') as f:
json.dump({"date": datetime.now().strftime("%Y-%m-%d"), "count": 0}, f)
def can_make_request(self) -> bool:
with open(self.state_file, 'r') as f:
data = json.load(f)
today = datetime.now().strftime("%Y-%m-%d")
if data["date"] != today:
data = {"date": today, "count": 0} # Új nap, kvóta nullázása
if data["count"] >= self.daily_limit:
return False
# Növeljük a számlálót
data["count"] += 1
with open(self.state_file, 'w') as f:
json.dump(data, f)
return True
class VehicleResearcher:
"""
Vehicle Robot 2.5: Sniper Researcher (Mesterlövész Adatgyűjtő)
Célzott keresésekkel és strukturált aktakészítéssel dolgozik az AI kímélése érdekében.
"""
def __init__(self):
self.max_attempts = 5
self.search_timeout = 15.0
# Kvóta menedzserek beállítása (.env-ből olvasva)
dvla_limit = int(os.getenv("DVLA_DAILY_LIMIT", "1000"))
self.dvla_quota = QuotaManager("dvla", dvla_limit)
self.dvla_token = os.getenv("DVLA_API_KEY")
async def fetch_ddg_targeted(self, label: str, query: str) -> str:
""" Célzott keresés szálbiztosan a DuckDuckGo-n. """
try:
def search():
with DDGS() as ddgs:
# max_results=2: Nem kell sok zaj, csak a legrelevánsabb 2 találat
results = ddgs.text(query, max_results=2)
return [f"- {r.get('body', '')}" for r in results] if results else []
results = await asyncio.wait_for(asyncio.to_thread(search), timeout=self.search_timeout)
if not results:
return f"[SOURCE: {label}]\nNincs érdemi találat.\n"
content = f"[SOURCE: {label} | KERESÉS: {query}]\n"
content += "\n".join(results) + "\n"
return content
except Exception as e:
logger.debug(f"Keresési hiba ({label}): {e}")
return f"[SOURCE: {label}]\nKERESÉSI HIBA.\n"
async def research_vehicle(self, db, vehicle_id: int, make: str, model: str, engine: str, year: str, current_attempts: int):
""" Egy jármű átvilágítása és a strukturált 'Akta' elkészítése a GPU számára. """
engine_safe = engine or ""
year_safe = str(year) if year else ""
logger.info(f"🔎 Mesterlövész Kutatás: {make} {model} (Motor: {engine_safe})")
# 1. TIER: Ingyenes, Célzott Keresések (A legmegbízhatóbb források)
queries = [
("ULTIMATE_SPECS", f"{make} {model} {engine_safe} {year_safe} site:ultimatespecs.com"),
("AUTO_DATA", f"{make} {model} {engine_safe} {year_safe} site:auto-data.net"),
("COMMON_ISSUES", f"{make} {model} {engine_safe} reliability common problems")
]
tasks = [self.fetch_ddg_targeted(label, q) for label, q in queries]
search_results = await asyncio.gather(*tasks)
# 2. TIER: Fizetős / Kvótás API-k (Példa a DVLA helyére)
# Ha a jövőben bejön brit rendszám, itt hívjuk meg a DVLA-t:
# if has_uk_plate and self.dvla_quota.can_make_request():
# uk_data = await self.fetch_dvla_data(plate)
# search_results.append(uk_data)
# 3. ÖSSZESÍTÉS (Az Akta összeállítása)
# Maximalizáljuk a szöveg hosszát, hogy az AI GPU ne fulladjon le!
full_context = "\n".join(search_results)
if len(full_context) > 2500:
full_context = full_context[:2500] + "\n...[TRUNCATED TO SAVE GPU TOKENS]"
try:
if len(full_context.strip()) > 150: # Csökkentettük az elvárást, mert a célzott keresés tömörebb
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
raw_search_context=full_context,
status='awaiting_ai_synthesis', # Kész az Akta, mehet az Alkimistának!
last_research_at=func.now(),
attempts=current_attempts + 1
)
)
logger.info(f"✅ Akta rögzítve ({len(full_context)} karakter): {make} {model}")
else:
new_status = 'suspended_research' if current_attempts + 1 >= self.max_attempts else 'unverified'
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
status=new_status,
attempts=current_attempts + 1,
last_research_at=func.now()
)
)
if new_status == 'suspended_research':
logger.warning(f"🛑 Felfüggesztve (Nincs nyom a weben): {make} {model}")
else:
logger.warning(f"⚠️ Kevés adat: {make} {model}, visszatéve a sorba.")
await db.commit()
except Exception as e:
await db.rollback()
logger.error(f"🚨 Adatbázis hiba az eredmény mentésénél ({vehicle_id}): {e}")
@classmethod
async def run(cls):
self_instance = cls()
logger.info("🚀 Vehicle Researcher 2.5 ONLINE (Sniper & Quota Manager)")
while True:
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS
query = text("""
UPDATE data.vehicle_model_definitions
SET status = 'research_in_progress'
WHERE id = (
SELECT id FROM data.vehicle_model_definitions
WHERE status IN ('unverified', 'awaiting_research')
AND attempts < :max_attempts
ORDER BY
CASE WHEN make = 'TOYOTA' THEN 1 ELSE 2 END,
attempts ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, marketing_name, engine_code, year_from, attempts;
""")
result = await db.execute(query, {"max_attempts": self_instance.max_attempts})
task = result.fetchone()
await db.commit()
if task:
v_id, v_make, v_model, v_engine, v_year, v_attempts = task
async with AsyncSessionLocal() as process_db:
await self_instance.research_vehicle(process_db, v_id, v_make, v_model, v_engine, v_year, v_attempts)
await asyncio.sleep(2) # Rate limit védelem a DDG felé
else:
await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
try:
asyncio.run(VehicleResearcher.run())
except KeyboardInterrupt:
logger.info("🛑 Kutató robot leállítva.")

View File

@@ -0,0 +1,137 @@
# /opt/docker/dev/service_finder/backend/app/workers/researcher_v2_1.py
import asyncio
import logging
import warnings
import os
from datetime import datetime, timezone
from typing import Optional, List
from sqlalchemy import select, update, and_, func, or_, case
from app.db.session import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
# DuckDuckGo search API hiba-elnyomás és import
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
from duckduckgo_search import DDGS
# Logolás beállítása
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
logger = logging.getLogger("Robot-Researcher-v2.1")
class ResearcherBot:
"""
Robot 2.1: Az internet porszívója.
Technikai adatokat gyűjt (DuckDuckGo), hogy előkészítse az AI dúsítást.
Kihasználja a motorkódot és a gyártási évet a pontosabb találatokért.
"""
def __init__(self):
self.batch_size = 5 # Egyszerre 5 járművet vesz ki
self.max_parallel_queries = 3 # Párhuzamos keresések száma
async def fetch_source(self, label: str, query: str) -> str:
""" Egyedi forrás lekérése szálbiztos módon. """
try:
def search():
with DDGS() as ddgs:
# Az első 3 találat body részét gyűjtjük be kontextusnak
results = ddgs.text(query, max_results=3)
return [f"[{r.get('title', 'No Title')}] {r.get('body', '')}" for r in results] if results else []
results = await asyncio.to_thread(search)
if not results:
return f"=== SOURCE: {label} | STATUS: EMPTY ===\n\n"
content = f"=== SOURCE: {label} | QUERY: {query} ===\n"
content += "\n---\n".join(results)
content += "\n=== END SOURCE ===\n\n"
return content
except Exception as e:
logger.error(f"❌ Keresési hiba ({label}): {str(e)}")
return f"=== SOURCE: {label} | ERROR: {str(e)} ===\n\n"
async def research_vehicle(self, vehicle_id: int):
""" Egyetlen jármű teljes körű átvilágítása. """
async with AsyncSessionLocal() as db:
res = await db.execute(select(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id))
v = res.scalar_one_or_none()
if not v: return
make = v.make
model = v.marketing_name
engine = v.engine_code or ""
year = f"{v.year_from}" if v.year_from else ""
# Státusz zárolása
v.status = 'research_in_progress'
await db.commit()
logger.info(f"🔎 Kutatás indul: {make} {model} (Motor: {engine}, Év: {year})")
# Célzott keresési kulcsszavak (Multi-Channel stratégia)
queries = [
("TECH_SPECS", f"{make} {model} {engine} {year} technical specifications engine power kw torque"),
("MAINTENANCE", f"{make} {model} {engine} oil capacity coolant transmission fluid type capacity"),
("TIRES_PROD", f"{make} {model} {year} tire size load index production years status")
]
# Párhuzamos forrásgyűjtés
tasks = [self.fetch_source(label, q) for label, q in queries]
search_results = await asyncio.gather(*tasks)
full_context = "".join(search_results)
async with AsyncSessionLocal() as db:
if len(full_context.strip()) > 200: # Ha van elegendő kontextus
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
raw_search_context=full_context,
status='awaiting_ai_synthesis', # Átadás a Robot 2.2-nek
last_research_at=func.now(),
attempts=VehicleModelDefinition.attempts + 1
)
)
logger.info(f"✅ Kontextus rögzítve: {make} {model}")
else:
# Sikertelen keresés, visszatesszük később
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
status='unverified',
attempts=VehicleModelDefinition.attempts + 1,
last_research_at=func.now()
)
)
logger.warning(f"⚠️ Kevés adat: {make} {model} - Újrapróbálkozás később")
await db.commit()
async def run(self):
logger.info("🚀 Robot 2.1 (Researcher) ONLINE - Cél: 407 Toyota feldolgozása")
while True:
async with AsyncSessionLocal() as db:
# Prioritás: unverified autók előre
priorities = case(
(VehicleModelDefinition.make == 'TOYOTA', 1),
else_=2
)
stmt = select(VehicleModelDefinition.id).where(
or_(VehicleModelDefinition.status == 'unverified',
VehicleModelDefinition.status == 'awaiting_research')
).order_by(priorities, VehicleModelDefinition.attempts.asc()).limit(self.batch_size)
res = await db.execute(stmt)
ids = [r[0] for r in res.fetchall()]
if not ids:
await asyncio.sleep(30)
continue
# Szekvenciális feldolgozás a rate-limit miatt
for rid in ids:
await self.research_vehicle(rid)
await asyncio.sleep(5) # 5 másodperc szünet a keresések között
if __name__ == "__main__":
asyncio.run(ResearcherBot().run())

View File

@@ -0,0 +1,208 @@
import asyncio
import logging
import datetime
import random
import sys
import json
from sqlalchemy import text, func, update, case
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.models.asset import AssetCatalog
from app.services.ai_service import AIService
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
class TechEnricher:
"""
Vehicle Robot 3: Alchemist Pro (Atomi Zárolás Patch)
Tiszta GPU fókusz: Csak az AI elemzésre és adategyesítésre koncentrál.
Nincs felesleges webkeresés. Szigorú Sane-Check.
"""
def __init__(self):
self.max_attempts = 5
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
def check_budget(self) -> bool:
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def is_data_sane(self, data: dict, base_info: dict) -> bool:
""" Szigorított AI Hallucináció szűrő """
if not data: return False
try:
ccm = int(data.get("ccm", 0) or 0)
kw = int(data.get("kw", 0) or 0)
v_class = base_info.get("v_type", "car")
# 1. Alapvető fizikai korlátok
if ccm > 18000 or (kw > 1500 and v_class != "truck"):
return False
# 2. Üres adatok kizárása (Kivéve elektromos autók, ahol ccm = 0)
fuel = data.get("fuel_type", base_info.get("rdw_fuel", "")).lower()
if kw == 0:
return False
if ccm == 0 and "electric" not in fuel and "elektric" not in fuel and v_class != "trailer":
return False
return True
except Exception as e:
logger.debug(f"Sane check hiba: {e}")
return False
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
try:
logger.info(f"🧠 AI dúsítás indul: {base_info['make']} {base_info['m_name']}")
# 1. LÉPÉS: AI Hívás (Rábízzuk az adatokat a modellre)
ai_data = await AIService.get_clean_vehicle_data(
base_info['make'],
base_info['m_name'],
base_info
)
# 2. LÉPÉS: Validáció (Ha az AI rossz adatot ad, NEM megyünk ki a webre, hanem dobjuk az aktát!)
if not ai_data or not self.is_data_sane(ai_data, base_info):
raise ValueError("Az AI hiányos adatot adott vissza vagy hallucinált.")
# 3. LÉPÉS: HIBRID MERGE (Az RDW adatok felülbírálják az AI-t a hatósági paramétereknél)
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else (ai_data.get("kw") or 0)
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else (ai_data.get("ccm") or 0)
# Üzemanyag tisztítása
fuel_rdw = base_info.get('rdw_fuel', '')
final_fuel = fuel_rdw if fuel_rdw and fuel_rdw != "Unknown" else ai_data.get("fuel_type", "petrol")
final_engine = base_info['rdw_engine'] if base_info['rdw_engine'] else ai_data.get("engine_code", "Unknown")
final_euro = base_info['rdw_euro'] or ai_data.get("euro_classification")
final_cylinders = base_info['rdw_cylinders'] or ai_data.get("cylinders")
# 4. LÉPÉS: Mentés az Arany Katalógusba
clean_model = str(ai_data.get("marketing_name", base_info['m_name']))[:50].upper()
cat_stmt = text("""
INSERT INTO data.vehicle_catalog
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
ON CONFLICT (make, model, year_from, fuel_type) DO NOTHING
RETURNING id;
""")
await db.execute(cat_stmt, {
"m_id": record_id,
"make": base_info['make'].upper(),
"model": clean_model,
"kw": final_kw,
"ccm": final_ccm,
"fuel": final_fuel,
"factory": json.dumps(ai_data)
})
# 5. LÉPÉS: Staging tábla (VMD) lezárása
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="gold_enriched",
engine_capacity=final_ccm,
power_kw=final_kw,
fuel_type=final_fuel,
engine_code=final_engine,
euro_classification=final_euro,
cylinders=final_cylinders,
specifications=ai_data, # Elmentjük az AI teljes outputját a mestertáblába is
updated_at=func.now()
)
)
await db.commit()
logger.info(f"✨ ARANY REKORD KÉSZ: {base_info['make'].upper()} {clean_model}")
self.ai_calls_today += 1
except Exception as e:
await db.rollback()
logger.warning(f"⚠️ Alkimista hiba ({base_info['make']} {base_info['m_name']}): {e}")
# Visszaküldés a sorba vagy felfüggesztés
new_status = 'suspended' if current_attempts + 1 >= self.max_attempts else 'unverified'
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
attempts=current_attempts + 1,
last_error=str(e)[:200],
status=new_status,
updated_at=func.now()
)
)
await db.commit()
if new_status == 'unverified':
logger.info("♻️ Akta visszaküldve a Robot-2-nek (Kutató).")
async def run(self):
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Atomi Zárolás Patch)")
while True:
if not self.check_budget():
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
await asyncio.sleep(3600); continue
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS (A "Szent Grál" a race condition ellen)
# A Robot-1 (ACTIVE) és a Robot-2 (awaiting_ai_synthesis) aktáit is felveszi!
query = text("""
UPDATE data.vehicle_model_definitions
SET status = 'ai_synthesis_in_progress'
WHERE id = (
SELECT id FROM data.vehicle_model_definitions
WHERE status IN ('awaiting_ai_synthesis', 'ACTIVE')
AND attempts < :max_attempts
ORDER BY
CASE WHEN status = 'awaiting_ai_synthesis' THEN 1 ELSE 2 END,
priority_score DESC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity,
fuel_type, engine_code, euro_classification, cylinders, raw_search_context, attempts;
""")
result = await db.execute(query, {"max_attempts": self.max_attempts})
task = result.fetchone()
await db.commit()
if task:
# Szétbontjuk a lekérdezett rekordot a base_info dict-be
r_id = task[0]
base_info = {
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
"rdw_fuel": task[6] or "petrol", "rdw_engine": task[7] or "",
"rdw_euro": task[8], "rdw_cylinders": task[9],
"web_context": task[10] or ""
}
attempts = task[11]
# Külön adatbázis kapcsolat a feldolgozáshoz (hosszú AI hívás miatt)
async with AsyncSessionLocal() as process_db:
await self.process_single_record(process_db, r_id, base_info, attempts)
# GPU hűtés / Ollama rate limit
await asyncio.sleep(random.uniform(1.5, 3.5))
else:
logger.info("😴 Nincs feldolgozandó akta, az Alkimista pihen...")
await asyncio.sleep(15)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
import os # Import az AI limit környezeti változóhoz
asyncio.run(TechEnricher().run())

View File

@@ -0,0 +1,262 @@
# /app/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py
import asyncio
import logging
import datetime
import random
import sys
import warnings
from sqlalchemy import select, and_, update, func, case
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.models.asset import AssetCatalog
from app.services.ai_service import AIService
# DuckDuckGo hiba-elnyomás
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
from duckduckgo_search import DDGS
# MB 2.0 Szigorú naplózás
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
class TechEnricher:
"""
Vehicle Robot 3: Industrial Alchemist (Pro Edition).
Felelős az MDM 'Arany' rekordjainak előállításáért hibrid (RDW + AI + Web) logikával.
"""
def __init__(self):
self.max_attempts = 5
self.batch_size = 10
self.daily_ai_limit = 500
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
self.search_timeout = 15.0
def check_budget(self) -> bool:
""" Napi AI keret ellenőrzése. """
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def is_data_sane(self, data: dict, rdw_kw: int, rdw_ccm: int) -> bool:
"""
Hallucináció elleni védelem: technikai józansági vizsgálat.
ÚJ: Ha az RDW-től van biztos adatunk, akkor megengedőbbek vagyunk az AI-val,
mert a fő adatokat az RDW-ből vesszük.
"""
# Ha van hivatalos adat, akkor "Sane", a többit megoldjuk a hibrid logikával
if rdw_kw > 0 or rdw_ccm > 0:
return True
try:
if not data: return False
ccm = int(data.get("ccm", 0) or 0)
kw = int(data.get("kw", 0) or 0)
# Ne engedjünk be teljesen üres adatot, ha nincs RDW támasz sem
if ccm == 0 and kw == 0 and data.get("vehicle_type") != "trailer":
return False
if ccm > 16000 or (kw > 1500 and data.get("vehicle_type") != "truck"):
return False
return True
except Exception as e:
logger.debug(f"Data Sane Error: {e}")
return False
async def get_web_wisdom(self, make: str, model: str) -> str:
""" Adatgyűjtés a DuckDuckGo-ról szálbiztos és timeouttal védett módon. """
query = f"{make} {model} technical specifications engine code fuel"
try:
def sync_search():
with DDGS() as ddgs:
results = ddgs.text(query, max_results=3)
return "\n".join([r['body'] for r in results]) if results else ""
return await asyncio.wait_for(asyncio.to_thread(sync_search), timeout=self.search_timeout)
except asyncio.TimeoutError:
logger.warning(f"⏱️ Web keresési időtúllépés ({make} {model})")
return ""
except Exception as e:
logger.warning(f"🌐 Keresési hiba ({make}): {e}")
return ""
async def process_single_record(self, record_id: int):
""" Rekord dúsítás: Read -> AI Process -> Hybrid Gold Data Merge. """
make, m_name, v_type = "", "", "car"
web_context = ""
# ÚJ: RDW adatok tárolója
rdw_kw, rdw_ccm, rdw_fuel, rdw_engine = 0, 0, "petrol", ""
# 1. LÉPÉS: Olvasás és státuszváltás
try:
async with AsyncSessionLocal() as db:
res = await db.execute(
select(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.with_for_update(skip_locked=True)
)
rec = res.scalar_one_or_none()
if not rec:
return
make = rec.make
m_name = rec.marketing_name
v_type = rec.vehicle_class or "car"
web_context = rec.raw_search_context or ""
# ÚJ: Kimentjük a Hunter által szerzett hivatalos RDW adatokat!
rdw_kw = rec.power_kw or 0
rdw_ccm = rec.engine_capacity or 0
rdw_fuel = rec.fuel_type or "petrol"
rdw_engine = rec.engine_code or ""
rec.status = "ai_synthesis_in_progress"
await db.commit()
except Exception as e:
logger.error(f"🚨 Adatbázis hiba olvasáskor (ID: {record_id}): {e}")
return
# 2. LÉPÉS: AI és Web munka
try:
logger.info(f"🧠 AI elemzés indul: {make} {m_name}")
# Átadjuk az AI-nak az RDW adatokat is kontextusként, hogy "okosodjon" belőle
sources_dict = {
"web_context": web_context,
"vehicle_class": v_type,
"rdw_kw": rdw_kw,
"rdw_ccm": rdw_ccm
}
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
# Ha az AI gyenge adatot hoz vissza, és az RDW adatunk is hiányos, akkor webezünk
if (not ai_data or not ai_data.get("kw")) and rdw_kw == 0:
logger.info(f"🔍 Adathiány, extra webes mélyfúrás: {make} {m_name}")
extra_web_info = await self.get_web_wisdom(make, m_name)
sources_dict["web_context"] = extra_web_info
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
# ÚJ: Hibrid józansági vizsgálat
if not ai_data: ai_data = {}
if not self.is_data_sane(ai_data, rdw_kw, rdw_ccm):
raise ValueError("Az AI válasza hallucinált ÉS hivatalos RDW adatunk sincs.")
self.ai_calls_today += 1
# ÚJ: HIBRID ADAT-ÖSSZEVONÁS (The Magic!)
# RDW (hivatalos) > AI (generált)
final_kw = rdw_kw if rdw_kw > 0 else (ai_data.get("kw") or 0)
final_ccm = rdw_ccm if rdw_ccm > 0 else (ai_data.get("ccm") or 0)
# Üzemanyag tisztítás (az RDW néha hollandul írja, ezt az AI tisztázhatja, de ha nincs AI, marad az RDW)
final_fuel = rdw_fuel if (rdw_fuel and rdw_fuel != "Unknown") else ai_data.get("fuel_type", "petrol")
final_engine = rdw_engine if rdw_engine else ai_data.get("engine_code", "Nincs adat")
# Befrissítjük a JSON payloadot is a biztos adatokkal
ai_data["kw"] = final_kw
ai_data["ccm"] = final_ccm
ai_data["engine_code"] = final_engine
# 3. LÉPÉS: Arany rekord mentése
async with AsyncSessionLocal() as db:
clean_model = str(ai_data.get("marketing_name", m_name))[:50].upper()
cat_stmt = select(AssetCatalog).where(and_(
AssetCatalog.make == make.upper(),
AssetCatalog.model == clean_model,
AssetCatalog.power_kw == final_kw # A pontos KW alapján egyedi
)).limit(1)
existing_cat = (await db.execute(cat_stmt)).scalar_one_or_none()
if not existing_cat:
db.add(AssetCatalog(
make=make.upper(),
model=clean_model,
power_kw=final_kw,
engine_capacity=final_ccm,
fuel_type=final_fuel,
vehicle_class=v_type,
factory_data=ai_data # Dúsított JSON
))
logger.info(f"✨ ÚJ ARANY REKORD (HIBRID): {make.upper()} {clean_model} ({final_ccm}ccm, {final_kw}kW)")
# Staging frissítése a biztos adatokkal
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="gold_enriched",
technical_code=ai_data.get("technical_code") or f"REF-{record_id}",
engine_capacity=final_ccm,
power_kw=final_kw,
updated_at=func.now()
)
)
await db.commit()
except Exception as e:
# 4. LÉPÉS: Hibakezelés
logger.error(f"🚨 Hiba a(z) {record_id} rekordnál ({make} {m_name}): {e}")
try:
async with AsyncSessionLocal() as db:
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
attempts=VehicleModelDefinition.attempts + 1,
last_error=str(e)[:200],
status=case(
(VehicleModelDefinition.attempts >= self.max_attempts - 1, "suspended"),
else_="unverified"
),
updated_at=func.now()
)
)
await db.commit()
except Exception as db_err:
logger.critical(f"💀 Végzetes adatbázis hiba a fallback mentésnél: {db_err}")
async def run(self):
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Napi limit: {self.daily_ai_limit})")
while True:
try:
if not self.check_budget():
logger.warning("💰 AI Keret kimerült. Alvás 1 órát.")
await asyncio.sleep(3600)
continue
async with AsyncSessionLocal() as db:
stmt = select(VehicleModelDefinition.id).where(and_(
VehicleModelDefinition.status.in_(["unverified", "awaiting_ai_synthesis"]),
VehicleModelDefinition.attempts < self.max_attempts
)).limit(self.batch_size)
res = await db.execute(stmt)
ids = [r[0] for r in res.fetchall()]
if not ids:
await asyncio.sleep(60)
continue
for rid in ids:
await self.process_single_record(rid)
await asyncio.sleep(random.uniform(5.0, 15.0)) # GPU kímélés
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
try:
asyncio.run(TechEnricher().run())
except KeyboardInterrupt:
logger.info("🛑 Alchemist Pro leállítva.")

View File

@@ -0,0 +1,118 @@
import asyncio
import logging
import sys
from sqlalchemy import select, and_, text, update
from sqlalchemy.orm import joinedload
from app.database import AsyncSessionLocal
from app.models.asset import Asset, AssetCatalog
from app.services.ai_service import AIService
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Vehicle-VIN-Auditor: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Vehicle-Robot-4-VINAuditor")
class VINAuditor:
"""
Vehicle Robot 4: VIN Auditor (Atomi Zárolás Patch)
Egyedi járművek (Assets) alvázszám alapú hitelesítése.
"""
@classmethod
async def audit_asset(cls, db, asset_id: int):
""" Egy konkrét eszköz hitelesítése alvázszám alapján. """
# 1. Adatok begyűjtése
stmt = select(Asset).options(joinedload(Asset.catalog)).where(Asset.id == asset_id)
asset = (await db.execute(stmt)).scalar_one_or_none()
if not asset or not asset.vin or not asset.catalog:
return
make = asset.catalog.make
vin = asset.vin
current_kw = asset.catalog.power_kw
# 2. AI FÁZIS
try:
logger.info(f"🛡️ VIN Ellenőrzés indítása: {vin}")
truth = await AIService.get_clean_vehicle_data(make, vin, {"source": "vin_audit", "vin": vin})
if truth and truth.get("kw"):
real_kw = int(truth["kw"])
# Ha jelentős (>=5 kW) eltérés van
if abs(real_kw - (current_kw or 0)) >= 5:
logger.warning(f"⚠️ Eltérés észlelve! VIN: {vin} | Valóság: {real_kw}kW != Katalógus: {current_kw}kW")
new_v = AssetCatalog(
make=make.upper(),
model=truth.get("marketing_name", asset.catalog.model),
power_kw=real_kw,
source=f"VIN-AUDIT-{vin}"
)
db.add(new_v)
await db.flush()
await db.execute(
update(Asset)
.where(Asset.id == asset_id)
.values(catalog_id=new_v.id, is_verified=True, status="active")
)
else:
await db.execute(
update(Asset)
.where(Asset.id == asset_id)
.values(is_verified=True, status="active")
)
await db.commit()
logger.info(f"✅ VIN Audit sikeresen lezárva: {vin}")
else:
logger.warning(f"⚠️ AI nem tudta azonosítani a VIN-t: {vin}")
# Visszaállítjuk, de megjelöljük, hogy már próbáltuk
await db.execute(update(Asset).where(Asset.id == asset_id).values(status="audit_failed"))
await db.commit()
except Exception as e:
await db.rollback()
logger.error(f"🚨 Kritikus hiba az audit során: {e}")
async def run(self):
logger.info("🛡️ Vehicle VIN Auditor ONLINE (Atomi Zárolás)")
while True:
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS (Asset táblán)
query = text("""
UPDATE data.assets
SET status = 'audit_in_progress'
WHERE id = (
SELECT id FROM data.assets
WHERE is_verified = false
AND vin IS NOT NULL
AND status NOT IN ('audit_in_progress', 'audit_failed')
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id;
""")
result = await db.execute(query)
task = result.fetchone()
await db.commit()
if task:
async with AsyncSessionLocal() as process_db:
await self.audit_asset(process_db, task[0])
await asyncio.sleep(2)
else:
await asyncio.sleep(60)
except Exception as e:
logger.error(f"🚨 Hiba a főciklusban: {e}")
await asyncio.sleep(30)
if __name__ == "__main__":
auditor = VINAuditor()
asyncio.run(auditor.run())