Files
service-finder/archive/2026.03.09/vehicle_robot_3_alchemist_pro_1.0.0.py
2026-03-10 07:34:01 +00:00

262 lines
11 KiB
Python
Executable File

# /app/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py
import asyncio
import logging
import datetime
import random
import sys
import warnings
from sqlalchemy import select, and_, update, func, case
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.models.asset import AssetCatalog
from app.services.ai_service import AIService
# DuckDuckGo hiba-elnyomás
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
from duckduckgo_search import DDGS
# MB 2.0 Szigorú naplózás
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
class TechEnricher:
"""
Vehicle Robot 3: Industrial Alchemist (Pro Edition).
Felelős az MDM 'Arany' rekordjainak előállításáért hibrid (RDW + AI + Web) logikával.
"""
def __init__(self):
self.max_attempts = 5
self.batch_size = 10
self.daily_ai_limit = 500
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
self.search_timeout = 15.0
def check_budget(self) -> bool:
""" Napi AI keret ellenőrzése. """
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def is_data_sane(self, data: dict, rdw_kw: int, rdw_ccm: int) -> bool:
"""
Hallucináció elleni védelem: technikai józansági vizsgálat.
ÚJ: Ha az RDW-től van biztos adatunk, akkor megengedőbbek vagyunk az AI-val,
mert a fő adatokat az RDW-ből vesszük.
"""
# Ha van hivatalos adat, akkor "Sane", a többit megoldjuk a hibrid logikával
if rdw_kw > 0 or rdw_ccm > 0:
return True
try:
if not data: return False
ccm = int(data.get("ccm", 0) or 0)
kw = int(data.get("kw", 0) or 0)
# Ne engedjünk be teljesen üres adatot, ha nincs RDW támasz sem
if ccm == 0 and kw == 0 and data.get("vehicle_type") != "trailer":
return False
if ccm > 16000 or (kw > 1500 and data.get("vehicle_type") != "truck"):
return False
return True
except Exception as e:
logger.debug(f"Data Sane Error: {e}")
return False
async def get_web_wisdom(self, make: str, model: str) -> str:
""" Adatgyűjtés a DuckDuckGo-ról szálbiztos és timeouttal védett módon. """
query = f"{make} {model} technical specifications engine code fuel"
try:
def sync_search():
with DDGS() as ddgs:
results = ddgs.text(query, max_results=3)
return "\n".join([r['body'] for r in results]) if results else ""
return await asyncio.wait_for(asyncio.to_thread(sync_search), timeout=self.search_timeout)
except asyncio.TimeoutError:
logger.warning(f"⏱️ Web keresési időtúllépés ({make} {model})")
return ""
except Exception as e:
logger.warning(f"🌐 Keresési hiba ({make}): {e}")
return ""
async def process_single_record(self, record_id: int):
""" Rekord dúsítás: Read -> AI Process -> Hybrid Gold Data Merge. """
make, m_name, v_type = "", "", "car"
web_context = ""
# ÚJ: RDW adatok tárolója
rdw_kw, rdw_ccm, rdw_fuel, rdw_engine = 0, 0, "petrol", ""
# 1. LÉPÉS: Olvasás és státuszváltás
try:
async with AsyncSessionLocal() as db:
res = await db.execute(
select(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.with_for_update(skip_locked=True)
)
rec = res.scalar_one_or_none()
if not rec:
return
make = rec.make
m_name = rec.marketing_name
v_type = rec.vehicle_class or "car"
web_context = rec.raw_search_context or ""
# ÚJ: Kimentjük a Hunter által szerzett hivatalos RDW adatokat!
rdw_kw = rec.power_kw or 0
rdw_ccm = rec.engine_capacity or 0
rdw_fuel = rec.fuel_type or "petrol"
rdw_engine = rec.engine_code or ""
rec.status = "ai_synthesis_in_progress"
await db.commit()
except Exception as e:
logger.error(f"🚨 Adatbázis hiba olvasáskor (ID: {record_id}): {e}")
return
# 2. LÉPÉS: AI és Web munka
try:
logger.info(f"🧠 AI elemzés indul: {make} {m_name}")
# Átadjuk az AI-nak az RDW adatokat is kontextusként, hogy "okosodjon" belőle
sources_dict = {
"web_context": web_context,
"vehicle_class": v_type,
"rdw_kw": rdw_kw,
"rdw_ccm": rdw_ccm
}
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
# Ha az AI gyenge adatot hoz vissza, és az RDW adatunk is hiányos, akkor webezünk
if (not ai_data or not ai_data.get("kw")) and rdw_kw == 0:
logger.info(f"🔍 Adathiány, extra webes mélyfúrás: {make} {m_name}")
extra_web_info = await self.get_web_wisdom(make, m_name)
sources_dict["web_context"] = extra_web_info
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
# ÚJ: Hibrid józansági vizsgálat
if not ai_data: ai_data = {}
if not self.is_data_sane(ai_data, rdw_kw, rdw_ccm):
raise ValueError("Az AI válasza hallucinált ÉS hivatalos RDW adatunk sincs.")
self.ai_calls_today += 1
# ÚJ: HIBRID ADAT-ÖSSZEVONÁS (The Magic!)
# RDW (hivatalos) > AI (generált)
final_kw = rdw_kw if rdw_kw > 0 else (ai_data.get("kw") or 0)
final_ccm = rdw_ccm if rdw_ccm > 0 else (ai_data.get("ccm") or 0)
# Üzemanyag tisztítás (az RDW néha hollandul írja, ezt az AI tisztázhatja, de ha nincs AI, marad az RDW)
final_fuel = rdw_fuel if (rdw_fuel and rdw_fuel != "Unknown") else ai_data.get("fuel_type", "petrol")
final_engine = rdw_engine if rdw_engine else ai_data.get("engine_code", "Nincs adat")
# Befrissítjük a JSON payloadot is a biztos adatokkal
ai_data["kw"] = final_kw
ai_data["ccm"] = final_ccm
ai_data["engine_code"] = final_engine
# 3. LÉPÉS: Arany rekord mentése
async with AsyncSessionLocal() as db:
clean_model = str(ai_data.get("marketing_name", m_name))[:50].upper()
cat_stmt = select(AssetCatalog).where(and_(
AssetCatalog.make == make.upper(),
AssetCatalog.model == clean_model,
AssetCatalog.power_kw == final_kw # A pontos KW alapján egyedi
)).limit(1)
existing_cat = (await db.execute(cat_stmt)).scalar_one_or_none()
if not existing_cat:
db.add(AssetCatalog(
make=make.upper(),
model=clean_model,
power_kw=final_kw,
engine_capacity=final_ccm,
fuel_type=final_fuel,
vehicle_class=v_type,
factory_data=ai_data # Dúsított JSON
))
logger.info(f"✨ ÚJ ARANY REKORD (HIBRID): {make.upper()} {clean_model} ({final_ccm}ccm, {final_kw}kW)")
# Staging frissítése a biztos adatokkal
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="gold_enriched",
technical_code=ai_data.get("technical_code") or f"REF-{record_id}",
engine_capacity=final_ccm,
power_kw=final_kw,
updated_at=func.now()
)
)
await db.commit()
except Exception as e:
# 4. LÉPÉS: Hibakezelés
logger.error(f"🚨 Hiba a(z) {record_id} rekordnál ({make} {m_name}): {e}")
try:
async with AsyncSessionLocal() as db:
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
attempts=VehicleModelDefinition.attempts + 1,
last_error=str(e)[:200],
status=case(
(VehicleModelDefinition.attempts >= self.max_attempts - 1, "suspended"),
else_="unverified"
),
updated_at=func.now()
)
)
await db.commit()
except Exception as db_err:
logger.critical(f"💀 Végzetes adatbázis hiba a fallback mentésnél: {db_err}")
async def run(self):
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Napi limit: {self.daily_ai_limit})")
while True:
try:
if not self.check_budget():
logger.warning("💰 AI Keret kimerült. Alvás 1 órát.")
await asyncio.sleep(3600)
continue
async with AsyncSessionLocal() as db:
stmt = select(VehicleModelDefinition.id).where(and_(
VehicleModelDefinition.status.in_(["unverified", "awaiting_ai_synthesis"]),
VehicleModelDefinition.attempts < self.max_attempts
)).limit(self.batch_size)
res = await db.execute(stmt)
ids = [r[0] for r in res.fetchall()]
if not ids:
await asyncio.sleep(60)
continue
for rid in ids:
await self.process_single_record(rid)
await asyncio.sleep(random.uniform(5.0, 15.0)) # GPU kímélés
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
try:
asyncio.run(TechEnricher().run())
except KeyboardInterrupt:
logger.info("🛑 Alchemist Pro leállítva.")