262 lines
11 KiB
Python
Executable File
262 lines
11 KiB
Python
Executable File
# /app/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py
|
|
import asyncio
|
|
import logging
|
|
import datetime
|
|
import random
|
|
import sys
|
|
import warnings
|
|
from sqlalchemy import select, and_, update, func, case
|
|
from app.database import AsyncSessionLocal
|
|
from app.models.vehicle_definitions import VehicleModelDefinition
|
|
from app.models.asset import AssetCatalog
|
|
from app.services.ai_service import AIService
|
|
|
|
# DuckDuckGo hiba-elnyomás
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
|
|
from duckduckgo_search import DDGS
|
|
|
|
# MB 2.0 Szigorú naplózás
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s',
|
|
stream=sys.stdout
|
|
)
|
|
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
|
|
|
|
class TechEnricher:
|
|
"""
|
|
Vehicle Robot 3: Industrial Alchemist (Pro Edition).
|
|
Felelős az MDM 'Arany' rekordjainak előállításáért hibrid (RDW + AI + Web) logikával.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.max_attempts = 5
|
|
self.batch_size = 10
|
|
self.daily_ai_limit = 500
|
|
self.ai_calls_today = 0
|
|
self.last_reset_date = datetime.date.today()
|
|
self.search_timeout = 15.0
|
|
|
|
def check_budget(self) -> bool:
|
|
""" Napi AI keret ellenőrzése. """
|
|
if datetime.date.today() > self.last_reset_date:
|
|
self.ai_calls_today = 0
|
|
self.last_reset_date = datetime.date.today()
|
|
return self.ai_calls_today < self.daily_ai_limit
|
|
|
|
def is_data_sane(self, data: dict, rdw_kw: int, rdw_ccm: int) -> bool:
|
|
"""
|
|
Hallucináció elleni védelem: technikai józansági vizsgálat.
|
|
ÚJ: Ha az RDW-től van biztos adatunk, akkor megengedőbbek vagyunk az AI-val,
|
|
mert a fő adatokat az RDW-ből vesszük.
|
|
"""
|
|
# Ha van hivatalos adat, akkor "Sane", a többit megoldjuk a hibrid logikával
|
|
if rdw_kw > 0 or rdw_ccm > 0:
|
|
return True
|
|
|
|
try:
|
|
if not data: return False
|
|
ccm = int(data.get("ccm", 0) or 0)
|
|
kw = int(data.get("kw", 0) or 0)
|
|
|
|
# Ne engedjünk be teljesen üres adatot, ha nincs RDW támasz sem
|
|
if ccm == 0 and kw == 0 and data.get("vehicle_type") != "trailer":
|
|
return False
|
|
|
|
if ccm > 16000 or (kw > 1500 and data.get("vehicle_type") != "truck"):
|
|
return False
|
|
return True
|
|
except Exception as e:
|
|
logger.debug(f"Data Sane Error: {e}")
|
|
return False
|
|
|
|
async def get_web_wisdom(self, make: str, model: str) -> str:
|
|
""" Adatgyűjtés a DuckDuckGo-ról szálbiztos és timeouttal védett módon. """
|
|
query = f"{make} {model} technical specifications engine code fuel"
|
|
try:
|
|
def sync_search():
|
|
with DDGS() as ddgs:
|
|
results = ddgs.text(query, max_results=3)
|
|
return "\n".join([r['body'] for r in results]) if results else ""
|
|
|
|
return await asyncio.wait_for(asyncio.to_thread(sync_search), timeout=self.search_timeout)
|
|
except asyncio.TimeoutError:
|
|
logger.warning(f"⏱️ Web keresési időtúllépés ({make} {model})")
|
|
return ""
|
|
except Exception as e:
|
|
logger.warning(f"🌐 Keresési hiba ({make}): {e}")
|
|
return ""
|
|
|
|
async def process_single_record(self, record_id: int):
|
|
""" Rekord dúsítás: Read -> AI Process -> Hybrid Gold Data Merge. """
|
|
make, m_name, v_type = "", "", "car"
|
|
web_context = ""
|
|
# ÚJ: RDW adatok tárolója
|
|
rdw_kw, rdw_ccm, rdw_fuel, rdw_engine = 0, 0, "petrol", ""
|
|
|
|
# 1. LÉPÉS: Olvasás és státuszváltás
|
|
try:
|
|
async with AsyncSessionLocal() as db:
|
|
res = await db.execute(
|
|
select(VehicleModelDefinition)
|
|
.where(VehicleModelDefinition.id == record_id)
|
|
.with_for_update(skip_locked=True)
|
|
)
|
|
rec = res.scalar_one_or_none()
|
|
if not rec:
|
|
return
|
|
|
|
make = rec.make
|
|
m_name = rec.marketing_name
|
|
v_type = rec.vehicle_class or "car"
|
|
web_context = rec.raw_search_context or ""
|
|
|
|
# ÚJ: Kimentjük a Hunter által szerzett hivatalos RDW adatokat!
|
|
rdw_kw = rec.power_kw or 0
|
|
rdw_ccm = rec.engine_capacity or 0
|
|
rdw_fuel = rec.fuel_type or "petrol"
|
|
rdw_engine = rec.engine_code or ""
|
|
|
|
rec.status = "ai_synthesis_in_progress"
|
|
await db.commit()
|
|
except Exception as e:
|
|
logger.error(f"🚨 Adatbázis hiba olvasáskor (ID: {record_id}): {e}")
|
|
return
|
|
|
|
# 2. LÉPÉS: AI és Web munka
|
|
try:
|
|
logger.info(f"🧠 AI elemzés indul: {make} {m_name}")
|
|
|
|
# Átadjuk az AI-nak az RDW adatokat is kontextusként, hogy "okosodjon" belőle
|
|
sources_dict = {
|
|
"web_context": web_context,
|
|
"vehicle_class": v_type,
|
|
"rdw_kw": rdw_kw,
|
|
"rdw_ccm": rdw_ccm
|
|
}
|
|
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
|
|
|
|
# Ha az AI gyenge adatot hoz vissza, és az RDW adatunk is hiányos, akkor webezünk
|
|
if (not ai_data or not ai_data.get("kw")) and rdw_kw == 0:
|
|
logger.info(f"🔍 Adathiány, extra webes mélyfúrás: {make} {m_name}")
|
|
extra_web_info = await self.get_web_wisdom(make, m_name)
|
|
sources_dict["web_context"] = extra_web_info
|
|
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
|
|
|
|
# ÚJ: Hibrid józansági vizsgálat
|
|
if not ai_data: ai_data = {}
|
|
if not self.is_data_sane(ai_data, rdw_kw, rdw_ccm):
|
|
raise ValueError("Az AI válasza hallucinált ÉS hivatalos RDW adatunk sincs.")
|
|
|
|
self.ai_calls_today += 1
|
|
|
|
# ÚJ: HIBRID ADAT-ÖSSZEVONÁS (The Magic!)
|
|
# RDW (hivatalos) > AI (generált)
|
|
final_kw = rdw_kw if rdw_kw > 0 else (ai_data.get("kw") or 0)
|
|
final_ccm = rdw_ccm if rdw_ccm > 0 else (ai_data.get("ccm") or 0)
|
|
|
|
# Üzemanyag tisztítás (az RDW néha hollandul írja, ezt az AI tisztázhatja, de ha nincs AI, marad az RDW)
|
|
final_fuel = rdw_fuel if (rdw_fuel and rdw_fuel != "Unknown") else ai_data.get("fuel_type", "petrol")
|
|
final_engine = rdw_engine if rdw_engine else ai_data.get("engine_code", "Nincs adat")
|
|
|
|
# Befrissítjük a JSON payloadot is a biztos adatokkal
|
|
ai_data["kw"] = final_kw
|
|
ai_data["ccm"] = final_ccm
|
|
ai_data["engine_code"] = final_engine
|
|
|
|
# 3. LÉPÉS: Arany rekord mentése
|
|
async with AsyncSessionLocal() as db:
|
|
clean_model = str(ai_data.get("marketing_name", m_name))[:50].upper()
|
|
|
|
cat_stmt = select(AssetCatalog).where(and_(
|
|
AssetCatalog.make == make.upper(),
|
|
AssetCatalog.model == clean_model,
|
|
AssetCatalog.power_kw == final_kw # A pontos KW alapján egyedi
|
|
)).limit(1)
|
|
|
|
existing_cat = (await db.execute(cat_stmt)).scalar_one_or_none()
|
|
|
|
if not existing_cat:
|
|
db.add(AssetCatalog(
|
|
make=make.upper(),
|
|
model=clean_model,
|
|
power_kw=final_kw,
|
|
engine_capacity=final_ccm,
|
|
fuel_type=final_fuel,
|
|
vehicle_class=v_type,
|
|
factory_data=ai_data # Dúsított JSON
|
|
))
|
|
logger.info(f"✨ ÚJ ARANY REKORD (HIBRID): {make.upper()} {clean_model} ({final_ccm}ccm, {final_kw}kW)")
|
|
|
|
# Staging frissítése a biztos adatokkal
|
|
await db.execute(
|
|
update(VehicleModelDefinition)
|
|
.where(VehicleModelDefinition.id == record_id)
|
|
.values(
|
|
status="gold_enriched",
|
|
technical_code=ai_data.get("technical_code") or f"REF-{record_id}",
|
|
engine_capacity=final_ccm,
|
|
power_kw=final_kw,
|
|
updated_at=func.now()
|
|
)
|
|
)
|
|
await db.commit()
|
|
|
|
except Exception as e:
|
|
# 4. LÉPÉS: Hibakezelés
|
|
logger.error(f"🚨 Hiba a(z) {record_id} rekordnál ({make} {m_name}): {e}")
|
|
try:
|
|
async with AsyncSessionLocal() as db:
|
|
await db.execute(
|
|
update(VehicleModelDefinition)
|
|
.where(VehicleModelDefinition.id == record_id)
|
|
.values(
|
|
attempts=VehicleModelDefinition.attempts + 1,
|
|
last_error=str(e)[:200],
|
|
status=case(
|
|
(VehicleModelDefinition.attempts >= self.max_attempts - 1, "suspended"),
|
|
else_="unverified"
|
|
),
|
|
updated_at=func.now()
|
|
)
|
|
)
|
|
await db.commit()
|
|
except Exception as db_err:
|
|
logger.critical(f"💀 Végzetes adatbázis hiba a fallback mentésnél: {db_err}")
|
|
|
|
async def run(self):
|
|
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Napi limit: {self.daily_ai_limit})")
|
|
|
|
while True:
|
|
try:
|
|
if not self.check_budget():
|
|
logger.warning("💰 AI Keret kimerült. Alvás 1 órát.")
|
|
await asyncio.sleep(3600)
|
|
continue
|
|
|
|
async with AsyncSessionLocal() as db:
|
|
stmt = select(VehicleModelDefinition.id).where(and_(
|
|
VehicleModelDefinition.status.in_(["unverified", "awaiting_ai_synthesis"]),
|
|
VehicleModelDefinition.attempts < self.max_attempts
|
|
)).limit(self.batch_size)
|
|
|
|
res = await db.execute(stmt)
|
|
ids = [r[0] for r in res.fetchall()]
|
|
|
|
if not ids:
|
|
await asyncio.sleep(60)
|
|
continue
|
|
|
|
for rid in ids:
|
|
await self.process_single_record(rid)
|
|
await asyncio.sleep(random.uniform(5.0, 15.0)) # GPU kímélés
|
|
|
|
except Exception as e:
|
|
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
|
await asyncio.sleep(10)
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
asyncio.run(TechEnricher().run())
|
|
except KeyboardInterrupt:
|
|
logger.info("🛑 Alchemist Pro leállítva.") |