feat: Robot ecosystem v1.2.6 - Google Search RAG & Master-Merge logic stabilized

This commit is contained in:
2026-02-17 22:44:57 +00:00
parent 2def6b2201
commit b11b9bce87
25 changed files with 3192 additions and 789 deletions

View File

@@ -3,133 +3,113 @@ import httpx
import logging
import os
import datetime
import json
from sqlalchemy import text, select, update
from sqlalchemy import select, and_
from sqlalchemy.exc import IntegrityError
from app.db.session import SessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.models.audit import ProcessLog
from app.services.ai_service import AIService
from app.services.email_manager import EmailManager # Feltételezve, hogy létezik
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-v1.1.0-Master-Enricher")
logger = logging.getLogger("Robot-Bulk-Master")
class TechEnricher:
"""
Master Enricher v1.1.0 - Hybrid RDW & AI Clean Edition
- Cél: vehicle_model_definitions (Master) tábla tisztítása és dúsítása.
- Megtartja a v1.0.4 RDW logikát, de kiegészíti AI-al a zajos adatokhoz (pl. Yamaha 4HN).
"""
API_URL = "https://opendata.rdw.nl/resource/kyri-nuah.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
@classmethod
def clean_num(cls, v):
try: return int(float(v)) if v else None
except: return None
@classmethod
async def fetch_rdw_tech_data(cls, make, model):
"""A v1.0.4-es RDW kereső logika."""
clean_model = str(model).upper().replace(str(make).upper(), "").strip()
if len(clean_model) < 2: return None
params = {"merk": make.upper(), "handelsbenaming": clean_model, "$limit": 1}
params = {"merk": make.upper(), "handelsbenaming": str(model).strip().upper(), "$limit": 1}
async with httpx.AsyncClient(headers=cls.HEADERS) as client:
try:
await asyncio.sleep(1.1) # RDW Rate limit védelem
resp = await client.get(cls.API_URL, params=params, timeout=20)
if resp.status_code == 200:
data = resp.json()
return data[0] if data else None
return None
except Exception as e:
logger.error(f"❌ RDW API Hiba: {e}")
return None
resp = await client.get(cls.API_URL, params=params, timeout=15)
return resp.json()[0] if resp.status_code == 200 and resp.json() else None
except: return None
@classmethod
async def run(cls):
logger.info("🚀 Master Enricher v1.1.0 INDUL...")
start_time = datetime.datetime.now()
stats = {"processed": 0, "failed": 0, "cleaned": []}
async with SessionLocal() as db:
# Csak azokat a Master rekordokat nézzük, amik még nincsenek hitelesítve
stmt = select(VehicleModelDefinition).where(
VehicleModelDefinition.status == "unverified"
).limit(30) # Kisebb batch a biztonság érdekében
res = await db.execute(stmt)
masters = res.scalars().all()
if not masters:
logger.info("😴 Nincs dúsításra váró adat.")
return
for master in masters:
try:
logger.info(f"🧪 Feldolgozás: {master.make} {master.marketing_name}")
# 1. Lépés: RDW adatok lekérése (v1.0.4 logika)
rdw_data = await cls.fetch_rdw_tech_data(master.make, master.marketing_name)
# 2. Lépés: AI segítség kérése, ha az RDW nem elég vagy a név 'zajos' (pl. 4HN)
# Ha a névben gyanús kódok vannak, az AI tisztítja meg
if not rdw_data or "(" in master.marketing_name or len(master.marketing_name) < 5:
ai_data = await AIService.get_clean_vehicle_data(
master.make, master.marketing_name, master.vehicle_type
)
if ai_data:
old_name = master.marketing_name
master.marketing_name = ai_data.get("marketing_name", old_name)
master.technical_code = ai_data.get("technical_code", master.technical_code)
master.engine_capacity = ai_data.get("ccm", master.engine_capacity)
master.power_kw = ai_data.get("kw", master.power_kw)
master.specifications = ai_data.get("maintenance", {})
stats["cleaned"].append(f"{old_name} -> {master.marketing_name}")
# Ha volt RDW adatunk, de az AI nem írta felül, töltsük be az RDW-t
if rdw_data and master.status == "unverified":
master.power_kw = cls.clean_num(rdw_data.get("netto_maximum_vermogen_kw"))
master.engine_capacity = cls.clean_num(rdw_data.get("cilinderinhoud"))
master.axle_count = cls.clean_num(rdw_data.get("aantal_assen"))
master.status = "ai_enriched"
stats["processed"] += 1
await db.commit()
except Exception as e:
logger.error(f"❌ Hiba a(z) {master.id} rekordnál: {e}")
stats["failed"] += 1
await db.rollback()
# 3. JELENTÉS MENTÉSE ÉS EMAIL KÜLDÉS
end_time = datetime.datetime.now()
new_log = ProcessLog(
process_name="Master-Enricher",
start_time=start_time,
end_time=end_time,
items_processed=stats["processed"],
items_failed=stats["failed"],
details=stats
)
db.add(new_log)
await db.commit()
# Email küldés (Dummy hívás a meglévő EmailManager-hez)
await cls.send_report_email(stats)
@classmethod
async def send_report_email(cls, stats):
report_body = f"Reggeli Robot Jelentés - {datetime.date.today()}\n\n"
report_body += f"Sikeresen feldolgozva: {stats['processed']}\n"
report_body += f"Hibák: {stats['failed']}\n\n"
report_body += "Tisztított nevek:\n" + "\n".join(stats['cleaned'])
logger.info("🚀 Master-Merge Robot FOLYAMATOS ÜZEMMÓD INDUL...")
logger.info("📧 Email jelentés elküldve az adminnak.")
# EmailManager.send_admin_notification("Robot Report", report_body)
while True: # Folyamatos ciklus, amíg el nem fogy az adat
async with SessionLocal() as main_db:
stmt = select(VehicleModelDefinition.id).where(
VehicleModelDefinition.status == "unverified"
).limit(50) # Egyszerre 50 ID-t foglalunk le
res = await main_db.execute(stmt)
ids = res.scalars().all()
if not ids:
logger.info("🏁 Minden rekord feldolgozva. A robot megáll.")
break
logger.info(f"📦 Új csomag indítása: {len(ids)} rekord.")
for m_id in ids:
async with SessionLocal() as db:
try:
current = await db.get(VehicleModelDefinition, m_id)
if not current: continue
logger.info(f"🧪 Feldolgozás: {current.make} {current.marketing_name} (ID: {m_id})")
rdw_data = await cls.fetch_rdw_tech_data(current.make, current.marketing_name)
if rdw_data:
current.engine_capacity = int(float(rdw_data.get("cilinderinhoud", 0))) or current.engine_capacity
current.power_kw = int(float(rdw_data.get("netto_maximum_vermogen_kw", 0))) or current.power_kw
ai_data = await AIService.get_clean_vehicle_data(current.make, current.marketing_name, current.vehicle_type)
if ai_data:
tech_code = ai_data.get("technical_code") or "N/A"
new_ccm = ai_data.get("ccm") or current.engine_capacity
master_record = None
if tech_code and tech_code != "N/A":
stmt_master = select(VehicleModelDefinition).where(and_(
VehicleModelDefinition.make == current.make,
VehicleModelDefinition.technical_code == tech_code,
VehicleModelDefinition.engine_capacity == new_ccm,
VehicleModelDefinition.status == 'ai_enriched',
VehicleModelDefinition.id != m_id
))
master_record = (await db.execute(stmt_master)).scalar_one_or_none()
if master_record:
logger.info(f"🔗 Merge: ID:{m_id} -> Master ID:{master_record.id}")
syns = set(master_record.synonyms or [])
syns.update(ai_data.get("synonyms", []))
syns.add(current.marketing_name)
master_record.synonyms = list(syns)
current.status = "duplicate"
current.parent_id = master_record.id
else:
current.technical_code = tech_code if tech_code != "N/A" else f"N/A-{m_id}"
current.marketing_name = ai_data.get("marketing_name", current.marketing_name)
current.engine_capacity = new_ccm
current.power_kw = ai_data.get("kw") or current.power_kw
current.year_from = ai_data.get("year_from")
current.year_to = ai_data.get("year_to")
current.synonyms = ai_data.get("synonyms", [])
if ai_data.get("maintenance"):
old_spec = current.specifications or {}
old_spec.update(ai_data.get("maintenance"))
current.specifications = old_spec
current.status = "ai_enriched"
else:
if not current.technical_code:
current.technical_code = f"UNKNOWN-{m_id}"
current.updated_at = datetime.datetime.now()
await db.commit()
logger.info(f"✅ Mentve (ID: {m_id})")
except Exception as e:
await db.rollback()
logger.error(f"❌ Hiba ID:{m_id}: {e}")
finally:
await db.close()
if __name__ == "__main__":
asyncio.run(TechEnricher.run())