refakotorálás előtti állapot
This commit is contained in:
24
backend/app/workers/vehicle/mapping_dictionary.py
Normal file
24
backend/app/workers/vehicle/mapping_dictionary.py
Normal file
@@ -0,0 +1,24 @@
|
||||
# MB 2.0: Strukturált adat-szótár a források egységesítéséhez
|
||||
VEHICLE_MAPPING = {
|
||||
"os-vehicle-db": {
|
||||
"brand": "make",
|
||||
"model": "model",
|
||||
"year": "year",
|
||||
"specs": {
|
||||
"engine_hp": "specs.engine.hp",
|
||||
"fuel": "specs.engine.fuel",
|
||||
"body": "specs.body.type"
|
||||
}
|
||||
}
|
||||
# Ide jönnek majd a carquery és fipe mappolások
|
||||
}
|
||||
|
||||
def normalize_make(make: str) -> str:
|
||||
""" Egységes márkanevek (pl. Mercedes-Benz vs Mercedes) """
|
||||
m = make.upper().strip()
|
||||
synonyms = {
|
||||
"MERCEDES": "MERCEDES-BENZ",
|
||||
"VW": "VOLKSWAGEN",
|
||||
"ALFA": "ALFA ROMEO"
|
||||
}
|
||||
return synonyms.get(m, m)
|
||||
26
backend/app/workers/vehicle/mapping_rules.py
Normal file
26
backend/app/workers/vehicle/mapping_rules.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# /app/app/workers/vehicle/mapping_rules.py
|
||||
|
||||
SOURCE_MAPPINGS = {
|
||||
"os-vehicle-db": {
|
||||
"make": "brand",
|
||||
"model": "model_name",
|
||||
"year": "release_year",
|
||||
"power": "specs.engine.hp"
|
||||
},
|
||||
"car-query": {
|
||||
"make": "model_make_id",
|
||||
"model": "model_name",
|
||||
"year": "model_year",
|
||||
"power": "model_engine_power_ps"
|
||||
}
|
||||
}
|
||||
|
||||
def unify_data(raw_data, source_name):
|
||||
mapping = SOURCE_MAPPINGS.get(source_name, {})
|
||||
unified = {
|
||||
"normalized_make": raw_data.get(mapping.get("make"), "").upper(),
|
||||
"normalized_model": raw_data.get(mapping.get("model"), "").upper(),
|
||||
"normalized_year": raw_data.get(mapping.get("year")),
|
||||
"raw_specs": raw_data # Megtartjuk az eredetit is
|
||||
}
|
||||
return unified
|
||||
128
backend/app/workers/vehicle/robot_report.py
Normal file
128
backend/app/workers/vehicle/robot_report.py
Normal file
@@ -0,0 +1,128 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/robot_report.py
|
||||
import asyncio
|
||||
import psutil
|
||||
import pynvml
|
||||
from datetime import datetime
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.panel import Panel
|
||||
from rich.layout import Layout
|
||||
from app.core.config import settings
|
||||
|
||||
console = Console()
|
||||
|
||||
async def get_data():
|
||||
engine = create_async_engine(settings.DATABASE_URL)
|
||||
async with engine.connect() as conn:
|
||||
# Pipeline adatok (R1-R4)
|
||||
pipe = await conn.execute(text("""
|
||||
SELECT
|
||||
(SELECT count(*) FROM data.catalog_discovery WHERE status = 'pending') as r1,
|
||||
(SELECT count(*) FROM data.vehicle_model_definitions WHERE status = 'unverified') as r2,
|
||||
(SELECT count(*) FROM data.vehicle_model_definitions WHERE status = 'awaiting_ai_synthesis') as r3,
|
||||
(SELECT count(*) FROM data.vehicle_model_definitions WHERE status = 'gold_enriched') as r4
|
||||
"""))
|
||||
p_res = pipe.fetchone()
|
||||
|
||||
# AI Termelés
|
||||
ai_hr = await conn.execute(text("SELECT count(*) FROM data.vehicle_model_definitions WHERE status = 'gold_enriched' AND updated_at > NOW() - INTERVAL '1 hour'"))
|
||||
ai_day = await conn.execute(text("SELECT count(*) FROM data.vehicle_model_definitions WHERE status = 'gold_enriched' AND updated_at > NOW() - INTERVAL '24 hours'"))
|
||||
|
||||
# Market Matrix (1.3)
|
||||
market_res = await conn.execute(text("SELECT vehicle_class, market, count(*) FROM data.catalog_discovery GROUP BY 1, 2"))
|
||||
m_data = market_res.fetchall()
|
||||
|
||||
# Robot Top listák (2.1 - 2.3)
|
||||
r1_top = await conn.execute(text("SELECT make, count(*) FROM data.catalog_discovery WHERE market = 'RDW' GROUP BY 1 ORDER BY 2 DESC LIMIT 5"))
|
||||
r12_top = await conn.execute(text("SELECT make, count(*) FROM data.catalog_discovery WHERE market = 'USA_IMPORT' GROUP BY 1 ORDER BY 2 DESC LIMIT 5"))
|
||||
r14_top = await conn.execute(text("SELECT make, count(*) FROM data.catalog_discovery WHERE vehicle_class = 'motorcycle' GROUP BY 1 ORDER BY 2 DESC LIMIT 5"))
|
||||
|
||||
# Általános Top (3.1 - 3.3)
|
||||
pending_top = await conn.execute(text("SELECT make, count(*) FROM data.catalog_discovery WHERE status = 'pending' GROUP BY 1 ORDER BY 2 DESC LIMIT 5"))
|
||||
gold_top = await conn.execute(text("SELECT make, count(*) FROM data.vehicle_model_definitions WHERE status = 'gold_enriched' GROUP BY 1 ORDER BY 2 DESC LIMIT 5"))
|
||||
status_stats = await conn.execute(text("SELECT status, count(*) FROM data.vehicle_model_definitions GROUP BY 1 ORDER BY 2 DESC LIMIT 5"))
|
||||
|
||||
# Kategória Top (4.1 - 4.3)
|
||||
cat_tops = {}
|
||||
for c in ['car', 'motorcycle', 'truck']:
|
||||
res = await conn.execute(text(f"SELECT make, count(*) FROM data.catalog_discovery WHERE vehicle_class = '{c}' GROUP BY 1 ORDER BY 2 DESC LIMIT 4"))
|
||||
total = await conn.execute(text(f"SELECT count(*) FROM data.catalog_discovery WHERE vehicle_class = '{c}'"))
|
||||
cat_tops[c] = {"list": res.fetchall(), "total": total.scalar() or 0}
|
||||
|
||||
return {
|
||||
"p": p_res, "ai": (ai_hr.scalar(), ai_day.scalar()), "markets": m_data,
|
||||
"r1": r1_top.fetchall(), "r12": r12_top.fetchall(), "r14": r14_top.fetchall(),
|
||||
"pending": pending_top.fetchall(), "gold": gold_top.fetchall(), "status": status_stats.fetchall(),
|
||||
"cat": cat_tops
|
||||
}
|
||||
|
||||
def get_hw():
|
||||
try:
|
||||
pynvml.nvmlInit()
|
||||
h = pynvml.nvmlDeviceGetHandleByIndex(0)
|
||||
info = pynvml.nvmlDeviceGetMemoryInfo(h)
|
||||
return {"cpu": psutil.cpu_percent(), "ram": psutil.virtual_memory().percent, "gpu": pynvml.nvmlDeviceGetUtilizationRates(h).gpu,
|
||||
"vram": f"{int(info.used/1024**2)}/{int(info.total/1024**2)}M", "temp": pynvml.nvmlDeviceGetTemperature(h, 0)}
|
||||
except: return None
|
||||
|
||||
def mini_table(data, color="white"):
|
||||
t = Table(show_header=False, box=None, expand=True, padding=(0,1))
|
||||
t.add_column("L", ratio=3); t.add_column("R", justify="right", ratio=2)
|
||||
for r in data: t.add_row(str(r[0])[:12], f"[{color}]{r[1]:,}[/]")
|
||||
return t
|
||||
|
||||
async def main():
|
||||
try:
|
||||
d = await get_data(); hw = get_hw()
|
||||
lay = Layout()
|
||||
lay.split_column(Layout(name="head", size=3), Layout(name="body"))
|
||||
lay["body"].split_column(Layout(name="row1"), Layout(name="row2"), Layout(name="row3"), Layout(name="row4"))
|
||||
for r in ["row1", "row2", "row3", "row4"]:
|
||||
lay[r].split_row(Layout(name=f"{r}1"), Layout(name=f"{r}2"), Layout(name=f"{r}3"))
|
||||
|
||||
# --- FEJLÉC ---
|
||||
h_str = f"💻 CPU: {hw['cpu']}% | 🧠 RAM: {hw['ram']}% | 📟 VRAM: {hw['vram']} | 🔥 GPU: {hw['gpu']}% ({hw['temp']}°C)" if hw else "Hardware adatok nem elérhetőek"
|
||||
lay["head"].update(Panel(h_str, title="[bold orange3]SZÁMÍTÓGÉP ADATOK[/]", style="orange3"))
|
||||
|
||||
# --- 1. SOR ---
|
||||
r11_t = Table(show_header=False, box=None, expand=True)
|
||||
r11_t.add_row("R1 (Hunter)", f"{d['p'][0]:,}"); r11_t.add_row("R2 (Res)", f"{d['p'][1]:,}")
|
||||
r11_t.add_row("R3 (Wait)", f"{d['p'][2]:,}"); r11_t.add_row("R4 (Gold)", f"[green]{d['p'][3]:,}[/]")
|
||||
r11_t.add_row("AI HR/DAY", f"[cyan]{d['ai'][0]}[/] / [yellow]{d['ai'][1]}[/]")
|
||||
lay["row11"].update(Panel(r11_t, title="1.1 PIPE & AI"))
|
||||
|
||||
lay["row12"].update(Panel(mini_table(d['gold'], "green"), title="1.2 TOP MÁRKÁK"))
|
||||
|
||||
r13_t = Table(show_header=True, box=None, expand=True, header_style="bold blue")
|
||||
r13_t.add_column("Típus", ratio=2); r13_t.add_column("EU", justify="right"); r13_t.add_column("USA", justify="right")
|
||||
m_map = {c: {"EU": 0, "USA": 0} for c in ['car', 'motorcycle', 'truck', 'bus']}
|
||||
for c, m, q in d['markets']:
|
||||
key = 'USA' if m == 'USA_IMPORT' else 'EU'
|
||||
if c in m_map: m_map[c][key] += q
|
||||
for c, v in m_map.items(): r13_t.add_row(c[:4].upper(), f"{v['EU']:,}", f"{v['USA']:,}")
|
||||
lay["row13"].update(Panel(r13_t, title="1.3 MARKET MATRIX"))
|
||||
|
||||
# --- 2. SOR (ROBOTOK) ---
|
||||
lay["row21"].update(Panel(mini_table(d['r1']), title="2.1 ROBOT 1 (RDW)"))
|
||||
lay["row22"].update(Panel(mini_table(d['r12']), title="2.2 ROBOT 1.2 (USA)"))
|
||||
lay["row23"].update(Panel(mini_table(d['r14']), title="2.3 ROBOT 1.4 (BIKE)"))
|
||||
|
||||
# --- 3. SOR (ÖSSZESÍTŐ) ---
|
||||
lay["row31"].update(Panel(mini_table(d['pending']), title="3.1 PENDING TOP"))
|
||||
lay["row32"].update(Panel(mini_table(d['gold'], "green"), title="3.2 GOLD TOP", border_style="green"))
|
||||
lay["row33"].update(Panel(mini_table(d['status'], "blue"), title="3.3 STATUS", border_style="blue"))
|
||||
|
||||
# --- 4. SOR (KATEGÓRIÁK) ---
|
||||
for i, (k, l) in enumerate([('car', '4.1 AUTO'), ('motorcycle', '4.2 MOTOR'), ('truck', '4.3 TEHER')], 1):
|
||||
t = mini_table(d['cat'][k]['list'], "yellow")
|
||||
t.add_row("[bold]TOTAL[/]", f"[bold yellow]{d['cat'][k]['total']:,}[/]")
|
||||
lay[f"row4{i}"].update(Panel(t, title=l, border_style="yellow"))
|
||||
|
||||
console.print(lay)
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]HIBA TÖRTÉNT:[/] {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
121
backend/app/workers/vehicle/vehicle_data_loader.py
Normal file
121
backend/app/workers/vehicle/vehicle_data_loader.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import json
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
# MB 2.0 Naplózási beállítások
|
||||
logger = logging.getLogger("Data-Loader-Master")
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
||||
|
||||
class VehicleDataLoader:
|
||||
# Ellenőrzött, működő források
|
||||
SOURCES = {
|
||||
"car-data-kohut": "https://raw.githubusercontent.com/DanielKohut/car-data/master/car_data.json",
|
||||
"car-list-matth": "https://raw.githubusercontent.com/matthlavacka/car-list/master/car-list.json"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def normalize_name(name: str) -> str:
|
||||
""" Alapvető szövegtisztítás: nagybetű, szóközmentesítés. """
|
||||
if not name: return ""
|
||||
n = str(name).upper().strip()
|
||||
# Gyakori szinonimák egységesítése
|
||||
synonyms = {"VW": "VOLKSWAGEN", "MERCEDES": "MERCEDES-BENZ", "ALFA": "ALFA ROMEO"}
|
||||
return synonyms.get(n, n)
|
||||
|
||||
async def fetch_json(self, url: str):
|
||||
""" Biztonságos letöltés időtúllépés kezeléssel. """
|
||||
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
|
||||
try:
|
||||
resp = await client.get(url)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
logger.error(f"❌ Letöltési hiba ({resp.status_code}): {url}")
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Kommunikációs hiba: {url} -> {e}")
|
||||
return None
|
||||
|
||||
def map_source_data(self, source_name, raw_data):
|
||||
"""
|
||||
Mapping Layer: Átfordítja a különböző források JSON szerkezetét
|
||||
a mi egységes data.reference_lookup sémánkra.
|
||||
"""
|
||||
unified_entries = []
|
||||
|
||||
try:
|
||||
if source_name == "car-data-kohut":
|
||||
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
|
||||
for brand_item in raw_data:
|
||||
make = self.normalize_name(brand_item.get("brand"))
|
||||
for model in brand_item.get("models", []):
|
||||
unified_entries.append({
|
||||
"make": make, "model": self.normalize_name(model),
|
||||
"year": None, "specs": json.dumps({"source": "kohut"}),
|
||||
"source": source_name, "source_id": None
|
||||
})
|
||||
|
||||
elif source_name == "car-list-matth":
|
||||
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
|
||||
for brand_item in raw_data:
|
||||
make = self.normalize_name(brand_item.get("brand"))
|
||||
for model in brand_item.get("models", []):
|
||||
unified_entries.append({
|
||||
"make": make, "model": self.normalize_name(model),
|
||||
"year": None, "specs": json.dumps({"source": "matthlavacka"}),
|
||||
"source": source_name, "source_id": None
|
||||
})
|
||||
|
||||
elif source_name == "os-vehicle-db":
|
||||
# Szerkezet: list[{"make": "...", "model": "...", "year": ...}]
|
||||
for item in raw_data:
|
||||
unified_entries.append({
|
||||
"make": self.normalize_name(item.get("make")),
|
||||
"model": self.normalize_name(item.get("model")),
|
||||
"year": item.get("year"),
|
||||
"specs": json.dumps(item),
|
||||
"source": source_name,
|
||||
"source_id": str(item.get("id", ""))
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Mapping hiba a(z) {source_name} feldolgozásakor: {e}")
|
||||
|
||||
return unified_entries
|
||||
|
||||
async def save_to_db(self, entries):
|
||||
""" Batch Upsert folyamat az adatbázisba. """
|
||||
if not entries: return
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
stmt = text("""
|
||||
INSERT INTO data.reference_lookup (make, model, year, specs, source, source_id)
|
||||
VALUES (:make, :model, :year, :specs, :source, :source_id)
|
||||
ON CONFLICT ON CONSTRAINT _ref_lookup_uc
|
||||
DO UPDATE SET specs = EXCLUDED.specs, updated_at = NOW()
|
||||
""")
|
||||
|
||||
try:
|
||||
# 1000-es csomagokban küldjük be
|
||||
for i in range(0, len(entries), 1000):
|
||||
batch = entries[i:i+1000]
|
||||
for row in batch:
|
||||
await db.execute(stmt, row)
|
||||
await db.commit()
|
||||
logger.info(f"💾 Mentve: {min(i + 1000, len(entries))} / {len(entries)}")
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"🚨 Adatbázis hiba: {e}")
|
||||
|
||||
async def run_sync(self):
|
||||
logger.info("🚀 Data Loader (Fast Lane Support) elindul...")
|
||||
for name, url in self.SOURCES.items():
|
||||
raw_json = await self.fetch_json(url)
|
||||
if raw_json:
|
||||
logger.info(f"🧹 {name} feldolgozása...")
|
||||
unified = self.map_source_data(name, raw_json)
|
||||
await self.save_to_db(unified)
|
||||
logger.info("🏁 Minden forrás szinkronizálva.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(VehicleDataLoader().run_sync())
|
||||
@@ -60,8 +60,8 @@ class DiscoveryEngine:
|
||||
async def seed_manual_bootstrap():
|
||||
""" 2. FÁZIS: Alapozó adatok rögzítése """
|
||||
initial_data = [
|
||||
{"make": "AUDI", "model": "A4", "generation": "B8 (2008-2015)", "vehicle_class": "car"},
|
||||
{"make": "BMW", "model": "3 SERIES", "generation": "F30 (2012-2019)", "vehicle_class": "car"}
|
||||
{"make": "AUDI", "model": "A4", "generation": "B8 (2008-2015)"}, # vehicle_class törölve
|
||||
{"make": "BMW", "model": "3 SERIES", "generation": "F30 (2012-2019)"}
|
||||
]
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
@@ -131,13 +131,20 @@ class DiscoveryEngine:
|
||||
elif "Motorfiets" in v_kind: v_class = 'motorcycle'
|
||||
else: v_class = 'truck'
|
||||
|
||||
# A MÁGIA: Különbözeti Szinkronizáció SQL
|
||||
# A MÁGIA: Különbözeti Szinkronizáció SQL + Explicit Type Casting
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery (make, model, vehicle_class, status, priority_score)
|
||||
SELECT :make, :model, :v_class, 'pending', :priority
|
||||
SELECT
|
||||
CAST(:make AS VARCHAR),
|
||||
CAST(:model AS VARCHAR),
|
||||
CAST(:v_class AS VARCHAR),
|
||||
'pending',
|
||||
:priority
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM data.vehicle_model_definitions
|
||||
WHERE make = :make AND marketing_name = :model AND status = 'gold_enriched'
|
||||
WHERE make = CAST(:make AS VARCHAR)
|
||||
AND marketing_name = CAST(:model AS VARCHAR)
|
||||
AND status = 'gold_enriched'
|
||||
)
|
||||
ON CONFLICT (make, model)
|
||||
DO UPDATE SET priority_score = EXCLUDED.priority_score
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
# /app/app/workers/vehicle/vehicle_robot_1_2_nhtsa_fetcher.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
logger = logging.getLogger("Robot-1-2-NHTSA-EU-Only")
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
class NHTSAFetcher:
|
||||
API_URL = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}?format=json"
|
||||
|
||||
@classmethod
|
||||
async def get_eu_makes(cls):
|
||||
"""Lekéri azokat a márkákat, amik már benne vannak az adatbázisban EU-s forrásból."""
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Csak azokat a márkákat keressük az USA-ban, amiket az EU-ban (RDW) már láttunk
|
||||
query = text("SELECT DISTINCT make FROM data.catalog_discovery WHERE market = 'EU' OR source = 'RDW'")
|
||||
res = await db.execute(query)
|
||||
return [row[0] for row in res.fetchall()]
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🚀 Robot 1.2 (EU-Guided NHTSA) indítása...")
|
||||
|
||||
while True:
|
||||
target_makes = await cls.get_eu_makes()
|
||||
if not target_makes:
|
||||
logger.warning("⚠️ Még nincs EU-s márka az adatbázisban. Várakozás...")
|
||||
await asyncio.sleep(60)
|
||||
continue
|
||||
|
||||
# 2026-tól megyünk vissza a múltba
|
||||
for year in range(2026, 1950, -1):
|
||||
async with AsyncSessionLocal() as db:
|
||||
for make in target_makes:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=20.0) as client:
|
||||
url = cls.API_URL.format(make=make, year=year)
|
||||
resp = await client.get(url)
|
||||
if resp.status_code != 200: continue
|
||||
|
||||
models = resp.json().get("Results", [])
|
||||
inserted = 0
|
||||
for m in models:
|
||||
model_name = m.get("Model_Name").upper().strip()
|
||||
# USA_IMPORT jelölés, de csak EU-s márkákhoz!
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery
|
||||
(make, model, vehicle_class, status, market, model_year, priority_score, source)
|
||||
VALUES (:make, :model, 'car', 'pending', 'USA_IMPORT', :year, 5, 'NHTSA-EU-FILTERED')
|
||||
ON CONFLICT ON CONSTRAINT _make_model_market_year_uc DO NOTHING
|
||||
""")
|
||||
res = await db.execute(query, {"make": make, "model": model_name, "year": year})
|
||||
if res.rowcount > 0: inserted += 1
|
||||
|
||||
if inserted > 0:
|
||||
logger.info(f"✅ {make} ({year}): {inserted} variáns dúsítva az USA-ból.")
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hiba: {make} {year}: {e}")
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(NHTSAFetcher.run())
|
||||
56
backend/app/workers/vehicle/vehicle_robot_1_4_bike_hunter.py
Normal file
56
backend/app/workers/vehicle/vehicle_robot_1_4_bike_hunter.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
logger = logging.getLogger("Robot-1-4-Bike")
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
BIKE_MAKES = [
|
||||
"HONDA", "YAMAHA", "KAWASAKI", "SUZUKI", "HARLEY-DAVIDSON",
|
||||
"BMW", "DUCATI", "KTM", "TRIUMPH", "APRILIA", "MOTO GUZZI", "INDIAN"
|
||||
]
|
||||
|
||||
class BikeHunter:
|
||||
API_URL = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/{make}/modelyear/{year}/vehicleType/motorcycle?format=json"
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🏍️ Robot 1.4 (Bike Hunter) indítása...")
|
||||
# 2026-tól 1970-ig pörgetjük a motorokat
|
||||
years = range(2026, 1969, -1)
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
for year in years:
|
||||
for make in BIKE_MAKES:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=20.0) as client:
|
||||
resp = await client.get(cls.API_URL.format(make=make, year=year))
|
||||
if resp.status_code != 200: continue
|
||||
models = resp.json().get("Results", [])
|
||||
|
||||
inserted = 0
|
||||
for m in models:
|
||||
model_name = m.get("Model_Name").upper().strip()
|
||||
# TISZTA SQL - Nincs Simon!
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery
|
||||
(make, model, vehicle_class, status, market, model_year, priority_score, source)
|
||||
VALUES (:make, :model, 'motorcycle', 'pending', 'USA_IMPORT', :year, 8, 'NHTSA-V1-BIKE')
|
||||
ON CONFLICT ON CONSTRAINT _make_model_market_year_uc DO NOTHING
|
||||
""")
|
||||
await db.execute(query, {"make": make, "model": model_name, "year": year})
|
||||
inserted += 1
|
||||
|
||||
if inserted > 0:
|
||||
logger.info(f"🏍️ {make} ({year}): {inserted} új motor rögzítve.")
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Bike Error {make} ({year}): {e}")
|
||||
|
||||
# Évjáratonként egy pici pihenő az API-nak
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(BikeHunter.run())
|
||||
66
backend/app/workers/vehicle/vehicle_robot_1_5_heavy_eu.py
Normal file
66
backend/app/workers/vehicle/vehicle_robot_1_5_heavy_eu.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# /app/app/workers/vehicle/vehicle_robot_1_5_heavy_eu.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
logger = logging.getLogger("Robot-1-5-Heavy-EU")
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
class HeavyEUHunter:
|
||||
# RDW Open Data - Hollandia az EU kapuja
|
||||
RDW_URL = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
|
||||
@classmethod
|
||||
async def fetch_rdw_heavy(cls, vehicle_type: str):
|
||||
"""
|
||||
vehicle_type: 'Vrachtwagen' (Teher), 'Bus', 'Kampeerauto' (Lakóautó)
|
||||
"""
|
||||
# Lekérjük az összes egyedi márka-típus párost
|
||||
query_url = f"{cls.RDW_URL}?voertuigsoort={vehicle_type}&$select=merk,handelsbenaming&$limit=10000"
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
try:
|
||||
resp = await client.get(query_url)
|
||||
return resp.json() if resp.status_code == 200 else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ RDW Error: {e}")
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🚛 Robot 1.5 (EU Heavy Duty) indítása...")
|
||||
# Definíciók: RDW név -> Mi kategóriánk
|
||||
job_list = {
|
||||
"Vrachtwagen": "truck",
|
||||
"Bus": "bus",
|
||||
"Kampeerauto": "rv"
|
||||
}
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
for rdw_name, internal_class in job_list.items():
|
||||
logger.info(f"📥 {rdw_name} adatok letöltése...")
|
||||
data = await cls.fetch_rdw_heavy(rdw_name)
|
||||
|
||||
inserted = 0
|
||||
for item in data:
|
||||
make = item.get('merk', '').upper().strip()
|
||||
model = item.get('handelsbenaming', '').upper().strip()
|
||||
|
||||
if not make or not model: continue
|
||||
|
||||
# Szűrés a kért EU márkákra + amik jönnek az RDW-ből
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery
|
||||
(make, model, vehicle_class, status, market, priority_score, source)
|
||||
VALUES (:make, :model, :v_class, 'pending', 'EU', 20, 'RDW-HEAVY')
|
||||
ON CONFLICT ON CONSTRAINT _make_model_market_year_uc DO NOTHING
|
||||
""")
|
||||
res = await db.execute(query, {"make": make, "model": model, "v_class": internal_class})
|
||||
if res.rowcount > 0: inserted += 1
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"✅ {rdw_name}: {inserted} új EU-s nagygép rögzítve.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(HeavyEUHunter.run())
|
||||
0
backend/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
Executable file → Normal file
0
backend/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
Executable file → Normal file
@@ -1,228 +0,0 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from sqlalchemy import text, select
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
# MB 2.0 Szigorú naplózás
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
|
||||
stream=sys.stdout
|
||||
)
|
||||
logger = logging.getLogger("Robot-1")
|
||||
|
||||
class CatalogHunter:
|
||||
"""
|
||||
Vehicle Robot 1.7.3: Mega-Hunter (Industrial Master Version)
|
||||
Teljes körű RDW adatbányászat Variant/Version szintű granuláltsággal.
|
||||
"""
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return ""
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError): return 0
|
||||
|
||||
@classmethod
|
||||
def parse_float(cls, value) -> float:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0.0
|
||||
return float(value)
|
||||
except (ValueError, TypeError): return 0.0
|
||||
|
||||
@classmethod
|
||||
async def fetch_tech_details(cls, client, plate):
|
||||
""" Extra technikai adatok (kW, Euro, CO2, Motorkód) párhuzamos lekérése. """
|
||||
urls = {
|
||||
"fuel": f"{cls.RDW_FUEL}?kenteken={plate}",
|
||||
"engine": f"{cls.RDW_ENGINE}?kenteken={plate}"
|
||||
}
|
||||
results = {
|
||||
"power_kw": 0,
|
||||
"engine_code": None,
|
||||
"euro_class": None,
|
||||
"fuel_desc": "Unknown",
|
||||
"co2": 0,
|
||||
"consumption": 0.0
|
||||
}
|
||||
try:
|
||||
resps = await asyncio.gather(*[client.get(u, headers=cls.HEADERS) for u in urls.values()])
|
||||
|
||||
# Üzemanyag és emisszió (8ys7-d773)
|
||||
if resps[0].status_code == 200 and resps[0].json():
|
||||
f = resps[0].json()[0]
|
||||
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
|
||||
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
|
||||
results.update({
|
||||
"power_kw": max(p1, p2),
|
||||
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
|
||||
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
|
||||
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
|
||||
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
|
||||
})
|
||||
|
||||
# Motorkód (jh96-v4pq)
|
||||
if resps[1].status_code == 200 and resps[1].json():
|
||||
results["engine_code"] = resps[1].json()[0].get("motorcode")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ RDW-Extra hiba ({plate}): {e}")
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
|
||||
clean_make = make_name.strip().upper()
|
||||
clean_model = model_name.strip().upper()
|
||||
logger.info(f"🎯 MEGA-VADÁSZAT INDUL: {clean_make} {clean_model}")
|
||||
|
||||
current_offset = 0
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
while True:
|
||||
params = {
|
||||
"merk": clean_make,
|
||||
"handelsbenaming": clean_model,
|
||||
"$limit": cls.BATCH_SIZE,
|
||||
"$offset": current_offset,
|
||||
"$order": "kenteken DESC"
|
||||
}
|
||||
try:
|
||||
r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS)
|
||||
batch = r.json() if r.status_code == 200 else []
|
||||
except Exception: break
|
||||
|
||||
if not batch:
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"🏁 {clean_make} {clean_model} minden variánsa feldolgozva.")
|
||||
return
|
||||
|
||||
for item in batch:
|
||||
try:
|
||||
plate = item.get("kenteken")
|
||||
if not plate: continue
|
||||
|
||||
# Alapadatok azonosítása
|
||||
variant = item.get("variant")
|
||||
version = item.get("uitvoering") # Az Execution/Version kód
|
||||
ccm = cls.parse_int(item.get("cilinderinhoud"))
|
||||
raw_model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
model_name_clean = raw_model.replace(clean_make, "").strip() or raw_model
|
||||
norm_name = cls.normalize(model_name_clean)
|
||||
|
||||
# Extra technikai mélyfúrás (kW, Fuel, Engine)
|
||||
tech = await cls.fetch_tech_details(client, plate)
|
||||
|
||||
# Ellenőrzés: Létezik-e már ez a pontos technikai variáns?
|
||||
stmt = select(VehicleModelDefinition).where(
|
||||
VehicleModelDefinition.make == clean_make,
|
||||
VehicleModelDefinition.normalized_name == norm_name,
|
||||
VehicleModelDefinition.engine_capacity == ccm,
|
||||
VehicleModelDefinition.variant_code == variant,
|
||||
VehicleModelDefinition.version_code == version,
|
||||
VehicleModelDefinition.fuel_type == tech["fuel_desc"]
|
||||
).limit(1)
|
||||
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Meglévő rekord frissítése a prioritással és hiányzó adatokkal
|
||||
existing.priority_score = priority
|
||||
if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"]
|
||||
if tech["engine_code"]: existing.engine_code = tech["engine_code"]
|
||||
if tech["co2"] > 0: existing.co2_emissions_combined = tech["co2"]
|
||||
else:
|
||||
# ÚJ REKORD LÉTREHOZÁSA - MINDEN ADAT MEZŐVEL
|
||||
db.add(VehicleModelDefinition(
|
||||
make=clean_make,
|
||||
marketing_name=model_name_clean,
|
||||
normalized_name=norm_name,
|
||||
variant_code=variant,
|
||||
version_code=version,
|
||||
type_approval_number=item.get("typegoedkeuringsnummer"),
|
||||
technical_code=plate, # Kötelező mező!
|
||||
engine_capacity=ccm,
|
||||
power_kw=tech["power_kw"],
|
||||
fuel_type=tech["fuel_desc"],
|
||||
engine_code=tech["engine_code"],
|
||||
# Fizikai méretek és súlyok (RDW Main-ből)
|
||||
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
|
||||
doors=cls.parse_int(item.get("aantal_deuren")),
|
||||
width=cls.parse_int(item.get("breedte")),
|
||||
wheelbase=cls.parse_int(item.get("wielbasis")),
|
||||
list_price=cls.parse_int(item.get("catalogusprijs")),
|
||||
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
|
||||
towing_weight_unbraked=cls.parse_int(item.get("maximum_massa_trekken_ongeremd")),
|
||||
towing_weight_braked=cls.parse_int(item.get("maximum_trekken_massa_geremd")),
|
||||
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
|
||||
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig") or item.get("toegestane_maximum_massa_voertuig")),
|
||||
body_type=item.get("inrichting"),
|
||||
# Emissziós és környezeti adatok (RDW Extra-ból)
|
||||
co2_emissions_combined=tech["co2"],
|
||||
fuel_consumption_combined=tech["consumption"],
|
||||
euro_classification=tech["euro_class"],
|
||||
cylinders=cls.parse_int(item.get("aantal_cilinders")),
|
||||
# Meta adatok
|
||||
vehicle_class=v_class,
|
||||
priority_score=priority,
|
||||
status="ACTIVE",
|
||||
source="MEGA-HUNTER-v1.7.3"
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}")
|
||||
|
||||
# Batch commit 50 soronként
|
||||
await db.commit()
|
||||
current_offset += len(batch)
|
||||
|
||||
# Biztonsági korlát: egy típusból ne szedjünk le többet, mint ami a variációkhoz kell
|
||||
if current_offset >= 300: break
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Mega-Hunter Robot v1.7.3 ONLINE")
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Legmagasabb prioritású modellek bekérése a Discovery listából
|
||||
query = text("""
|
||||
SELECT id, make, model, vehicle_class, priority_score
|
||||
FROM data.catalog_discovery
|
||||
WHERE status IN ('pending', 'processing')
|
||||
ORDER BY priority_score DESC
|
||||
LIMIT 1
|
||||
""")
|
||||
task = (await db.execute(query)).fetchone()
|
||||
|
||||
if task:
|
||||
# Állapot rögzítése a "dupla munka" ellen
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processing' WHERE id = :id"), {"id": task[0]})
|
||||
await db.commit()
|
||||
|
||||
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
|
||||
else:
|
||||
logger.info("😴 Nincs több feladat, pihenés 30 másodpercig...")
|
||||
await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a futtatás során: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
@@ -1,173 +0,0 @@
|
||||
# /app/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from sqlalchemy import text, select, update
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
# MB 2.0 Naplózás
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s')
|
||||
logger = logging.getLogger("Robot-1")
|
||||
|
||||
class CatalogHunter:
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return ""
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError): return 0
|
||||
|
||||
@classmethod
|
||||
async def fetch_extra_tech(cls, client, plate):
|
||||
params = {"kenteken": plate}
|
||||
results = {"power_kw": 0, "euro_klasse": None, "fuel_desc": "Unknown", "engine_code": None}
|
||||
try:
|
||||
resp_fuel, resp_eng = await asyncio.gather(
|
||||
client.get(cls.RDW_FUEL, params=params, headers=cls.HEADERS),
|
||||
client.get(cls.RDW_ENGINE, params=params, headers=cls.HEADERS)
|
||||
)
|
||||
if resp_fuel.status_code == 200:
|
||||
fuel_rows = resp_fuel.json()
|
||||
max_p = 0
|
||||
f_types = []
|
||||
for row in fuel_rows:
|
||||
p1 = cls.parse_int(row.get("netto_maximum_vermogen") or row.get("nettomaximumvermogen"))
|
||||
p2 = cls.parse_int(row.get("nominaal_continu_maximum_vermogen") or row.get("nominaalcontinuvermogen"))
|
||||
p = max(p1, p2)
|
||||
if p > max_p: max_p = p
|
||||
f = row.get("brandstof_omschrijving")
|
||||
if f and f not in f_types: f_types.append(f)
|
||||
if not results["euro_klasse"]:
|
||||
results["euro_klasse"] = row.get("uitlaatemissieniveau") or row.get("euro_klasse")
|
||||
results["power_kw"] = max_p
|
||||
results["fuel_desc"] = ", ".join(f_types) if f_types else "Unknown"
|
||||
if resp_eng.status_code == 200:
|
||||
eng_rows = resp_eng.json()
|
||||
if eng_rows: results["engine_code"] = eng_rows[0].get("motorcode")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ RDW-Extra hiba ({plate}): {e}")
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
|
||||
clean_make = make_name.strip().upper()
|
||||
clean_model = model_name.strip().upper()
|
||||
logger.info(f"🎯 VADÁSZAT INDUL: {clean_make} {clean_model} (Prio: {priority})")
|
||||
|
||||
current_offset = 0
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
while True:
|
||||
params = {
|
||||
"merk": clean_make,
|
||||
"handelsbenaming": clean_model,
|
||||
"$limit": cls.BATCH_SIZE,
|
||||
"$offset": current_offset,
|
||||
"$order": "kenteken DESC"
|
||||
}
|
||||
try:
|
||||
r = await client.get(cls.RDW_MAIN, params=params, headers=cls.HEADERS)
|
||||
batch = r.json() if r.status_code == 200 else []
|
||||
except Exception: break
|
||||
|
||||
if not batch:
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"🏁 {clean_make} {clean_model} feldolgozva.")
|
||||
return
|
||||
|
||||
for item in batch:
|
||||
try:
|
||||
plate = item.get("kenteken")
|
||||
if not plate: continue
|
||||
raw_model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
model_name_clean = raw_model.replace(clean_make, "").strip() or raw_model
|
||||
norm_name = cls.normalize(model_name_clean)
|
||||
ccm = cls.parse_int(item.get("cilinderinhoud"))
|
||||
|
||||
tech = await cls.fetch_extra_tech(client, plate)
|
||||
|
||||
# Ellenőrizzük, van-e már ilyen technikai variációnk
|
||||
stmt = select(VehicleModelDefinition).where(
|
||||
VehicleModelDefinition.make == clean_make,
|
||||
VehicleModelDefinition.normalized_name == norm_name,
|
||||
VehicleModelDefinition.engine_capacity == ccm,
|
||||
VehicleModelDefinition.fuel_type == tech["fuel_desc"]
|
||||
).limit(1)
|
||||
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Csak frissítjük, ha találtunk pontosabb adatot
|
||||
if tech["engine_code"]: existing.engine_code = tech["engine_code"]
|
||||
if tech["power_kw"] > 0: existing.power_kw = tech["power_kw"]
|
||||
existing.priority_score = priority # Prioritás frissítése
|
||||
else:
|
||||
# ÚJ REKORD LÉTREHOZÁSA
|
||||
db.add(VehicleModelDefinition(
|
||||
make=clean_make,
|
||||
marketing_name=model_name_clean,
|
||||
normalized_name=norm_name,
|
||||
marketing_name_aliases=[raw_model],
|
||||
technical_code=plate,
|
||||
fuel_type=tech["fuel_desc"],
|
||||
engine_capacity=ccm,
|
||||
engine_code=tech["engine_code"],
|
||||
power_kw=tech["power_kw"],
|
||||
cylinders=cls.parse_int(item.get("aantal_cilinders")),
|
||||
euro_classification=tech["euro_klasse"],
|
||||
vehicle_class=v_class,
|
||||
priority_score=priority,
|
||||
status="ACTIVE", # <--- EZ KELL A RÖNTGENNEK!
|
||||
source="PRECISION-HUNTER-v2.1"
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Hiba a sor feldolgozásakor ({plate}): {e}")
|
||||
|
||||
await db.commit()
|
||||
current_offset += len(batch)
|
||||
# Ha már van elég variációnk ebből a típusból, nem kell mind a 100.000 autót átnézni
|
||||
if current_offset >= 500: break
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Vehicle Catalog Hunter ONLINE")
|
||||
while True:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Lekérjük a prioritásos feladatokat
|
||||
query = text("""
|
||||
SELECT id, make, model, vehicle_class, priority_score
|
||||
FROM data.catalog_discovery
|
||||
WHERE status IN ('pending', 'processing')
|
||||
ORDER BY priority_score DESC
|
||||
LIMIT 1
|
||||
""")
|
||||
task = (await db.execute(query)).fetchone()
|
||||
|
||||
if task:
|
||||
# status 'processing'-re állítása, hogy más robot ne nyúljon hozzá
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processing' WHERE id = :id"), {"id": task[0]})
|
||||
await db.commit()
|
||||
|
||||
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
|
||||
else:
|
||||
await asyncio.sleep(30)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
2
backend/app/workers/vehicle/vehicle_robot_2_researcher.py
Executable file → Normal file
2
backend/app/workers/vehicle/vehicle_robot_2_researcher.py
Executable file → Normal file
@@ -159,7 +159,7 @@ class VehicleResearcher:
|
||||
SET status = 'research_in_progress'
|
||||
WHERE id = (
|
||||
SELECT id FROM data.vehicle_model_definitions
|
||||
WHERE status IN ('unverified', 'awaiting_research')
|
||||
WHERE status IN ('unverified', 'awaiting_research', 'ACTIVE')
|
||||
AND attempts < :max_attempts
|
||||
ORDER BY
|
||||
CASE WHEN make = 'TOYOTA' THEN 1 ELSE 2 END,
|
||||
|
||||
@@ -1,137 +0,0 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/researcher_v2_1.py
|
||||
import asyncio
|
||||
import logging
|
||||
import warnings
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, List
|
||||
from sqlalchemy import select, update, and_, func, or_, case
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
# DuckDuckGo search API hiba-elnyomás és import
|
||||
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
# Logolás beállítása
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Robot-Researcher-v2.1")
|
||||
|
||||
class ResearcherBot:
|
||||
"""
|
||||
Robot 2.1: Az internet porszívója.
|
||||
Technikai adatokat gyűjt (DuckDuckGo), hogy előkészítse az AI dúsítást.
|
||||
Kihasználja a motorkódot és a gyártási évet a pontosabb találatokért.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.batch_size = 5 # Egyszerre 5 járművet vesz ki
|
||||
self.max_parallel_queries = 3 # Párhuzamos keresések száma
|
||||
|
||||
async def fetch_source(self, label: str, query: str) -> str:
|
||||
""" Egyedi forrás lekérése szálbiztos módon. """
|
||||
try:
|
||||
def search():
|
||||
with DDGS() as ddgs:
|
||||
# Az első 3 találat body részét gyűjtjük be kontextusnak
|
||||
results = ddgs.text(query, max_results=3)
|
||||
return [f"[{r.get('title', 'No Title')}] {r.get('body', '')}" for r in results] if results else []
|
||||
|
||||
results = await asyncio.to_thread(search)
|
||||
|
||||
if not results:
|
||||
return f"=== SOURCE: {label} | STATUS: EMPTY ===\n\n"
|
||||
|
||||
content = f"=== SOURCE: {label} | QUERY: {query} ===\n"
|
||||
content += "\n---\n".join(results)
|
||||
content += "\n=== END SOURCE ===\n\n"
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Keresési hiba ({label}): {str(e)}")
|
||||
return f"=== SOURCE: {label} | ERROR: {str(e)} ===\n\n"
|
||||
|
||||
async def research_vehicle(self, vehicle_id: int):
|
||||
""" Egyetlen jármű teljes körű átvilágítása. """
|
||||
async with AsyncSessionLocal() as db:
|
||||
res = await db.execute(select(VehicleModelDefinition).where(VehicleModelDefinition.id == vehicle_id))
|
||||
v = res.scalar_one_or_none()
|
||||
if not v: return
|
||||
|
||||
make = v.make
|
||||
model = v.marketing_name
|
||||
engine = v.engine_code or ""
|
||||
year = f"{v.year_from}" if v.year_from else ""
|
||||
|
||||
# Státusz zárolása
|
||||
v.status = 'research_in_progress'
|
||||
await db.commit()
|
||||
|
||||
logger.info(f"🔎 Kutatás indul: {make} {model} (Motor: {engine}, Év: {year})")
|
||||
|
||||
# Célzott keresési kulcsszavak (Multi-Channel stratégia)
|
||||
queries = [
|
||||
("TECH_SPECS", f"{make} {model} {engine} {year} technical specifications engine power kw torque"),
|
||||
("MAINTENANCE", f"{make} {model} {engine} oil capacity coolant transmission fluid type capacity"),
|
||||
("TIRES_PROD", f"{make} {model} {year} tire size load index production years status")
|
||||
]
|
||||
|
||||
# Párhuzamos forrásgyűjtés
|
||||
tasks = [self.fetch_source(label, q) for label, q in queries]
|
||||
search_results = await asyncio.gather(*tasks)
|
||||
full_context = "".join(search_results)
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
if len(full_context.strip()) > 200: # Ha van elegendő kontextus
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
raw_search_context=full_context,
|
||||
status='awaiting_ai_synthesis', # Átadás a Robot 2.2-nek
|
||||
last_research_at=func.now(),
|
||||
attempts=VehicleModelDefinition.attempts + 1
|
||||
)
|
||||
)
|
||||
logger.info(f"✅ Kontextus rögzítve: {make} {model}")
|
||||
else:
|
||||
# Sikertelen keresés, visszatesszük később
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
status='unverified',
|
||||
attempts=VehicleModelDefinition.attempts + 1,
|
||||
last_research_at=func.now()
|
||||
)
|
||||
)
|
||||
logger.warning(f"⚠️ Kevés adat: {make} {model} - Újrapróbálkozás később")
|
||||
await db.commit()
|
||||
|
||||
async def run(self):
|
||||
logger.info("🚀 Robot 2.1 (Researcher) ONLINE - Cél: 407 Toyota feldolgozása")
|
||||
while True:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Prioritás: unverified autók előre
|
||||
priorities = case(
|
||||
(VehicleModelDefinition.make == 'TOYOTA', 1),
|
||||
else_=2
|
||||
)
|
||||
|
||||
stmt = select(VehicleModelDefinition.id).where(
|
||||
or_(VehicleModelDefinition.status == 'unverified',
|
||||
VehicleModelDefinition.status == 'awaiting_research')
|
||||
).order_by(priorities, VehicleModelDefinition.attempts.asc()).limit(self.batch_size)
|
||||
|
||||
res = await db.execute(stmt)
|
||||
ids = [r[0] for r in res.fetchall()]
|
||||
|
||||
if not ids:
|
||||
await asyncio.sleep(30)
|
||||
continue
|
||||
|
||||
# Szekvenciális feldolgozás a rate-limit miatt
|
||||
for rid in ids:
|
||||
await self.research_vehicle(rid)
|
||||
await asyncio.sleep(5) # 5 másodperc szünet a keresések között
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ResearcherBot().run())
|
||||
@@ -1,262 +0,0 @@
|
||||
# /app/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py
|
||||
import asyncio
|
||||
import logging
|
||||
import datetime
|
||||
import random
|
||||
import sys
|
||||
import warnings
|
||||
from sqlalchemy import select, and_, update, func, case
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
from app.models.asset import AssetCatalog
|
||||
from app.services.ai_service import AIService
|
||||
|
||||
# DuckDuckGo hiba-elnyomás
|
||||
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
# MB 2.0 Szigorú naplózás
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s',
|
||||
stream=sys.stdout
|
||||
)
|
||||
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
|
||||
|
||||
class TechEnricher:
|
||||
"""
|
||||
Vehicle Robot 3: Industrial Alchemist (Pro Edition).
|
||||
Felelős az MDM 'Arany' rekordjainak előállításáért hibrid (RDW + AI + Web) logikával.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.max_attempts = 5
|
||||
self.batch_size = 10
|
||||
self.daily_ai_limit = 500
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
self.search_timeout = 15.0
|
||||
|
||||
def check_budget(self) -> bool:
|
||||
""" Napi AI keret ellenőrzése. """
|
||||
if datetime.date.today() > self.last_reset_date:
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
return self.ai_calls_today < self.daily_ai_limit
|
||||
|
||||
def is_data_sane(self, data: dict, rdw_kw: int, rdw_ccm: int) -> bool:
|
||||
"""
|
||||
Hallucináció elleni védelem: technikai józansági vizsgálat.
|
||||
ÚJ: Ha az RDW-től van biztos adatunk, akkor megengedőbbek vagyunk az AI-val,
|
||||
mert a fő adatokat az RDW-ből vesszük.
|
||||
"""
|
||||
# Ha van hivatalos adat, akkor "Sane", a többit megoldjuk a hibrid logikával
|
||||
if rdw_kw > 0 or rdw_ccm > 0:
|
||||
return True
|
||||
|
||||
try:
|
||||
if not data: return False
|
||||
ccm = int(data.get("ccm", 0) or 0)
|
||||
kw = int(data.get("kw", 0) or 0)
|
||||
|
||||
# Ne engedjünk be teljesen üres adatot, ha nincs RDW támasz sem
|
||||
if ccm == 0 and kw == 0 and data.get("vehicle_type") != "trailer":
|
||||
return False
|
||||
|
||||
if ccm > 16000 or (kw > 1500 and data.get("vehicle_type") != "truck"):
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug(f"Data Sane Error: {e}")
|
||||
return False
|
||||
|
||||
async def get_web_wisdom(self, make: str, model: str) -> str:
|
||||
""" Adatgyűjtés a DuckDuckGo-ról szálbiztos és timeouttal védett módon. """
|
||||
query = f"{make} {model} technical specifications engine code fuel"
|
||||
try:
|
||||
def sync_search():
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.text(query, max_results=3)
|
||||
return "\n".join([r['body'] for r in results]) if results else ""
|
||||
|
||||
return await asyncio.wait_for(asyncio.to_thread(sync_search), timeout=self.search_timeout)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"⏱️ Web keresési időtúllépés ({make} {model})")
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.warning(f"🌐 Keresési hiba ({make}): {e}")
|
||||
return ""
|
||||
|
||||
async def process_single_record(self, record_id: int):
|
||||
""" Rekord dúsítás: Read -> AI Process -> Hybrid Gold Data Merge. """
|
||||
make, m_name, v_type = "", "", "car"
|
||||
web_context = ""
|
||||
# ÚJ: RDW adatok tárolója
|
||||
rdw_kw, rdw_ccm, rdw_fuel, rdw_engine = 0, 0, "petrol", ""
|
||||
|
||||
# 1. LÉPÉS: Olvasás és státuszváltás
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
res = await db.execute(
|
||||
select(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.with_for_update(skip_locked=True)
|
||||
)
|
||||
rec = res.scalar_one_or_none()
|
||||
if not rec:
|
||||
return
|
||||
|
||||
make = rec.make
|
||||
m_name = rec.marketing_name
|
||||
v_type = rec.vehicle_class or "car"
|
||||
web_context = rec.raw_search_context or ""
|
||||
|
||||
# ÚJ: Kimentjük a Hunter által szerzett hivatalos RDW adatokat!
|
||||
rdw_kw = rec.power_kw or 0
|
||||
rdw_ccm = rec.engine_capacity or 0
|
||||
rdw_fuel = rec.fuel_type or "petrol"
|
||||
rdw_engine = rec.engine_code or ""
|
||||
|
||||
rec.status = "ai_synthesis_in_progress"
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Adatbázis hiba olvasáskor (ID: {record_id}): {e}")
|
||||
return
|
||||
|
||||
# 2. LÉPÉS: AI és Web munka
|
||||
try:
|
||||
logger.info(f"🧠 AI elemzés indul: {make} {m_name}")
|
||||
|
||||
# Átadjuk az AI-nak az RDW adatokat is kontextusként, hogy "okosodjon" belőle
|
||||
sources_dict = {
|
||||
"web_context": web_context,
|
||||
"vehicle_class": v_type,
|
||||
"rdw_kw": rdw_kw,
|
||||
"rdw_ccm": rdw_ccm
|
||||
}
|
||||
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
|
||||
|
||||
# Ha az AI gyenge adatot hoz vissza, és az RDW adatunk is hiányos, akkor webezünk
|
||||
if (not ai_data or not ai_data.get("kw")) and rdw_kw == 0:
|
||||
logger.info(f"🔍 Adathiány, extra webes mélyfúrás: {make} {m_name}")
|
||||
extra_web_info = await self.get_web_wisdom(make, m_name)
|
||||
sources_dict["web_context"] = extra_web_info
|
||||
ai_data = await AIService.get_clean_vehicle_data(make, m_name, sources_dict)
|
||||
|
||||
# ÚJ: Hibrid józansági vizsgálat
|
||||
if not ai_data: ai_data = {}
|
||||
if not self.is_data_sane(ai_data, rdw_kw, rdw_ccm):
|
||||
raise ValueError("Az AI válasza hallucinált ÉS hivatalos RDW adatunk sincs.")
|
||||
|
||||
self.ai_calls_today += 1
|
||||
|
||||
# ÚJ: HIBRID ADAT-ÖSSZEVONÁS (The Magic!)
|
||||
# RDW (hivatalos) > AI (generált)
|
||||
final_kw = rdw_kw if rdw_kw > 0 else (ai_data.get("kw") or 0)
|
||||
final_ccm = rdw_ccm if rdw_ccm > 0 else (ai_data.get("ccm") or 0)
|
||||
|
||||
# Üzemanyag tisztítás (az RDW néha hollandul írja, ezt az AI tisztázhatja, de ha nincs AI, marad az RDW)
|
||||
final_fuel = rdw_fuel if (rdw_fuel and rdw_fuel != "Unknown") else ai_data.get("fuel_type", "petrol")
|
||||
final_engine = rdw_engine if rdw_engine else ai_data.get("engine_code", "Nincs adat")
|
||||
|
||||
# Befrissítjük a JSON payloadot is a biztos adatokkal
|
||||
ai_data["kw"] = final_kw
|
||||
ai_data["ccm"] = final_ccm
|
||||
ai_data["engine_code"] = final_engine
|
||||
|
||||
# 3. LÉPÉS: Arany rekord mentése
|
||||
async with AsyncSessionLocal() as db:
|
||||
clean_model = str(ai_data.get("marketing_name", m_name))[:50].upper()
|
||||
|
||||
cat_stmt = select(AssetCatalog).where(and_(
|
||||
AssetCatalog.make == make.upper(),
|
||||
AssetCatalog.model == clean_model,
|
||||
AssetCatalog.power_kw == final_kw # A pontos KW alapján egyedi
|
||||
)).limit(1)
|
||||
|
||||
existing_cat = (await db.execute(cat_stmt)).scalar_one_or_none()
|
||||
|
||||
if not existing_cat:
|
||||
db.add(AssetCatalog(
|
||||
make=make.upper(),
|
||||
model=clean_model,
|
||||
power_kw=final_kw,
|
||||
engine_capacity=final_ccm,
|
||||
fuel_type=final_fuel,
|
||||
vehicle_class=v_type,
|
||||
factory_data=ai_data # Dúsított JSON
|
||||
))
|
||||
logger.info(f"✨ ÚJ ARANY REKORD (HIBRID): {make.upper()} {clean_model} ({final_ccm}ccm, {final_kw}kW)")
|
||||
|
||||
# Staging frissítése a biztos adatokkal
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
status="gold_enriched",
|
||||
technical_code=ai_data.get("technical_code") or f"REF-{record_id}",
|
||||
engine_capacity=final_ccm,
|
||||
power_kw=final_kw,
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
except Exception as e:
|
||||
# 4. LÉPÉS: Hibakezelés
|
||||
logger.error(f"🚨 Hiba a(z) {record_id} rekordnál ({make} {m_name}): {e}")
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
attempts=VehicleModelDefinition.attempts + 1,
|
||||
last_error=str(e)[:200],
|
||||
status=case(
|
||||
(VehicleModelDefinition.attempts >= self.max_attempts - 1, "suspended"),
|
||||
else_="unverified"
|
||||
),
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
except Exception as db_err:
|
||||
logger.critical(f"💀 Végzetes adatbázis hiba a fallback mentésnél: {db_err}")
|
||||
|
||||
async def run(self):
|
||||
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Napi limit: {self.daily_ai_limit})")
|
||||
|
||||
while True:
|
||||
try:
|
||||
if not self.check_budget():
|
||||
logger.warning("💰 AI Keret kimerült. Alvás 1 órát.")
|
||||
await asyncio.sleep(3600)
|
||||
continue
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
stmt = select(VehicleModelDefinition.id).where(and_(
|
||||
VehicleModelDefinition.status.in_(["unverified", "awaiting_ai_synthesis"]),
|
||||
VehicleModelDefinition.attempts < self.max_attempts
|
||||
)).limit(self.batch_size)
|
||||
|
||||
res = await db.execute(stmt)
|
||||
ids = [r[0] for r in res.fetchall()]
|
||||
|
||||
if not ids:
|
||||
await asyncio.sleep(60)
|
||||
continue
|
||||
|
||||
for rid in ids:
|
||||
await self.process_single_record(rid)
|
||||
await asyncio.sleep(random.uniform(5.0, 15.0)) # GPU kímélés
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(TechEnricher().run())
|
||||
except KeyboardInterrupt:
|
||||
logger.info("🛑 Alchemist Pro leállítva.")
|
||||
@@ -1,224 +0,0 @@
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import datetime
|
||||
import random
|
||||
import sys
|
||||
import json
|
||||
from sqlalchemy import text, func, update, case
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
from app.models.asset import AssetCatalog
|
||||
from app.services.ai_service import AIService
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
|
||||
|
||||
class TechEnricher:
|
||||
"""
|
||||
Vehicle Robot 3: Alchemist Pro (Atomi Zárolás Patch)
|
||||
Tiszta GPU fókusz: Csak az AI elemzésre és adategyesítésre koncentrál.
|
||||
Nincs felesleges webkeresés. Szigorú Sane-Check.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.max_attempts = 5
|
||||
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
|
||||
def check_budget(self) -> bool:
|
||||
if datetime.date.today() > self.last_reset_date:
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
return self.ai_calls_today < self.daily_ai_limit
|
||||
|
||||
ddef is_data_sane(self, data: dict, base_info: dict) -> bool:
|
||||
""" Szigorított, de intelligens AI Hallucináció szűrő """
|
||||
if not data:
|
||||
logger.warning("Sane-check: Teljesen üres AI válasz.")
|
||||
return False
|
||||
|
||||
try:
|
||||
# 1. Alapvető fizikai korlátok vizsgálata (csak az AI adatokon)
|
||||
ai_ccm = int(data.get("ccm", 0) or 0)
|
||||
ai_kw = int(data.get("kw", 0) or 0)
|
||||
v_class = base_info.get("v_type", "car")
|
||||
|
||||
if ai_ccm > 18000:
|
||||
logger.warning(f"Sane-check bukás: Irreális CCM érték ({ai_ccm})")
|
||||
return False
|
||||
if ai_kw > 1500 and v_class != "truck":
|
||||
logger.warning(f"Sane-check bukás: Irreális KW érték ({ai_kw})")
|
||||
return False
|
||||
|
||||
# 2. KOMBINÁLT Adat teljesség vizsgálata (RDW + AI)
|
||||
# Ha az RDW tudja, akkor nem baj, ha az AI nem találta meg!
|
||||
merged_kw = base_info.get('rdw_kw') or ai_kw
|
||||
merged_ccm = base_info.get('rdw_ccm') or ai_ccm
|
||||
fuel = data.get("fuel_type", base_info.get("rdw_fuel", "")).lower()
|
||||
|
||||
# Ha még kombinálva sincs meg a KW
|
||||
if merged_kw == 0:
|
||||
logger.warning("Sane-check figyelmeztetés: Hiányzó KW (se RDW, se AI). Engedélyezve részleges adatként.")
|
||||
# Nem térünk vissza False-al, inkább mentsük el, amit eddig tudunk!
|
||||
|
||||
# Ha még kombinálva sincs meg a CCM (és nem elektromos)
|
||||
if merged_ccm == 0 and "electric" not in fuel and "elektric" not in fuel and v_class != "trailer":
|
||||
logger.warning("Sane-check figyelmeztetés: Hiányzó CCM egy belsőégésű motornál. Engedélyezve részleges adatként.")
|
||||
# Ezt is átengedjük, hogy kitörjünk a végtelen hurokból.
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Sane check hiba: {e}")
|
||||
return False
|
||||
|
||||
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
|
||||
try:
|
||||
logger.info(f"🧠 AI dúsítás indul: {base_info['make']} {base_info['m_name']}")
|
||||
|
||||
# 1. LÉPÉS: AI Hívás (Rábízzuk az adatokat a modellre)
|
||||
ai_data = await AIService.get_clean_vehicle_data(
|
||||
base_info['make'],
|
||||
base_info['m_name'],
|
||||
base_info
|
||||
)
|
||||
|
||||
# 2. LÉPÉS: Validáció (Ha az AI rossz adatot ad, NEM megyünk ki a webre, hanem dobjuk az aktát!)
|
||||
if not ai_data or not self.is_data_sane(ai_data, base_info):
|
||||
raise ValueError("Az AI hiányos adatot adott vissza vagy hallucinált.")
|
||||
|
||||
# 3. LÉPÉS: HIBRID MERGE (Az RDW adatok felülbírálják az AI-t a hatósági paramétereknél)
|
||||
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else (ai_data.get("kw") or 0)
|
||||
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else (ai_data.get("ccm") or 0)
|
||||
|
||||
# Üzemanyag tisztítása
|
||||
fuel_rdw = base_info.get('rdw_fuel', '')
|
||||
final_fuel = fuel_rdw if fuel_rdw and fuel_rdw != "Unknown" else ai_data.get("fuel_type", "petrol")
|
||||
|
||||
final_engine = base_info['rdw_engine'] if base_info['rdw_engine'] else ai_data.get("engine_code", "Unknown")
|
||||
final_euro = base_info['rdw_euro'] or ai_data.get("euro_classification")
|
||||
final_cylinders = base_info['rdw_cylinders'] or ai_data.get("cylinders")
|
||||
|
||||
# 4. LÉPÉS: Mentés az Arany Katalógusba
|
||||
clean_model = str(ai_data.get("marketing_name", base_info['m_name']))[:50].upper()
|
||||
|
||||
cat_stmt = text("""
|
||||
INSERT INTO data.vehicle_catalog
|
||||
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
|
||||
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
|
||||
RETURNING id;
|
||||
""")
|
||||
|
||||
await db.execute(cat_stmt, {
|
||||
"m_id": record_id,
|
||||
"make": base_info['make'].upper(),
|
||||
"model": clean_model,
|
||||
"kw": final_kw,
|
||||
"ccm": final_ccm,
|
||||
"fuel": final_fuel,
|
||||
"factory": json.dumps(ai_data)
|
||||
})
|
||||
|
||||
# 5. LÉPÉS: Staging tábla (VMD) lezárása
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
status="gold_enriched",
|
||||
engine_capacity=final_ccm,
|
||||
power_kw=final_kw,
|
||||
fuel_type=final_fuel,
|
||||
engine_code=final_engine,
|
||||
euro_classification=final_euro,
|
||||
cylinders=final_cylinders,
|
||||
specifications=ai_data, # Elmentjük az AI teljes outputját a mestertáblába is
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
logger.info(f"✨ ARANY REKORD KÉSZ: {base_info['make'].upper()} {clean_model}")
|
||||
self.ai_calls_today += 1
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.warning(f"⚠️ Alkimista hiba ({base_info['make']} {base_info['m_name']}): {e}")
|
||||
|
||||
# Visszaküldés a sorba vagy felfüggesztés
|
||||
new_status = 'suspended' if current_attempts + 1 >= self.max_attempts else 'unverified'
|
||||
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
attempts=current_attempts + 1,
|
||||
last_error=str(e)[:200],
|
||||
status=new_status,
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
if new_status == 'unverified':
|
||||
logger.info("♻️ Akta visszaküldve a Robot-2-nek (Kutató).")
|
||||
|
||||
async def run(self):
|
||||
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Atomi Zárolás Patch)")
|
||||
while True:
|
||||
if not self.check_budget():
|
||||
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
|
||||
await asyncio.sleep(3600); continue
|
||||
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# ATOMI ZÁROLÁS (A "Szent Grál" a race condition ellen)
|
||||
# A Robot-1 (ACTIVE) és a Robot-2 (awaiting_ai_synthesis) aktáit is felveszi!
|
||||
query = text("""
|
||||
UPDATE data.vehicle_model_definitions
|
||||
SET status = 'ai_synthesis_in_progress'
|
||||
WHERE id = (
|
||||
SELECT id FROM data.vehicle_model_definitions
|
||||
WHERE status IN ('awaiting_ai_synthesis', 'ACTIVE')
|
||||
AND attempts < :max_attempts
|
||||
ORDER BY
|
||||
CASE WHEN status = 'awaiting_ai_synthesis' THEN 1 ELSE 2 END,
|
||||
priority_score DESC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity,
|
||||
fuel_type, engine_code, euro_classification, cylinders, raw_search_context, attempts;
|
||||
""")
|
||||
|
||||
result = await db.execute(query, {"max_attempts": self.max_attempts})
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
# Szétbontjuk a lekérdezett rekordot a base_info dict-be
|
||||
r_id = task[0]
|
||||
base_info = {
|
||||
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
|
||||
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
|
||||
"rdw_fuel": task[6] or "petrol", "rdw_engine": task[7] or "",
|
||||
"rdw_euro": task[8], "rdw_cylinders": task[9],
|
||||
"web_context": task[10] or ""
|
||||
}
|
||||
attempts = task[11]
|
||||
|
||||
# Külön adatbázis kapcsolat a feldolgozáshoz (hosszú AI hívás miatt)
|
||||
async with AsyncSessionLocal() as process_db:
|
||||
await self.process_single_record(process_db, r_id, base_info, attempts)
|
||||
|
||||
# GPU hűtés / Ollama rate limit
|
||||
await asyncio.sleep(random.uniform(1.5, 3.5))
|
||||
else:
|
||||
logger.info("😴 Nincs feldolgozandó akta, az Alkimista pihen...")
|
||||
await asyncio.sleep(15)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os # Import az AI limit környezeti változóhoz
|
||||
asyncio.run(TechEnricher().run())
|
||||
0
backend/app/workers/vehicle/vehicle_robot_4_vin_auditor.py
Executable file → Normal file
0
backend/app/workers/vehicle/vehicle_robot_4_vin_auditor.py
Executable file → Normal file
Reference in New Issue
Block a user