checkpoint: DB baseline reset (v10) with perfect column order for n8n.
Integrated catalog_robot.py logic for structured data ingestion. Updated models and documentation for the Robot Ecosystem.
This commit is contained in:
Binary file not shown.
@@ -3,196 +3,177 @@ import httpx
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func, or_, text
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.asset import AssetCatalog
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot1-Master-Fleet-DeepDive")
|
||||
logger = logging.getLogger("Robot1-Ghost-Commander-v1.1.9")
|
||||
|
||||
class CatalogScout:
|
||||
"""
|
||||
Robot 1: Univerzális Járműkatalógus Építő és Audit Robot.
|
||||
Logika: EU-Elsődlegesség (CarQuery) -> US-Kiegészítés (NHTSA).
|
||||
Kategóriák: Car, Motorcycle, Bus, Truck, Trailer, ATV, Marine, Aerial.
|
||||
Szekvenciák:
|
||||
1. Deep Dive (Motorvariánsok gyűjtése)
|
||||
2. Audit (Hiányos adatok pótlása)
|
||||
Robot 1.1.9: Environment Master.
|
||||
- .env alapú hitelesítés (RDW App Token)
|
||||
- Prioritás: RDW (EU) -> NHTSA (US) -> CarQuery (Ban-figyeléssel)
|
||||
- 2.5s lekérési frissítés a biztonságért
|
||||
"""
|
||||
|
||||
CQ_URL = "https://www.carqueryapi.com/api/0.3/"
|
||||
NHTSA_BASE = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/"
|
||||
|
||||
RDW_URL = "https://opendata.rdw.nl/resource/ed7h-m8uz.json"
|
||||
|
||||
# Adatok beolvasása környezeti változókból
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "application/json"
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Accept": "application/json",
|
||||
"X-App-Token": RDW_TOKEN
|
||||
}
|
||||
|
||||
# BAN FIGYELŐ ÁLLAPOT
|
||||
cq_banned_until = None
|
||||
|
||||
# --- KATEGÓRIA DEFINÍCIÓK (Szigorú flotta-szétválasztás) ---
|
||||
# --- KATEGÓRIA DEFINÍCIÓK (Szigorúan az eredeti lista szerint) ---
|
||||
MOTO_MAKES = ['ducati', 'ktm', 'triumph', 'aprilia', 'benelli', 'vespa', 'simson', 'mz', 'etz', 'jawa', 'husqvarna', 'gasgas', 'sherco']
|
||||
MARINE_IDS = ['DF', 'DT', 'OUTBOARD', 'MARINE', 'JET SKI', 'SEA-DOO', 'WAVERUNNER', 'YACHT', 'BOAT']
|
||||
AERIAL_IDS = ['CESSNA', 'PIPER', 'AIRBUS', 'BOEING', 'HELICOPTER', 'AIRCRAFT', 'BEECHCRAFT', 'EMBRAER', 'DRONE']
|
||||
ATV_IDS = ['LT-', 'LTZ', 'LTR', 'KINGQUAD', 'QUAD', 'POLARIS', 'CAN-AM', 'MULE', 'RZR', 'ARCTIC CAT', 'UTV', 'SIDE-BY-SIDE']
|
||||
|
||||
# Versenygépek (Motorkerékpárként, üzemóra alapú szervizhez)
|
||||
RACING_IDS = ['RM-Z', 'KX', 'CRF', 'YZ', 'SX-F', 'XC-W', 'RM125', 'RM250', 'CR125', 'CR250', 'MC450']
|
||||
MOTO_KEYWORDS = ['CBR', 'GSX', 'YZF', 'NINJA', 'Z1000', 'DR-Z', 'MT-0', 'V-STROM', 'ADVENTURE', 'SCRAMBLER', 'CBF', 'VFR', 'HAYABUSA']
|
||||
|
||||
# Flotta kategóriák szétválasztása
|
||||
BUS_KEYWORDS = ['BUS', 'COACH', 'INTERCITY', 'SHUTTLE', 'TRANSIT']
|
||||
TRUCK_KEYWORDS = ['TRUCK', 'SEMI', 'TRACTOR', 'HAULER', 'ACTROS', 'MAN', 'SCANIA', 'IVECO', 'VOLVO FH', 'DAF', 'TGX', 'RENAULT T']
|
||||
TRAILER_KEYWORDS = ['TRAILER', 'SEMITRAILER', 'PÓTKOCSI', 'UTÁNFUTÓ', 'SCHMITZ', 'KRONE', 'KÖGEL']
|
||||
|
||||
FALLBACK_BRANDS = ['Audi', 'BMW', 'Mercedes-Benz', 'Volkswagen', 'Toyota', 'Ford', 'Honda', 'Hyundai', 'Kia', 'Mazda', 'Nissan', 'Volvo', 'Skoda', 'Opel', 'Tesla', 'Lexus', 'Porsche', 'Dacia', 'Suzuki']
|
||||
|
||||
@classmethod
|
||||
def identify_class(cls, make: str, model: str) -> str:
|
||||
"""Kategória meghatározás flottakezelési szempontok alapján."""
|
||||
m_full = f"{make} {model}".upper()
|
||||
|
||||
m_full = f"{str(make)} {str(model)}".upper()
|
||||
if any(x in m_full for x in cls.AERIAL_IDS): return "aerial"
|
||||
if any(x in m_full for x in cls.MARINE_IDS): return "marine"
|
||||
if any(x in m_full for x in cls.ATV_IDS): return "atv"
|
||||
|
||||
# Motorkerékpárok (Versenygépekkel együtt)
|
||||
if any(x in m_full for x in cls.RACING_IDS) or make.lower() in cls.MOTO_MAKES:
|
||||
if any(x in m_full or str(make).lower() in cls.MOTO_MAKES for x in (cls.RACING_IDS + cls.MOTO_KEYWORDS)):
|
||||
return "motorcycle"
|
||||
if any(x in m_full for x in cls.MOTO_KEYWORDS):
|
||||
return "motorcycle"
|
||||
|
||||
# Flotta (Busz vs Teherautó vs Pótkocsi)
|
||||
if any(x in m_full for x in cls.BUS_KEYWORDS): return "bus"
|
||||
if any(x in m_full for x in cls.TRUCK_KEYWORDS): return "truck"
|
||||
if any(x in m_full for x in cls.TRAILER_KEYWORDS): return "trailer"
|
||||
|
||||
return "car"
|
||||
|
||||
@classmethod
|
||||
async def fetch_api(cls, url, params=None, is_cq=False):
|
||||
"""API hívó JSONP tisztítással és sebességkorlátozással."""
|
||||
async with httpx.AsyncClient(headers=cls.HEADERS) as client:
|
||||
if is_cq and cls.cq_banned_until and datetime.datetime.now() < cls.cq_banned_until:
|
||||
return "SILENT_SKIP"
|
||||
|
||||
async with httpx.AsyncClient(headers=cls.HEADERS, follow_redirects=True) as client:
|
||||
try:
|
||||
# 1.5s várakozás a Free API limitjei miatt
|
||||
await asyncio.sleep(1.5)
|
||||
# CarQuery: 5.0mp szünet (Hard Ban ellen), többi: 2.5mp (User kérése szerint)
|
||||
await asyncio.sleep(5.0 if is_cq else 2.5)
|
||||
resp = await client.get(url, params=params, timeout=35)
|
||||
if resp.status_code != 200: return None
|
||||
|
||||
if resp.status_code == 403 or "denied" in resp.text.lower():
|
||||
logger.error("🚫 CarQuery BAN! 2 óra kényszerpihenő aktiválva.")
|
||||
cls.cq_banned_until = datetime.datetime.now() + datetime.timedelta(hours=2)
|
||||
return "DENIED"
|
||||
|
||||
if resp.status_code != 200: return None
|
||||
content = resp.text.strip()
|
||||
if is_cq:
|
||||
# Robusztusabb JSONP tisztítás regexszel
|
||||
match = re.search(r'(\{.*\}|\[.*\])', content, re.DOTALL)
|
||||
if match:
|
||||
content = match.group(0)
|
||||
elif "(" in content and ")" in content:
|
||||
content = content[content.find("(") + 1 : content.rfind(")")]
|
||||
|
||||
if match: content = match.group(0)
|
||||
return json.loads(content)
|
||||
except Exception as e:
|
||||
logger.error(f"❌ API hiba: {e} | URL: {url}")
|
||||
logger.error(f"❌ API hiba: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def enrich_missing_data(cls):
|
||||
"""
|
||||
SEQUENCE 2: Audit Robot.
|
||||
Keresi a hiányos technikai adatokat és próbálja dúsítani őket.
|
||||
"""
|
||||
logger.info("🔍 Audit szekvencia indítása (hiányos adatok keresése)...")
|
||||
async with SessionLocal() as db:
|
||||
# Keressük azokat a rekordokat, ahol hiányzik a köbcenti vagy a teljesítmény
|
||||
stmt = select(AssetCatalog).where(
|
||||
or_(
|
||||
AssetCatalog.factory_data == text("'{}'::jsonb"),
|
||||
AssetCatalog.engine_variant == 'Standard',
|
||||
AssetCatalog.fuel_type == None
|
||||
)
|
||||
).limit(100) # Egyszerre csak 100-at nézünk
|
||||
async def is_model_processed(cls, db: AsyncSession, make: str, model: str, year: int):
|
||||
stmt = select(AssetCatalog.id).where(AssetCatalog.make == make, AssetCatalog.model == model, AssetCatalog.year_from == year).limit(1)
|
||||
result = await db.execute(stmt)
|
||||
return result.scalars().first() is not None
|
||||
|
||||
@classmethod
|
||||
async def auto_heal(cls, db: AsyncSession, cq_active: bool):
|
||||
logger.info("🛠️ Auto-Heal: Hiányos rekordok dúsítása...")
|
||||
stmt = select(AssetCatalog).where(AssetCatalog.engine_variant == 'Standard', AssetCatalog.fuel_type == 'Unknown').limit(20)
|
||||
results = await db.execute(stmt)
|
||||
for r in results.scalars().all():
|
||||
# 1. RDW javítás (Holland Open Data + Token)
|
||||
rdw = await cls.fetch_api(cls.RDW_URL, {"merk": r.make.upper(), "handelsbenaming": r.model.upper(), "$limit": 1})
|
||||
if rdw and isinstance(rdw, list) and len(rdw) > 0:
|
||||
item = rdw[0]
|
||||
r.fuel_type = item.get("brandstof_omschrijving", "Unknown")
|
||||
r.factory_data.update({"hp": item.get("netto_maximum_vermogen"), "cc": item.get("cilinderinhoud"), "source": "heal_v1.9_rdw"})
|
||||
continue
|
||||
|
||||
results = await db.execute(stmt)
|
||||
incomplete_records = results.scalars().all()
|
||||
|
||||
for record in incomplete_records:
|
||||
logger.info(f"🛠 Audit: {record.make} {record.model} ({record.year_from}) dúsítása...")
|
||||
pass
|
||||
# 2. CQ javítás (Ha nem vagyunk kitiltva)
|
||||
if cq_active:
|
||||
t_data = await cls.fetch_api(cls.CQ_URL, {"cmd": "getTrims", "make": r.make.lower(), "model": r.model, "year": r.year_from}, is_cq=True)
|
||||
if t_data and t_data not in ["DENIED", "SILENT_SKIP"] and "Trims" in t_data:
|
||||
t = t_data["Trims"][0]
|
||||
r.engine_variant = t.get("model_trim") or "Standard"
|
||||
r.factory_data.update({"hp": t.get("model_engine_power_ps"), "cc": t.get("model_engine_cc"), "source": "heal_v1.9_cq"})
|
||||
await db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 1: EU-Elsődlegességű Deep Dive szinkron indítása...")
|
||||
|
||||
# 2026-tól visszafelé haladunk (Modern flották prioritása)
|
||||
for year in range(2026, 1989, -1):
|
||||
logger.info(f"📅 Feldolgozás alatt: {year} évjárat")
|
||||
|
||||
makes_data = await cls.fetch_api(cls.CQ_URL, {"cmd": "getMakes", "year": year}, is_cq=True)
|
||||
if not makes_data or "Makes" not in makes_data: continue
|
||||
logger.info(f"🤖 Robot 1.9.2 indítása (RDW Token: {'Aktív' if cls.RDW_TOKEN else 'HIÁNYZIK!'})")
|
||||
|
||||
for make_entry in makes_data.get("Makes", []):
|
||||
m_id = make_entry["make_id"]
|
||||
m_display = make_entry["make_display"]
|
||||
|
||||
# MODELL GYŰJTÉS: EU + US fúzió
|
||||
for year in range(2026, 1989, -1):
|
||||
logger.info(f"📅 --- CIKLUS: {year} ---")
|
||||
|
||||
cq_now_active = not (cls.cq_banned_until and datetime.datetime.now() < cls.cq_banned_until)
|
||||
|
||||
async with SessionLocal() as db:
|
||||
await cls.auto_heal(db, cq_now_active)
|
||||
|
||||
# 1. MÁRKALISTA (NHTSA + Fallback)
|
||||
makes_to_process = []
|
||||
for b in cls.FALLBACK_BRANDS:
|
||||
makes_to_process.append({"id": b.lower(), "display": b})
|
||||
|
||||
for make in makes_to_process:
|
||||
models_to_fetch = set()
|
||||
|
||||
# 🇪🇺 EU Forrás
|
||||
cq_models = await cls.fetch_api(cls.CQ_URL, {"cmd": "getModels", "make": m_id, "year": year}, is_cq=True)
|
||||
if cq_models and cq_models.get("Models"):
|
||||
for m in cq_models["Models"]: models_to_fetch.add(m["model_name"])
|
||||
|
||||
# 🇺🇸 US Forrás kiegészítés
|
||||
n_data = await cls.fetch_api(f"{cls.NHTSA_BASE}{m_display}/modelyear/{year}?format=json")
|
||||
# A: NHTSA (US)
|
||||
n_data = await cls.fetch_api(f"{cls.NHTSA_BASE}{make['display']}/modelyear/{year}?format=json")
|
||||
if n_data and n_data.get("Results"):
|
||||
for r in n_data["Results"]: models_to_fetch.add(r["Model_Name"])
|
||||
|
||||
# B: RDW (Holland) - Tokennel védve
|
||||
rdw_m = await cls.fetch_api(cls.RDW_URL, {"merk": make['display'].upper(), "$limit": 30})
|
||||
if rdw_m and isinstance(rdw_m, list):
|
||||
for r in rdw_m: models_to_fetch.add(r.get("handelsbenaming"))
|
||||
|
||||
async with SessionLocal() as db:
|
||||
for model_name in models_to_fetch:
|
||||
# DEEP DIVE: Motorvariánsok (Trims) lekérése
|
||||
trims_data = await cls.fetch_api(cls.CQ_URL, {
|
||||
"cmd": "getTrims", "make": m_id, "model": model_name, "year": year
|
||||
}, is_cq=True)
|
||||
if not model_name or await cls.is_model_processed(db, make["display"], model_name, year):
|
||||
continue
|
||||
|
||||
# C: CarQuery (Csak ha nincs ban)
|
||||
found_trims = []
|
||||
t_data = await cls.fetch_api(cls.CQ_URL, {"cmd": "getTrims", "make": make["id"], "model": model_name, "year": year}, is_cq=True)
|
||||
if t_data and t_data not in ["DENIED", "SILENT_SKIP"] and "Trims" in t_data:
|
||||
found_trims = t_data["Trims"]
|
||||
|
||||
found_trims = trims_data.get("Trims", []) if trims_data else []
|
||||
|
||||
# Ha nincs trim adat, egy standard sor mindenképpen kell
|
||||
if not found_trims:
|
||||
found_trims = [{"model_trim": "Standard", "model_engine_fuel": None}]
|
||||
found_trims = [{"model_trim": "Standard", "model_engine_fuel": "Unknown"}]
|
||||
|
||||
for t in found_trims:
|
||||
variant = t.get("model_trim") or "Standard"
|
||||
fuel = t.get("model_engine_fuel") or "Unknown"
|
||||
v_class = cls.identify_class(m_display, model_name)
|
||||
|
||||
# Szigorú duplikáció-ellenőrzés (UniqueConstraint alapú lekérdezés)
|
||||
stmt = select(AssetCatalog).where(
|
||||
AssetCatalog.make == m_display,
|
||||
AssetCatalog.model == model_name,
|
||||
AssetCatalog.year_from == year,
|
||||
AssetCatalog.engine_variant == variant,
|
||||
AssetCatalog.fuel_type == fuel
|
||||
)
|
||||
result = await db.execute(stmt)
|
||||
if not result.scalars().first():
|
||||
db.add(AssetCatalog(
|
||||
make=m_display,
|
||||
model=model_name,
|
||||
year_from=year,
|
||||
engine_variant=variant,
|
||||
fuel_type=fuel,
|
||||
vehicle_class=v_class,
|
||||
factory_data={
|
||||
"cc": t.get("model_engine_cc"),
|
||||
"hp": t.get("model_engine_power_ps"),
|
||||
"cylinders": t.get("model_engine_cyl"),
|
||||
"transmission": t.get("model_transmission_type"),
|
||||
"source": "master_v7_deep_dive",
|
||||
"sync_date": str(func.now())
|
||||
}
|
||||
))
|
||||
|
||||
# JAVÍTÁS: Márkánkénti véglegesítés az adatbázisban a session-ön belül
|
||||
db.add(AssetCatalog(
|
||||
make=make["display"], model=model_name, year_from=year,
|
||||
engine_variant=t.get("model_trim") or "Standard",
|
||||
fuel_type=t.get("model_engine_fuel") or "Unknown",
|
||||
vehicle_class=cls.identify_class(make["display"], model_name),
|
||||
factory_data={
|
||||
"hp": t.get("model_engine_power_ps"), "cc": t.get("model_engine_cc"),
|
||||
"source": "ghost_v1.9.2", "sync_date": str(datetime.datetime.now())
|
||||
}
|
||||
))
|
||||
await db.commit()
|
||||
logger.info(f"✅ {m_display} ({year}) összes variánsa rögzítve.")
|
||||
|
||||
# SEQUENCE 2: Miután végeztünk a fő listával, nézzük meg a hiányosakat
|
||||
await cls.enrich_missing_data()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogScout.run())
|
||||
Reference in New Issue
Block a user