feat: v1.7 overhaul - identity hash, triple wallet, financial ledger, and security audit system
This commit is contained in:
Binary file not shown.
Binary file not shown.
61
backend/app/workers/brand_seeder.py
Normal file
61
backend/app/workers/brand_seeder.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.db.session import SessionLocal
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Smart-Seeder-v1.0.2")
|
||||
|
||||
async def seed_with_priority():
|
||||
# RDW lekérdezés: Márka, Fő kategória és darabszám
|
||||
# Olyan márkákat keresünk, amikből legalább 10 db van
|
||||
URL = "https://opendata.rdw.nl/resource/m9d7-ebf2.json?$select=merk,voertuigsoort,count(*)%20as%20total&$group=merk,voertuigsoort&$having=total%20>=%2010"
|
||||
|
||||
logger.info("📥 Adatok lekérése az RDW-től prioritásos besoroláshoz...")
|
||||
|
||||
async with httpx.AsyncClient(timeout=120) as client:
|
||||
try:
|
||||
resp = await client.get(URL)
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"❌ API hiba: {resp.status_code}")
|
||||
return
|
||||
|
||||
raw_data = resp.json()
|
||||
async with SessionLocal() as db:
|
||||
for entry in raw_data:
|
||||
make = entry.get("merk", "").upper()
|
||||
v_kind = entry.get("voertuigsoort", "")
|
||||
|
||||
# --- PRIORITÁS LOGIKA ---
|
||||
# 1. Személyautó (Personenauto) -> 'pending' (Azonnali feldolgozás)
|
||||
# 2. Motor (Motorfiets) -> 'queued_motor'
|
||||
# 3. Minden más -> 'queued_heavy'
|
||||
|
||||
status = 'queued_heavy'
|
||||
if "Personenauto" in v_kind:
|
||||
status = 'pending'
|
||||
elif "Motorfiets" in v_kind:
|
||||
status = 'queued_motor'
|
||||
|
||||
query = text("""
|
||||
INSERT INTO data.catalog_discovery (make, model, vehicle_class, source, status)
|
||||
VALUES (:make, 'ALL_VARIANTS', :v_class, 'smart_seeder_v2_1', :status)
|
||||
ON CONFLICT (make, model, vehicle_class) DO UPDATE
|
||||
SET status = EXCLUDED.status WHERE data.catalog_discovery.status = 'pending';
|
||||
""")
|
||||
|
||||
await db.execute(query, {
|
||||
"make": make,
|
||||
"v_class": v_kind,
|
||||
"status": status
|
||||
})
|
||||
|
||||
await db.commit()
|
||||
logger.info("✅ A Discovery lista feltöltve és prioritizálva (Autók az élen)!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hiba: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(seed_with_priority())
|
||||
@@ -2,178 +2,207 @@ import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func, or_, text
|
||||
from sqlalchemy import text
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.asset import AssetCatalog
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot1-Ghost-Commander-v1.1.9")
|
||||
logger = logging.getLogger("Robot-v1.0.13-Global-Hunter")
|
||||
|
||||
class CatalogScout:
|
||||
class CatalogMaster:
|
||||
"""
|
||||
Robot 1.1.9: Environment Master.
|
||||
- .env alapú hitelesítés (RDW App Token)
|
||||
- Prioritás: RDW (EU) -> NHTSA (US) -> CarQuery (Ban-figyeléssel)
|
||||
- 2.5s lekérési frissítés a biztonságért
|
||||
Master Hunter Robot v1.0.13 - Global Hunter Edition
|
||||
- Holland (RDW), Brit (DVLA) és Amerikai (NHTSA) adatbázis integráció.
|
||||
- Ratio-Filter: Kiszűri a 0.19-es kW/kg arányszámokat.
|
||||
- Multi-field Power Discovery: Minden lehetséges mezőből kinyeri a kW-ot.
|
||||
- Dinamikus évjárat kezelés a duplikációk ellen.
|
||||
"""
|
||||
|
||||
CQ_URL = "https://www.carqueryapi.com/api/0.3/"
|
||||
NHTSA_BASE = "https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeYear/make/"
|
||||
RDW_URL = "https://opendata.rdw.nl/resource/ed7h-m8uz.json"
|
||||
# API Végpontok
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_AXLE = "https://opendata.rdw.nl/resource/3huj-srit.json"
|
||||
RDW_BODY = "https://opendata.rdw.nl/resource/vezc-m2t6.json"
|
||||
|
||||
# Adatok beolvasása környezeti változókból
|
||||
UK_DVLA = "https://driver-vehicle-licensing.api.gov.uk/vehicle-enquiry/v1/vehicles"
|
||||
US_NHTSA = "https://vpic.nhtsa.dot.gov/api/vehicles/DecodeVinValuesBatch/"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
UK_API_KEY = os.getenv("UK_DVLA_API_KEY")
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Accept": "application/json",
|
||||
"X-App-Token": RDW_TOKEN
|
||||
HEADERS_RDW = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
HEADERS_UK = {"x-api-key": UK_API_KEY, "Content-Type": "application/json"} if UK_API_KEY else {}
|
||||
|
||||
CATEGORY_MAP = {
|
||||
"Personenauto": "car",
|
||||
"Motorfiets": "motorcycle",
|
||||
"Bedrijfsauto": "truck",
|
||||
"Vrachtwagen": "truck",
|
||||
"Opleggertrekker": "truck",
|
||||
"Bus": "bus",
|
||||
"Aanhangwagen": "trailer",
|
||||
"Oplegger": "trailer",
|
||||
"Landbouw- of bosbouwtrekker": "agricultural",
|
||||
"camper": "camper"
|
||||
}
|
||||
|
||||
# BAN FIGYELŐ ÁLLAPOT
|
||||
cq_banned_until = None
|
||||
|
||||
# --- KATEGÓRIA DEFINÍCIÓK (Szigorúan az eredeti lista szerint) ---
|
||||
MOTO_MAKES = ['ducati', 'ktm', 'triumph', 'aprilia', 'benelli', 'vespa', 'simson', 'mz', 'etz', 'jawa', 'husqvarna', 'gasgas', 'sherco']
|
||||
MARINE_IDS = ['DF', 'DT', 'OUTBOARD', 'MARINE', 'JET SKI', 'SEA-DOO', 'WAVERUNNER', 'YACHT', 'BOAT']
|
||||
AERIAL_IDS = ['CESSNA', 'PIPER', 'AIRBUS', 'BOEING', 'HELICOPTER', 'AIRCRAFT', 'BEECHCRAFT', 'EMBRAER', 'DRONE']
|
||||
ATV_IDS = ['LT-', 'LTZ', 'LTR', 'KINGQUAD', 'QUAD', 'POLARIS', 'CAN-AM', 'MULE', 'RZR', 'ARCTIC CAT', 'UTV', 'SIDE-BY-SIDE']
|
||||
RACING_IDS = ['RM-Z', 'KX', 'CRF', 'YZ', 'SX-F', 'XC-W', 'RM125', 'RM250', 'CR125', 'CR250', 'MC450']
|
||||
MOTO_KEYWORDS = ['CBR', 'GSX', 'YZF', 'NINJA', 'Z1000', 'DR-Z', 'MT-0', 'V-STROM', 'ADVENTURE', 'SCRAMBLER', 'CBF', 'VFR', 'HAYABUSA']
|
||||
BUS_KEYWORDS = ['BUS', 'COACH', 'INTERCITY', 'SHUTTLE', 'TRANSIT']
|
||||
TRUCK_KEYWORDS = ['TRUCK', 'SEMI', 'TRACTOR', 'HAULER', 'ACTROS', 'MAN', 'SCANIA', 'IVECO', 'VOLVO FH', 'DAF', 'TGX', 'RENAULT T']
|
||||
TRAILER_KEYWORDS = ['TRAILER', 'SEMITRAILER', 'PÓTKOCSI', 'UTÁNFUTÓ', 'SCHMITZ', 'KRONE', 'KÖGEL']
|
||||
|
||||
FALLBACK_BRANDS = ['Audi', 'BMW', 'Mercedes-Benz', 'Volkswagen', 'Toyota', 'Ford', 'Honda', 'Hyundai', 'Kia', 'Mazda', 'Nissan', 'Volvo', 'Skoda', 'Opel', 'Tesla', 'Lexus', 'Porsche', 'Dacia', 'Suzuki']
|
||||
|
||||
@classmethod
|
||||
def identify_class(cls, make: str, model: str) -> str:
|
||||
m_full = f"{str(make)} {str(model)}".upper()
|
||||
if any(x in m_full for x in cls.AERIAL_IDS): return "aerial"
|
||||
if any(x in m_full for x in cls.MARINE_IDS): return "marine"
|
||||
if any(x in m_full for x in cls.ATV_IDS): return "atv"
|
||||
if any(x in m_full or str(make).lower() in cls.MOTO_MAKES for x in (cls.RACING_IDS + cls.MOTO_KEYWORDS)):
|
||||
return "motorcycle"
|
||||
if any(x in m_full for x in cls.BUS_KEYWORDS): return "bus"
|
||||
if any(x in m_full for x in cls.TRUCK_KEYWORDS): return "truck"
|
||||
if any(x in m_full for x in cls.TRAILER_KEYWORDS): return "trailer"
|
||||
return "car"
|
||||
def clean_kw(cls, val):
|
||||
"""Speciális kW tisztító: ignorálja az 1.0 alatti arányszámokat."""
|
||||
try:
|
||||
if val is None: return None
|
||||
f_val = float(str(val).replace(',', '.'))
|
||||
if 0 < f_val < 1.0: return None # Ez csak arányszám (kW/kg)
|
||||
v = int(f_val)
|
||||
return v if v > 0 else None
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def fetch_api(cls, url, params=None, is_cq=False):
|
||||
if is_cq and cls.cq_banned_until and datetime.datetime.now() < cls.cq_banned_until:
|
||||
return "SILENT_SKIP"
|
||||
def clean_int(cls, val):
|
||||
"""Általános egész szám tisztító."""
|
||||
try:
|
||||
if val is None: return None
|
||||
return int(float(str(val).replace(',', '.')))
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
async with httpx.AsyncClient(headers=cls.HEADERS, follow_redirects=True) as client:
|
||||
@classmethod
|
||||
async def fetch_api(cls, url, params=None, headers=None, method="GET", json_data=None):
|
||||
"""Univerzális API hívó sebességkorlátozással."""
|
||||
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as client:
|
||||
try:
|
||||
# CarQuery: 5.0mp szünet (Hard Ban ellen), többi: 2.5mp (User kérése szerint)
|
||||
await asyncio.sleep(5.0 if is_cq else 2.5)
|
||||
resp = await client.get(url, params=params, timeout=35)
|
||||
|
||||
if resp.status_code == 403 or "denied" in resp.text.lower():
|
||||
logger.error("🚫 CarQuery BAN! 2 óra kényszerpihenő aktiválva.")
|
||||
cls.cq_banned_until = datetime.datetime.now() + datetime.timedelta(hours=2)
|
||||
return "DENIED"
|
||||
|
||||
if resp.status_code != 200: return None
|
||||
content = resp.text.strip()
|
||||
if is_cq:
|
||||
match = re.search(r'(\{.*\}|\[.*\])', content, re.DOTALL)
|
||||
if match: content = match.group(0)
|
||||
return json.loads(content)
|
||||
await asyncio.sleep(1.2) # Biztonsági késleltetés
|
||||
if method == "POST":
|
||||
resp = await client.post(url, json=json_data, timeout=30)
|
||||
else:
|
||||
resp = await client.get(url, params=params, timeout=30)
|
||||
return resp.json() if resp.status_code in [200, 201] else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ API hiba: {e}")
|
||||
return None
|
||||
logger.error(f"❌ API Hiba ({url}): {e}")
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def is_model_processed(cls, db: AsyncSession, make: str, model: str, year: int):
|
||||
stmt = select(AssetCatalog.id).where(AssetCatalog.make == make, AssetCatalog.model == model, AssetCatalog.year_from == year).limit(1)
|
||||
result = await db.execute(stmt)
|
||||
return result.scalars().first() is not None
|
||||
async def get_deep_tech(cls, plate, main_kw=None, vin=None):
|
||||
"""Nemzetközi dúsítás: Holland -> Brit -> Amerikai sorrendben."""
|
||||
res = {"kw": cls.clean_kw(main_kw), "fuel": "Unknown", "axles": None, "body": "Standard", "euro": None}
|
||||
|
||||
# 1. HOLLAND (RDW) DÚSÍTÁS
|
||||
fuel_data = await cls.fetch_api(cls.RDW_FUEL, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
||||
if fuel_data:
|
||||
f0 = fuel_data[0]
|
||||
if not res["kw"]:
|
||||
res["kw"] = cls.clean_kw(f0.get("nettomaximumvermogen") or f0.get("netto_maximum_vermogen"))
|
||||
res["fuel"] = f0.get("brandstof_omschrijving", "Unknown")
|
||||
res["euro"] = f0.get("uitlaatemissieniveau")
|
||||
|
||||
# 2. BRIT (DVLA) ELLENŐRZÉS (Ha van UK kulcs és még hiányzik adat)
|
||||
if cls.UK_API_KEY and (not res["kw"] or not res["euro"]):
|
||||
uk_data = await cls.fetch_api(cls.UK_DVLA, method="POST", json_data={"registrationNumber": plate}, headers=cls.HEADERS_UK)
|
||||
if uk_data:
|
||||
res["kw"] = res["kw"] or cls.clean_kw(uk_data.get("engineCapacity")) # Brit adatok finomítása
|
||||
res["euro"] = res["euro"] or uk_data.get("euroStatus")
|
||||
|
||||
# 3. AMERIKAI (NHTSA) KUTATÁS (Ha van alvázszám)
|
||||
if vin and len(vin) == 17:
|
||||
us_data = await cls.fetch_api(cls.US_NHTSA, params={"format": "json", "data": vin})
|
||||
if us_data and "Results" in us_data:
|
||||
# Az amerikai adatbázisból kinyerjük a lóerőt (HP), ha a kW még mindig nincs meg
|
||||
hp = us_data["Results"][0].get("EngineHP")
|
||||
if hp and not res["kw"]:
|
||||
res["kw"] = int(float(hp) * 0.7457) # HP -> kW konverzió
|
||||
|
||||
# RDW Extra adatok (Tengely, Karosszéria)
|
||||
axle = await cls.fetch_api(cls.RDW_AXLE, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
||||
if axle: res["axles"] = cls.clean_int(axle[0].get("aantal_assen"))
|
||||
|
||||
body = await cls.fetch_api(cls.RDW_BODY, {"kenteken": plate}, headers=cls.HEADERS_RDW)
|
||||
if body: res["body"] = body[0].get("carrosserie_omschrijving", "Standard")
|
||||
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
async def auto_heal(cls, db: AsyncSession, cq_active: bool):
|
||||
logger.info("🛠️ Auto-Heal: Hiányos rekordok dúsítása...")
|
||||
stmt = select(AssetCatalog).where(AssetCatalog.engine_variant == 'Standard', AssetCatalog.fuel_type == 'Unknown').limit(20)
|
||||
results = await db.execute(stmt)
|
||||
for r in results.scalars().all():
|
||||
# 1. RDW javítás (Holland Open Data + Token)
|
||||
rdw = await cls.fetch_api(cls.RDW_URL, {"merk": r.make.upper(), "handelsbenaming": r.model.upper(), "$limit": 1})
|
||||
if rdw and isinstance(rdw, list) and len(rdw) > 0:
|
||||
item = rdw[0]
|
||||
r.fuel_type = item.get("brandstof_omschrijving", "Unknown")
|
||||
r.factory_data.update({"hp": item.get("netto_maximum_vermogen"), "cc": item.get("cilinderinhoud"), "source": "heal_v1.9_rdw"})
|
||||
async def process_make(cls, db, task_id, make_name):
|
||||
logger.info(f"🚀 >>> {make_name} GlobalHunter v1.0.13 INDUL...")
|
||||
offset, limit, total_saved = 0, 1000, 0
|
||||
unique_variants = {}
|
||||
|
||||
while True:
|
||||
params = {"merk": make_name.upper(), "$limit": limit, "$offset": offset}
|
||||
main_data = await cls.fetch_api(cls.RDW_MAIN, params, headers=cls.HEADERS_RDW)
|
||||
if not main_data: break
|
||||
|
||||
for item in main_data:
|
||||
plate = item.get("kenteken")
|
||||
if not plate: continue
|
||||
|
||||
model = str(item.get("handelsbenaming", "Unknown")).upper()
|
||||
ccm = cls.clean_int(item.get("cilinderinhoud"))
|
||||
weight = cls.clean_int(item.get("massa_ledig_voertuig") or item.get("massa_rijklaar"))
|
||||
kw_candidate = item.get("netto_maximum_vermogen") or item.get("vermogen_massarijklaar")
|
||||
|
||||
raw_date = item.get("datum_eerste_toelating")
|
||||
prod_year = int(str(raw_date)[:4]) if raw_date else 2024
|
||||
|
||||
v_class = cls.CATEGORY_MAP.get(item.get("voertuigsoort"), "other")
|
||||
if "kampeerwagen" in str(item.get("inrichting", "")).lower(): v_class = "camper"
|
||||
|
||||
# Variáns kulcs: Modell + CCM + Súly + kW + Év = Egyedi technikai ujjlenyomat
|
||||
variant_key = f"{model}-{ccm}-{weight}-{v_class}-{kw_candidate}-{prod_year}"
|
||||
|
||||
if variant_key not in unique_variants:
|
||||
unique_variants[variant_key] = {
|
||||
"model": model, "ccm": ccm, "weight": weight, "v_class": v_class,
|
||||
"plate": plate, "main_kw": kw_candidate, "prod_year": prod_year,
|
||||
"vin": item.get("vin") # Ha az RDW-ben benne van a VIN
|
||||
}
|
||||
|
||||
if len(main_data) < limit or offset > 90000: break
|
||||
offset += limit
|
||||
|
||||
logger.info(f"📊 {len(unique_variants)} egyedi variáns kutatása indul...")
|
||||
|
||||
for key, v in unique_variants.items():
|
||||
deep = await cls.get_deep_tech(v["plate"], main_kw=v["main_kw"], vin=v["vin"])
|
||||
try:
|
||||
db_item = AssetCatalog(
|
||||
make=make_name.upper(), model=v["model"], vehicle_class=v["v_class"],
|
||||
fuel_type=deep["fuel"], power_kw=deep["kw"], engine_capacity=v["ccm"],
|
||||
max_weight_kg=v["weight"], axle_count=deep["axles"], body_type=deep["body"],
|
||||
year_from=v["prod_year"], euro_class=deep["euro"],
|
||||
factory_data={
|
||||
"source": "GlobalHunter-v1.0.13",
|
||||
"sample_plate": v["plate"],
|
||||
"enriched_at": str(datetime.datetime.now())
|
||||
}
|
||||
)
|
||||
db.add(db_item)
|
||||
await db.commit()
|
||||
total_saved += 1
|
||||
if total_saved % 50 == 0: logger.info(f"✅ {total_saved} variáns elmentve.")
|
||||
except Exception:
|
||||
await db.rollback()
|
||||
continue
|
||||
|
||||
# 2. CQ javítás (Ha nem vagyunk kitiltva)
|
||||
if cq_active:
|
||||
t_data = await cls.fetch_api(cls.CQ_URL, {"cmd": "getTrims", "make": r.make.lower(), "model": r.model, "year": r.year_from}, is_cq=True)
|
||||
if t_data and t_data not in ["DENIED", "SILENT_SKIP"] and "Trims" in t_data:
|
||||
t = t_data["Trims"][0]
|
||||
r.engine_variant = t.get("model_trim") or "Standard"
|
||||
r.factory_data.update({"hp": t.get("model_engine_power_ps"), "cc": t.get("model_engine_cc"), "source": "heal_v1.9_cq"})
|
||||
|
||||
await db.execute(text("UPDATE data.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
logger.info(f"🏁 {make_name} KÉSZ. {total_saved} rekord rögzítve.")
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info(f"🤖 Robot 1.9.2 indítása (RDW Token: {'Aktív' if cls.RDW_TOKEN else 'HIÁNYZIK!'})")
|
||||
|
||||
for year in range(2026, 1989, -1):
|
||||
logger.info(f"📅 --- CIKLUS: {year} ---")
|
||||
|
||||
cq_now_active = not (cls.cq_banned_until and datetime.datetime.now() < cls.cq_banned_until)
|
||||
|
||||
logger.info("🤖 Robot 1.0.13 (Global Hunter) ONLINE")
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
await cls.auto_heal(db, cq_now_active)
|
||||
|
||||
# 1. MÁRKALISTA (NHTSA + Fallback)
|
||||
makes_to_process = []
|
||||
for b in cls.FALLBACK_BRANDS:
|
||||
makes_to_process.append({"id": b.lower(), "display": b})
|
||||
|
||||
for make in makes_to_process:
|
||||
models_to_fetch = set()
|
||||
|
||||
# A: NHTSA (US)
|
||||
n_data = await cls.fetch_api(f"{cls.NHTSA_BASE}{make['display']}/modelyear/{year}?format=json")
|
||||
if n_data and n_data.get("Results"):
|
||||
for r in n_data["Results"]: models_to_fetch.add(r["Model_Name"])
|
||||
|
||||
# B: RDW (Holland) - Tokennel védve
|
||||
rdw_m = await cls.fetch_api(cls.RDW_URL, {"merk": make['display'].upper(), "$limit": 30})
|
||||
if rdw_m and isinstance(rdw_m, list):
|
||||
for r in rdw_m: models_to_fetch.add(r.get("handelsbenaming"))
|
||||
|
||||
async with SessionLocal() as db:
|
||||
for model_name in models_to_fetch:
|
||||
if not model_name or await cls.is_model_processed(db, make["display"], model_name, year):
|
||||
continue
|
||||
|
||||
# C: CarQuery (Csak ha nincs ban)
|
||||
found_trims = []
|
||||
t_data = await cls.fetch_api(cls.CQ_URL, {"cmd": "getTrims", "make": make["id"], "model": model_name, "year": year}, is_cq=True)
|
||||
if t_data and t_data not in ["DENIED", "SILENT_SKIP"] and "Trims" in t_data:
|
||||
found_trims = t_data["Trims"]
|
||||
|
||||
if not found_trims:
|
||||
found_trims = [{"model_trim": "Standard", "model_engine_fuel": "Unknown"}]
|
||||
|
||||
for t in found_trims:
|
||||
db.add(AssetCatalog(
|
||||
make=make["display"], model=model_name, year_from=year,
|
||||
engine_variant=t.get("model_trim") or "Standard",
|
||||
fuel_type=t.get("model_engine_fuel") or "Unknown",
|
||||
vehicle_class=cls.identify_class(make["display"], model_name),
|
||||
factory_data={
|
||||
"hp": t.get("model_engine_power_ps"), "cc": t.get("model_engine_cc"),
|
||||
"source": "ghost_v1.9.2", "sync_date": str(datetime.datetime.now())
|
||||
}
|
||||
))
|
||||
await db.commit()
|
||||
res = await db.execute(text("SELECT id, make FROM data.catalog_discovery WHERE status = 'pending' LIMIT 1"))
|
||||
task = res.fetchone()
|
||||
if task:
|
||||
await cls.process_make(db, task[0], task[1])
|
||||
else:
|
||||
logger.info("😴 Várólista üres. Alvás 60 mp...")
|
||||
await asyncio.sleep(60)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogScout.run())
|
||||
asyncio.run(CatalogMaster.run())
|
||||
@@ -1,282 +1,161 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import uuid
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.orm import selectinload
|
||||
from app.db.session import SessionLocal
|
||||
|
||||
# Modellek importálása
|
||||
from app.models.service import ServiceProfile, ExpertiseTag
|
||||
from app.models.organization import Organization, OrganizationFinancials, OrgType, OrgUserRole, OrganizationMember
|
||||
from app.models.identity import Person
|
||||
from app.models.address import Address, GeoPostalCode
|
||||
from geoalchemy2.elements import WKTElement
|
||||
from datetime import datetime, timezone
|
||||
# Modellek - Az új v1.3 struktúra
|
||||
from app.models.service import ServiceStaging, DiscoveryParameter
|
||||
|
||||
# Naplózás beállítása
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot2-Dunakeszi-Detective")
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("Robot-v1.3-ContinentalScout")
|
||||
|
||||
class ServiceHunter:
|
||||
"""
|
||||
Robot 2.7.2: Dunakeszi Detective - Deep Model Integration.
|
||||
Logika:
|
||||
1. Helyi CSV (Saját beküldés - Cím alapú Geocoding-al - 50 pont Trust)
|
||||
2. OSM (Közösségi adat - 10 pont Trust)
|
||||
3. Google (Adatpótlás/Fallback - 30 pont Trust)
|
||||
Robot v1.3.0: Continental Scout.
|
||||
EU-szintű felderítő motor, Discovery tábla alapú vezérléssel.
|
||||
"""
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
|
||||
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
|
||||
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
|
||||
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||
LOCAL_CSV_PATH = "/app/app/workers/local_services.csv"
|
||||
|
||||
@classmethod
|
||||
async def geocode_address(cls, address_text):
|
||||
"""Cím szövegből GPS koordinátát és címkomponenseket csinál."""
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
logger.warning("⚠️ Google API kulcs hiányzik!")
|
||||
return None
|
||||
|
||||
params = {"address": address_text, "key": cls.GOOGLE_API_KEY}
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(cls.GEOCODE_URL, params=params, timeout=10)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get("results"):
|
||||
result = data["results"][0]
|
||||
loc = result["geometry"]["location"]
|
||||
|
||||
# Címkomponensek kinyerése a kötelező mezőkhöz
|
||||
components = result.get("address_components", [])
|
||||
parsed = {"lat": loc["lat"], "lng": loc["lng"], "zip": "", "city": "", "street": "Ismeretlen", "type": "utca", "number": "1"}
|
||||
|
||||
for c in components:
|
||||
types = c.get("types", [])
|
||||
if "postal_code" in types: parsed["zip"] = c["long_name"]
|
||||
if "locality" in types: parsed["city"] = c["long_name"]
|
||||
if "route" in types: parsed["street"] = c["long_name"]
|
||||
if "street_number" in types: parsed["number"] = c["long_name"]
|
||||
|
||||
logger.info(f"📍 Geocoding sikeres: {address_text}")
|
||||
return parsed
|
||||
else:
|
||||
logger.error(f"❌ Geocoding hiba: {resp.status_code}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Geocoding hiba: {e}")
|
||||
return None
|
||||
async def get_coordinates(cls, city, country_code):
|
||||
"""Város központjának lekérése a keresés indításához."""
|
||||
params = {"address": f"{city}, {country_code}", "key": cls.GOOGLE_API_KEY}
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(cls.GEOCODE_URL, params=params)
|
||||
if resp.status_code == 200:
|
||||
results = resp.json().get("results")
|
||||
if results:
|
||||
loc = results[0]["geometry"]["location"]
|
||||
return loc["lat"], loc["lng"]
|
||||
return None, None
|
||||
|
||||
@classmethod
|
||||
async def get_google_place_details_new(cls, lat, lon):
|
||||
"""Google Places API (New) - Adatpótlás FieldMask használatával."""
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
return None
|
||||
async def get_google_places(cls, lat, lon, keyword):
|
||||
"""Google Places New API - Javított, 400-as hiba elleni védelemmel."""
|
||||
if not cls.GOOGLE_API_KEY: return []
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.types,places.internationalPhoneNumber,places.websiteUri"
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.types,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
|
||||
}
|
||||
|
||||
# A 'keyword' a TextQuery-hez kellene, a SearchNearby-nél típusokat (includedTypes) használunk.
|
||||
# EU szintű trükk: Ha nincs pontos típus, a 'car_repair' az alapértelmezett.
|
||||
payload = {
|
||||
"includedTypes": ["car_repair", "gas_station", "ev_charging_station", "car_wash", "motorcycle_repair"],
|
||||
"maxResultCount": 1,
|
||||
"includedTypes": ["car_repair", "gas_station", "car_wash", "motorcycle_repair"],
|
||||
"maxResultCount": 20,
|
||||
"locationRestriction": {
|
||||
"circle": {
|
||||
"center": {"latitude": lat, "longitude": lon},
|
||||
"radius": 40.0
|
||||
"radius": 5000.0 # 5km körzet
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers, timeout=10)
|
||||
if resp.status_code == 200:
|
||||
places = resp.json().get("places", [])
|
||||
if places:
|
||||
p = places[0]
|
||||
return {
|
||||
"name": p.get("displayName", {}).get("text"),
|
||||
"google_id": p.get("id"),
|
||||
"types": p.get("types", []),
|
||||
"phone": p.get("internationalPhoneNumber"),
|
||||
"website": p.get("websiteUri")
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Google kiegészítő hívás hiba: {e}")
|
||||
return None
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get("places", [])
|
||||
else:
|
||||
logger.error(f"❌ Google API hiba ({resp.status_code}): {resp.text}")
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def import_local_csv(cls, db: AsyncSession):
|
||||
"""Manuális adatok betöltése CSV-ből."""
|
||||
if not os.path.exists(cls.LOCAL_CSV_PATH):
|
||||
return
|
||||
async def save_to_staging(cls, db: AsyncSession, data: dict):
|
||||
"""Mentés a Staging táblába 9-mezős bontással."""
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.external_id == str(data['external_id']))
|
||||
if (await db.execute(stmt)).scalar_one_or_none(): return
|
||||
|
||||
try:
|
||||
with open(cls.LOCAL_CSV_PATH, mode='r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
geo_data = None
|
||||
if row.get('cim'):
|
||||
geo_data = await cls.geocode_address(row['cim'])
|
||||
|
||||
if geo_data:
|
||||
element = {
|
||||
"tags": {
|
||||
"name": row['nev'], "phone": row.get('telefon'),
|
||||
"website": row.get('web'), "amenity": row.get('tipus', 'car_repair'),
|
||||
"addr:full": row.get('cim'),
|
||||
"addr:city": geo_data["city"], "addr:zip": geo_data["zip"],
|
||||
"addr:street": geo_data["street"], "addr:type": geo_data["type"],
|
||||
"addr:number": geo_data["number"]
|
||||
},
|
||||
"lat": geo_data["lat"], "lon": geo_data["lng"]
|
||||
}
|
||||
await cls.save_service_deep(db, element, source="local_manual")
|
||||
logger.info("✅ Helyi CSV adatok feldolgozva.")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ CSV feldolgozási hiba: {e}")
|
||||
|
||||
@classmethod
|
||||
async def get_or_create_person(cls, db: AsyncSession, name: str) -> Person:
|
||||
"""Ghost Person kezelése."""
|
||||
names = name.split(' ', 1)
|
||||
last_name = names[0]
|
||||
first_name = names[1] if len(names) > 1 else "Ismeretlen"
|
||||
stmt = select(Person).where(Person.last_name == last_name, Person.first_name == first_name)
|
||||
result = await db.execute(stmt); person = result.scalar_one_or_none()
|
||||
if not person:
|
||||
person = Person(last_name=last_name, first_name=first_name, is_ghost=True, is_active=False)
|
||||
db.add(person); await db.flush()
|
||||
return person
|
||||
|
||||
@classmethod
|
||||
async def enrich_financials(cls, db: AsyncSession, org_id: int):
|
||||
"""Pénzügyi rekord inicializálása."""
|
||||
financial = OrganizationFinancials(
|
||||
organization_id=org_id, year=datetime.now(timezone.utc).year - 1, source="bot_discovery"
|
||||
new_entry = ServiceStaging(
|
||||
name=data['name'],
|
||||
source=data['source'],
|
||||
external_id=str(data['external_id']),
|
||||
# Itt történik a 9-mezős bontás (ha érkezik adat)
|
||||
postal_code=data.get('zip'),
|
||||
city=data.get('city'),
|
||||
street_name=data.get('street'),
|
||||
street_type=data.get('street_type', 'utca'),
|
||||
house_number=data.get('number'),
|
||||
full_address=data.get('full_address'),
|
||||
contact_phone=data.get('phone'),
|
||||
website=data.get('website'),
|
||||
raw_data=data.get('raw', {}),
|
||||
status="pending",
|
||||
trust_score=data.get('trust', 10)
|
||||
)
|
||||
db.add(financial)
|
||||
|
||||
@classmethod
|
||||
async def save_service_deep(cls, db: AsyncSession, element: dict, source="osm"):
|
||||
"""Mély mentés a modelled specifikus mezőneveivel és kötelező értékeivel."""
|
||||
tags = element.get("tags", {})
|
||||
lat, lon = element.get("lat"), element.get("lon")
|
||||
if not lat or not lon: return
|
||||
|
||||
osm_name = tags.get("name") or tags.get("brand") or tags.get("operator")
|
||||
google_data = None
|
||||
if not osm_name or osm_name.lower() in ['aprilia', 'bosch', 'shell', 'mol', 'omv', 'ismeretlen']:
|
||||
google_data = await cls.get_google_place_details_new(lat, lon)
|
||||
|
||||
final_name = (google_data["name"] if google_data else osm_name) or "Ismeretlen Szolgáltató"
|
||||
|
||||
stmt = select(Organization).where(Organization.full_name == final_name)
|
||||
result = await db.execute(stmt); org = result.scalar_one_or_none()
|
||||
|
||||
if not org:
|
||||
# 1. Address létrehozása (a kötelező mezőket kitöltjük az átadott tags-ből vagy alapértékkel)
|
||||
new_addr = Address(
|
||||
latitude=lat,
|
||||
longitude=lon,
|
||||
full_address_text=tags.get("addr:full") or f"2120 Dunakeszi, {tags.get('addr:street', 'Ismeretlen')} {tags.get('addr:housenumber', '1')}",
|
||||
street_name=tags.get("addr:street") or "Ismeretlen",
|
||||
street_type=tags.get("addr:type") or "utca",
|
||||
house_number=tags.get("addr:number") or tags.get("addr:housenumber") or "1"
|
||||
)
|
||||
db.add(new_addr); await db.flush()
|
||||
|
||||
# 2. Organization létrehozása (a modelled alapján ezek a mezők itt vannak)
|
||||
org = Organization(
|
||||
full_name=final_name,
|
||||
name=final_name[:50],
|
||||
org_type=OrgType.service,
|
||||
address_id=new_addr.id,
|
||||
address_city=tags.get("addr:city") or "Dunakeszi",
|
||||
address_zip=tags.get("addr:zip") or "2120",
|
||||
address_street_name=new_addr.street_name,
|
||||
address_street_type=new_addr.street_type,
|
||||
address_house_number=new_addr.house_number
|
||||
)
|
||||
db.add(org); await db.flush()
|
||||
|
||||
# 3. Service Profile
|
||||
trust = 50 if source == "local_manual" else (30 if google_data else 10)
|
||||
spec = {"brands": [], "types": google_data["types"] if google_data else [], "osm_tags": tags}
|
||||
if tags.get("brand"): spec["brands"].append(tags.get("brand"))
|
||||
|
||||
profile = ServiceProfile(
|
||||
organization_id=org.id,
|
||||
location=WKTElement(f'POINT({lon} {lat})', srid=4326),
|
||||
status="ghost",
|
||||
trust_score=trust,
|
||||
google_place_id=google_data["google_id"] if google_data else None,
|
||||
specialization_tags=spec,
|
||||
website=google_data["website"] if google_data else tags.get("website"),
|
||||
contact_phone=google_data["phone"] if google_data else tags.get("phone")
|
||||
)
|
||||
db.add(profile)
|
||||
|
||||
# 4. Tulajdonos rögzítése
|
||||
owner_name = tags.get("operator") or tags.get("contact:person")
|
||||
if owner_name and len(owner_name) > 3:
|
||||
person = await cls.get_or_create_person(db, owner_name)
|
||||
db.add(OrganizationMember(
|
||||
organization_id=org.id,
|
||||
person_id=person.id,
|
||||
role=OrgUserRole.OWNER,
|
||||
is_verified=False
|
||||
))
|
||||
|
||||
await cls.enrich_financials(db, org.id)
|
||||
await db.flush()
|
||||
logger.info(f"✨ [{source.upper()}] Mentve: {final_name} (Bizalom: {trust})")
|
||||
db.add(new_entry)
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 2.7.2: Dunakeszi Detective indítása...")
|
||||
logger.info("🤖 Robot v1.3.0: Continental Scout elindult...")
|
||||
|
||||
# Kapcsolódási védelem
|
||||
connected = False
|
||||
while not connected:
|
||||
try:
|
||||
async with SessionLocal() as db:
|
||||
await db.execute(text("SELECT 1"))
|
||||
connected = True
|
||||
except Exception as e:
|
||||
logger.warning(f"⏳ Várakozás a hálózatra (shared-postgres host?): {e}")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
try:
|
||||
await db.execute(text("SET search_path TO data, public"))
|
||||
# 1. Beküldött CSV feldolgozása (Geocoding-al)
|
||||
await cls.import_local_csv(db)
|
||||
await db.commit()
|
||||
|
||||
# 2. OSM Szkennelés
|
||||
query = """[out:json][timeout:120];area["name"="Dunakeszi"]->.city;(nwr["shop"~"car_repair|motorcycle_repair|tyres|car_parts|motorcycle"](area.city);nwr["amenity"~"car_repair|vehicle_inspection|motorcycle_repair|fuel|charging_station|car_wash"](area.city);nwr["amenity"~"car_repair|fuel|charging_station"](around:5000, 47.63, 19.13););out center;"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.OVERPASS_URL, data={"data": query}, timeout=120)
|
||||
if resp.status_code == 200:
|
||||
elements = resp.json().get("elements", [])
|
||||
for el in elements:
|
||||
await cls.save_service_deep(db, el, source="osm")
|
||||
await db.commit()
|
||||
# 1. Paraméterek lekérése a táblából
|
||||
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
|
||||
tasks = (await db.execute(stmt)).scalars().all()
|
||||
|
||||
for task in tasks:
|
||||
logger.info(f"🔎 Felderítés: {task.city} ({task.country_code}) -> {task.keyword}")
|
||||
|
||||
# Koordináták beszerzése a kereséshez
|
||||
lat, lon = await cls.get_coordinates(task.city, task.country_code)
|
||||
if not lat: continue
|
||||
|
||||
# --- GOOGLE FÁZIS ---
|
||||
google_places = await cls.get_google_places(lat, lon, task.keyword)
|
||||
for p in google_places:
|
||||
await cls.save_to_staging(db, {
|
||||
"external_id": p.get('id'),
|
||||
"name": p.get('displayName', {}).get('text'),
|
||||
"full_address": p.get('formattedAddress'),
|
||||
"phone": p.get('internationalPhoneNumber'),
|
||||
"website": p.get('websiteUri'),
|
||||
"source": "google",
|
||||
"raw": p,
|
||||
"trust": 30
|
||||
})
|
||||
|
||||
# --- OSM FÁZIS (EU kompatibilis lekérdezés) ---
|
||||
osm_query = f"""[out:json][timeout:60];
|
||||
(nwr["amenity"~"car_repair|fuel"](around:5000, {lat}, {lon}););
|
||||
out center;"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.OVERPASS_URL, data={"data": osm_query})
|
||||
if resp.status_code == 200:
|
||||
for el in resp.json().get("elements", []):
|
||||
t = el.get("tags", {})
|
||||
await cls.save_to_staging(db, {
|
||||
"external_id": f"osm_{el['id']}",
|
||||
"name": t.get('name', 'Ismeretlen szerviz'),
|
||||
"city": t.get('addr:city', task.city),
|
||||
"zip": t.get('addr:postcode'),
|
||||
"street": t.get('addr:street'),
|
||||
"number": t.get('addr:housenumber'),
|
||||
"source": "osm",
|
||||
"raw": el,
|
||||
"trust": 15
|
||||
})
|
||||
|
||||
task.last_run_at = datetime.now(timezone.utc)
|
||||
await db.commit()
|
||||
logger.info(f"✅ {task.city} felderítve.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Futáshiba: {e}")
|
||||
logger.error(f"💥 Kritikus hiba a ciklusban: {e}")
|
||||
|
||||
logger.info("😴 Scan kész, 24 óra pihenő...")
|
||||
await asyncio.sleep(86400)
|
||||
logger.info("😴 Minden aktív feladat kész. Alvás 1 órán át...")
|
||||
await asyncio.sleep(3600)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceHunter.run())
|
||||
282
backend/app/workers/service_hunter_old.py
Normal file
282
backend/app/workers/service_hunter_old.py
Normal file
@@ -0,0 +1,282 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import uuid
|
||||
import os
|
||||
import sys
|
||||
import csv
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.orm import selectinload
|
||||
from app.db.session import SessionLocal
|
||||
|
||||
# Modellek importálása
|
||||
from app.models.service import ServiceProfile, ExpertiseTag
|
||||
from app.models.organization import Organization, OrganizationFinancials, OrgType, OrgUserRole, OrganizationMember
|
||||
from app.models.identity import Person
|
||||
from app.models.address import Address, GeoPostalCode
|
||||
from geoalchemy2.elements import WKTElement
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# Naplózás beállítása
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot2-Dunakeszi-Detective")
|
||||
|
||||
class ServiceHunter:
|
||||
"""
|
||||
Robot 2.7.2: Dunakeszi Detective - Deep Model Integration.
|
||||
Logika:
|
||||
1. Helyi CSV (Saját beküldés - Cím alapú Geocoding-al - 50 pont Trust)
|
||||
2. OSM (Közösségi adat - 10 pont Trust)
|
||||
3. Google (Adatpótlás/Fallback - 30 pont Trust)
|
||||
"""
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
|
||||
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
|
||||
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
|
||||
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||
LOCAL_CSV_PATH = "/app/app/workers/local_services.csv"
|
||||
|
||||
@classmethod
|
||||
async def geocode_address(cls, address_text):
|
||||
"""Cím szövegből GPS koordinátát és címkomponenseket csinál."""
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
logger.warning("⚠️ Google API kulcs hiányzik!")
|
||||
return None
|
||||
|
||||
params = {"address": address_text, "key": cls.GOOGLE_API_KEY}
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(cls.GEOCODE_URL, params=params, timeout=10)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get("results"):
|
||||
result = data["results"][0]
|
||||
loc = result["geometry"]["location"]
|
||||
|
||||
# Címkomponensek kinyerése a kötelező mezőkhöz
|
||||
components = result.get("address_components", [])
|
||||
parsed = {"lat": loc["lat"], "lng": loc["lng"], "zip": "", "city": "", "street": "Ismeretlen", "type": "utca", "number": "1"}
|
||||
|
||||
for c in components:
|
||||
types = c.get("types", [])
|
||||
if "postal_code" in types: parsed["zip"] = c["long_name"]
|
||||
if "locality" in types: parsed["city"] = c["long_name"]
|
||||
if "route" in types: parsed["street"] = c["long_name"]
|
||||
if "street_number" in types: parsed["number"] = c["long_name"]
|
||||
|
||||
logger.info(f"📍 Geocoding sikeres: {address_text}")
|
||||
return parsed
|
||||
else:
|
||||
logger.error(f"❌ Geocoding hiba: {resp.status_code}")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Geocoding hiba: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def get_google_place_details_new(cls, lat, lon):
|
||||
"""Google Places API (New) - Adatpótlás FieldMask használatával."""
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
return None
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.types,places.internationalPhoneNumber,places.websiteUri"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"includedTypes": ["car_repair", "gas_station", "ev_charging_station", "car_wash", "motorcycle_repair"],
|
||||
"maxResultCount": 1,
|
||||
"locationRestriction": {
|
||||
"circle": {
|
||||
"center": {"latitude": lat, "longitude": lon},
|
||||
"radius": 40.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers, timeout=10)
|
||||
if resp.status_code == 200:
|
||||
places = resp.json().get("places", [])
|
||||
if places:
|
||||
p = places[0]
|
||||
return {
|
||||
"name": p.get("displayName", {}).get("text"),
|
||||
"google_id": p.get("id"),
|
||||
"types": p.get("types", []),
|
||||
"phone": p.get("internationalPhoneNumber"),
|
||||
"website": p.get("websiteUri")
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Google kiegészítő hívás hiba: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def import_local_csv(cls, db: AsyncSession):
|
||||
"""Manuális adatok betöltése CSV-ből."""
|
||||
if not os.path.exists(cls.LOCAL_CSV_PATH):
|
||||
return
|
||||
|
||||
try:
|
||||
with open(cls.LOCAL_CSV_PATH, mode='r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
geo_data = None
|
||||
if row.get('cim'):
|
||||
geo_data = await cls.geocode_address(row['cim'])
|
||||
|
||||
if geo_data:
|
||||
element = {
|
||||
"tags": {
|
||||
"name": row['nev'], "phone": row.get('telefon'),
|
||||
"website": row.get('web'), "amenity": row.get('tipus', 'car_repair'),
|
||||
"addr:full": row.get('cim'),
|
||||
"addr:city": geo_data["city"], "addr:zip": geo_data["zip"],
|
||||
"addr:street": geo_data["street"], "addr:type": geo_data["type"],
|
||||
"addr:number": geo_data["number"]
|
||||
},
|
||||
"lat": geo_data["lat"], "lon": geo_data["lng"]
|
||||
}
|
||||
await cls.save_service_deep(db, element, source="local_manual")
|
||||
logger.info("✅ Helyi CSV adatok feldolgozva.")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ CSV feldolgozási hiba: {e}")
|
||||
|
||||
@classmethod
|
||||
async def get_or_create_person(cls, db: AsyncSession, name: str) -> Person:
|
||||
"""Ghost Person kezelése."""
|
||||
names = name.split(' ', 1)
|
||||
last_name = names[0]
|
||||
first_name = names[1] if len(names) > 1 else "Ismeretlen"
|
||||
stmt = select(Person).where(Person.last_name == last_name, Person.first_name == first_name)
|
||||
result = await db.execute(stmt); person = result.scalar_one_or_none()
|
||||
if not person:
|
||||
person = Person(last_name=last_name, first_name=first_name, is_ghost=True, is_active=False)
|
||||
db.add(person); await db.flush()
|
||||
return person
|
||||
|
||||
@classmethod
|
||||
async def enrich_financials(cls, db: AsyncSession, org_id: int):
|
||||
"""Pénzügyi rekord inicializálása."""
|
||||
financial = OrganizationFinancials(
|
||||
organization_id=org_id, year=datetime.now(timezone.utc).year - 1, source="bot_discovery"
|
||||
)
|
||||
db.add(financial)
|
||||
|
||||
@classmethod
|
||||
async def save_service_deep(cls, db: AsyncSession, element: dict, source="osm"):
|
||||
"""Mély mentés a modelled specifikus mezőneveivel és kötelező értékeivel."""
|
||||
tags = element.get("tags", {})
|
||||
lat, lon = element.get("lat"), element.get("lon")
|
||||
if not lat or not lon: return
|
||||
|
||||
osm_name = tags.get("name") or tags.get("brand") or tags.get("operator")
|
||||
google_data = None
|
||||
if not osm_name or osm_name.lower() in ['aprilia', 'bosch', 'shell', 'mol', 'omv', 'ismeretlen']:
|
||||
google_data = await cls.get_google_place_details_new(lat, lon)
|
||||
|
||||
final_name = (google_data["name"] if google_data else osm_name) or "Ismeretlen Szolgáltató"
|
||||
|
||||
stmt = select(Organization).where(Organization.full_name == final_name)
|
||||
result = await db.execute(stmt); org = result.scalar_one_or_none()
|
||||
|
||||
if not org:
|
||||
# 1. Address létrehozása (a kötelező mezőket kitöltjük az átadott tags-ből vagy alapértékkel)
|
||||
new_addr = Address(
|
||||
latitude=lat,
|
||||
longitude=lon,
|
||||
full_address_text=tags.get("addr:full") or f"2120 Dunakeszi, {tags.get('addr:street', 'Ismeretlen')} {tags.get('addr:housenumber', '1')}",
|
||||
street_name=tags.get("addr:street") or "Ismeretlen",
|
||||
street_type=tags.get("addr:type") or "utca",
|
||||
house_number=tags.get("addr:number") or tags.get("addr:housenumber") or "1"
|
||||
)
|
||||
db.add(new_addr); await db.flush()
|
||||
|
||||
# 2. Organization létrehozása (a modelled alapján ezek a mezők itt vannak)
|
||||
org = Organization(
|
||||
full_name=final_name,
|
||||
name=final_name[:50],
|
||||
org_type=OrgType.service,
|
||||
address_id=new_addr.id,
|
||||
address_city=tags.get("addr:city") or "Dunakeszi",
|
||||
address_zip=tags.get("addr:zip") or "2120",
|
||||
address_street_name=new_addr.street_name,
|
||||
address_street_type=new_addr.street_type,
|
||||
address_house_number=new_addr.house_number
|
||||
)
|
||||
db.add(org); await db.flush()
|
||||
|
||||
# 3. Service Profile
|
||||
trust = 50 if source == "local_manual" else (30 if google_data else 10)
|
||||
spec = {"brands": [], "types": google_data["types"] if google_data else [], "osm_tags": tags}
|
||||
if tags.get("brand"): spec["brands"].append(tags.get("brand"))
|
||||
|
||||
profile = ServiceProfile(
|
||||
organization_id=org.id,
|
||||
location=WKTElement(f'POINT({lon} {lat})', srid=4326),
|
||||
status="ghost",
|
||||
trust_score=trust,
|
||||
google_place_id=google_data["google_id"] if google_data else None,
|
||||
specialization_tags=spec,
|
||||
website=google_data["website"] if google_data else tags.get("website"),
|
||||
contact_phone=google_data["phone"] if google_data else tags.get("phone")
|
||||
)
|
||||
db.add(profile)
|
||||
|
||||
# 4. Tulajdonos rögzítése
|
||||
owner_name = tags.get("operator") or tags.get("contact:person")
|
||||
if owner_name and len(owner_name) > 3:
|
||||
person = await cls.get_or_create_person(db, owner_name)
|
||||
db.add(OrganizationMember(
|
||||
organization_id=org.id,
|
||||
person_id=person.id,
|
||||
role=OrgUserRole.OWNER,
|
||||
is_verified=False
|
||||
))
|
||||
|
||||
await cls.enrich_financials(db, org.id)
|
||||
await db.flush()
|
||||
logger.info(f"✨ [{source.upper()}] Mentve: {final_name} (Bizalom: {trust})")
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Robot 2.7.2: Dunakeszi Detective indítása...")
|
||||
|
||||
# Kapcsolódási védelem
|
||||
connected = False
|
||||
while not connected:
|
||||
try:
|
||||
async with SessionLocal() as db:
|
||||
await db.execute(text("SELECT 1"))
|
||||
connected = True
|
||||
except Exception as e:
|
||||
logger.warning(f"⏳ Várakozás a hálózatra (shared-postgres host?): {e}")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
try:
|
||||
await db.execute(text("SET search_path TO data, public"))
|
||||
# 1. Beküldött CSV feldolgozása (Geocoding-al)
|
||||
await cls.import_local_csv(db)
|
||||
await db.commit()
|
||||
|
||||
# 2. OSM Szkennelés
|
||||
query = """[out:json][timeout:120];area["name"="Dunakeszi"]->.city;(nwr["shop"~"car_repair|motorcycle_repair|tyres|car_parts|motorcycle"](area.city);nwr["amenity"~"car_repair|vehicle_inspection|motorcycle_repair|fuel|charging_station|car_wash"](area.city);nwr["amenity"~"car_repair|fuel|charging_station"](around:5000, 47.63, 19.13););out center;"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(cls.OVERPASS_URL, data={"data": query}, timeout=120)
|
||||
if resp.status_code == 200:
|
||||
elements = resp.json().get("elements", [])
|
||||
for el in elements:
|
||||
await cls.save_service_deep(db, el, source="osm")
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Futáshiba: {e}")
|
||||
|
||||
logger.info("😴 Scan kész, 24 óra pihenő...")
|
||||
await asyncio.sleep(86400)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceHunter.run())
|
||||
125
backend/app/workers/technical_enricher.py
Normal file
125
backend/app/workers/technical_enricher.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import datetime
|
||||
from sqlalchemy import text
|
||||
from app.db.session import SessionLocal
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot-v1.0.4-Master-Enricher")
|
||||
|
||||
class TechEnricher:
|
||||
"""
|
||||
Master Enricher v1.0.4
|
||||
- Target: kyri-nuah (RDW Technical Catalogue)
|
||||
- Fix: Visszaállás 'merk' mezőre + SQL fix az új oszlopokhoz.
|
||||
"""
|
||||
|
||||
API_URL = "https://opendata.rdw.nl/resource/kyri-nuah.json"
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
|
||||
@classmethod
|
||||
async def fetch_tech_data(cls, make, model):
|
||||
# Tisztítás: Ha a modell névben benne van a márka, levágjuk
|
||||
clean_model = str(model).upper().replace(str(make).upper(), "").strip()
|
||||
|
||||
# Ha a modellnév csak szám vagy túl rövid, az RDW nem fogja szeretni
|
||||
if len(clean_model) < 2:
|
||||
return None
|
||||
|
||||
# PRÓBA 1: A 'merk' mezővel (Ez a leggyakoribb)
|
||||
params = {
|
||||
"merk": make.upper(),
|
||||
"handelsbenaming": clean_model,
|
||||
"$limit": 1
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(headers=cls.HEADERS) as client:
|
||||
try:
|
||||
await asyncio.sleep(1.1)
|
||||
resp = await client.get(cls.API_URL, params=params, timeout=20)
|
||||
|
||||
# Ha a 'merk' nem tetszik neki (400-as hiba), megpróbáljuk 'merknaam'-al
|
||||
if resp.status_code == 400:
|
||||
params = {"merknaam": make.upper(), "handelsbenaming": clean_model, "$limit": 1}
|
||||
resp = await client.get(cls.TECH_API_URL, params=params, timeout=20)
|
||||
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
return data[0] if data else None
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"❌ API Hiba: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🚀 Master Enricher v1.0.4 - Új oszlopok töltése indul...")
|
||||
|
||||
while True:
|
||||
async with SessionLocal() as db:
|
||||
# Olyan sorokat keresünk, ahol az új oszlopok még üresek
|
||||
query = text("""
|
||||
SELECT id, make, model
|
||||
FROM data.vehicle_catalog
|
||||
WHERE fuel_type IS NULL OR fuel_type = 'Pending' OR fuel_type LIKE 'No-Tech%'
|
||||
LIMIT 20
|
||||
""")
|
||||
res = await db.execute(query)
|
||||
tasks = res.fetchall()
|
||||
|
||||
if not tasks:
|
||||
logger.info("😴 Minden adat kész. Alvás 5 perc...")
|
||||
await asyncio.sleep(300)
|
||||
continue
|
||||
|
||||
for t_id, make, model in tasks:
|
||||
logger.info(f"🧪 Gazdagítás: {make} | {model}")
|
||||
tech = await cls.fetch_tech_data(make, model)
|
||||
|
||||
if tech:
|
||||
# RDW mezők kinyerése
|
||||
kw = tech.get("netto_maximum_vermogen_kw")
|
||||
ccm = tech.get("cilinderinhoud")
|
||||
weight = tech.get("technisch_toelaatbare_maximum_massa")
|
||||
axles = tech.get("aantal_assen")
|
||||
euro = tech.get("milieuklasse_eg_goedkeuring_licht")
|
||||
fuel = tech.get("brandstof_omschrijving_brandstof_stam", "Standard")
|
||||
|
||||
# Biztonságos konverzió
|
||||
def clean_num(v):
|
||||
try: return int(float(v)) if v else None
|
||||
except: return None
|
||||
|
||||
update_query = text("""
|
||||
UPDATE data.vehicle_catalog
|
||||
SET fuel_type = :fuel,
|
||||
power_kw = :kw,
|
||||
engine_capacity = :ccm,
|
||||
max_weight_kg = :weight,
|
||||
axle_count = :axles,
|
||||
euro_class = :euro,
|
||||
factory_data = factory_data || jsonb_build_object('enriched_at', :now)
|
||||
WHERE id = :id
|
||||
""")
|
||||
|
||||
await db.execute(update_query, {
|
||||
"fuel": fuel, "kw": clean_num(kw), "ccm": clean_num(ccm),
|
||||
"weight": clean_num(weight), "axles": clean_num(axles),
|
||||
"euro": str(euro) if euro else None,
|
||||
"id": t_id, "now": str(datetime.datetime.now())
|
||||
})
|
||||
await db.commit()
|
||||
logger.info(f"✅ OK: {make} {model} -> {kw}kW")
|
||||
else:
|
||||
# Ha nem találtuk meg, megjelöljük, hogy ne próbálkozzon újra egy darabig
|
||||
await db.execute(text("UPDATE data.vehicle_catalog SET fuel_type = 'No-Tech-V4' WHERE id = :id"), {"id": t_id})
|
||||
await db.commit()
|
||||
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(TechEnricher.run())
|
||||
Reference in New Issue
Block a user