refakotorálás előtti állapot
This commit is contained in:
121
backend/app/workers/vehicle/vehicle_data_loader.py
Normal file
121
backend/app/workers/vehicle/vehicle_data_loader.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import json
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
# MB 2.0 Naplózási beállítások
|
||||
logger = logging.getLogger("Data-Loader-Master")
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
||||
|
||||
class VehicleDataLoader:
|
||||
# Ellenőrzött, működő források
|
||||
SOURCES = {
|
||||
"car-data-kohut": "https://raw.githubusercontent.com/DanielKohut/car-data/master/car_data.json",
|
||||
"car-list-matth": "https://raw.githubusercontent.com/matthlavacka/car-list/master/car-list.json"
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def normalize_name(name: str) -> str:
|
||||
""" Alapvető szövegtisztítás: nagybetű, szóközmentesítés. """
|
||||
if not name: return ""
|
||||
n = str(name).upper().strip()
|
||||
# Gyakori szinonimák egységesítése
|
||||
synonyms = {"VW": "VOLKSWAGEN", "MERCEDES": "MERCEDES-BENZ", "ALFA": "ALFA ROMEO"}
|
||||
return synonyms.get(n, n)
|
||||
|
||||
async def fetch_json(self, url: str):
|
||||
""" Biztonságos letöltés időtúllépés kezeléssel. """
|
||||
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
|
||||
try:
|
||||
resp = await client.get(url)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
logger.error(f"❌ Letöltési hiba ({resp.status_code}): {url}")
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Kommunikációs hiba: {url} -> {e}")
|
||||
return None
|
||||
|
||||
def map_source_data(self, source_name, raw_data):
|
||||
"""
|
||||
Mapping Layer: Átfordítja a különböző források JSON szerkezetét
|
||||
a mi egységes data.reference_lookup sémánkra.
|
||||
"""
|
||||
unified_entries = []
|
||||
|
||||
try:
|
||||
if source_name == "car-data-kohut":
|
||||
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
|
||||
for brand_item in raw_data:
|
||||
make = self.normalize_name(brand_item.get("brand"))
|
||||
for model in brand_item.get("models", []):
|
||||
unified_entries.append({
|
||||
"make": make, "model": self.normalize_name(model),
|
||||
"year": None, "specs": json.dumps({"source": "kohut"}),
|
||||
"source": source_name, "source_id": None
|
||||
})
|
||||
|
||||
elif source_name == "car-list-matth":
|
||||
# Szerkezet: list[{"brand": "...", "models": ["...", ...]}]
|
||||
for brand_item in raw_data:
|
||||
make = self.normalize_name(brand_item.get("brand"))
|
||||
for model in brand_item.get("models", []):
|
||||
unified_entries.append({
|
||||
"make": make, "model": self.normalize_name(model),
|
||||
"year": None, "specs": json.dumps({"source": "matthlavacka"}),
|
||||
"source": source_name, "source_id": None
|
||||
})
|
||||
|
||||
elif source_name == "os-vehicle-db":
|
||||
# Szerkezet: list[{"make": "...", "model": "...", "year": ...}]
|
||||
for item in raw_data:
|
||||
unified_entries.append({
|
||||
"make": self.normalize_name(item.get("make")),
|
||||
"model": self.normalize_name(item.get("model")),
|
||||
"year": item.get("year"),
|
||||
"specs": json.dumps(item),
|
||||
"source": source_name,
|
||||
"source_id": str(item.get("id", ""))
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Mapping hiba a(z) {source_name} feldolgozásakor: {e}")
|
||||
|
||||
return unified_entries
|
||||
|
||||
async def save_to_db(self, entries):
|
||||
""" Batch Upsert folyamat az adatbázisba. """
|
||||
if not entries: return
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
stmt = text("""
|
||||
INSERT INTO data.reference_lookup (make, model, year, specs, source, source_id)
|
||||
VALUES (:make, :model, :year, :specs, :source, :source_id)
|
||||
ON CONFLICT ON CONSTRAINT _ref_lookup_uc
|
||||
DO UPDATE SET specs = EXCLUDED.specs, updated_at = NOW()
|
||||
""")
|
||||
|
||||
try:
|
||||
# 1000-es csomagokban küldjük be
|
||||
for i in range(0, len(entries), 1000):
|
||||
batch = entries[i:i+1000]
|
||||
for row in batch:
|
||||
await db.execute(stmt, row)
|
||||
await db.commit()
|
||||
logger.info(f"💾 Mentve: {min(i + 1000, len(entries))} / {len(entries)}")
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"🚨 Adatbázis hiba: {e}")
|
||||
|
||||
async def run_sync(self):
|
||||
logger.info("🚀 Data Loader (Fast Lane Support) elindul...")
|
||||
for name, url in self.SOURCES.items():
|
||||
raw_json = await self.fetch_json(url)
|
||||
if raw_json:
|
||||
logger.info(f"🧹 {name} feldolgozása...")
|
||||
unified = self.map_source_data(name, raw_json)
|
||||
await self.save_to_db(unified)
|
||||
logger.info("🏁 Minden forrás szinkronizálva.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(VehicleDataLoader().run_sync())
|
||||
Reference in New Issue
Block a user