admin firs step
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/ultimatespecs/vehicle_ultimate_r0_spider.py
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Worker: vehicle_ultimate_r0_spider
|
||||
@@ -32,7 +33,7 @@ logging.basicConfig(
|
||||
logger = logging.getLogger("R0-SPIDER")
|
||||
|
||||
# Konfiguráció
|
||||
SLEEP_INTERVAL = random.uniform(3, 6) # 3-6 mp között várakozás
|
||||
SLEEP_INTERVAL = random.uniform(1, 2) # 1-2 mp között várakozás
|
||||
MAX_RETRIES = 3
|
||||
BASE_URL = "https://www.ultimatespecs.com/index.php?q={query}"
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/ultimatespecs/vehicle_ultimate_r1_scraper.py
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Worker: vehicle_ultimate_r1_scraper
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/ultimatespecs/vehicle_ultimate_r2_enricher.py
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Worker: vehicle_ultimate_r2_enricher
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/ultimatespecs/vehicle_ultimate_r3_finalizer.py
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Worker: vehicle_ultimate_r3_finalizer
|
||||
@@ -389,7 +390,7 @@ def main():
|
||||
# Fő ciklus indítása - korlátozott számú iterációval teszteléshez
|
||||
try:
|
||||
# Teszteléshez: maximum 5 iteráció
|
||||
asyncio.run(finalizer.run(max_iterations=5))
|
||||
asyncio.run(finalizer.run(max_iterations=sys.maxsize))
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Keyboard interrupt received, shutting down...")
|
||||
finally:
|
||||
|
||||
60
backend/app/workers/vehicle/vehicle_efficiency_optimizer.py
Normal file
60
backend/app/workers/vehicle/vehicle_efficiency_optimizer.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import asyncio
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [OPTIMIZER] %(message)s')
|
||||
logger = logging.getLogger("Efficiency-Optimizer")
|
||||
|
||||
async def optimize_queue():
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
# 1. FÁZIS: AUTO-GOLD (Ami már kész van, ne menjen AI-hoz)
|
||||
# Ha az UltimateSpecs vagy az RDW már kitöltötte a lényeget, lőjük Aranyba!
|
||||
logger.info("🚀 1. Fázis: Auto-Gold ellenőrzés indítása...")
|
||||
auto_gold_query = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET status = 'gold_enriched',
|
||||
updated_at = NOW(),
|
||||
source = source || ' + AUTO_GOLD'
|
||||
WHERE status = 'awaiting_ai_synthesis'
|
||||
AND power_kw > 0
|
||||
AND engine_capacity > 0
|
||||
AND fuel_type != 'Unknown'
|
||||
AND body_type IS NOT NULL
|
||||
AND trim_level != ''
|
||||
RETURNING id;
|
||||
""")
|
||||
result = await db.execute(auto_gold_query)
|
||||
logger.info(f"✅ {len(result.fetchall())} járművet automatikusan ARANY státuszba emeltem (AI megspórolva).")
|
||||
|
||||
# 2. FÁZIS: DEDUPLIKÁCIÓ (Katalógus összehasonlítás)
|
||||
# Keressük azokat a várakozókat, amiknek már van egy ARANY párjuk
|
||||
logger.info("🚀 2. Fázis: Duplikációk szűrése a katalógus alapján...")
|
||||
dedup_query = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions AS pending
|
||||
SET status = 'merged_duplicate',
|
||||
updated_at = NOW()
|
||||
FROM vehicle.vehicle_model_definitions AS gold
|
||||
WHERE pending.status = 'awaiting_ai_synthesis'
|
||||
AND gold.status = 'gold_enriched'
|
||||
AND pending.make = gold.make
|
||||
AND pending.normalized_name = gold.normalized_name
|
||||
AND pending.year_from = gold.year_from
|
||||
AND pending.fuel_type = gold.fuel_type
|
||||
AND pending.market = gold.market
|
||||
AND pending.id != gold.id
|
||||
RETURNING pending.id;
|
||||
""")
|
||||
result = await db.execute(dedup_query)
|
||||
logger.info(f"🗑️ {len(result.fetchall())} duplikált várakozót töröltem a sorból (Már van Arany párjuk).")
|
||||
|
||||
await db.commit()
|
||||
logger.info("🏆 Optimalizálás befejezve. A sor megtisztítva!")
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"❌ Hiba az optimalizálás során: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(optimize_queue())
|
||||
108
backend/app/workers/vehicle/vehicle_master_cleaner.py
Normal file
108
backend/app/workers/vehicle/vehicle_master_cleaner.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
from sqlalchemy import text, update
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [MASTER-CLEANER] %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Master-Cleaner")
|
||||
|
||||
# --- REGEX MINTÁK (A "Kód" amivel az adatot keressük a szövegben) ---
|
||||
KW_PATTERN = re.compile(r'(\d{2,3})\s*(?:kW|kw|kilowatt)', re.IGNORECASE)
|
||||
CCM_PATTERN = re.compile(r'(\d{3,4})\s*(?:ccm|cm3|cc|cubic)', re.IGNORECASE)
|
||||
|
||||
class MasterCleaner:
|
||||
"""
|
||||
Thought Process:
|
||||
1. A robot célja a 126k rekord AI-mentes tisztítása.
|
||||
2. Első körben azokat a sorokat keressük, amik már technikailag teljesek (Auto-Gold).
|
||||
3. Második körben a 'raw_search_context' szövegeiből Regex-szel kinyerjük a hiányzó kW/ccm adatokat.
|
||||
4. Harmadik körben a duplikációkat (uix_vmd_precision_v2 alapján) összeolvasztjuk.
|
||||
"""
|
||||
|
||||
async def run_audit(self):
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
logger.info("🔍 Audit indítása a teljes állományon...")
|
||||
|
||||
# 1. AUTO-GOLD: Ha már minden mező kitöltött (UltimateSpecs R2/R3 jóvoltából)
|
||||
# Ez a leggyorsabb: ha van kW, ccm, fuel és body, akkor az kész.
|
||||
gold_query = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET status = 'gold_enriched', updated_at = NOW(), source = source || ' + AUDITOR_FIX'
|
||||
WHERE status IN ('awaiting_ai_synthesis', 'unverified')
|
||||
AND power_kw > 0 AND engine_capacity > 0
|
||||
AND fuel_type != 'Unknown' AND body_type IS NOT NULL
|
||||
RETURNING id;
|
||||
""")
|
||||
res_gold = await db.execute(gold_query)
|
||||
logger.info(f"✨ {len(res_gold.fetchall())} járművet találtam, ami már eleve 'Arany' volt.")
|
||||
|
||||
# 2. REGEX EXTRACTION: Beleolvasunk a 'raw_search_context'-be
|
||||
# Olyanokat keresünk, ahol power_kw vagy engine_capacity még 0.
|
||||
logger.info("🧪 Regex extrakció indítása a szöveges kontextusból...")
|
||||
fetch_query = text("""
|
||||
SELECT id, raw_search_context, power_kw, engine_capacity
|
||||
FROM vehicle.vehicle_model_definitions
|
||||
WHERE (power_kw = 0 OR engine_capacity = 0)
|
||||
AND raw_search_context != ''
|
||||
AND status != 'gold_enriched'
|
||||
LIMIT 10000;
|
||||
""")
|
||||
|
||||
rows = (await db.execute(fetch_query)).fetchall()
|
||||
extracted_count = 0
|
||||
|
||||
for r_id, context, p_kw, e_ccm in rows:
|
||||
updates = {}
|
||||
|
||||
if p_kw == 0:
|
||||
kw_match = KW_PATTERN.search(context)
|
||||
if kw_match:
|
||||
updates["power_kw"] = int(kw_match.group(1))
|
||||
|
||||
if e_ccm == 0:
|
||||
ccm_match = CCM_PATTERN.search(context)
|
||||
if ccm_match:
|
||||
updates["engine_capacity"] = int(ccm_match.group(1))
|
||||
|
||||
if updates:
|
||||
# Ha találtunk valamit, frissítjük a rekordot
|
||||
stmt = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET power_kw = COALESCE(:kw, power_kw),
|
||||
engine_capacity = COALESCE(:ccm, engine_capacity),
|
||||
source = source || ' + REGEX_EXTRACT'
|
||||
WHERE id = :id
|
||||
""")
|
||||
await db.execute(stmt, {"kw": updates.get("power_kw"), "ccm": updates.get("engine_capacity"), "id": r_id})
|
||||
extracted_count += 1
|
||||
|
||||
logger.info(f"📝 {extracted_count} járműnél találtam meg az adatokat a szöveges kontextusban.")
|
||||
|
||||
# 3. DEDUPLIKÁCIÓ: Márka + Név + Üzemanyag + Évjárat alapján
|
||||
logger.info("✂️ Duplikációk összeolvasztása...")
|
||||
dedup_query = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions AS p
|
||||
SET status = 'merged_duplicate'
|
||||
FROM vehicle.vehicle_model_definitions AS g
|
||||
WHERE p.status != 'gold_enriched' AND g.status = 'gold_enriched'
|
||||
AND p.make = g.make AND p.normalized_name = g.normalized_name
|
||||
AND p.year_from = g.year_from AND p.id != g.id
|
||||
RETURNING p.id;
|
||||
""")
|
||||
res_dedup = await db.execute(dedup_query)
|
||||
logger.info(f"🗑️ {len(res_dedup.fetchall())} duplikációt távolítottam el.")
|
||||
|
||||
await db.commit()
|
||||
logger.info("🏆 A 126k rekord átvizsgálása befejeződött!")
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"❌ Kritikus hiba az audit során: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
cleaner = MasterCleaner()
|
||||
asyncio.run(cleaner.run_audit())
|
||||
@@ -29,7 +29,7 @@ OLLAMA_URL = "http://sf_ollama:11434/api/generate"
|
||||
OLLAMA_MODEL = "qwen2.5-coder:14b" # A 14b paraméteres modell az agy
|
||||
MAX_ATTEMPTS = 3
|
||||
TIMEOUT_SECONDS = 45 # Megemelt timeout a 14b modell lassabb válaszideje miatt
|
||||
BATCH_SIZE = 3 # Maximum 3 párhuzamos AI hívás a CPU fagyás elkerülésére
|
||||
BATCH_SIZE = 10 # Maximum 10 párhuzamos AI hívás a CPU fagyás elkerülésére
|
||||
|
||||
class AlchemistPro:
|
||||
def __init__(self):
|
||||
|
||||
Reference in New Issue
Block a user