Initial commit: Robot ökoszisztéma v2.0 - Stabilizált jármű és szerviz robotok
This commit is contained in:
173
backend/app/workers/service/service_robot_0_hunter.py
Executable file
173
backend/app/workers/service/service_robot_0_hunter.py
Executable file
@@ -0,0 +1,173 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/service_hunter.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, text, update
|
||||
from app.db.session import AsyncSessionLocal
|
||||
from app.models.staged_data import ServiceStaging, DiscoveryParameter
|
||||
|
||||
# Naplózás beállítása a Sentinel monitorozáshoz
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Robot-Continental-Scout-v1.3")
|
||||
|
||||
class ServiceHunter:
|
||||
"""
|
||||
Robot v1.3.1: Continental Scout (Grid Search Edition)
|
||||
Felelőssége: Új szervizpontok felfedezése külső API-k alapján.
|
||||
"""
|
||||
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
|
||||
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||
|
||||
@classmethod
|
||||
def _generate_fingerprint(cls, name: str, city: str, address: str) -> str:
|
||||
"""
|
||||
MD5 Ujjlenyomat generálása.
|
||||
Ez biztosítja, hogy ha ugyanazt a helyet több rács-cellából is megtaláljuk,
|
||||
ne jöjjön létre duplikált rekord.
|
||||
"""
|
||||
raw = f"{str(name).lower()}|{str(city).lower()}|{str(address).lower()[:10]}"
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
@classmethod
|
||||
async def _get_city_bounds(cls, city: str, country_code: str):
|
||||
""" Nominatim API hívás a város befoglaló téglalapjának lekéréséhez. """
|
||||
url = "https://nominatim.openstreetmap.org/search"
|
||||
params = {"city": city, "country": country_code, "format": "json"}
|
||||
headers = {"User-Agent": "ServiceFinder-Scout-v1.3/2.0 (contact@servicefinder.com)"}
|
||||
|
||||
async with httpx.AsyncClient(headers=headers, timeout=10) as client:
|
||||
try:
|
||||
resp = await client.get(url, params=params)
|
||||
if resp.status_code == 200 and resp.json():
|
||||
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
|
||||
return [float(x) for x in bbox]
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Városhatár lekérdezési hiba ({city}): {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def get_google_places(cls, lat: float, lon: float):
|
||||
""" Google Places V1 (New) API hívás. """
|
||||
if not cls.GOOGLE_API_KEY:
|
||||
logger.error("❌ Google API Key hiányzik!")
|
||||
return []
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
|
||||
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress,places.location"
|
||||
}
|
||||
# MB 2.0 szűrők: Csak releváns típusok
|
||||
payload = {
|
||||
"includedTypes": ["car_repair", "motorcycle_repair", "car_wash", "tire_shop"],
|
||||
"maxResultCount": 20,
|
||||
"locationRestriction": {
|
||||
"circle": {
|
||||
"center": {"latitude": lat, "longitude": lon},
|
||||
"radius": 1200.0 # 1.2km sugarú körök a jó átfedéshez
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
try:
|
||||
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get("places", [])
|
||||
logger.warning(f"Google API hiba: {resp.status_code} - {resp.text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Google API hívás hiba: {e}")
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def _save_to_staging(cls, db: AsyncSession, task, p_data: dict):
|
||||
""" Adatmentés a staging táblába deduplikációval. """
|
||||
name = p_data.get('displayName', {}).get('text')
|
||||
addr = p_data.get('formattedAddress', '')
|
||||
f_print = cls._generate_fingerprint(name, task.city, addr)
|
||||
|
||||
# Ellenőrzés, hogy létezik-e már (Ujjlenyomat alapján)
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == f_print)
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
# Csak a bizalmi pontot és az utolsó észlelést frissítjük
|
||||
existing.trust_score += 2
|
||||
existing.updated_at = datetime.now(timezone.utc)
|
||||
return
|
||||
|
||||
# Új rekord létrehozása
|
||||
new_entry = ServiceStaging(
|
||||
name=name,
|
||||
source="google_scout_v1.3",
|
||||
external_id=p_data.get('id'),
|
||||
fingerprint=f_print,
|
||||
city=task.city,
|
||||
full_address=addr,
|
||||
contact_phone=p_data.get('internationalPhoneNumber'),
|
||||
website=p_data.get('websiteUri'),
|
||||
raw_data=p_data,
|
||||
status="pending",
|
||||
trust_score=30 # Alapértelmezett bizalmi szint
|
||||
)
|
||||
db.add(new_entry)
|
||||
|
||||
@classmethod
|
||||
async def run_grid_search(cls, db: AsyncSession, task: DiscoveryParameter):
|
||||
""" A város koordináta-alapú bejárása. """
|
||||
bbox = await cls._get_city_bounds(task.city, task.country_code or 'HU')
|
||||
if not bbox:
|
||||
return
|
||||
|
||||
# Lépésközök meghatározása (kb. 1km = 0.01 fok)
|
||||
lat_step = 0.012
|
||||
lon_step = 0.018
|
||||
|
||||
curr_lat = bbox[0]
|
||||
while curr_lat < bbox[1]:
|
||||
curr_lon = bbox[2]
|
||||
while curr_lon < bbox[3]:
|
||||
logger.info(f"🛰️ Cella pásztázása: {curr_lat:.4f}, {curr_lon:.4f} ({task.city})")
|
||||
|
||||
places = await cls.get_google_places(curr_lat, curr_lon)
|
||||
for p in places:
|
||||
await cls._save_to_staging(db, task, p)
|
||||
|
||||
await db.commit() # Cellánként mentünk, hogy ne vesszen el a munka
|
||||
curr_lon += lon_step
|
||||
await asyncio.sleep(0.3) # Rate limit védelem
|
||||
curr_lat += lat_step
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
""" A robot fő hurokfolyamata. """
|
||||
logger.info("🤖 Continental Scout ONLINE - Grid Engine Indul...")
|
||||
while True:
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
# Aktív keresési feladatok lekérése
|
||||
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
|
||||
tasks = (await db.execute(stmt)).scalars().all()
|
||||
|
||||
for task in tasks:
|
||||
# Csak akkor futtatjuk, ha már régen volt (pl. 30 naponta)
|
||||
if not task.last_run_at or (datetime.now(timezone.utc) - task.last_run_at).days >= 30:
|
||||
logger.info(f"🔎 Felderítés indítása: {task.city}")
|
||||
await cls.run_grid_search(db, task)
|
||||
|
||||
task.last_run_at = datetime.now(timezone.utc)
|
||||
await db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💥 Kritikus hiba a Scout robotban: {e}")
|
||||
await db.rollback()
|
||||
|
||||
# 6 óránként ellenőrizzük, van-e új feladat
|
||||
await asyncio.sleep(21600)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceHunter.run())
|
||||
136
backend/app/workers/service/service_robot_1_scout_osm.py
Executable file
136
backend/app/workers/service/service_robot_1_scout_osm.py
Executable file
@@ -0,0 +1,136 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/service/service_robot_1_scout_osm.py
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import httpx
|
||||
from urllib.parse import quote
|
||||
from sqlalchemy import select, text
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.service import ServiceStaging # JAVÍTOTT IMPORT ÚTVONAL!
|
||||
import re
|
||||
|
||||
# Logolás MB 2.0 szabvány szerint
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Service-Robot-1-OSM")
|
||||
|
||||
class OSMScout:
|
||||
"""
|
||||
Service Robot 1: OSM Scout
|
||||
Feladata: Új szerviz jelöltek porszívózása az OpenStreetMap-ről.
|
||||
"""
|
||||
HUNGARY_BBOX = "45.7,16.1,48.6,22.9"
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter?data="
|
||||
|
||||
@staticmethod
|
||||
def normalize_name(text_val: str) -> str:
|
||||
""" Alapvető tisztítás a pontosabb ujjlenyomathoz. """
|
||||
if not text_val: return ""
|
||||
# Kisbetű, ékezetek maradnak, de a felesleges szóközök és írásjelek mennek
|
||||
text_val = text_val.lower().strip()
|
||||
text_val = re.sub(r'\s+', ' ', text_val)
|
||||
return text_val
|
||||
|
||||
@staticmethod
|
||||
def generate_fingerprint(name: str, city: str) -> str:
|
||||
""" Egyedi azonosító generálása a duplikációk elkerülésére. """
|
||||
n = OSMScout.normalize_name(name)
|
||||
c = OSMScout.normalize_name(city)
|
||||
raw = f"{n}|{c}"
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
async def fetch_osm_data(self, query_part: str):
|
||||
""" Lekérdezés az Overpass API-tól. """
|
||||
query = f'[out:json][timeout:120];(node{query_part}({self.HUNGARY_BBOX});way{query_part}({self.HUNGARY_BBOX}););out center;'
|
||||
async with httpx.AsyncClient(timeout=150.0) as client:
|
||||
for attempt in range(3):
|
||||
try:
|
||||
resp = await client.get(self.OVERPASS_URL + quote(query))
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get('elements', [])
|
||||
elif resp.status_code == 429: # Túl sok kérés az OSM felé
|
||||
logger.warning(f"⚠️ OSM Rate Limit, várakozás...")
|
||||
await asyncio.sleep(5 * (attempt + 1))
|
||||
else:
|
||||
logger.warning(f"⚠️ OSM API válasz: {resp.status_code}")
|
||||
except Exception as e:
|
||||
if attempt == 2:
|
||||
logger.error(f"❌ Overpass hiba végleges: {e}")
|
||||
await asyncio.sleep(2)
|
||||
return []
|
||||
|
||||
async def run_once(self):
|
||||
""" Egy teljes kör lefutása. """
|
||||
logger.info("🛰️ OSM Scout porszívózás indítása...")
|
||||
|
||||
# Keressük az összes autóval kapcsolatos shop-ot és amenitit
|
||||
queries = ['["shop"~"car_repair|tyres|car_parts"]', '["amenity"~"car_wash|fuel"]']
|
||||
all_elements = []
|
||||
for q in queries:
|
||||
elements = await self.fetch_osm_data(q)
|
||||
all_elements.extend(elements)
|
||||
logger.info(f"🔍 Lekérdezés kész: {q} -> {len(elements)} találat")
|
||||
await asyncio.sleep(2) # Kíméljük az OSM szervereket
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
added = 0
|
||||
skipped = 0
|
||||
|
||||
for node in all_elements:
|
||||
tags = node.get('tags', {})
|
||||
name = tags.get('name', tags.get('operator'))
|
||||
if not name: continue
|
||||
|
||||
city = tags.get('addr:city', 'Ismeretlen')
|
||||
postcode = tags.get('addr:postcode', '')
|
||||
f_print = self.generate_fingerprint(name, city)
|
||||
|
||||
# Ellenőrizzük, hogy létezik-e már ez a szerviz a Staging táblában
|
||||
stmt = select(ServiceStaging.id).where(ServiceStaging.fingerprint == f_print)
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if existing is None:
|
||||
full_addr = f"{postcode} {city}, {tags.get('addr:street', '')} {tags.get('addr:housenumber', '')}".strip(" ,")
|
||||
|
||||
# Bővített JSON a nyers adatokhoz, mert a modelled nem tartalmazza a source és trust oszlopokat
|
||||
raw_payload = {
|
||||
"osm_tags": tags,
|
||||
"source": "osm_scout_v2",
|
||||
"trust_score": 20
|
||||
}
|
||||
|
||||
new_entry = ServiceStaging(
|
||||
name=name,
|
||||
postal_code=postcode,
|
||||
city=city,
|
||||
full_address=full_addr,
|
||||
fingerprint=f_print,
|
||||
status="pending",
|
||||
raw_data=raw_payload
|
||||
)
|
||||
db.add(new_entry)
|
||||
added += 1
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
try:
|
||||
await db.commit()
|
||||
logger.info(f"✅ Kör véget ért. Új szervizek: {added}, Ismert (kihagyva): {skipped}")
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"❌ Adatbázis mentési hiba: {e}")
|
||||
|
||||
async def loop(self):
|
||||
""" Folyamatos működés (hetente egyszer frissít). """
|
||||
logger.info("🤖 OSM Scout ONLINE")
|
||||
while True:
|
||||
try:
|
||||
await self.run_once()
|
||||
except Exception as e:
|
||||
logger.error(f"Kritikus hiba a főciklusban: {e}")
|
||||
|
||||
logger.info("😴 Robot elalvás (7 nap)... OSM adatok ritkán változnak.")
|
||||
await asyncio.sleep(86400 * 7) # 7 naponta egyszer nézzük át (felesleges naponta)
|
||||
|
||||
if __name__ == "__main__":
|
||||
scout = OSMScout()
|
||||
asyncio.run(scout.loop())
|
||||
106
backend/app/workers/service/service_robot_2_researcher.py
Normal file
106
backend/app/workers/service/service_robot_2_researcher.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import warnings
|
||||
from sqlalchemy import text, update
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.service import ServiceStaging
|
||||
|
||||
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-2-Service-Researcher: %(message)s')
|
||||
logger = logging.getLogger("Service-Robot-2-Researcher")
|
||||
|
||||
class ServiceResearcher:
|
||||
"""
|
||||
Service Robot 2: Internetes Adatgyűjtő (Atomi Zárolással)
|
||||
"""
|
||||
def __init__(self):
|
||||
self.search_timeout = 15.0
|
||||
|
||||
async def fetch_source(self, query: str) -> str:
|
||||
""" Célzott DuckDuckGo keresés. """
|
||||
try:
|
||||
def search():
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.text(query, max_results=3)
|
||||
return [f"- {r.get('body', '')}" for r in results] if results else []
|
||||
|
||||
results = await asyncio.wait_for(asyncio.to_thread(search), timeout=self.search_timeout)
|
||||
if not results: return ""
|
||||
return "\n".join(results)
|
||||
except Exception as e:
|
||||
logger.debug(f"Keresési hiba: {e}")
|
||||
return ""
|
||||
|
||||
async def process_service(self, db, service_id: int, name: str, city: str):
|
||||
logger.info(f"🔎 Szerviz kutatása weben: {name} ({city})")
|
||||
|
||||
# Keressük a szerviz nyomait a neten
|
||||
query = f"{name} autó szerviz {city} szolgáltatások vélemények"
|
||||
web_context = await self.fetch_source(query)
|
||||
|
||||
try:
|
||||
if len(web_context) > 50:
|
||||
# Van adat, átadjuk a Robot-3-nak elemzésre!
|
||||
await db.execute(
|
||||
update(ServiceStaging)
|
||||
.where(ServiceStaging.id == service_id)
|
||||
.values(
|
||||
raw_data=func.jsonb_set(ServiceStaging.raw_data, '{web_context}', f'"{web_context}"'),
|
||||
status='enrich_ready'
|
||||
)
|
||||
)
|
||||
logger.info(f"✅ Webtalálat rögzítve: {name}")
|
||||
else:
|
||||
# Nincs adat, "szellem" szerviz
|
||||
await db.execute(
|
||||
update(ServiceStaging)
|
||||
.where(ServiceStaging.id == service_id)
|
||||
.values(status='no_web_presence')
|
||||
)
|
||||
logger.warning(f"⚠️ Nincs webes nyoma: {name}, jegelve.")
|
||||
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"🚨 Mentési hiba ({service_id}): {e}")
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
self_instance = cls()
|
||||
logger.info("🚀 Service Researcher ONLINE (Atomi Zárolás Patch)")
|
||||
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# ATOMI ZÁROLÁS
|
||||
query = text("""
|
||||
UPDATE data.service_staging
|
||||
SET status = 'research_in_progress'
|
||||
WHERE id = (
|
||||
SELECT id FROM data.service_staging
|
||||
WHERE status = 'pending'
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, name, city;
|
||||
""")
|
||||
result = await db.execute(query)
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
s_id, s_name, s_city = task
|
||||
async with AsyncSessionLocal() as process_db:
|
||||
await self_instance.process_service(process_db, s_id, s_name, s_city)
|
||||
await asyncio.sleep(2) # Kíméljük a keresőt
|
||||
else:
|
||||
await asyncio.sleep(30)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceResearcher.run())
|
||||
115
backend/app/workers/service/service_robot_3_enricher.py
Executable file
115
backend/app/workers/service/service_robot_3_enricher.py
Executable file
@@ -0,0 +1,115 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import json
|
||||
from sqlalchemy import select, text, update, func
|
||||
from app.database import AsyncSessionLocal # JAVÍTVA
|
||||
from app.models.service import ServiceProfile, ExpertiseTag, ServiceExpertise, ServiceStaging
|
||||
|
||||
# Logolás MB 2.0 szabvány
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
|
||||
logger = logging.getLogger("Service-Robot-3-Enricher")
|
||||
|
||||
class ServiceEnricher:
|
||||
"""
|
||||
Service Robot 3: Professional Classifier (Atomi Zárolással)
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
async def match_expertise_to_service(db, service_profile_id: int, scraped_text: str):
|
||||
""" Kulcsszó-alapú elemző motor az ExpertiseTag tábla alapján. """
|
||||
if not scraped_text: return
|
||||
|
||||
tags_query = await db.execute(select(ExpertiseTag).where(ExpertiseTag.is_official == True))
|
||||
all_tags = tags_query.scalars().all()
|
||||
|
||||
found_any = False
|
||||
for tag in all_tags:
|
||||
match_count = 0
|
||||
for kw in (tag.search_keywords or []):
|
||||
if kw.lower() in scraped_text.lower():
|
||||
match_count += 1
|
||||
|
||||
if match_count > 0:
|
||||
existing_check = await db.execute(
|
||||
select(ServiceExpertise).where(
|
||||
ServiceExpertise.service_id == service_profile_id,
|
||||
ServiceExpertise.expertise_id == tag.id
|
||||
)
|
||||
)
|
||||
|
||||
if not existing_check.scalar():
|
||||
new_link = ServiceExpertise(
|
||||
service_id=service_profile_id,
|
||||
expertise_id=tag.id,
|
||||
confidence_level=min(match_count, 2)
|
||||
)
|
||||
db.add(new_link)
|
||||
found_any = True
|
||||
logger.info(f"✅ {tag.key} szakma azonosítva a szerviznél.")
|
||||
|
||||
if found_any:
|
||||
await db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run_worker(cls):
|
||||
logger.info("🧠 Service Enricher ONLINE - Szakmai elemzés indítása (Atomi Zárolás)")
|
||||
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# 1. Zárolunk egy "enrich_ready" szervizt a Staging táblából
|
||||
query = text("""
|
||||
UPDATE data.service_staging
|
||||
SET status = 'enriching'
|
||||
WHERE id = (
|
||||
SELECT id FROM data.service_staging
|
||||
WHERE status = 'enrich_ready'
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, name, city, full_address, fingerprint, raw_data;
|
||||
""")
|
||||
result = await db.execute(query)
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
s_id, name, city, address, fprint, raw_data = task
|
||||
web_context = raw_data.get('web_context', '') if isinstance(raw_data, dict) else ''
|
||||
|
||||
async with AsyncSessionLocal() as process_db:
|
||||
try:
|
||||
# 2. Áttesszük a végleges ServiceProfile táblába (mert már van elég adatunk a webről)
|
||||
profile_stmt = text("""
|
||||
INSERT INTO data.service_profiles
|
||||
(fingerprint, status, trust_score, location, is_verified, bio)
|
||||
VALUES (:fp, 'active', 40, ST_SetSRID(ST_MakePoint(19.04, 47.49), 4326), false, :bio)
|
||||
ON CONFLICT (fingerprint) DO UPDATE SET bio = EXCLUDED.bio
|
||||
RETURNING id;
|
||||
""") # Megjegyzés: A GPS koordinátát (19.04, 47.49) majd a Validator (Robot-4) pontosítja!
|
||||
|
||||
p_result = await process_db.execute(profile_stmt, {"fp": fprint, "bio": name + " - " + city})
|
||||
profile_id = p_result.scalar()
|
||||
await process_db.commit()
|
||||
|
||||
# 3. Futtatjuk a kulcsszó-elemzést
|
||||
await cls.match_expertise_to_service(process_db, profile_id, web_context)
|
||||
|
||||
# 4. Lezárjuk a Staging feladatot
|
||||
await process_db.execute(text("UPDATE data.service_staging SET status = 'processed' WHERE id = :id"), {"id": s_id})
|
||||
await process_db.commit()
|
||||
|
||||
except Exception as e:
|
||||
await process_db.rollback()
|
||||
logger.error(f"Hiba a dúsítás során ({s_id}): {e}")
|
||||
await process_db.execute(text("UPDATE data.service_staging SET status = 'error' WHERE id = :id"), {"id": s_id})
|
||||
await process_db.commit()
|
||||
else:
|
||||
await asyncio.sleep(15)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(ServiceEnricher.run_worker())
|
||||
199
backend/app/workers/service/service_robot_4_validator_google.py
Normal file
199
backend/app/workers/service/service_robot_4_validator_google.py
Normal file
@@ -0,0 +1,199 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from datetime import datetime
|
||||
from sqlalchemy import text, update, func
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.service import ServiceProfile
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-4-Validator: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Service-Robot-4-Google-Validator")
|
||||
|
||||
class QuotaManager:
|
||||
""" Szigorú napi limit figyelő a Google API-hoz, hogy soha többé ne legyen 250$-os számla! """
|
||||
def __init__(self, service_name: str, daily_limit: int):
|
||||
self.service_name = service_name
|
||||
self.daily_limit = daily_limit
|
||||
self.state_file = f"/app/temp/.quota_{service_name}.json"
|
||||
self._ensure_file()
|
||||
|
||||
def _ensure_file(self):
|
||||
os.makedirs(os.path.dirname(self.state_file), exist_ok=True)
|
||||
if not os.path.exists(self.state_file):
|
||||
with open(self.state_file, 'w') as f:
|
||||
json.dump({"date": datetime.now().strftime("%Y-%m-%d"), "count": 0}, f)
|
||||
|
||||
def can_make_request(self) -> bool:
|
||||
with open(self.state_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
if data["date"] != today:
|
||||
data = {"date": today, "count": 0}
|
||||
|
||||
if data["count"] >= self.daily_limit:
|
||||
return False
|
||||
|
||||
data["count"] += 1
|
||||
with open(self.state_file, 'w') as f:
|
||||
json.dump(data, f)
|
||||
return True
|
||||
|
||||
class GoogleValidator:
|
||||
"""
|
||||
Service Robot 4: Mesterlövész Validátor
|
||||
Egyedi, célzott Google Text Search hívások a meglévő szervizek pontosítására.
|
||||
"""
|
||||
PLACES_TEXT_URL = "https://places.googleapis.com/v1/places:searchText"
|
||||
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("GOOGLE_API_KEY")
|
||||
# Napi limit: pl. 100 lekérdezés = kb. $3/nap maximum!
|
||||
self.daily_limit = int(os.getenv("GOOGLE_DAILY_LIMIT", "100"))
|
||||
self.quota = QuotaManager("google_places", self.daily_limit)
|
||||
self.headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": self.api_key,
|
||||
# Csak a legszükségesebb mezőket kérjük, hogy olcsó maradjon az API hívás!
|
||||
"X-Goog-FieldMask": "places.id,places.location,places.rating,places.userRatingCount,places.regularOpeningHours,places.internationalPhoneNumber,places.websiteUri"
|
||||
}
|
||||
|
||||
async def fetch_place_details(self, client: httpx.AsyncClient, name: str, bio_context: str):
|
||||
if not self.api_key:
|
||||
logger.error("❌ HIÁNYZIK A GOOGLE_API_KEY a .env fájlból!")
|
||||
return None
|
||||
|
||||
# A keresési kifejezés: pl. "Kovács Autószerviz Budapest"
|
||||
query_text = f"{name} {bio_context}"
|
||||
payload = {"textQuery": query_text, "maxResultCount": 1}
|
||||
|
||||
for attempt in range(2):
|
||||
try:
|
||||
resp = await client.post(self.PLACES_TEXT_URL, json=payload, headers=self.headers)
|
||||
if resp.status_code == 200:
|
||||
places = resp.json().get("places", [])
|
||||
return places[0] if places else "NOT_FOUND"
|
||||
elif resp.status_code == 429:
|
||||
await asyncio.sleep(2)
|
||||
else:
|
||||
logger.error(f"Google API hiba: {resp.status_code}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"Hálózati hiba a Google felé: {e}")
|
||||
await asyncio.sleep(1)
|
||||
return None
|
||||
|
||||
async def validate_service(self, db, profile_id: int, fingerprint: str, bio: str):
|
||||
logger.info(f"📍 Validálás indul: {fingerprint}")
|
||||
|
||||
if not self.quota.can_make_request():
|
||||
logger.warning("🛑 NAPI GOOGLE KVÓTA ELÉRVE! A Validátor holnapig alszik.")
|
||||
return "QUOTA_EXCEEDED"
|
||||
|
||||
name = fingerprint.split('|')[0] if '|' in fingerprint else fingerprint
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
place_data = await self.fetch_place_details(client, name, bio)
|
||||
|
||||
try:
|
||||
if place_data == "NOT_FOUND":
|
||||
logger.warning(f"⚠️ A Google nem ismeri: {name}. Szellem szerviz?")
|
||||
await db.execute(
|
||||
update(ServiceProfile)
|
||||
.where(ServiceProfile.id == profile_id)
|
||||
.values(status='ghost', last_audit_at=func.now())
|
||||
)
|
||||
elif place_data:
|
||||
# Kinyerjük a pontos GPS koordinátákat
|
||||
loc = place_data.get("location", {})
|
||||
lat, lon = loc.get("latitude"), loc.get("longitude")
|
||||
|
||||
# Összeállítjuk az adatokat
|
||||
updates = {
|
||||
"google_place_id": place_data.get("id"),
|
||||
"rating": place_data.get("rating"),
|
||||
"user_ratings_total": place_data.get("userRatingCount"),
|
||||
"contact_phone": place_data.get("internationalPhoneNumber"),
|
||||
"website": place_data.get("websiteUri"),
|
||||
"opening_hours": place_data.get("regularOpeningHours", {}),
|
||||
"is_verified": True,
|
||||
"status": "active",
|
||||
"trust_score": ServiceProfile.trust_score + 50, # A Google megerősítette!
|
||||
"last_audit_at": func.now()
|
||||
}
|
||||
|
||||
# PostGIS Geometry frissítése, ha van GPS!
|
||||
if lat and lon:
|
||||
logger.info(f"🗺️ Pontos koordináta megvan: {lat}, {lon}")
|
||||
updates["location"] = func.ST_SetSRID(func.ST_MakePoint(lon, lat), 4326)
|
||||
|
||||
await db.execute(
|
||||
update(ServiceProfile)
|
||||
.where(ServiceProfile.id == profile_id)
|
||||
.values(**updates)
|
||||
)
|
||||
logger.info(f"✅ Szerviz hitelesítve és GPS pozicionálva: {name}")
|
||||
else:
|
||||
# API Hiba, később újra próbáljuk
|
||||
return "ERROR"
|
||||
|
||||
await db.commit()
|
||||
return "DONE"
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"🚨 Adatbázis hiba a validálásnál: {e}")
|
||||
return "ERROR"
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
self_instance = cls()
|
||||
logger.info("🎯 Service Validator (Robot-4) ONLINE - Várakozás dúsított szervizekre...")
|
||||
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# ATOMI ZÁROLÁS: Keresünk egy aktív, de még nem validált szervizt
|
||||
query = text("""
|
||||
UPDATE data.service_profiles
|
||||
SET status = 'validation_in_progress'
|
||||
WHERE id = (
|
||||
SELECT id FROM data.service_profiles
|
||||
WHERE is_verified = false
|
||||
AND status NOT IN ('validation_in_progress', 'ghost')
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, fingerprint, bio;
|
||||
""")
|
||||
|
||||
result = await db.execute(query)
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
p_id, fprint, bio = task
|
||||
async with AsyncSessionLocal() as process_db:
|
||||
status = await self_instance.validate_service(process_db, p_id, fprint, bio)
|
||||
|
||||
# Ha API hiba volt, visszaállítjuk az eredeti állapotot
|
||||
if status == "ERROR":
|
||||
await process_db.execute(text("UPDATE data.service_profiles SET status = 'active' WHERE id = :id"), {"id": p_id})
|
||||
await process_db.commit()
|
||||
|
||||
if status == "QUOTA_EXCEEDED":
|
||||
await asyncio.sleep(3600) # Elalszik 1 órára, ha kimerült a napi limit
|
||||
else:
|
||||
await asyncio.sleep(1) # Rate limit védelem
|
||||
else:
|
||||
await asyncio.sleep(30) # Nincs új szerviz
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a Validator főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(GoogleValidator().run())
|
||||
Reference in New Issue
Block a user