Files
service-finder/backend/app/workers/service_hunter.py

161 lines
6.6 KiB
Python

import asyncio
import httpx
import logging
import os
import hashlib
from datetime import datetime, timezone
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, text, update
from app.db.session import SessionLocal
from app.models.service import ServiceStaging, DiscoveryParameter
# Naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("Robot-v1.3.1-ContinentalScout")
class ServiceHunter:
"""
Robot v1.3.1: Continental Scout (Grid Search Edition)
- Dinamikus rácsbejárás a sűrű területek lefedésére.
- Ujjlenyomat-alapú deduplikáció.
- Bővített kulcsszókezelés.
"""
PLACES_NEW_URL = "https://places.googleapis.com/v1/places:searchNearby"
GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
@classmethod
def generate_fingerprint(cls, name: str, city: str, street: str) -> str:
"""Egyedi ujjlenyomat készítése a duplikációk kiszűrésére."""
raw_string = f"{str(name).lower()}|{str(city).lower()}|{str(street).lower()[:5]}"
return hashlib.md5(raw_string.encode()).hexdigest()
@classmethod
async def get_city_bounds(cls, city, country_code):
"""Város befoglaló téglalapjának (Bounding Box) lekérése Nominatim-al."""
url = "https://nominatim.openstreetmap.org/search"
params = {"city": city, "country": country_code, "format": "json"}
async with httpx.AsyncClient(headers={"User-Agent": "ServiceFinder-Scout/1.0"}) as client:
resp = await client.get(url, params=params)
if resp.status_code == 200 and resp.json():
bbox = resp.json()[0].get("boundingbox") # [min_lat, max_lat, min_lon, max_lon]
return [float(x) for x in bbox]
return None
@classmethod
async def run_grid_search(cls, db, task):
"""Rács-alapú bejárás a városon belül."""
bbox = await cls.get_city_bounds(task.city, task.country_code)
if not bbox: return
# 1km-es lépések generálása (kb. 0.01 fok)
lat_step = 0.015
lon_step = 0.02
curr_lat = bbox[0]
while curr_lat < bbox[1]:
curr_lon = bbox[2]
while curr_lon < bbox[3]:
logger.info(f"🛰️ Rács-cella pásztázása: {curr_lat}, {curr_lon} - Kulcsszó: {task.keyword}")
places = await cls.get_google_places(curr_lat, curr_lon, task.keyword)
for p in places:
# Adatok kinyerése és tisztítása
name = p.get('displayName', {}).get('text')
full_addr = p.get('formattedAddress', '')
# Ujjlenyomat generálás
f_print = cls.generate_fingerprint(name, task.city, full_addr)
await cls.save_to_staging(db, {
"external_id": p.get('id'),
"name": name,
"full_address": full_addr,
"phone": p.get('internationalPhoneNumber'),
"website": p.get('websiteUri'),
"fingerprint": f_print,
"city": task.city,
"source": "google",
"raw": p,
"trust": 30
})
curr_lon += lon_step
await asyncio.sleep(0.5) # API védelem
curr_lat += lat_step
@classmethod
async def get_google_places(cls, lat, lon, keyword):
"""Google Places New API hívás rács-pontra."""
if not cls.GOOGLE_API_KEY: return []
headers = {
"Content-Type": "application/json",
"X-Goog-Api-Key": cls.GOOGLE_API_KEY,
"X-Goog-FieldMask": "places.displayName,places.id,places.internationalPhoneNumber,places.websiteUri,places.formattedAddress"
}
payload = {
"includedTypes": ["car_repair", "motorcycle_repair"],
"maxResultCount": 20,
"locationRestriction": {
"circle": {
"center": {"latitude": lat, "longitude": lon},
"radius": 1500.0 # 1.5km sugarú kör a fedés érdekében
}
}
}
async with httpx.AsyncClient() as client:
resp = await client.post(cls.PLACES_NEW_URL, json=payload, headers=headers)
return resp.json().get("places", []) if resp.status_code == 200 else []
@classmethod
async def save_to_staging(cls, db: AsyncSession, data: dict):
"""Mentés ujjlenyomat ellenőrzéssel."""
# 1. Megnézzük, létezik-e már ez az ujjlenyomat
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == data['fingerprint'])
existing = (await db.execute(stmt)).scalar_one_or_none()
if existing:
# Csak a bizalmi pontot növeljük és az utolsó észlelést frissítjük
existing.trust_score += 5
return
new_entry = ServiceStaging(
name=data['name'],
source=data['source'],
external_id=str(data['external_id']),
fingerprint=data['fingerprint'],
city=data['city'],
full_address=data['full_address'],
contact_phone=data['phone'],
website=data['website'],
raw_data=data.get('raw', {}),
status="pending",
trust_score=data.get('trust', 30)
)
db.add(new_entry)
await db.flush()
@classmethod
async def run(cls):
logger.info("🤖 Continental Scout v1.3.1 - Grid Engine INDUL...")
while True:
async with SessionLocal() as db:
try:
await db.execute(text("SET search_path TO data, public"))
stmt = select(DiscoveryParameter).where(DiscoveryParameter.is_active == True)
tasks = (await db.execute(stmt)).scalars().all()
for task in tasks:
logger.info(f"🔎 Mélyfúrás indítása: {task.city} -> {task.keyword}")
await cls.run_grid_search(db, task)
task.last_run_at = datetime.now(timezone.utc)
await db.commit()
except Exception as e:
logger.error(f"💥 Hiba: {e}")
await db.rollback()
await asyncio.sleep(3600)
if __name__ == "__main__":
asyncio.run(ServiceHunter.run())