111 lines
4.3 KiB
Python
Executable File
111 lines
4.3 KiB
Python
Executable File
# /opt/docker/dev/service_finder/backend/discovery_bot.py
|
|
import asyncio
|
|
import json
|
|
import httpx
|
|
import os
|
|
import hashlib
|
|
import logging
|
|
from urllib.parse import quote
|
|
from sqlalchemy import select
|
|
from app.database import AsyncSessionLocal
|
|
from app.models.staged_data import ServiceStaging
|
|
|
|
# Logolás beállítása
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
|
|
logger = logging.getLogger("OSM-Discovery")
|
|
|
|
# Konfiguráció
|
|
HUNGARY_BBOX = "45.7,16.1,48.6,22.9"
|
|
OVERPASS_URL = "http://overpass-api.de/api/interpreter?data="
|
|
|
|
class OSMDiscoveryBot:
|
|
@staticmethod
|
|
def generate_fingerprint(name: str, city: str) -> str:
|
|
"""
|
|
Ujjlenyomat generálása a deduplikációhoz.
|
|
Kicsit lazább, mint a Hunter-nél, mert az OSM címadatok néha hiányosak.
|
|
"""
|
|
raw = f"{str(name).lower()}|{str(city).lower()}"
|
|
return hashlib.md5(raw.encode()).hexdigest()
|
|
|
|
@staticmethod
|
|
def get_service_type(tags: dict, name: str) -> str:
|
|
""" OSM tagek leképezése belső kategóriákra. """
|
|
name = name.lower()
|
|
shop = tags.get('shop', '')
|
|
amenity = tags.get('amenity', '')
|
|
|
|
if shop == 'tyres' or 'gumi' in name: return 'tire_shop'
|
|
if amenity == 'car_wash' or 'mosó' in name: return 'car_wash'
|
|
if any(x in name for x in ['villamos', 'autóvill', 'elektro']): return 'electrician'
|
|
if any(x in name for x in ['fényez', 'lakatos', 'karosszéria']): return 'body_shop'
|
|
return 'mechanic'
|
|
|
|
async def fetch_osm_data(self, query_part: str):
|
|
""" Aszinkron adatgyűjtés az Overpass API-tól. """
|
|
query = f'[out:json][timeout:120];(node{query_part}({HUNGARY_BBOX});way{query_part}({HUNGARY_BBOX}););out center;'
|
|
async with httpx.AsyncClient(timeout=150) as client:
|
|
try:
|
|
resp = await client.get(OVERPASS_URL + quote(query))
|
|
if resp.status_code == 200:
|
|
return resp.json().get('elements', [])
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"❌ Overpass hiba: {e}")
|
|
return []
|
|
|
|
async def sync(self):
|
|
logger.info("🛰️ OSM Országos szinkronizáció indítása...")
|
|
|
|
# 1. Lekérdezések összeállítása
|
|
queries = [
|
|
'["shop"~"car_repair|tyres"]',
|
|
'["amenity"="car_wash"]'
|
|
]
|
|
|
|
all_elements = []
|
|
for q in queries:
|
|
elements = await self.fetch_osm_data(q)
|
|
all_elements.extend(elements)
|
|
|
|
logger.info(f"📊 {len(all_elements)} potenciális szervizpont érkezett.")
|
|
|
|
async with AsyncSessionLocal() as db:
|
|
added_count = 0
|
|
for node in all_elements:
|
|
tags = node.get('tags', {})
|
|
if not tags.get('name'): continue
|
|
|
|
lat = node.get('lat', node.get('center', {}).get('lat'))
|
|
lon = node.get('lon', node.get('center', {}).get('lon'))
|
|
|
|
name = tags.get('name', tags.get('operator', 'Ismeretlen szerviz'))
|
|
city = tags.get('addr:city', 'Ismeretlen')
|
|
street = tags.get('addr:street', '')
|
|
housenumber = tags.get('addr:housenumber', '')
|
|
|
|
f_print = self.generate_fingerprint(name, city)
|
|
|
|
# Deduplikáció ellenőrzése
|
|
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == f_print)
|
|
existing = (await db.execute(stmt)).scalar_one_or_none()
|
|
|
|
if not existing:
|
|
db.add(ServiceStaging(
|
|
name=name,
|
|
source="osm_discovery_v2",
|
|
fingerprint=f_print,
|
|
city=city,
|
|
full_address=f"{city}, {street} {housenumber}".strip(", "),
|
|
status="pending",
|
|
trust_score=20, # Az OSM adatokat alacsonyabb bizalommal kezeljük, mint a Google-t
|
|
raw_data=tags
|
|
))
|
|
added_count += 1
|
|
|
|
await db.commit()
|
|
logger.info(f"✅ Szinkron kész. {added_count} új elem került a Staging táblába.")
|
|
|
|
if __name__ == "__main__":
|
|
bot = OSMDiscoveryBot()
|
|
asyncio.run(bot.sync()) |