chore: Backend codebase cleanup and archiving of legacy scripts
This commit is contained in:
111
backend/archive_v1_scripts/discovery_bot.py.old
Executable file
111
backend/archive_v1_scripts/discovery_bot.py.old
Executable file
@@ -0,0 +1,111 @@
|
||||
# /opt/docker/dev/service_finder/backend/discovery_bot.py
|
||||
import asyncio
|
||||
import json
|
||||
import httpx
|
||||
import os
|
||||
import hashlib
|
||||
import logging
|
||||
from urllib.parse import quote
|
||||
from sqlalchemy import select
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.staged_data import ServiceStaging
|
||||
|
||||
# Logolás beállítása
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
|
||||
logger = logging.getLogger("OSM-Discovery")
|
||||
|
||||
# Konfiguráció
|
||||
HUNGARY_BBOX = "45.7,16.1,48.6,22.9"
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter?data="
|
||||
|
||||
class OSMDiscoveryBot:
|
||||
@staticmethod
|
||||
def generate_fingerprint(name: str, city: str) -> str:
|
||||
"""
|
||||
Ujjlenyomat generálása a deduplikációhoz.
|
||||
Kicsit lazább, mint a Hunter-nél, mert az OSM címadatok néha hiányosak.
|
||||
"""
|
||||
raw = f"{str(name).lower()}|{str(city).lower()}"
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def get_service_type(tags: dict, name: str) -> str:
|
||||
""" OSM tagek leképezése belső kategóriákra. """
|
||||
name = name.lower()
|
||||
shop = tags.get('shop', '')
|
||||
amenity = tags.get('amenity', '')
|
||||
|
||||
if shop == 'tyres' or 'gumi' in name: return 'tire_shop'
|
||||
if amenity == 'car_wash' or 'mosó' in name: return 'car_wash'
|
||||
if any(x in name for x in ['villamos', 'autóvill', 'elektro']): return 'electrician'
|
||||
if any(x in name for x in ['fényez', 'lakatos', 'karosszéria']): return 'body_shop'
|
||||
return 'mechanic'
|
||||
|
||||
async def fetch_osm_data(self, query_part: str):
|
||||
""" Aszinkron adatgyűjtés az Overpass API-tól. """
|
||||
query = f'[out:json][timeout:120];(node{query_part}({HUNGARY_BBOX});way{query_part}({HUNGARY_BBOX}););out center;'
|
||||
async with httpx.AsyncClient(timeout=150) as client:
|
||||
try:
|
||||
resp = await client.get(OVERPASS_URL + quote(query))
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get('elements', [])
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Overpass hiba: {e}")
|
||||
return []
|
||||
|
||||
async def sync(self):
|
||||
logger.info("🛰️ OSM Országos szinkronizáció indítása...")
|
||||
|
||||
# 1. Lekérdezések összeállítása
|
||||
queries = [
|
||||
'["shop"~"car_repair|tyres"]',
|
||||
'["amenity"="car_wash"]'
|
||||
]
|
||||
|
||||
all_elements = []
|
||||
for q in queries:
|
||||
elements = await self.fetch_osm_data(q)
|
||||
all_elements.extend(elements)
|
||||
|
||||
logger.info(f"📊 {len(all_elements)} potenciális szervizpont érkezett.")
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
added_count = 0
|
||||
for node in all_elements:
|
||||
tags = node.get('tags', {})
|
||||
if not tags.get('name'): continue
|
||||
|
||||
lat = node.get('lat', node.get('center', {}).get('lat'))
|
||||
lon = node.get('lon', node.get('center', {}).get('lon'))
|
||||
|
||||
name = tags.get('name', tags.get('operator', 'Ismeretlen szerviz'))
|
||||
city = tags.get('addr:city', 'Ismeretlen')
|
||||
street = tags.get('addr:street', '')
|
||||
housenumber = tags.get('addr:housenumber', '')
|
||||
|
||||
f_print = self.generate_fingerprint(name, city)
|
||||
|
||||
# Deduplikáció ellenőrzése
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == f_print)
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if not existing:
|
||||
db.add(ServiceStaging(
|
||||
name=name,
|
||||
source="osm_discovery_v2",
|
||||
fingerprint=f_print,
|
||||
city=city,
|
||||
full_address=f"{city}, {street} {housenumber}".strip(", "),
|
||||
status="pending",
|
||||
trust_score=20, # Az OSM adatokat alacsonyabb bizalommal kezeljük, mint a Google-t
|
||||
raw_data=tags
|
||||
))
|
||||
added_count += 1
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"✅ Szinkron kész. {added_count} új elem került a Staging táblába.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
bot = OSMDiscoveryBot()
|
||||
asyncio.run(bot.sync())
|
||||
Reference in New Issue
Block a user