106 lines
4.1 KiB
Python
106 lines
4.1 KiB
Python
import asyncio
|
|
import logging
|
|
import warnings
|
|
from sqlalchemy import text, update
|
|
from app.database import AsyncSessionLocal
|
|
from app.models.service import ServiceStaging
|
|
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
|
|
from duckduckgo_search import DDGS
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-2-Service-Researcher: %(message)s')
|
|
logger = logging.getLogger("Service-Robot-2-Researcher")
|
|
|
|
class ServiceResearcher:
|
|
"""
|
|
Service Robot 2: Internetes Adatgyűjtő (Atomi Zárolással)
|
|
"""
|
|
def __init__(self):
|
|
self.search_timeout = 15.0
|
|
|
|
async def fetch_source(self, query: str) -> str:
|
|
""" Célzott DuckDuckGo keresés. """
|
|
try:
|
|
def search():
|
|
with DDGS() as ddgs:
|
|
results = ddgs.text(query, max_results=3)
|
|
return [f"- {r.get('body', '')}" for r in results] if results else []
|
|
|
|
results = await asyncio.wait_for(asyncio.to_thread(search), timeout=self.search_timeout)
|
|
if not results: return ""
|
|
return "\n".join(results)
|
|
except Exception as e:
|
|
logger.debug(f"Keresési hiba: {e}")
|
|
return ""
|
|
|
|
async def process_service(self, db, service_id: int, name: str, city: str):
|
|
logger.info(f"🔎 Szerviz kutatása weben: {name} ({city})")
|
|
|
|
# Keressük a szerviz nyomait a neten
|
|
query = f"{name} autó szerviz {city} szolgáltatások vélemények"
|
|
web_context = await self.fetch_source(query)
|
|
|
|
try:
|
|
if len(web_context) > 50:
|
|
# Van adat, átadjuk a Robot-3-nak elemzésre!
|
|
await db.execute(
|
|
update(ServiceStaging)
|
|
.where(ServiceStaging.id == service_id)
|
|
.values(
|
|
raw_data=func.jsonb_set(ServiceStaging.raw_data, '{web_context}', f'"{web_context}"'),
|
|
status='enrich_ready'
|
|
)
|
|
)
|
|
logger.info(f"✅ Webtalálat rögzítve: {name}")
|
|
else:
|
|
# Nincs adat, "szellem" szerviz
|
|
await db.execute(
|
|
update(ServiceStaging)
|
|
.where(ServiceStaging.id == service_id)
|
|
.values(status='no_web_presence')
|
|
)
|
|
logger.warning(f"⚠️ Nincs webes nyoma: {name}, jegelve.")
|
|
|
|
await db.commit()
|
|
except Exception as e:
|
|
await db.rollback()
|
|
logger.error(f"🚨 Mentési hiba ({service_id}): {e}")
|
|
|
|
@classmethod
|
|
async def run(cls):
|
|
self_instance = cls()
|
|
logger.info("🚀 Service Researcher ONLINE (Atomi Zárolás Patch)")
|
|
|
|
while True:
|
|
try:
|
|
async with AsyncSessionLocal() as db:
|
|
# ATOMI ZÁROLÁS
|
|
query = text("""
|
|
UPDATE marketplace.service_staging
|
|
SET status = 'research_in_progress'
|
|
WHERE id = (
|
|
SELECT id FROM marketplace.service_staging
|
|
WHERE status = 'pending'
|
|
FOR UPDATE SKIP LOCKED
|
|
LIMIT 1
|
|
)
|
|
RETURNING id, name, city;
|
|
""")
|
|
result = await db.execute(query)
|
|
task = result.fetchone()
|
|
await db.commit()
|
|
|
|
if task:
|
|
s_id, s_name, s_city = task
|
|
async with AsyncSessionLocal() as process_db:
|
|
await self_instance.process_service(process_db, s_id, s_name, s_city)
|
|
await asyncio.sleep(2) # Kíméljük a keresőt
|
|
else:
|
|
await asyncio.sleep(30)
|
|
|
|
except Exception as e:
|
|
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
|
await asyncio.sleep(10)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(ServiceResearcher.run()) |