2026.03.29 20:00 Gitea_manager javítás előtt
This commit is contained in:
114
archive/old_files/.env_old
Executable file
114
archive/old_files/.env_old
Executable file
@@ -0,0 +1,114 @@
|
||||
COMPOSE_PROJECT_NAME=service_finder
|
||||
|
||||
# --- ADATBÁZIS KAPCSOLAT (Központi) ---
|
||||
# Itt a 'shared-postgres' nevet használjuk, ami a központi konténer neve
|
||||
APP_DB_HOST=shared-postgres
|
||||
APP_DB_PORT=5432
|
||||
APP_DB_NAME=service_finder
|
||||
APP_DB_USER=service_finder_app
|
||||
# IDE ÍRD AZT A JELSZÓT, amit a pgAdminban/ALTER USER parancsnál adtál a 'service_finder_app'-nak!
|
||||
APP_DB_PASSWORD='MiskociA74'
|
||||
|
||||
# Ez a legfontosabb sor, ezt használja a Python program:
|
||||
DATABASE_URL=postgresql+asyncpg://service_finder_app:MiskociA74@shared-postgres:5432/service_finder
|
||||
# Migrációhoz (néha szinkron driver kell):
|
||||
MIGRATION_DATABASE_URL=postgresql+asyncpg://service_finder_app:MiskociA74@shared-postgres:5432/service_finder
|
||||
|
||||
# --- ALKALMAZÁS BEÁLLÍTÁSOK ---
|
||||
ALLOWED_ORIGINS="https://app.profibot.hu,https://dev.profibot.hu,http://localhost:3000"
|
||||
PYTHONPATH=/app
|
||||
|
||||
# --- MINIO (Fájltárolás) ---
|
||||
# Ez maradhat helyi konténer, vagy köthetjük a központihoz is, de most hagyjuk a projektben
|
||||
MINIO_ENDPOINT=minio:9000
|
||||
MINIO_ROOT_USER=kincses
|
||||
MINIO_ROOT_PASSWORD='MiskociA74'
|
||||
MINIO_ACCESS_KEY=kincses
|
||||
MINIO_SECRET_KEY='MiskociA74'
|
||||
|
||||
# --- EGYÉB API KULCSOK ---
|
||||
SENDGRID_API_KEY=SG.SENDGRID_API_KEY=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
|
||||
FROM_EMAIL=info@profibot.hu
|
||||
|
||||
# Biztonsági kulcs a tokenekhez (KÖTELEZŐ!)
|
||||
SECRET_KEY=2dca2ff3bf9b8184e14038d5d08e646b31bd4a5f5ffc7e19d28e294f3bb3760b
|
||||
_______________________________________________________________
|
||||
# ==============================================================================
|
||||
# 🛠️ INFRASTRUKTÚRA (Docker & Database)
|
||||
# ==============================================================================
|
||||
# Adatbázis alapok
|
||||
POSTGRES_USER=kincses
|
||||
POSTGRES_PASSWORD='MiskociA74'
|
||||
POSTGRES_DB=service_finder
|
||||
|
||||
# Kapcsolati URL a Python számára (Központi shared-postgres)
|
||||
DATABASE_URL=postgresql+asyncpg://service_finder_app:MiskociA74@shared-postgres:5432/service_finder
|
||||
|
||||
# Migrációhoz használt URL (Alembic számára)
|
||||
MIGRATION_DATABASE_URL=postgresql+asyncpg://service_finder_app:MiskociA74@shared-postgres:5432/service_finder
|
||||
|
||||
# Redis elérés
|
||||
REDIS_URL=redis://service_finder_redis:6379/0
|
||||
|
||||
# ==============================================================================
|
||||
# 🚀 ALKALMAZÁS BEÁLLÍTÁSOK (FastAPI)
|
||||
# ==============================================================================
|
||||
ENV=development
|
||||
DEBUG=True
|
||||
PYTHONPATH=/app
|
||||
|
||||
# Biztonsági kulcs a JWT tokenekhez (Generálj egy hosszú véletlen sort!)
|
||||
# Példa generáláshoz: openssl rand -hex 32
|
||||
SECRET_KEY='2dca2ff3bf9b8184e14038d5d08e646b31bd4a5f5ffc7e19d28e294f3bb3760b'
|
||||
ALGORITHM=HS256
|
||||
|
||||
# CORS: Milyen címekről érhető el az API? (Vesszővel elválasztva)
|
||||
CORS_ORIGINS=https://app.profibot.hu,https://dev.profibot.hu,http://localhost:3000,http://192.168.100.10:3000
|
||||
|
||||
# Frontend címe a kiküldött linkekhez (Visszaigazolás, jelszó-visszaállítás)
|
||||
FRONTEND_BASE_URL=http://192.168.100.10:3000
|
||||
|
||||
# ==============================================================================
|
||||
# 📧 EMAIL RENDSZER (SMTP / SendGrid)
|
||||
# ==============================================================================
|
||||
# EMAIL_PROVIDER lehet: 'smtp' vagy 'sendgrid' vagy 'disabled'
|
||||
EMAIL_PROVIDER=sendgrid
|
||||
EMAILS_FROM_EMAIL=info@profibot.hu
|
||||
EMAILS_FROM_NAME='Service Finder'
|
||||
|
||||
# SendGrid beállítások
|
||||
SENDGRID_API_KEY=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
|
||||
|
||||
# SMTP Fallback (Csak ha az EMAIL_PROVIDER=smtp)
|
||||
SMTP_HOST=smtp.gmail.com
|
||||
SMTP_PORT=587
|
||||
SMTP_USER=info@profibot.hu
|
||||
SMTP_PASSWORD='SAJÁT_APP_PASSWORD'
|
||||
|
||||
# ==============================================================================
|
||||
# 📦 MINIO (Fájltárolás - NAS-ra kivezetve)
|
||||
# ==============================================================================
|
||||
MINIO_ENDPOINT=minio:9000
|
||||
MINIO_ROOT_USER=kincses
|
||||
MINIO_ROOT_PASSWORD='MiskociA74'
|
||||
MINIO_ACCESS_KEY=kincses
|
||||
MINIO_SECRET_KEY='MiskociA74'
|
||||
|
||||
# --- n8n CONFIG ---
|
||||
N8N_DB_PASSWORD=MiskociA74
|
||||
# Az n8n-en belül a központi DB elérése:
|
||||
# Host: shared-postgres
|
||||
# User: service_finder_app
|
||||
|
||||
# --- Frontend ---
|
||||
FRONTEND_BASE_URL=https://dev.profibot.hu/docs
|
||||
|
||||
|
||||
|
||||
# Holland autó adatbázis free token
|
||||
RDW_APP_TOKEN=kSMUn0tvnmoM6TMSegLpFvKI8
|
||||
|
||||
# gemini service_finder_robot
|
||||
GEMINI_API_KEY=AIzaSyAaCVNPwf8PCphu_pt6spjAa2OVu8Exug8
|
||||
|
||||
DeepSeek API key = sk-1871b668aac44b50859ee6c54fe95e21
|
||||
175
archive/old_files/backend/app/models/marketplace/service.py.old
Executable file
175
archive/old_files/backend/app/models/marketplace/service.py.old
Executable file
@@ -0,0 +1,175 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/models/marketplace/service.py
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, List, Optional
|
||||
from sqlalchemy import Integer, String, Boolean, DateTime, ForeignKey, text, Text, Float, Index, Numeric, BigInteger
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from sqlalchemy.dialects.postgresql import UUID as PG_UUID, JSONB
|
||||
from geoalchemy2 import Geometry
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
# MB 2.0: Központi aszinkron adatbázis motorból húzzuk be a Base-t
|
||||
from app.database import Base
|
||||
|
||||
class ServiceProfile(Base):
|
||||
""" Szerviz szolgáltató adatai (v1.3.1). """
|
||||
__tablename__ = "service_profiles"
|
||||
__table_args__ = (
|
||||
Index('idx_service_fingerprint', 'fingerprint', unique=True),
|
||||
{"schema": "marketplace"}
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
||||
organization_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("fleet.organizations.id"), unique=True)
|
||||
parent_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("marketplace.service_profiles.id"))
|
||||
|
||||
fingerprint: Mapped[str] = mapped_column(String(255), index=True, nullable=False)
|
||||
location: Mapped[Any] = mapped_column(Geometry(geometry_type='POINT', srid=4326, spatial_index=False), index=True)
|
||||
|
||||
status: Mapped[str] = mapped_column(String(20), server_default=text("'ghost'"), index=True)
|
||||
last_audit_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
google_place_id: Mapped[Optional[str]] = mapped_column(String(100), unique=True)
|
||||
rating: Mapped[Optional[float]] = mapped_column(Float)
|
||||
user_ratings_total: Mapped[Optional[int]] = mapped_column(Integer)
|
||||
|
||||
# Aggregated verified review ratings (Social 3)
|
||||
rating_verified_count: Mapped[Optional[int]] = mapped_column(Integer, server_default=text("0"))
|
||||
rating_price_avg: Mapped[Optional[float]] = mapped_column(Float)
|
||||
rating_quality_avg: Mapped[Optional[float]] = mapped_column(Float)
|
||||
rating_time_avg: Mapped[Optional[float]] = mapped_column(Float)
|
||||
rating_communication_avg: Mapped[Optional[float]] = mapped_column(Float)
|
||||
rating_overall: Mapped[Optional[float]] = mapped_column(Float)
|
||||
last_review_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
vibe_analysis: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
social_links: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
specialization_tags: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
|
||||
trust_score: Mapped[int] = mapped_column(Integer, default=30)
|
||||
is_verified: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
verification_log: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
|
||||
opening_hours: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
contact_phone: Mapped[Optional[str]] = mapped_column(String)
|
||||
contact_email: Mapped[Optional[str]] = mapped_column(String)
|
||||
website: Mapped[Optional[str]] = mapped_column(String)
|
||||
bio: Mapped[Optional[str]] = mapped_column(Text)
|
||||
|
||||
# Kapcsolatok
|
||||
organization: Mapped["Organization"] = relationship("Organization", back_populates="service_profile")
|
||||
expertises: Mapped[List["ServiceExpertise"]] = relationship("ServiceExpertise", back_populates="service")
|
||||
reviews: Mapped[List["ServiceReview"]] = relationship("ServiceReview", back_populates="service")
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), onupdate=func.now())
|
||||
|
||||
class ExpertiseTag(Base):
|
||||
"""
|
||||
Szakmai címkék mesterlistája (MB 2.0).
|
||||
Ez a tábla vezérli a robotok keresését és a Gamification pontozást is.
|
||||
"""
|
||||
__tablename__ = "expertise_tags"
|
||||
__table_args__ = {"schema": "marketplace"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
|
||||
# Egyedi azonosító kulcs (pl. 'ENGINE_REBUILD')
|
||||
key: Mapped[str] = mapped_column(String(50), unique=True, index=True)
|
||||
|
||||
# Megjelenítendő nevek
|
||||
name_hu: Mapped[Optional[str]] = mapped_column(String(100))
|
||||
name_en: Mapped[Optional[str]] = mapped_column(String(100))
|
||||
|
||||
# Főcsoport (pl. 'MECHANICS', 'ELECTRICAL', 'EMERGENCY')
|
||||
category: Mapped[Optional[str]] = mapped_column(String(30), index=True)
|
||||
|
||||
# --- 🎮 GAMIFICATION ÉS DISCOVERY ---
|
||||
|
||||
# Hivatalos címke (True) vagy júzer/robot által javasolt (False)
|
||||
is_official: Mapped[bool] = mapped_column(Boolean, default=True, server_default=text("true"))
|
||||
|
||||
# Ha júzer javasolta, itt tároljuk, ki volt az (XP jóváíráshoz)
|
||||
suggested_by_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey("identity.persons.id"))
|
||||
|
||||
# ÁLLÍTHATÓ PONTÉRTÉK: Az adatbázisból jön, így bármikor módosítható.
|
||||
# Ritka szakmáknál magasabb, gyakoriaknál alacsonyabb érték állítható be.
|
||||
discovery_points: Mapped[int] = mapped_column(Integer, default=10, server_default=text("10"))
|
||||
|
||||
# Robot kulcsszavak (JSONB): ["fék", "betét", "tárcsa", "fékfolyadék"]
|
||||
# A Scout robot ez alapján azonosítja be a szervizt a weboldala alapján.
|
||||
search_keywords: Mapped[Any] = mapped_column(JSONB, server_default=text("'[]'::jsonb"))
|
||||
|
||||
# Népszerűségi mutató (hányszor lett felhasználva a rendszerben)
|
||||
usage_count: Mapped[int] = mapped_column(Integer, default=0, server_default=text("0"))
|
||||
|
||||
# UI ikon azonosító (pl. 'wrench', 'tire-flat', 'car-electric')
|
||||
icon: Mapped[Optional[str]] = mapped_column(String(50))
|
||||
|
||||
# Leírás a szakmáról (Adminisztratív célokra)
|
||||
description: Mapped[Optional[str]] = mapped_column(Text)
|
||||
|
||||
# Időbélyegek
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), onupdate=func.now())
|
||||
|
||||
# --- KAPCSOLATOK ---
|
||||
services: Mapped[List["ServiceExpertise"]] = relationship("ServiceExpertise", back_populates="tag")
|
||||
# Visszamutatás a beküldőre (ha van)
|
||||
suggested_by: Mapped[Optional["Person"]] = relationship("Person")
|
||||
|
||||
class ServiceExpertise(Base):
|
||||
"""
|
||||
KAPCSOLÓTÁBLA: Ez köti össze a szervizt a szakmáival.
|
||||
Itt tároljuk, hogy az adott szerviznél mennyire validált egy szakma.
|
||||
"""
|
||||
__tablename__ = "service_expertises"
|
||||
__table_args__ = {"schema": "marketplace"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
service_id: Mapped[int] = mapped_column(Integer, ForeignKey("marketplace.service_profiles.id", ondelete="CASCADE"))
|
||||
expertise_id: Mapped[int] = mapped_column(Integer, ForeignKey("marketplace.expertise_tags.id", ondelete="CASCADE"))
|
||||
|
||||
# Mennyire biztos ez a tudás? (0: robot találta, 1: júzer mondta, 2: igazolt szakma)
|
||||
confidence_level: Mapped[int] = mapped_column(Integer, default=0, server_default=text("0"))
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=text("now()"))
|
||||
|
||||
# Kapcsolatok visszafelé
|
||||
service = relationship("ServiceProfile", back_populates="expertises")
|
||||
tag = relationship("ExpertiseTag", back_populates="services")
|
||||
|
||||
class ServiceStaging(Base):
|
||||
""" Hunter (robot) adatok tárolója. """
|
||||
__tablename__ = "service_staging"
|
||||
__table_args__ = (
|
||||
Index('idx_staging_fingerprint', 'fingerprint', unique=True),
|
||||
{"schema": "marketplace"}
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
||||
name: Mapped[str] = mapped_column(String, index=True, nullable=False)
|
||||
postal_code: Mapped[Optional[str]] = mapped_column(String(10), index=True)
|
||||
city: Mapped[Optional[str]] = mapped_column(String(100), index=True)
|
||||
full_address: Mapped[Optional[str]] = mapped_column(String)
|
||||
fingerprint: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
raw_data: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
|
||||
# Additional contact and identification fields
|
||||
contact_phone: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
|
||||
website: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
external_id: Mapped[Optional[str]] = mapped_column(String(100), nullable=True, index=True)
|
||||
|
||||
status: Mapped[str] = mapped_column(String(20), server_default=text("'pending'"), index=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
class DiscoveryParameter(Base):
|
||||
""" Robot vezérlési paraméterek adminból. """
|
||||
__tablename__ = "discovery_parameters"
|
||||
__table_args__ = {"schema": "marketplace"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
city: Mapped[str] = mapped_column(String(100))
|
||||
keyword: Mapped[str] = mapped_column(String(100))
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
last_run_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
|
||||
73
archive/old_files/backend/app/models/marketplace/staged_data1.2_.py.old
Executable file
73
archive/old_files/backend/app/models/marketplace/staged_data1.2_.py.old
Executable file
@@ -0,0 +1,73 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/models/marketplace/staged_data.py
|
||||
from datetime import datetime
|
||||
from typing import Optional, Any
|
||||
from sqlalchemy import String, Integer, DateTime, text, Boolean, Float
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.sql import func
|
||||
from app.db.base_class import Base
|
||||
|
||||
class StagedVehicleData(Base):
|
||||
""" Robot 2.1 (Researcher) nyers adatgyűjtője. """
|
||||
__tablename__ = "staged_vehicle_data"
|
||||
__table_args__ = {"schema": "system"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
source_url: Mapped[Optional[str]] = mapped_column(String)
|
||||
raw_data: Mapped[dict] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
|
||||
status: Mapped[str] = mapped_column(String(20), default="PENDING", index=True)
|
||||
error_log: Mapped[Optional[str]] = mapped_column(String)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
class ServiceStaging(Base):
|
||||
""" Robot 1.3 (Scout) által talált nyers szerviz adatok és a Robot 5 (Auditor) naplója. """
|
||||
__tablename__ = "service_staging"
|
||||
__table_args__ = {"schema": "marketplace"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String(255), index=True)
|
||||
source: Mapped[Optional[str]] = mapped_column(String(50))
|
||||
external_id: Mapped[Optional[str]] = mapped_column(String(100), index=True)
|
||||
fingerprint: Mapped[str] = mapped_column(String(64), unique=True, index=True)
|
||||
|
||||
# Elérhetőségek
|
||||
city: Mapped[str] = mapped_column(String(100), index=True)
|
||||
postal_code: Mapped[Optional[str]] = mapped_column(String(10))
|
||||
full_address: Mapped[Optional[str]] = mapped_column(String(500))
|
||||
contact_phone: Mapped[Optional[str]] = mapped_column(String(50))
|
||||
website: Mapped[Optional[str]] = mapped_column(String(255))
|
||||
contact_email: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
|
||||
# Beküldés és Bizalom
|
||||
description: Mapped[Optional[str]] = mapped_column(Text)
|
||||
submitted_by: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("identity.users.id"))
|
||||
trust_score: Mapped[int] = mapped_column(Integer, default=0, server_default=text("0"))
|
||||
|
||||
# Nyers adatok és Státusz
|
||||
raw_data: Mapped[dict] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
|
||||
status: Mapped[str] = mapped_column(String(20), default="pending", index=True)
|
||||
|
||||
# --- Robot 5 (Auditor) technikai mezők ---
|
||||
# Ezek kellenek a munka naplózásához
|
||||
rejection_reason: Mapped[Optional[str]] = mapped_column(String(500))
|
||||
published_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
|
||||
service_profile_id: Mapped[Optional[int]] = mapped_column(Integer)
|
||||
organization_id: Mapped[Optional[int]] = mapped_column(Integer)
|
||||
audit_trail: Mapped[Optional[dict]] = mapped_column(JSONB)
|
||||
|
||||
# Időbélyegek
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), onupdate=func.now())
|
||||
|
||||
class DiscoveryParameter(Base):
|
||||
""" Felderítési paraméterek (Városok, ahol a Scout keres). """
|
||||
__tablename__ = "discovery_parameters"
|
||||
__table_args__ = {"schema": "marketplace"}
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
city: Mapped[str] = mapped_column(String(100), unique=True, index=True)
|
||||
keyword: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
last_run_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
|
||||
58
archive/old_files/backend/app/scripts/move_tables.py.old
Normal file
58
archive/old_files/backend/app/scripts/move_tables.py.old
Normal file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Move tables from system schema to gamification schema.
|
||||
"""
|
||||
import asyncio
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy import text
|
||||
|
||||
async def move_tables():
|
||||
# Use the same DATABASE_URL as sync_engine
|
||||
from app.core.config import settings
|
||||
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
|
||||
|
||||
async with engine.begin() as conn:
|
||||
# Check if tables exist in system schema
|
||||
result = await conn.execute(text("""
|
||||
SELECT table_schema, table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_name IN ('competitions', 'user_scores')
|
||||
ORDER BY table_schema;
|
||||
"""))
|
||||
rows = result.fetchall()
|
||||
print("Current tables:")
|
||||
for row in rows:
|
||||
print(f" {row.table_schema}.{row.table_name}")
|
||||
|
||||
# Move competitions
|
||||
print("\nMoving system.competitions to gamification.competitions...")
|
||||
try:
|
||||
await conn.execute(text('ALTER TABLE system.competitions SET SCHEMA gamification;'))
|
||||
print(" OK")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
|
||||
# Move user_scores
|
||||
print("Moving system.user_scores to gamification.user_scores...")
|
||||
try:
|
||||
await conn.execute(text('ALTER TABLE system.user_scores SET SCHEMA gamification;'))
|
||||
print(" OK")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
|
||||
# Verify
|
||||
result = await conn.execute(text("""
|
||||
SELECT table_schema, table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_name IN ('competitions', 'user_scores')
|
||||
ORDER BY table_schema;
|
||||
"""))
|
||||
rows = result.fetchall()
|
||||
print("\nAfter moving:")
|
||||
for row in rows:
|
||||
print(f" {row.table_schema}.{row.table_name}")
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(move_tables())
|
||||
@@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rename tables in system schema to deprecated to avoid extra detection.
|
||||
"""
|
||||
import asyncio
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy import text
|
||||
|
||||
async def rename():
|
||||
from app.core.config import settings
|
||||
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
|
||||
|
||||
async with engine.begin() as conn:
|
||||
# Check if tables exist
|
||||
result = await conn.execute(text("""
|
||||
SELECT table_schema, table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'system' AND table_name IN ('competitions', 'user_scores');
|
||||
"""))
|
||||
rows = result.fetchall()
|
||||
print("Tables to rename:")
|
||||
for row in rows:
|
||||
print(f" {row.table_schema}.{row.table_name}")
|
||||
|
||||
# Rename competitions
|
||||
try:
|
||||
await conn.execute(text('ALTER TABLE system.competitions RENAME TO competitions_deprecated;'))
|
||||
print("Renamed system.competitions -> system.competitions_deprecated")
|
||||
except Exception as e:
|
||||
print(f"Error renaming competitions: {e}")
|
||||
|
||||
# Rename user_scores
|
||||
try:
|
||||
await conn.execute(text('ALTER TABLE system.user_scores RENAME TO user_scores_deprecated;'))
|
||||
print("Renamed system.user_scores -> system.user_scores_deprecated")
|
||||
except Exception as e:
|
||||
print(f"Error renaming user_scores: {e}")
|
||||
|
||||
# Verify
|
||||
result = await conn.execute(text("""
|
||||
SELECT table_schema, table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'system' AND table_name LIKE '%deprecated';
|
||||
"""))
|
||||
rows = result.fetchall()
|
||||
print("\nAfter rename:")
|
||||
for row in rows:
|
||||
print(f" {row.table_schema}.{row.table_name}")
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(rename())
|
||||
170
archive/old_files/backend/app/scripts/sync_engine1.0.py.old
Normal file
170
archive/old_files/backend/app/scripts/sync_engine1.0.py.old
Normal file
@@ -0,0 +1,170 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/scripts/sync_engine.py
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Universal Schema Synchronizer
|
||||
|
||||
Dynamically imports all SQLAlchemy models from app.models, compares them with the live database,
|
||||
and creates missing tables/columns without dropping anything.
|
||||
|
||||
Safety First:
|
||||
- NEVER drops tables or columns.
|
||||
- Prints planned SQL before execution.
|
||||
- Requires confirmation for destructive operations (none in this script).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy import inspect, text
|
||||
from sqlalchemy.schema import CreateTable, AddConstraint
|
||||
from sqlalchemy.sql.ddl import CreateColumn
|
||||
|
||||
# Add backend to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from app.database import Base
|
||||
from app.core.config import settings
|
||||
|
||||
def dynamic_import_models():
|
||||
"""
|
||||
Dynamically import all .py files in app.models directory to ensure Base.metadata is populated.
|
||||
"""
|
||||
models_dir = Path(__file__).parent.parent / "models"
|
||||
imported = []
|
||||
|
||||
for py_file in models_dir.glob("*.py"):
|
||||
if py_file.name == "__init__.py":
|
||||
continue
|
||||
module_name = f"app.models.{py_file.stem}"
|
||||
try:
|
||||
module = importlib.import_module(module_name)
|
||||
imported.append(module_name)
|
||||
print(f"✅ Imported {module_name}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not import {module_name}: {e}")
|
||||
|
||||
# Also ensure the __init__ is loaded (it imports many models manually)
|
||||
import app.models
|
||||
print(f"📦 Total tables in Base.metadata: {len(Base.metadata.tables)}")
|
||||
return imported
|
||||
|
||||
async def compare_and_repair():
|
||||
"""
|
||||
Compare SQLAlchemy metadata with live database and create missing tables/columns.
|
||||
"""
|
||||
print("🔗 Connecting to database...")
|
||||
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
|
||||
|
||||
def get_diff_and_repair(connection):
|
||||
inspector = inspect(connection)
|
||||
|
||||
# Get all schemas from models
|
||||
expected_schemas = sorted({t.schema for t in Base.metadata.sorted_tables if t.schema})
|
||||
print(f"📋 Expected schemas: {expected_schemas}")
|
||||
|
||||
# Ensure enum types exist in marketplace schema
|
||||
if 'marketplace' in expected_schemas:
|
||||
print("\n🔧 Ensuring enum types in marketplace schema...")
|
||||
# moderation_status enum
|
||||
connection.execute(text("""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'moderation_status' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
|
||||
CREATE TYPE marketplace.moderation_status AS ENUM ('pending', 'approved', 'rejected');
|
||||
END IF;
|
||||
END $$;
|
||||
"""))
|
||||
# source_type enum
|
||||
connection.execute(text("""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'source_type' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
|
||||
CREATE TYPE marketplace.source_type AS ENUM ('manual', 'ocr', 'import');
|
||||
END IF;
|
||||
END $$;
|
||||
"""))
|
||||
print("✅ Enum types ensured.")
|
||||
|
||||
for schema in expected_schemas:
|
||||
print(f"\n--- 🔍 Checking schema '{schema}' ---")
|
||||
|
||||
# Check if schema exists
|
||||
db_schemas = inspector.get_schema_names()
|
||||
if schema not in db_schemas:
|
||||
print(f"❌ Schema '{schema}' missing. Creating...")
|
||||
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
|
||||
print(f"✅ Schema '{schema}' created.")
|
||||
|
||||
# Get tables in this schema from models
|
||||
model_tables = [t for t in Base.metadata.sorted_tables if t.schema == schema]
|
||||
db_tables = inspector.get_table_names(schema=schema)
|
||||
|
||||
for table in model_tables:
|
||||
if table.name not in db_tables:
|
||||
print(f"❌ Missing table: {schema}.{table.name}")
|
||||
# Generate CREATE TABLE statement
|
||||
create_stmt = CreateTable(table)
|
||||
# Print SQL for debugging
|
||||
sql_str = str(create_stmt.compile(bind=engine))
|
||||
print(f" SQL: {sql_str}")
|
||||
connection.execute(create_stmt)
|
||||
print(f"✅ Table {schema}.{table.name} created.")
|
||||
else:
|
||||
# Check columns
|
||||
db_columns = {c['name']: c for c in inspector.get_columns(table.name, schema=schema)}
|
||||
model_columns = table.columns
|
||||
|
||||
missing_cols = []
|
||||
for col in model_columns:
|
||||
if col.name not in db_columns:
|
||||
missing_cols.append(col)
|
||||
|
||||
if missing_cols:
|
||||
print(f"⚠️ Table {schema}.{table.name} missing columns: {[c.name for c in missing_cols]}")
|
||||
for col in missing_cols:
|
||||
# Generate ADD COLUMN statement
|
||||
col_type = col.type.compile(dialect=engine.dialect)
|
||||
sql = f'ALTER TABLE "{schema}"."{table.name}" ADD COLUMN "{col.name}" {col_type}'
|
||||
if col.nullable is False:
|
||||
sql += " NOT NULL"
|
||||
if col.default is not None:
|
||||
# Handle default values (simplistic)
|
||||
sql += f" DEFAULT {col.default.arg}"
|
||||
print(f" SQL: {sql}")
|
||||
connection.execute(text(sql))
|
||||
print(f"✅ Column {col.name} added.")
|
||||
else:
|
||||
print(f"✅ Table {schema}.{table.name} is up‑to‑date.")
|
||||
|
||||
print("\n--- ✅ Schema synchronization complete. ---")
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(get_diff_and_repair)
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
async def main():
|
||||
print("🚀 Universal Schema Synchronizer")
|
||||
print("=" * 50)
|
||||
|
||||
# Step 1: Dynamic import
|
||||
print("\n📥 Step 1: Dynamically importing all models...")
|
||||
dynamic_import_models()
|
||||
|
||||
# Step 2: Compare and repair
|
||||
print("\n🔧 Step 2: Comparing with database and repairing...")
|
||||
await compare_and_repair()
|
||||
|
||||
# Step 3: Final verification
|
||||
print("\n📊 Step 3: Final verification...")
|
||||
# Run compare_schema.py logic to confirm everything is green
|
||||
from app.tests_internal.diagnostics.compare_schema import compare
|
||||
await compare()
|
||||
|
||||
print("\n✨ Synchronization finished successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
232
archive/old_files/backend/app/scripts/unified_db_sync_1.0.py.old
Normal file
232
archive/old_files/backend/app/scripts/unified_db_sync_1.0.py.old
Normal file
@@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unified Database Synchronizer with Deep Constraint & Index Support
|
||||
|
||||
Dynamically imports all SQLAlchemy models, compares metadata with live database,
|
||||
and creates missing tables, columns, unique constraints, and indexes.
|
||||
|
||||
Safety First:
|
||||
- NEVER drops tables, columns, constraints, or indexes.
|
||||
- Prints planned SQL before execution.
|
||||
- Requires confirmation for destructive operations (none in this script).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy import inspect, text, UniqueConstraint, Index
|
||||
from sqlalchemy.schema import CreateTable, AddConstraint, CreateIndex
|
||||
from sqlalchemy.sql.ddl import CreateColumn
|
||||
|
||||
# Add backend to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from app.database import Base
|
||||
from app.core.config import settings
|
||||
|
||||
def dynamic_import_models():
|
||||
"""
|
||||
Dynamically import all .py files in app.models directory to ensure Base.metadata is populated.
|
||||
"""
|
||||
models_dir = Path(__file__).parent.parent / "models"
|
||||
imported = []
|
||||
|
||||
for py_file in models_dir.glob("*.py"):
|
||||
if py_file.name == "__init__.py":
|
||||
continue
|
||||
module_name = f"app.models.{py_file.stem}"
|
||||
try:
|
||||
module = importlib.import_module(module_name)
|
||||
imported.append(module_name)
|
||||
print(f"✅ Imported {module_name}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not import {module_name}: {e}")
|
||||
|
||||
# Also ensure the __init__ is loaded (it imports many models manually)
|
||||
import app.models
|
||||
print(f"📦 Total tables in Base.metadata: {len(Base.metadata.tables)}")
|
||||
return imported
|
||||
|
||||
async def compare_and_repair(apply: bool = False):
|
||||
"""
|
||||
Compare SQLAlchemy metadata with live database and create missing
|
||||
tables, columns, unique constraints, and indexes.
|
||||
|
||||
If apply is False, only prints SQL statements without executing.
|
||||
"""
|
||||
print("🔗 Connecting to database...")
|
||||
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
|
||||
|
||||
def get_diff_and_repair(connection):
|
||||
inspector = inspect(connection)
|
||||
|
||||
# Get all schemas from models
|
||||
expected_schemas = sorted({t.schema for t in Base.metadata.sorted_tables if t.schema})
|
||||
print(f"📋 Expected schemas: {expected_schemas}")
|
||||
|
||||
# Ensure enum types exist in marketplace schema
|
||||
if 'marketplace' in expected_schemas:
|
||||
print("\n🔧 Ensuring enum types in marketplace schema...")
|
||||
# moderation_status enum
|
||||
connection.execute(text("""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'moderation_status' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
|
||||
CREATE TYPE marketplace.moderation_status AS ENUM ('pending', 'approved', 'rejected');
|
||||
END IF;
|
||||
END $$;
|
||||
"""))
|
||||
# source_type enum
|
||||
connection.execute(text("""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'source_type' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
|
||||
CREATE TYPE marketplace.source_type AS ENUM ('manual', 'ocr', 'import');
|
||||
END IF;
|
||||
END $$;
|
||||
"""))
|
||||
print("✅ Enum types ensured.")
|
||||
|
||||
for schema in expected_schemas:
|
||||
print(f"\n--- 🔍 Checking schema '{schema}' ---")
|
||||
|
||||
# Check if schema exists
|
||||
db_schemas = inspector.get_schema_names()
|
||||
if schema not in db_schemas:
|
||||
print(f"❌ Schema '{schema}' missing. Creating...")
|
||||
if apply:
|
||||
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
|
||||
print(f"✅ Schema '{schema}' created.")
|
||||
else:
|
||||
print(f" SQL: CREATE SCHEMA IF NOT EXISTS \"{schema}\"")
|
||||
|
||||
# Get tables in this schema from models
|
||||
model_tables = [t for t in Base.metadata.sorted_tables if t.schema == schema]
|
||||
db_tables = inspector.get_table_names(schema=schema)
|
||||
|
||||
for table in model_tables:
|
||||
if table.name not in db_tables:
|
||||
print(f"❌ Missing table: {schema}.{table.name}")
|
||||
# Generate CREATE TABLE statement
|
||||
create_stmt = CreateTable(table)
|
||||
sql_str = str(create_stmt.compile(bind=engine))
|
||||
print(f" SQL: {sql_str}")
|
||||
if apply:
|
||||
connection.execute(create_stmt)
|
||||
print(f"✅ Table {schema}.{table.name} created.")
|
||||
continue
|
||||
|
||||
# Check columns
|
||||
db_columns = {c['name']: c for c in inspector.get_columns(table.name, schema=schema)}
|
||||
model_columns = table.columns
|
||||
|
||||
missing_cols = []
|
||||
for col in model_columns:
|
||||
if col.name not in db_columns:
|
||||
missing_cols.append(col)
|
||||
|
||||
if missing_cols:
|
||||
print(f"⚠️ Table {schema}.{table.name} missing columns: {[c.name for c in missing_cols]}")
|
||||
for col in missing_cols:
|
||||
col_type = col.type.compile(dialect=engine.dialect)
|
||||
sql = f'ALTER TABLE "{schema}"."{table.name}" ADD COLUMN "{col.name}" {col_type}'
|
||||
if col.nullable is False:
|
||||
sql += " NOT NULL"
|
||||
if col.default is not None:
|
||||
sql += f" DEFAULT {col.default.arg}"
|
||||
print(f" SQL: {sql}")
|
||||
if apply:
|
||||
connection.execute(text(sql))
|
||||
print(f"✅ Column {col.name} added.")
|
||||
else:
|
||||
print(f"✅ Table {schema}.{table.name} columns are up‑to‑date.")
|
||||
|
||||
# Check Unique Constraints
|
||||
db_unique_constraints = inspector.get_unique_constraints(table.name, schema=schema)
|
||||
# Map by column names (since constraint names may differ)
|
||||
db_unique_map = {}
|
||||
for uc in db_unique_constraints:
|
||||
key = tuple(sorted(uc['column_names']))
|
||||
db_unique_map[key] = uc['name']
|
||||
|
||||
# Find unique constraints defined in model
|
||||
model_unique_constraints = [c for c in table.constraints if isinstance(c, UniqueConstraint)]
|
||||
for uc in model_unique_constraints:
|
||||
uc_columns = tuple(sorted([col.name for col in uc.columns]))
|
||||
if uc_columns not in db_unique_map:
|
||||
# Constraint missing
|
||||
constraint_name = uc.name or f"uq_{table.name}_{'_'.join(uc_columns)}"
|
||||
columns_sql = ', '.join([f'"{col}"' for col in uc_columns])
|
||||
sql = f'ALTER TABLE "{schema}"."{table.name}" ADD CONSTRAINT "{constraint_name}" UNIQUE ({columns_sql})'
|
||||
print(f"⚠️ Missing unique constraint on {schema}.{table.name} columns {uc_columns}")
|
||||
print(f" SQL: {sql}")
|
||||
if apply:
|
||||
connection.execute(text(sql))
|
||||
print(f"✅ Unique constraint {constraint_name} added.")
|
||||
else:
|
||||
print(f"✅ Unique constraint on {uc_columns} exists.")
|
||||
|
||||
# Check Indexes
|
||||
db_indexes = inspector.get_indexes(table.name, schema=schema)
|
||||
db_index_map = {}
|
||||
for idx in db_indexes:
|
||||
key = tuple(sorted(idx['column_names']))
|
||||
db_index_map[key] = idx['name']
|
||||
|
||||
# Find indexes defined in model (Index objects)
|
||||
model_indexes = [idx for idx in table.indexes]
|
||||
for idx in model_indexes:
|
||||
idx_columns = tuple(sorted([col.name for col in idx.columns]))
|
||||
if idx_columns not in db_index_map:
|
||||
# Index missing
|
||||
index_name = idx.name or f"idx_{table.name}_{'_'.join(idx_columns)}"
|
||||
columns_sql = ', '.join([f'"{col}"' for col in idx_columns])
|
||||
unique_sql = "UNIQUE " if idx.unique else ""
|
||||
sql = f'CREATE {unique_sql}INDEX "{index_name}" ON "{schema}"."{table.name}" ({columns_sql})'
|
||||
print(f"⚠️ Missing index on {schema}.{table.name} columns {idx_columns}")
|
||||
print(f" SQL: {sql}")
|
||||
if apply:
|
||||
connection.execute(text(sql))
|
||||
print(f"✅ Index {index_name} added.")
|
||||
else:
|
||||
print(f"✅ Index on {idx_columns} exists.")
|
||||
|
||||
print("\n--- ✅ Schema synchronization complete. ---")
|
||||
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(get_diff_and_repair)
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
async def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Unified Database Synchronizer")
|
||||
parser.add_argument('--apply', action='store_true', help='Apply changes to database (otherwise dry‑run)')
|
||||
args = parser.parse_args()
|
||||
|
||||
print("🚀 Unified Database Synchronizer")
|
||||
print("=" * 50)
|
||||
|
||||
# Step 1: Dynamic import
|
||||
print("\n📥 Step 1: Dynamically importing all models...")
|
||||
dynamic_import_models()
|
||||
|
||||
# Step 2: Compare and repair
|
||||
print("\n🔧 Step 2: Comparing with database and repairing...")
|
||||
await compare_and_repair(apply=args.apply)
|
||||
|
||||
# Step 3: Final verification
|
||||
print("\n📊 Step 3: Final verification...")
|
||||
try:
|
||||
from app.tests_internal.diagnostics.compare_schema import compare
|
||||
await compare()
|
||||
except ImportError:
|
||||
print("⚠️ compare_schema module not found, skipping verification.")
|
||||
|
||||
print("\n✨ Synchronization finished successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
111
archive/old_files/backend/app/services/ai_service_googleApi_old.py.old
Executable file
111
archive/old_files/backend/app/services/ai_service_googleApi_old.py.old
Executable file
@@ -0,0 +1,111 @@
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
import asyncio
|
||||
import re
|
||||
from typing import Dict, Any, Optional
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from sqlalchemy import select
|
||||
from app.db.session import SessionLocal
|
||||
from app.models import SystemParameter
|
||||
|
||||
logger = logging.getLogger("AI-Service")
|
||||
|
||||
class AIService:
|
||||
"""
|
||||
AI Service v1.2.5 - Final Integrated Edition
|
||||
- Robot 2: Technikai dúsítás (Search + Regex JSON parsing)
|
||||
- Robot 3: OCR (Controlled JSON generation)
|
||||
"""
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
client = genai.Client(api_key=api_key) if api_key else None
|
||||
PRIMARY_MODEL = "gemini-2.0-flash"
|
||||
|
||||
@classmethod
|
||||
async def get_config_delay(cls) -> float:
|
||||
try:
|
||||
async with SessionLocal() as db:
|
||||
stmt = select(SystemParameter).where(SystemParameter.key == "AI_REQUEST_DELAY")
|
||||
res = await db.execute(stmt)
|
||||
param = res.scalar_one_or_none()
|
||||
return float(param.value) if param else 1.0
|
||||
except Exception: return 1.0
|
||||
|
||||
@classmethod
|
||||
async def get_clean_vehicle_data(cls, make: str, raw_model: str, v_type: str) -> Optional[Dict[str, Any]]:
|
||||
"""Robot 2: Adatbányászat Google Search segítségével."""
|
||||
if not cls.client: return None
|
||||
await asyncio.sleep(await cls.get_config_delay())
|
||||
|
||||
search_tool = types.Tool(google_search=types.GoogleSearch())
|
||||
|
||||
prompt = f"""
|
||||
KERESS RÁ az interneten: {make} {raw_model} ({v_type}) pontos gyári modellkódja és technikai adatai.
|
||||
Adj választ szigorúan csak egy JSON blokkban:
|
||||
{{
|
||||
"marketing_name": "tiszta név",
|
||||
"synonyms": ["név1", "név2"],
|
||||
"technical_code": "gyári kód",
|
||||
"year_from": int,
|
||||
"year_to": int_vagy_null,
|
||||
"ccm": int,
|
||||
"kw": int,
|
||||
"maintenance": {{ "oil_type": "string", "oil_qty": float, "spark_plug": "string", "coolant": "string" }}
|
||||
}}
|
||||
FONTOS: A 'technical_code' NEM lehet üres. Ha nem találod, adj 'N/A' értéket!
|
||||
"""
|
||||
|
||||
# Search tool használata esetén a response_mime_type tilos!
|
||||
config = types.GenerateContentConfig(
|
||||
system_instruction="Profi járműtechnikai adatbányász vagy. Csak tiszta JSON-t válaszolsz markdown kódblokk nélkül.",
|
||||
tools=[search_tool],
|
||||
temperature=0.1
|
||||
)
|
||||
|
||||
try:
|
||||
response = cls.client.models.generate_content(model=cls.PRIMARY_MODEL, contents=prompt, config=config)
|
||||
text = response.text
|
||||
# Tisztítás: ha az AI mégis tenne bele markdown jeleket
|
||||
clean_json = re.sub(r'```json\s*|```', '', text).strip()
|
||||
res_json = json.loads(clean_json)
|
||||
if isinstance(res_json, list) and len(res_json) > 0: res_json = res_json[0]
|
||||
return res_json if isinstance(res_json, dict) else None
|
||||
except Exception as e:
|
||||
logger.error(f"❌ AI hiba ({make} {raw_model}): {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def analyze_document_image(cls, image_data: bytes, doc_type: str) -> Optional[Dict[str, Any]]:
|
||||
"""Robot 3: OCR funkció - Forgalmi, Személyi, Számla, Odometer."""
|
||||
if not cls.client: return None
|
||||
await asyncio.sleep(await cls.get_config_delay())
|
||||
|
||||
prompts = {
|
||||
"identity": "Személyes okmány adatok (név, szám, lejárat).",
|
||||
"vehicle_reg": "Forgalmi adatok (rendszám, alvázszám, kW, ccm).",
|
||||
"invoice": "Számla adatok (partner, végösszeg, dátum).",
|
||||
"odometer": "Csak a kilométeróra állása számként."
|
||||
}
|
||||
|
||||
# Itt maradhat a response_mime_type, mert nem használunk Search-öt
|
||||
config = types.GenerateContentConfig(
|
||||
system_instruction="Profi OCR dokumentum-elemző vagy. Csak tiszta JSON-t válaszolsz.",
|
||||
response_mime_type="application/json"
|
||||
)
|
||||
|
||||
try:
|
||||
response = cls.client.models.generate_content(
|
||||
model=cls.PRIMARY_MODEL,
|
||||
contents=[
|
||||
f"Elemezd ezt a képet ({doc_type}): {prompts.get(doc_type, 'OCR')}",
|
||||
types.Part.from_bytes(data=image_data, mime_type="image/jpeg")
|
||||
],
|
||||
config=config
|
||||
)
|
||||
res_json = json.loads(response.text)
|
||||
if isinstance(res_json, list) and len(res_json) > 0: res_json = res_json[0]
|
||||
return res_json if isinstance(res_json, dict) else None
|
||||
except Exception as e:
|
||||
logger.error(f"❌ OCR hiba: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,208 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy import text, select
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.asset import AssetCatalog
|
||||
|
||||
# MB 2.0 Szigorú naplózás
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-0-Discovery: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Vehicle-Robot-0-Discovery")
|
||||
|
||||
class DiscoveryEngine:
|
||||
"""
|
||||
THOUGHT PROCESS (IPARI ÜZEMMÓD 2.0):
|
||||
1. Őrkutya (Watchdog): Megkeresi és kiszabadítja a beragadt feladatokat óránként.
|
||||
2. Differential Sync (Különbözeti Szinkron): Csak a hiányzó vagy új modelleket rögzíti, a gold_enriched-eket kihagyja.
|
||||
3. Monthly Scheduler: Havonta egyszer tölti le a teljes RDW adatbázist lapozva.
|
||||
"""
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
SYNC_STATE_FILE = "/app/temp/.last_rdw_sync" # Állapotfájl, hogy Docker újrainduláskor se kezdje elölről azonnal
|
||||
|
||||
@staticmethod
|
||||
async def run_watchdog():
|
||||
""" 1. FÁZIS: Az Őrkutya (Dead-Letter Queue Manager) """
|
||||
logger.info("🐕 Őrkutya: Beragadt feladatok keresése a rendszerben...")
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# A) Hunter takarítás (visszaállítás pending-re, ha a Hunter lefagyott)
|
||||
res1 = await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'pending' WHERE status = 'processing' RETURNING id;"))
|
||||
hunter_resets = len(res1.fetchall())
|
||||
if hunter_resets > 0:
|
||||
logger.warning(f"🔄 {hunter_resets} db beragadt Hunter feladat (processing) visszaállítva 'pending'-re.")
|
||||
|
||||
# B) AI Robotok takarítása (2 órás timeout)
|
||||
query2 = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET status = CASE
|
||||
WHEN status = 'research_in_progress' THEN 'unverified'
|
||||
WHEN status = 'ai_synthesis_in_progress' THEN 'awaiting_ai_synthesis'
|
||||
END
|
||||
WHERE status IN ('research_in_progress', 'ai_synthesis_in_progress')
|
||||
AND updated_at < NOW() - INTERVAL '2 hours'
|
||||
RETURNING id;
|
||||
""")
|
||||
res2 = await db.execute(query2)
|
||||
ai_resets = len(res2.fetchall())
|
||||
if ai_resets > 0:
|
||||
logger.warning(f"🔄 {ai_resets} db beragadt AI feladat visszaállítva.")
|
||||
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Őrkutya hiba: {e}")
|
||||
|
||||
@staticmethod
|
||||
async def seed_manual_bootstrap():
|
||||
""" 2. FÁZIS: Alapozó adatok rögzítése """
|
||||
initial_data = [
|
||||
{"make": "AUDI", "model": "A4", "generation": "B8 (2008-2015)"}, # vehicle_class törölve
|
||||
{"make": "BMW", "model": "3 SERIES", "generation": "F30 (2012-2019)"}
|
||||
]
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
for item in initial_data:
|
||||
stmt = select(AssetCatalog).where(AssetCatalog.make == item["make"], AssetCatalog.model == item["model"])
|
||||
if not (await db.execute(stmt)).scalar_one_or_none():
|
||||
db.add(AssetCatalog(**item))
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.warning(f"Manual bootstrap hiba (Ignorálható, ha az adatbázis már tele van): {e}")
|
||||
|
||||
@classmethod
|
||||
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, params: dict, retries: int = 3):
|
||||
""" Hibatűrő HTTP kérés API leállások ellen. """
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
resp = await client.get(url, params=params, headers=cls.HEADERS)
|
||||
if resp.status_code == 200:
|
||||
return resp
|
||||
elif resp.status_code == 429:
|
||||
await asyncio.sleep(2 ** attempt)
|
||||
else:
|
||||
return None
|
||||
except httpx.RequestError:
|
||||
if attempt == retries - 1:
|
||||
return None
|
||||
await asyncio.sleep(2 ** attempt)
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def seed_from_rdw(cls):
|
||||
""" 3. FÁZIS: Távoli felfedezés - KÜLÖNBÖZETI SZINKRONIZÁCIÓ (Differential Sync) """
|
||||
logger.info("📥 RDW TÖMEGES LETÖLTÉS: Új modellek keresése (Differential Sync)...")
|
||||
|
||||
limit = 10000
|
||||
offset = 0
|
||||
inserted_count = 0
|
||||
updated_count = 0
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
while True:
|
||||
params = {
|
||||
"$select": "merk,handelsbenaming,voertuigsoort,count(*) as total",
|
||||
"$group": "merk,handelsbenaming,voertuigsoort",
|
||||
"$order": "total DESC",
|
||||
"$limit": limit,
|
||||
"$offset": offset
|
||||
}
|
||||
|
||||
resp = await cls.fetch_with_retry(client, "https://opendata.rdw.nl/resource/m9d7-ebf2.json", params)
|
||||
if not resp: break
|
||||
raw_data = resp.json()
|
||||
if not raw_data: break
|
||||
|
||||
logger.info(f"📊 Lapozás: {offset} - {offset + len(raw_data)} tételek analízise...")
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
for entry in raw_data:
|
||||
make = str(entry.get("merk", "")).upper().strip()
|
||||
model = str(entry.get("handelsbenaming", "")).upper().strip()
|
||||
v_kind = entry.get("voertuigsoort", "")
|
||||
total_count = int(entry.get("total", 0))
|
||||
|
||||
if not make or not model: continue
|
||||
|
||||
if "Personenauto" in v_kind: v_class = 'car'
|
||||
elif "Motorfiets" in v_kind: v_class = 'motorcycle'
|
||||
else: v_class = 'truck'
|
||||
|
||||
# A MÁGIA: Különbözeti Szinkronizáció SQL + Explicit Type Casting
|
||||
query = text("""
|
||||
INSERT INTO vehicle.catalog_discovery (make, model, vehicle_class, status, priority_score)
|
||||
SELECT
|
||||
CAST(:make AS VARCHAR),
|
||||
CAST(:model AS VARCHAR),
|
||||
CAST(:v_class AS VARCHAR),
|
||||
'pending',
|
||||
:priority
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM vehicle.vehicle_model_definitions
|
||||
WHERE make = CAST(:make AS VARCHAR)
|
||||
AND marketing_name = CAST(:model AS VARCHAR)
|
||||
AND status = 'gold_enriched'
|
||||
)
|
||||
ON CONFLICT (make, model)
|
||||
DO UPDATE SET priority_score = EXCLUDED.priority_score
|
||||
WHERE vehicle.catalog_discovery.status != 'processed'
|
||||
RETURNING xmax;
|
||||
""")
|
||||
|
||||
result = await db.execute(query, {
|
||||
"make": make, "model": model, "v_class": v_class, "priority": total_count
|
||||
})
|
||||
|
||||
row = result.fetchone()
|
||||
if row:
|
||||
if row[0] == 0: inserted_count += 1 # Új beszúrás
|
||||
else: updated_count += 1 # Meglévő frissítése
|
||||
|
||||
await db.commit()
|
||||
offset += limit
|
||||
await asyncio.sleep(1)
|
||||
|
||||
logger.info(f"✅ RDW Szinkron kész! Új modellek a listán: {inserted_count} | Frissített prioritások: {updated_count}")
|
||||
|
||||
# Sikeres futás regisztrálása a fájlrendszeren
|
||||
os.makedirs(os.path.dirname(cls.SYNC_STATE_FILE), exist_ok=True)
|
||||
with open(cls.SYNC_STATE_FILE, 'w') as f:
|
||||
f.write(datetime.now().isoformat())
|
||||
|
||||
@classmethod
|
||||
def should_run_rdw_sync(cls) -> bool:
|
||||
""" Ellenőrzi, hogy eltelt-e 30 nap a legutóbbi sikeres RDW szinkronizáció óta. """
|
||||
if not os.path.exists(cls.SYNC_STATE_FILE):
|
||||
return True
|
||||
try:
|
||||
with open(cls.SYNC_STATE_FILE, 'r') as f:
|
||||
last_sync = datetime.fromisoformat(f.read().strip())
|
||||
return datetime.now() - last_sync > timedelta(days=30)
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
""" FŐ CIKLUS: Havi ütemező és Óránkénti Őrkutya """
|
||||
logger.info("🚀 ÉLES ÜZEM: Discovery Engine (Differential Sync) & Watchdog indítása...")
|
||||
await cls.seed_manual_bootstrap()
|
||||
|
||||
while True:
|
||||
# 1. Óránkénti takarítás
|
||||
await cls.run_watchdog()
|
||||
|
||||
# 2. Havi szinkronizáció ellenőrzése
|
||||
if cls.should_run_rdw_sync():
|
||||
await cls.seed_from_rdw()
|
||||
else:
|
||||
logger.info("🛌 Az RDW szinkronizáció már lefutott az elmúlt 30 napban. Ugrás...")
|
||||
|
||||
# 3. Alvás 1 órát (Heartbeat)
|
||||
logger.info("⏱️ A Discovery Engine most 1 órát pihen a következő Őrkutya futásig.")
|
||||
await asyncio.sleep(3600)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(DiscoveryEngine.run())
|
||||
@@ -0,0 +1,108 @@
|
||||
# /app/app/workers/vehicle/vehicle_robot_0_strategist.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal # MB 2.0 Standard import
|
||||
|
||||
# Sentinel rendszerhez illesztett logolás
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
|
||||
logger = logging.getLogger("Vehicle-Robot-0-Strategist")
|
||||
|
||||
class Robot0Strategist:
|
||||
"""
|
||||
THOUGHT PROCESS:
|
||||
1. A robot célja a 'priority_score' meghatározása valós piaci adatok (RDW) alapján.
|
||||
2. Első lépésben ellenőrizzük a sémát (Self-healing), hogy létezik-e az oszlop.
|
||||
3. A kategóriákat (autó, motor, teher) szétválasztjuk, hogy célzott prioritásokat kapjunk.
|
||||
4. Az 'ON CONFLICT' logika garantálja, hogy ne rontsuk el a már feldolgozott (processed) sorokat.
|
||||
5. A prioritás alapja a darabszám: minél több van egy típusból, annál előrébb kerül a listán.
|
||||
"""
|
||||
RDW_API = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
|
||||
# Holland típusok leképezése belső kategóriákra
|
||||
CATEGORIES = [
|
||||
{"name": "car", "rdw_types": ["'Personenauto'"]},
|
||||
{"name": "motorcycle", "rdw_types": ["'Motorfiets'"]},
|
||||
{"name": "truck", "rdw_types": ["'Bedrijfsauto'", "'Vrachtwagen'", "'Opleggertrekker'"]},
|
||||
{"name": "other", "rdw_types": ["NOT IN ('Personenauto', 'Motorfiets', 'Bedrijfsauto', 'Vrachtwagen', 'Opleggertrekker')"]}
|
||||
]
|
||||
|
||||
async def get_popular_makes(self, vehicle_class: str, rdw_types: list):
|
||||
""" Piaci adatok lekérése darabszám szerinti sorrendben. """
|
||||
if "NOT IN" in rdw_types[0]:
|
||||
type_filter = f"voertuigsoort {rdw_types[0]}"
|
||||
else:
|
||||
type_filter = " OR ".join([f"voertuigsoort = {t}" for t in rdw_types])
|
||||
|
||||
params = {
|
||||
"$select": "merk, count(*) AS darabszam",
|
||||
"$where": type_filter,
|
||||
"$group": "merk",
|
||||
"$order": "darabszam DESC",
|
||||
"$limit": 500
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=45.0) as client:
|
||||
try:
|
||||
resp = await client.get(self.RDW_API, params=params, headers=self.HEADERS)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
logger.error(f"⚠️ RDW API Hiba: {resp.status_code}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Kapcsolati hiba az RDW felé: {e}")
|
||||
return []
|
||||
|
||||
async def run(self):
|
||||
logger.info("🚀 Robot 0 (Strategist) ONLINE - Piaci elemzés indítása...")
|
||||
|
||||
# --- SÉMA ELLENŐRZÉS (Golyóálló megoldás) ---
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
await db.execute(text("ALTER TABLE vehicle.catalog_discovery ADD COLUMN IF NOT EXISTS priority_score INTEGER DEFAULT 0;"))
|
||||
await db.commit()
|
||||
logger.info("✅ Adatbázis séma rendben (priority_score aktív).")
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"⚠️ Séma hiba: {e}")
|
||||
|
||||
for category in self.CATEGORIES:
|
||||
v_class = category["name"]
|
||||
logger.info(f"📊 {v_class.upper()} hadosztály prioritásainak számítása...")
|
||||
|
||||
makes = await self.get_popular_makes(v_class, category["rdw_types"])
|
||||
if not makes: continue
|
||||
|
||||
added_count = 0
|
||||
for item in makes:
|
||||
make_name = str(item.get("merk", "")).upper().strip()
|
||||
if not make_name: continue
|
||||
|
||||
count = int(item.get("darabszam", 0))
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
# UPSERT: Beállítjuk a prioritást, de nem bántjuk a már kész rekordokat
|
||||
query = text("""
|
||||
INSERT INTO vehicle.catalog_discovery (make, model, vehicle_class, status, source, attempts, priority_score)
|
||||
VALUES (:make, 'ALL_VARIANTS', :class, 'pending', 'STRATEGIST-V2', 0, :score)
|
||||
ON CONFLICT (make, model, vehicle_class)
|
||||
DO UPDATE SET priority_score = :score
|
||||
WHERE vehicle.catalog_discovery.status NOT IN ('processed', 'in_progress');
|
||||
""")
|
||||
|
||||
await db.execute(query, {"make": make_name, "class": v_class, "score": count})
|
||||
await db.commit()
|
||||
added_count += 1
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.warning(f"❌ Hiba a márka rögzítésekor ({make_name}): {e}")
|
||||
|
||||
logger.info(f"✅ {v_class.upper()} kategória kész: {added_count} márka rangsorolva.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(Robot0Strategist().run())
|
||||
@@ -0,0 +1,224 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
# Naplózás beállítása a standard kimenetre
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
|
||||
stream=sys.stdout
|
||||
)
|
||||
logger = logging.getLogger("Robot-1-Hunter")
|
||||
|
||||
class CatalogHunter:
|
||||
"""
|
||||
Vehicle Robot 1.9.3: The Truly Invincible Hunter (SAVEPOINT PATCH)
|
||||
Kezeli az ALL_VARIANTS utasítást és row-level tranzakcióvédelmet használ.
|
||||
"""
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return ""
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError): return 0
|
||||
|
||||
@classmethod
|
||||
def parse_float(cls, value) -> float:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0.0
|
||||
return float(value)
|
||||
except (ValueError, TypeError): return 0.0
|
||||
|
||||
@classmethod
|
||||
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, retries: int = 3):
|
||||
""" Hibatűrő HTTP lekérdezés exponenciális várakozással. """
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
resp = await client.get(url, headers=cls.HEADERS)
|
||||
if resp.status_code == 200:
|
||||
return resp
|
||||
elif resp.status_code == 429: # Rate limit
|
||||
await asyncio.sleep(2 ** attempt)
|
||||
else:
|
||||
return resp
|
||||
except httpx.RequestError as e:
|
||||
if attempt == retries - 1:
|
||||
logger.debug(f"Hálózati hiba: {e}")
|
||||
raise
|
||||
await asyncio.sleep(2 ** attempt)
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def fetch_tech_details(cls, client, plate):
|
||||
""" Technikai adatok (üzemanyag, teljesítmény, motorkód) begyűjtése. """
|
||||
results = {
|
||||
"power_kw": 0, "engine_code": None, "euro_class": None,
|
||||
"fuel_desc": "Unknown", "co2": 0, "consumption": 0.0
|
||||
}
|
||||
try:
|
||||
# Üzemanyag adatok
|
||||
f_resp = await cls.fetch_with_retry(client, f"{cls.RDW_FUEL}?kenteken={plate}")
|
||||
if f_resp and f_resp.status_code == 200 and f_resp.json():
|
||||
f = f_resp.json()[0]
|
||||
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
|
||||
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
|
||||
results.update({
|
||||
"power_kw": max(p1, p2),
|
||||
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
|
||||
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
|
||||
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
|
||||
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
|
||||
})
|
||||
|
||||
# Motorkód adatok
|
||||
e_resp = await cls.fetch_with_retry(client, f"{cls.RDW_ENGINE}?kenteken={plate}")
|
||||
if e_resp and e_resp.status_code == 200 and e_resp.json():
|
||||
results["engine_code"] = e_resp.json()[0].get("motorcode")
|
||||
except Exception:
|
||||
pass
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
|
||||
""" Egy adott márka/modell (vagy wildcard) feldolgozása. """
|
||||
clean_make = make_name.strip().upper()
|
||||
clean_model = model_name.strip().upper()
|
||||
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
|
||||
|
||||
offset = 0
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
while True:
|
||||
# Dinamikus paraméterezés: ALL_VARIANTS esetén nem szűrünk modellre
|
||||
if clean_model == 'ALL_VARIANTS':
|
||||
params = f"merk={clean_make}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
|
||||
else:
|
||||
params = f"merk={clean_make}&handelsbenaming={clean_model}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
|
||||
|
||||
try:
|
||||
r = await cls.fetch_with_retry(client, f"{cls.RDW_MAIN}?{params}")
|
||||
batch = r.json() if r and r.status_code == 200 else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ API hiba: {e}")
|
||||
break
|
||||
|
||||
if not batch:
|
||||
break
|
||||
|
||||
for item in batch:
|
||||
plate = item.get("kenteken", "UNKNOWN")
|
||||
try:
|
||||
# SAVEPOINT: Ha egy rekord mentése hibás, a tranzakció blokk nem sérül
|
||||
async with db.begin_nested():
|
||||
tech = await cls.fetch_tech_details(client, plate)
|
||||
|
||||
# Valódi modellnév kinyerése (Wildcard esetén fontos)
|
||||
actual_model = (item.get("handelsbenaming") or clean_model).upper()
|
||||
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
|
||||
|
||||
stmt = insert(VehicleModelDefinition).values(
|
||||
make=clean_make,
|
||||
marketing_name=actual_model,
|
||||
normalized_name=norm_name,
|
||||
variant_code=item.get("variant", "UNKNOWN"),
|
||||
version_code=item.get("uitvoering", "UNKNOWN"),
|
||||
type_approval_number=item.get("typegoedkeuringsnummer"),
|
||||
technical_code=plate,
|
||||
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
|
||||
power_kw=tech["power_kw"],
|
||||
fuel_type=tech["fuel_desc"],
|
||||
engine_code=tech["engine_code"],
|
||||
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
|
||||
doors=cls.parse_int(item.get("aantal_deuren")),
|
||||
width=cls.parse_int(item.get("breedte")),
|
||||
wheelbase=cls.parse_int(item.get("wielbasis")),
|
||||
list_price=cls.parse_int(item.get("catalogusprijs")),
|
||||
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
|
||||
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
|
||||
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
|
||||
body_type=item.get("inrichting"),
|
||||
co2_emissions_combined=tech["co2"],
|
||||
fuel_consumption_combined=tech["consumption"],
|
||||
euro_classification=tech["euro_class"],
|
||||
cylinders=cls.parse_int(item.get("aantal_cilinders")),
|
||||
vehicle_class=v_class,
|
||||
priority_score=priority,
|
||||
status="unverified", # R2 Researcher számára előkészítve
|
||||
source="MEGA-HUNTER-v1.9.3"
|
||||
).on_conflict_do_nothing(
|
||||
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type']
|
||||
)
|
||||
await db.execute(stmt)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Sor eldobva ({plate}): {e}")
|
||||
|
||||
# Batch commit a sikeres sorok után
|
||||
await db.commit()
|
||||
|
||||
offset += len(batch)
|
||||
if offset >= 500: # Biztonsági korlát egy-egy márkánál
|
||||
break
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Discovery feladat lezárása
|
||||
await db.execute(
|
||||
text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"),
|
||||
{"id": task_id}
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Mega-Hunter v1.9.3 ONLINE (SAVEPOINT ENABLED)")
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# ATOMI ZÁROLÁS: Keresés, Zárolás és Állapotváltás egy lépésben
|
||||
query = text("""
|
||||
UPDATE vehicle.catalog_discovery
|
||||
SET status = 'processing'
|
||||
WHERE id = (
|
||||
SELECT id FROM vehicle.catalog_discovery
|
||||
WHERE status = 'pending'
|
||||
ORDER BY priority_score DESC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, make, model, vehicle_class, priority_score;
|
||||
""")
|
||||
|
||||
result = await db.execute(query)
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
|
||||
else:
|
||||
# Ha nincs munka, 30 másodperc pihenő
|
||||
await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Főciklus hiba: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
@@ -0,0 +1,179 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
|
||||
# version: 1.9.6
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from sqlalchemy import text, func
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
# MB 2.0 Standard Naplózás
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
|
||||
stream=sys.stdout
|
||||
)
|
||||
logger = logging.getLogger("Robot-1-Hunter")
|
||||
|
||||
class CatalogHunter:
|
||||
"""
|
||||
Vehicle Robot 1.9.6: Mega-Hunter (TIMESTAMP & INTEGRITY PATCH)
|
||||
Kezeli az ALL_VARIANTS-t, a Savepoint-okat és az összes kötelező mezőt.
|
||||
"""
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return ""
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError): return 0
|
||||
|
||||
@classmethod
|
||||
def parse_float(cls, value) -> float:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0.0
|
||||
return float(value)
|
||||
except (ValueError, TypeError): return 0.0
|
||||
|
||||
@classmethod
|
||||
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, retries: int = 3):
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
resp = await client.get(url, headers=cls.HEADERS)
|
||||
if resp.status_code == 200: return resp
|
||||
elif resp.status_code == 429: await asyncio.sleep(2 ** attempt)
|
||||
else: return resp
|
||||
except httpx.RequestError:
|
||||
if attempt == retries - 1: raise
|
||||
await asyncio.sleep(2 ** attempt)
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def fetch_tech_details(cls, client, plate):
|
||||
results = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
|
||||
try:
|
||||
f_resp = await cls.fetch_with_retry(client, f"{cls.RDW_FUEL}?kenteken={plate}")
|
||||
if f_resp and f_resp.status_code == 200 and f_resp.json():
|
||||
f = f_resp.json()[0]
|
||||
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
|
||||
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
|
||||
results.update({
|
||||
"power_kw": max(p1, p2),
|
||||
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
|
||||
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
|
||||
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
|
||||
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
|
||||
})
|
||||
e_resp = await cls.fetch_with_retry(client, f"{cls.RDW_ENGINE}?kenteken={plate}")
|
||||
if e_resp and e_resp.status_code == 200 and e_resp.json():
|
||||
results["engine_code"] = e_resp.json()[0].get("motorcode")
|
||||
except Exception: pass
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
|
||||
clean_make = make_name.strip().upper()
|
||||
clean_model = model_name.strip().upper()
|
||||
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
|
||||
|
||||
offset = 0
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
while True:
|
||||
if clean_model == 'ALL_VARIANTS':
|
||||
params = f"merk={clean_make}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
|
||||
else:
|
||||
params = f"merk={clean_make}&handelsbenaming={clean_model}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
|
||||
|
||||
try:
|
||||
r = await cls.fetch_with_retry(client, f"{cls.RDW_MAIN}?{params}")
|
||||
batch = r.json() if r and r.status_code == 200 else []
|
||||
except Exception: break
|
||||
|
||||
if not batch: break
|
||||
|
||||
for item in batch:
|
||||
plate = item.get("kenteken", "UNKNOWN")
|
||||
try:
|
||||
async with db.begin_nested():
|
||||
tech = await cls.fetch_tech_details(client, plate)
|
||||
actual_model = (item.get("handelsbenaming") or clean_model).upper()
|
||||
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
|
||||
|
||||
stmt = insert(VehicleModelDefinition).values(
|
||||
make=clean_make,
|
||||
marketing_name=actual_model,
|
||||
normalized_name=norm_name,
|
||||
variant_code=item.get("variant", "UNKNOWN"),
|
||||
version_code=item.get("uitvoering", "UNKNOWN"),
|
||||
technical_code=plate,
|
||||
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
|
||||
power_kw=tech["power_kw"],
|
||||
fuel_type=tech["fuel_desc"],
|
||||
engine_code=tech["engine_code"],
|
||||
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
|
||||
doors=cls.parse_int(item.get("aantal_deuren")),
|
||||
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
|
||||
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
|
||||
vehicle_class=v_class,
|
||||
priority_score=priority,
|
||||
market='EU', # KÖTELEZŐ
|
||||
status="unverified",
|
||||
is_manual=False,
|
||||
created_at=func.now(), # KÖTELEZŐ DÁTUMOK
|
||||
updated_at=func.now(),
|
||||
source="MEGA-HUNTER-v1.9.6"
|
||||
).on_conflict_do_nothing(
|
||||
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type']
|
||||
)
|
||||
await db.execute(stmt)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Sor eldobva ({plate}): {e}")
|
||||
|
||||
await db.commit()
|
||||
offset += len(batch)
|
||||
if offset >= 500: break
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
|
||||
await db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Mega-Hunter v1.9.6 ONLINE (TIMESTAMP PATCH)")
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
query = text("""
|
||||
UPDATE vehicle.catalog_discovery SET status = 'processing'
|
||||
WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending'
|
||||
ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1)
|
||||
RETURNING id, make, model, vehicle_class, priority_score;
|
||||
""")
|
||||
result = await db.execute(query)
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
if task: await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
|
||||
else: await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Főciklus hiba: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
@@ -0,0 +1,168 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Robot-1")
|
||||
|
||||
class CatalogHunter:
|
||||
"""
|
||||
Vehicle Robot 2.1.2: A Végleges Vadász
|
||||
Tökéletes adattípus szinkron. raw_search_context -> string.
|
||||
"""
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return "UNKNOWN"
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError): return 0
|
||||
|
||||
@classmethod
|
||||
def parse_float(cls, value) -> float:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0.0
|
||||
return float(value)
|
||||
except (ValueError, TypeError): return 0.0
|
||||
|
||||
@classmethod
|
||||
async def fetch_tech_details(cls, client, plate):
|
||||
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
|
||||
try:
|
||||
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
|
||||
if f_resp.status_code == 200 and f_resp.json():
|
||||
f = f_resp.json()[0]
|
||||
p1 = cls.parse_int(f.get("netto_maximum_vermogen"))
|
||||
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
|
||||
res.update({
|
||||
"power_kw": max(p1, p2),
|
||||
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
|
||||
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
|
||||
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
|
||||
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
|
||||
})
|
||||
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
|
||||
if e_resp.status_code == 200 and e_resp.json():
|
||||
res["engine_code"] = e_resp.json()[0].get("motorcode")
|
||||
except Exception: pass
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
async def process_task(cls, db, task):
|
||||
clean_make = task.make.strip().upper()
|
||||
clean_model = task.model.strip().upper()
|
||||
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
offset = 0
|
||||
while True:
|
||||
params = f"merk={clean_make}"
|
||||
if clean_model != 'ALL_VARIANTS':
|
||||
params += f"&handelsbenaming={clean_model}"
|
||||
params += f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
|
||||
|
||||
try:
|
||||
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
|
||||
batch = r.json() if r.status_code == 200 else []
|
||||
except Exception: break
|
||||
if not batch: break
|
||||
|
||||
for item in batch:
|
||||
plate = item.get("kenteken", "UNKNOWN")
|
||||
try:
|
||||
async with db.begin_nested():
|
||||
tech = await cls.fetch_tech_details(client, plate)
|
||||
actual_model = (item.get("handelsbenaming") or clean_model).upper()
|
||||
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
|
||||
|
||||
datum_eerste_toelating = str(item.get("datum_eerste_toelating", ""))
|
||||
year_from = cls.parse_int(datum_eerste_toelating[:4]) if len(datum_eerste_toelating) >= 4 else 0
|
||||
|
||||
stmt = insert(VehicleModelDefinition).values(
|
||||
market='EU',
|
||||
make=clean_make,
|
||||
marketing_name=actual_model,
|
||||
normalized_name=norm_name,
|
||||
variant_code=item.get("variant", "UNKNOWN"),
|
||||
version_code=item.get("uitvoering", "UNKNOWN"),
|
||||
technical_code=plate,
|
||||
type_approval_number=item.get("typegoedkeuringsnummer"),
|
||||
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
|
||||
doors=cls.parse_int(item.get("aantal_deuren")),
|
||||
width=cls.parse_int(item.get("breedte")),
|
||||
wheelbase=cls.parse_int(item.get("wielbasis")),
|
||||
list_price=cls.parse_int(item.get("catalogusprijs")),
|
||||
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
|
||||
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
|
||||
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
|
||||
fuel_consumption_combined=tech["consumption"],
|
||||
co2_emissions_combined=tech["co2"],
|
||||
vehicle_class=task.vehicle_class,
|
||||
body_type=item.get("inrichting"),
|
||||
fuel_type=tech["fuel_desc"],
|
||||
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
|
||||
power_kw=tech["power_kw"],
|
||||
cylinders=cls.parse_int(item.get("aantal_cilinders")),
|
||||
engine_code=tech["engine_code"],
|
||||
euro_classification=tech["euro_class"],
|
||||
year_from=year_from,
|
||||
priority_score=task.priority_score,
|
||||
status="unverified",
|
||||
source="MEGA-HUNTER-v2.1.2",
|
||||
# JAVÍTÁS: A raw_search_context most már üres STRING (''), ahogy a modell elvárja!
|
||||
raw_search_context='',
|
||||
research_metadata={},
|
||||
specifications={},
|
||||
marketing_name_aliases=[]
|
||||
).on_conflict_do_nothing(
|
||||
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
|
||||
)
|
||||
await db.execute(stmt)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
|
||||
|
||||
await db.commit()
|
||||
offset += len(batch)
|
||||
if offset >= 500: break
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
|
||||
await db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Mega-Hunter v2.1.2 (Adattípus Fix) ONLINE")
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
query = text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;")
|
||||
res = await db.execute(query)
|
||||
task = res.fetchone()
|
||||
await db.commit()
|
||||
if task: await cls.process_task(db, task)
|
||||
else: await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Főciklus hiba: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
@@ -0,0 +1,205 @@
|
||||
# /app/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Robot-1")
|
||||
|
||||
class CatalogHunter:
|
||||
"""
|
||||
Vehicle Robot 2.2.0: Fast-Track to Gold Edition
|
||||
Ha az RDW-ből megvan minden kulcsadat (kw, ccm, fuel), azonnal 'gold_enriched'-re teszi a járművet
|
||||
és beírja a vehicle_catalog mestertáblába!
|
||||
"""
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
||||
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
if not text_val: return "UNKNOWN"
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0
|
||||
return int(float(value))
|
||||
except (ValueError, TypeError): return 0
|
||||
|
||||
@classmethod
|
||||
def parse_float(cls, value) -> float:
|
||||
try:
|
||||
if value is None or str(value).strip() == "": return 0.0
|
||||
return float(value)
|
||||
except (ValueError, TypeError): return 0.0
|
||||
|
||||
@classmethod
|
||||
async def fetch_tech_details(cls, client, plate):
|
||||
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
|
||||
try:
|
||||
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
|
||||
if f_resp.status_code == 200 and f_resp.json():
|
||||
f = f_resp.json()[0]
|
||||
p1 = cls.parse_int(f.get("netto_maximum_vermogen"))
|
||||
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
|
||||
res.update({
|
||||
"power_kw": max(p1, p2),
|
||||
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
|
||||
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
|
||||
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
|
||||
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
|
||||
})
|
||||
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
|
||||
if e_resp.status_code == 200 and e_resp.json():
|
||||
res["engine_code"] = e_resp.json()[0].get("motorcode")
|
||||
except Exception: pass
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
async def process_task(cls, db, task):
|
||||
clean_make = task.make.strip().upper()
|
||||
clean_model = task.model.strip().upper()
|
||||
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
offset = 0
|
||||
while True:
|
||||
params = f"merk={clean_make}"
|
||||
if clean_model != 'ALL_VARIANTS':
|
||||
params += f"&handelsbenaming={clean_model}"
|
||||
params += f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
|
||||
|
||||
try:
|
||||
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
|
||||
batch = r.json() if r.status_code == 200 else []
|
||||
except Exception: break
|
||||
if not batch: break
|
||||
|
||||
for item in batch:
|
||||
plate = item.get("kenteken", "UNKNOWN")
|
||||
try:
|
||||
async with db.begin_nested():
|
||||
tech = await cls.fetch_tech_details(client, plate)
|
||||
actual_model = (item.get("handelsbenaming") or clean_model).upper()
|
||||
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
|
||||
|
||||
datum_eerste_toelating = str(item.get("datum_eerste_toelating", ""))
|
||||
year_from = cls.parse_int(datum_eerste_toelating[:4]) if len(datum_eerste_toelating) >= 4 else 0
|
||||
|
||||
engine_ccm = cls.parse_int(item.get("cilinderinhoud"))
|
||||
power_kw = tech["power_kw"]
|
||||
fuel_type = tech["fuel_desc"]
|
||||
|
||||
# FAST-TRACK LOGIKA: Ha a kötelező műszaki adatok megvannak, azonnal ARANY minősítést kap!
|
||||
# Villanyautóknál a CCM lehet 0, ezt is kezeljük.
|
||||
is_gold = False
|
||||
if (power_kw > 0 and engine_ccm > 0) or (power_kw > 0 and "elektri" in fuel_type.lower()):
|
||||
is_gold = True
|
||||
|
||||
final_status = "gold_enriched" if is_gold else "unverified"
|
||||
|
||||
# 1. Beírjuk a VMD-be (Staging tábla)
|
||||
stmt = insert(VehicleModelDefinition).values(
|
||||
market='EU',
|
||||
make=clean_make,
|
||||
marketing_name=actual_model,
|
||||
normalized_name=norm_name,
|
||||
variant_code=item.get("variant", "UNKNOWN"),
|
||||
version_code=item.get("uitvoering", "UNKNOWN"),
|
||||
technical_code=plate,
|
||||
type_approval_number=item.get("typegoedkeuringsnummer"),
|
||||
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
|
||||
doors=cls.parse_int(item.get("aantal_deuren")),
|
||||
width=cls.parse_int(item.get("breedte")),
|
||||
wheelbase=cls.parse_int(item.get("wielbasis")),
|
||||
list_price=cls.parse_int(item.get("catalogusprijs")),
|
||||
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
|
||||
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
|
||||
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
|
||||
fuel_consumption_combined=tech["consumption"],
|
||||
co2_emissions_combined=tech["co2"],
|
||||
vehicle_class=task.vehicle_class,
|
||||
body_type=item.get("inrichting"),
|
||||
fuel_type=fuel_type,
|
||||
engine_capacity=engine_ccm,
|
||||
power_kw=power_kw,
|
||||
cylinders=cls.parse_int(item.get("aantal_cilinders")),
|
||||
engine_code=tech["engine_code"],
|
||||
euro_classification=tech["euro_class"],
|
||||
year_from=year_from,
|
||||
priority_score=task.priority_score,
|
||||
status=final_status, # Dinamikus státusz
|
||||
source="MEGA-HUNTER-v2.2.0-FAST",
|
||||
raw_search_context='',
|
||||
research_metadata={},
|
||||
specifications={"fast_track": True}, # Jelezzük, hogy ez RDW-ből jött közvetlenül
|
||||
marketing_name_aliases=[]
|
||||
).on_conflict_do_nothing(
|
||||
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
|
||||
).returning(VehicleModelDefinition.id)
|
||||
|
||||
res = await db.execute(stmt)
|
||||
vmd_id = res.scalar()
|
||||
|
||||
# 2. HA ARANY, AZONNAL LÉPÜNK A VÉGSŐ KATALÓGUSBA (Ahogy az Alchemist is tenné)
|
||||
if is_gold and vmd_id:
|
||||
cat_stmt = text("""
|
||||
INSERT INTO vehicle.vehicle_catalog
|
||||
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
|
||||
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
|
||||
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING;
|
||||
""")
|
||||
await db.execute(cat_stmt, {
|
||||
"m_id": vmd_id,
|
||||
"make": clean_make,
|
||||
"model": actual_model[:50],
|
||||
"kw": power_kw,
|
||||
"ccm": engine_ccm,
|
||||
"fuel": fuel_type,
|
||||
"factory": json.dumps({"source": "RDW API Direct", "verified": True})
|
||||
})
|
||||
logger.info(f"✨ FAST-TRACK ARANY: {clean_make} {actual_model} (KW: {power_kw}, CCM: {engine_ccm})")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
|
||||
|
||||
await db.commit()
|
||||
offset += len(batch)
|
||||
if offset >= 500: break
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
|
||||
await db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Mega-Hunter v2.2.0 (Fast-Track Edition) ONLINE")
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
query = text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;")
|
||||
res = await db.execute(query)
|
||||
task = res.fetchone()
|
||||
await db.commit()
|
||||
if task: await cls.process_task(db, task)
|
||||
else: await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Főciklus hiba: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
@@ -0,0 +1,140 @@
|
||||
import asyncio, httpx, logging, os, re, sys, json
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Robot-1")
|
||||
|
||||
class CatalogHunter:
|
||||
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
|
||||
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
|
||||
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
|
||||
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
|
||||
BATCH_SIZE = 50
|
||||
|
||||
@classmethod
|
||||
def normalize(cls, text_val: str) -> str:
|
||||
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower() if text_val else "UNKNOWN"
|
||||
|
||||
@classmethod
|
||||
def parse_int(cls, value) -> int:
|
||||
try: return int(float(value)) if value and str(value).strip() else 0
|
||||
except: return 0
|
||||
|
||||
@classmethod
|
||||
def parse_float(cls, value) -> float:
|
||||
try: return float(value) if value and str(value).strip() else 0.0
|
||||
except: return 0.0
|
||||
|
||||
@classmethod
|
||||
async def fetch_tech_details(cls, client, plate):
|
||||
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
|
||||
try:
|
||||
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
|
||||
if f_resp.status_code == 200 and f_resp.json():
|
||||
f = f_resp.json()[0]
|
||||
p1, p2 = cls.parse_int(f.get("netto_maximum_vermogen")), cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
|
||||
res.update({
|
||||
"power_kw": max(p1, p2),
|
||||
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
|
||||
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
|
||||
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
|
||||
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
|
||||
})
|
||||
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
|
||||
if e_resp.status_code == 200 and e_resp.json():
|
||||
res["engine_code"] = e_resp.json()[0].get("motorcode")
|
||||
except Exception: pass
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
async def process_task(cls, db, task):
|
||||
clean_make, clean_model = task.make.strip().upper(), task.model.strip().upper()
|
||||
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
offset = 0
|
||||
while True:
|
||||
params = f"merk={clean_make}" + (f"&handelsbenaming={clean_model}" if clean_model != 'ALL_VARIANTS' else "") + f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
|
||||
try:
|
||||
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
|
||||
batch = r.json() if r.status_code == 200 else []
|
||||
except Exception: break
|
||||
if not batch: break
|
||||
|
||||
for item in batch:
|
||||
plate = item.get("kenteken", "UNKNOWN")
|
||||
try:
|
||||
async with db.begin_nested():
|
||||
tech = await cls.fetch_tech_details(client, plate)
|
||||
actual_model = (item.get("handelsbenaming") or clean_model).upper()
|
||||
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
|
||||
|
||||
datum = str(item.get("datum_eerste_toelating", ""))
|
||||
year_from = cls.parse_int(datum[:4]) if len(datum) >= 4 else 0
|
||||
|
||||
engine_ccm, power_kw, fuel_type = cls.parse_int(item.get("cilinderinhoud")), tech["power_kw"], tech["fuel_desc"]
|
||||
|
||||
# FAST-TRACK LOGIKA: Ha van KW és CCM, egyből ARANY!
|
||||
is_gold = (power_kw > 0 and engine_ccm > 0) or (power_kw > 0 and "elektri" in fuel_type.lower())
|
||||
final_status = "gold_enriched" if is_gold else "unverified"
|
||||
|
||||
stmt = insert(VehicleModelDefinition).values(
|
||||
market='EU', make=clean_make, marketing_name=actual_model, normalized_name=norm_name,
|
||||
variant_code=item.get("variant", "UNKNOWN"), version_code=item.get("uitvoering", "UNKNOWN"),
|
||||
technical_code=plate, type_approval_number=item.get("typegoedkeuringsnummer"),
|
||||
seats=cls.parse_int(item.get("aantal_zitplaatsen")), doors=cls.parse_int(item.get("aantal_deuren")),
|
||||
width=cls.parse_int(item.get("breedte")), wheelbase=cls.parse_int(item.get("wielbasis")),
|
||||
list_price=cls.parse_int(item.get("catalogusprijs")), max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
|
||||
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")), max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
|
||||
fuel_consumption_combined=tech["consumption"], co2_emissions_combined=tech["co2"],
|
||||
vehicle_class=task.vehicle_class, body_type=item.get("inrichting"), fuel_type=fuel_type,
|
||||
engine_capacity=engine_ccm, power_kw=power_kw, cylinders=cls.parse_int(item.get("aantal_cilinders")),
|
||||
engine_code=tech["engine_code"], euro_classification=tech["euro_class"], year_from=year_from,
|
||||
priority_score=task.priority_score, status=final_status, source="MEGA-HUNTER-v2.2.0-FAST",
|
||||
raw_search_context='', research_metadata={}, specifications={"fast_track": True} if is_gold else {}, marketing_name_aliases=[]
|
||||
).on_conflict_do_nothing(
|
||||
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
|
||||
).returning(VehicleModelDefinition.id)
|
||||
|
||||
res = await db.execute(stmt)
|
||||
vmd_id = res.scalar()
|
||||
|
||||
# Automatikus Publikálás (Ha Arany)
|
||||
if is_gold and vmd_id:
|
||||
cat_stmt = text("""
|
||||
INSERT INTO vehicle.vehicle_catalog (master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
|
||||
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
|
||||
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING;
|
||||
""")
|
||||
await db.execute(cat_stmt, {"m_id": vmd_id, "make": clean_make, "model": actual_model[:50], "kw": power_kw, "ccm": engine_ccm, "fuel": fuel_type, "factory": '{"source": "RDW Fast-Track"}'})
|
||||
logger.info(f"✨ FAST-TRACK ARANY: {clean_make} {actual_model}")
|
||||
|
||||
except Exception as e: logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
|
||||
|
||||
await db.commit()
|
||||
offset += len(batch)
|
||||
if offset >= 500: break
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
|
||||
await db.commit()
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🤖 Mega-Hunter v2.2.0 (Fast-Track) ONLINE")
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
res = await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;"))
|
||||
task = res.fetchone()
|
||||
await db.commit()
|
||||
if task: await cls.process_task(db, task)
|
||||
else: await asyncio.sleep(30)
|
||||
except Exception: await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(CatalogHunter.run())
|
||||
@@ -0,0 +1,239 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_2_researcher.py
|
||||
import asyncio
|
||||
import logging
|
||||
import warnings
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
from sqlalchemy import text, update, func
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
|
||||
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
# MB 2.0 Szabvány naplózás
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-2-Researcher: %(message)s')
|
||||
logger = logging.getLogger("Vehicle-Robot-2-Researcher")
|
||||
|
||||
class QuotaManager:
|
||||
""" Szigorú napi limit figyelő a fizetős/hatósági API-khoz """
|
||||
def __init__(self, service_name: str, daily_limit: int):
|
||||
self.service_name = service_name
|
||||
self.daily_limit = daily_limit
|
||||
self.state_file = f"/app/temp/.quota_{service_name}.json"
|
||||
self._ensure_file()
|
||||
|
||||
def _ensure_file(self):
|
||||
os.makedirs(os.path.dirname(self.state_file), exist_ok=True)
|
||||
if not os.path.exists(self.state_file):
|
||||
with open(self.state_file, 'w') as f:
|
||||
json.dump({"date": datetime.now().strftime("%Y-%m-%d"), "count": 0}, f)
|
||||
|
||||
def can_make_request(self) -> bool:
|
||||
with open(self.state_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
if data["date"] != today:
|
||||
data = {"date": today, "count": 0} # Új nap, kvóta nullázása
|
||||
|
||||
if data["count"] >= self.daily_limit:
|
||||
return False
|
||||
|
||||
# Növeljük a számlálót
|
||||
data["count"] += 1
|
||||
with open(self.state_file, 'w') as f:
|
||||
json.dump(data, f)
|
||||
return True
|
||||
|
||||
class VehicleResearcher:
|
||||
"""
|
||||
Vehicle Robot 2.5: Sniper Researcher (Mesterlövész Adatgyűjtő)
|
||||
Célzott keresésekkel és strukturált aktakészítéssel dolgozik az AI kímélése érdekében.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.max_attempts = 5
|
||||
self.search_timeout = 15.0
|
||||
|
||||
# Kvóta menedzserek beállítása (.env-ből olvasva)
|
||||
dvla_limit = int(os.getenv("DVLA_DAILY_LIMIT", "1000"))
|
||||
self.dvla_quota = QuotaManager("dvla", dvla_limit)
|
||||
self.dvla_token = os.getenv("DVLA_API_KEY")
|
||||
|
||||
async def fetch_ddg_targeted(self, label: str, query: str) -> str:
|
||||
""" Célzott keresés szálbiztosan a DuckDuckGo-n. """
|
||||
try:
|
||||
def search():
|
||||
with DDGS() as ddgs:
|
||||
# max_results=2: Nem kell sok zaj, csak a legrelevánsabb 2 találat
|
||||
results = ddgs.text(query, max_results=2)
|
||||
return [f"- {r.get('body', '')}" for r in results] if results else []
|
||||
|
||||
results = await asyncio.wait_for(asyncio.to_thread(search), timeout=self.search_timeout)
|
||||
|
||||
if not results:
|
||||
return f"[SOURCE: {label}]\nNincs érdemi találat.\n"
|
||||
|
||||
content = f"[SOURCE: {label} | KERESÉS: {query}]\n"
|
||||
content += "\n".join(results) + "\n"
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Keresési hiba ({label}): {e}")
|
||||
return f"[SOURCE: {label}]\nKERESÉSI HIBA.\n"
|
||||
|
||||
def extract_specs_from_text(self, text: str) -> dict:
|
||||
""" Regex alapú kinyerés a nyers szövegből: ccm, kW, motoradatok. """
|
||||
import re
|
||||
specs = {}
|
||||
|
||||
# CCM (köbcentiméter) minta: 1998 cc, 2.0 L, 2000 cm³
|
||||
ccm_pattern = r'(\d{3,4})\s*(?:cc|ccm|cm³|cm3|cc\.)'
|
||||
match = re.search(ccm_pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
specs['ccm'] = int(match.group(1))
|
||||
else:
|
||||
# Alternatív minta: 2.0 liter -> 2000 cc
|
||||
liter_pattern = r'(\d+\.?\d*)\s*(?:L|liter|ℓ)'
|
||||
match = re.search(liter_pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
liters = float(match.group(1))
|
||||
specs['ccm'] = int(liters * 1000)
|
||||
|
||||
# KW (kilowatt) minta: 150 kW, 150kW, 150 KW
|
||||
kw_pattern = r'(\d{2,4})\s*(?:kW|kw|KW)'
|
||||
match = re.search(kw_pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
specs['kw'] = int(match.group(1))
|
||||
else:
|
||||
# Le (lóerő) átváltás: 150 LE -> 110 kW (kb)
|
||||
hp_pattern = r'(\d{2,4})\s*(?:HP|hp|LE|le|Ps)'
|
||||
match = re.search(hp_pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
hp = int(match.group(1))
|
||||
specs['kw'] = int(hp * 0.7355) # hozzávetőleges átváltás
|
||||
|
||||
# Motor kód minta: motor kód: 1.8 TSI, engine code: N47
|
||||
engine_pattern = r'(?:motor\s*kód|engine\s*code|motor\s*code)[:\s]+([A-Z0-9\.\- ]+)'
|
||||
match = re.search(engine_pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
specs['engine_code'] = match.group(1).strip()
|
||||
|
||||
return specs
|
||||
|
||||
async def research_vehicle(self, db, vehicle_id: int, make: str, model: str, engine: str, year: str, current_attempts: int):
|
||||
""" Egy jármű átvilágítása és a strukturált 'Akta' elkészítése a GPU számára. """
|
||||
engine_safe = engine or ""
|
||||
year_safe = str(year) if year else ""
|
||||
|
||||
logger.info(f"🔎 Mesterlövész Kutatás: {make} {model} (Motor: {engine_safe})")
|
||||
|
||||
# 1. TIER: Ingyenes, Célzott Keresések (A legmegbízhatóbb források)
|
||||
queries = [
|
||||
("ULTIMATE_SPECS", f"{make} {model} {engine_safe} {year_safe} site:ultimatespecs.com"),
|
||||
("AUTO_DATA", f"{make} {model} {engine_safe} {year_safe} site:auto-data.net"),
|
||||
("COMMON_ISSUES", f"{make} {model} {engine_safe} reliability common problems")
|
||||
]
|
||||
|
||||
tasks = [self.fetch_ddg_targeted(label, q) for label, q in queries]
|
||||
search_results = await asyncio.gather(*tasks)
|
||||
|
||||
# 2. TIER: Fizetős / Kvótás API-k (Példa a DVLA helyére)
|
||||
# Ha a jövőben bejön brit rendszám, itt hívjuk meg a DVLA-t:
|
||||
# if has_uk_plate and self.dvla_quota.can_make_request():
|
||||
# uk_data = await self.fetch_dvla_data(plate)
|
||||
# search_results.append(uk_data)
|
||||
|
||||
# 3. ÖSSZESÍTÉS (Az Akta összeállítása)
|
||||
# Maximalizáljuk a szöveg hosszát, hogy az AI GPU ne fulladjon le!
|
||||
full_context = "\n".join(search_results)
|
||||
if len(full_context) > 2500:
|
||||
full_context = full_context[:2500] + "\n...[TRUNCATED TO SAVE GPU TOKENS]"
|
||||
|
||||
# Regex alapú specifikáció kinyerés
|
||||
extracted_specs = self.extract_specs_from_text(full_context)
|
||||
|
||||
try:
|
||||
if len(full_context.strip()) > 150: # Csökkentettük az elvárást, mert a célzott keresés tömörebb
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
raw_search_context=full_context,
|
||||
research_metadata=extracted_specs,
|
||||
status='awaiting_ai_synthesis', # Kész az Akta, mehet az Alkimistának!
|
||||
last_research_at=func.now(),
|
||||
attempts=current_attempts + 1
|
||||
)
|
||||
)
|
||||
logger.info(f"✅ Akta rögzítve ({len(full_context)} karakter): {make} {model}")
|
||||
else:
|
||||
new_status = 'suspended_research' if current_attempts + 1 >= self.max_attempts else 'unverified'
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == vehicle_id)
|
||||
.values(
|
||||
status=new_status,
|
||||
attempts=current_attempts + 1,
|
||||
last_research_at=func.now()
|
||||
)
|
||||
)
|
||||
if new_status == 'suspended_research':
|
||||
logger.warning(f"🛑 Felfüggesztve (Nincs nyom a weben): {make} {model}")
|
||||
else:
|
||||
logger.warning(f"⚠️ Kevés adat: {make} {model}, visszatéve a sorba.")
|
||||
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"🚨 Adatbázis hiba az eredmény mentésénél ({vehicle_id}): {e}")
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
self_instance = cls()
|
||||
logger.info("🚀 Vehicle Researcher 2.5 ONLINE (Sniper & Quota Manager)")
|
||||
|
||||
while True:
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# ATOMI ZÁROLÁS
|
||||
query = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET status = 'research_in_progress'
|
||||
WHERE id = (
|
||||
SELECT id FROM vehicle.vehicle_model_definitions
|
||||
WHERE status IN ('unverified', 'awaiting_research', 'ACTIVE')
|
||||
AND attempts < :max_attempts
|
||||
AND is_manual = FALSE
|
||||
ORDER BY
|
||||
CASE WHEN make = 'TOYOTA' THEN 1 ELSE 2 END,
|
||||
attempts ASC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, make, marketing_name, engine_code, year_from, attempts;
|
||||
""")
|
||||
|
||||
result = await db.execute(query, {"max_attempts": self_instance.max_attempts})
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
v_id, v_make, v_model, v_engine, v_year, v_attempts = task
|
||||
async with AsyncSessionLocal() as process_db:
|
||||
await self_instance.research_vehicle(process_db, v_id, v_make, v_model, v_engine, v_year, v_attempts)
|
||||
|
||||
await asyncio.sleep(2) # Rate limit védelem a DDG felé
|
||||
else:
|
||||
await asyncio.sleep(30)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(VehicleResearcher.run())
|
||||
except KeyboardInterrupt:
|
||||
logger.info("🛑 Kutató robot leállítva.")
|
||||
@@ -0,0 +1,225 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py
|
||||
import asyncio
|
||||
import logging
|
||||
import datetime
|
||||
import random
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
from sqlalchemy import text, func, update, case
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
from app.models.asset import AssetCatalog
|
||||
from app.services.ai_service import AIService
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
|
||||
|
||||
class TechEnricher:
|
||||
"""
|
||||
Vehicle Robot 3: Alchemist Pro (Atomi Zárolás + Kézi Moderáció Patch)
|
||||
Tiszta GPU fókusz: Csak az AI elemzésre és adategyesítésre koncentrál.
|
||||
Nincs felesleges webkeresés. Szigorú, de intelligens Sane-Check.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.max_attempts = 5
|
||||
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
|
||||
def check_budget(self) -> bool:
|
||||
if datetime.date.today() > self.last_reset_date:
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
return self.ai_calls_today < self.daily_ai_limit
|
||||
|
||||
def validate_merged_data(self, merged_kw: int, merged_ccm: int, v_class: str, fuel: str, current_attempts: int) -> tuple[bool, str]:
|
||||
""" Intelligens validáció a MERGE után. Visszaadja a státuszt és a hiba okát. """
|
||||
if merged_ccm > 18000:
|
||||
return False, f"Irreális CCM érték ({merged_ccm})"
|
||||
if merged_kw > 1500 and v_class != "truck":
|
||||
return False, f"Irreális KW érték ({merged_kw})"
|
||||
|
||||
# Ha hiányzik a KW
|
||||
if merged_kw == 0:
|
||||
if current_attempts < 3:
|
||||
return False, "Hiányzó KW adat. Újrakutatás javasolt."
|
||||
else:
|
||||
logger.warning("Sane-check: Többszöri próbálkozás után sincs KW, de átengedjük részlegesként.")
|
||||
|
||||
# Ha hiányzik a CCM (és belsőégésű)
|
||||
if merged_ccm == 0 and "electric" not in fuel and "elektric" not in fuel and v_class != "trailer":
|
||||
if current_attempts < 3:
|
||||
return False, "Hiányzó CCM (belsőégésű motornál). Újrakutatás javasolt."
|
||||
else:
|
||||
logger.warning("Sane-check: Többszöri próbálkozás után sincs CCM, átengedjük részlegesként.")
|
||||
|
||||
return True, "OK"
|
||||
|
||||
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
|
||||
# Pontos azonosító a logokhoz (Márka, Modell, ID, RDW adatok)
|
||||
v_ident = f"{base_info['make'].upper()} {base_info['m_name']} (ID: {record_id}, RDW: {base_info['rdw_ccm']}ccm, KW: {base_info['rdw_kw']})"
|
||||
attempt_str = f"[Próba: {current_attempts + 1}/{self.max_attempts}]"
|
||||
|
||||
ai_data = {} # Üres dict, ha az AI hívás elszállna
|
||||
|
||||
try:
|
||||
logger.info(f"🧠 AI dúsítás indul: {v_ident} {attempt_str}")
|
||||
|
||||
# 1. LÉPÉS: AI Hívás (Rábízzuk az adatokat a modellre)
|
||||
ai_data = await AIService.get_clean_vehicle_data(
|
||||
base_info['make'],
|
||||
base_info['m_name'],
|
||||
base_info
|
||||
)
|
||||
|
||||
if not ai_data:
|
||||
raise ValueError("Teljesen üres AI válasz (API hiba vagy extrém hallucináció).")
|
||||
|
||||
# 2. LÉPÉS: HIBRID MERGE (Még a validáció előtt!)
|
||||
# Az RDW adatok felülbírálják az AI-t a hatósági paramétereknél
|
||||
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else int(ai_data.get("kw", 0) or 0)
|
||||
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else int(ai_data.get("ccm", 0) or 0)
|
||||
|
||||
# Üzemanyag tisztítása
|
||||
fuel_rdw = base_info.get('rdw_fuel', '')
|
||||
final_fuel = fuel_rdw if fuel_rdw and fuel_rdw != "Unknown" else ai_data.get("fuel_type", "petrol")
|
||||
|
||||
final_engine = base_info['rdw_engine'] if base_info['rdw_engine'] else ai_data.get("engine_code", "Unknown")
|
||||
final_euro = base_info['rdw_euro'] or ai_data.get("euro_classification")
|
||||
final_cylinders = base_info['rdw_cylinders'] or ai_data.get("cylinders")
|
||||
|
||||
# 3. LÉPÉS: Intelligens Validáció
|
||||
is_valid, error_msg = self.validate_merged_data(final_kw, final_ccm, base_info['v_type'], final_fuel.lower(), current_attempts)
|
||||
if not is_valid:
|
||||
raise ValueError(f"Validációs hiba: {error_msg}")
|
||||
|
||||
# 4. LÉPÉS: Mentés az Arany Katalógusba
|
||||
clean_model = str(ai_data.get("marketing_name", base_info['m_name']))[:50].upper()
|
||||
|
||||
cat_stmt = text("""
|
||||
INSERT INTO vehicle.vehicle_catalog
|
||||
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
|
||||
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
|
||||
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING
|
||||
RETURNING id;
|
||||
""")
|
||||
|
||||
await db.execute(cat_stmt, {
|
||||
"m_id": record_id,
|
||||
"make": base_info['make'].upper(),
|
||||
"model": clean_model,
|
||||
"kw": final_kw,
|
||||
"ccm": final_ccm,
|
||||
"fuel": final_fuel,
|
||||
"factory": json.dumps(ai_data)
|
||||
})
|
||||
|
||||
# 5. LÉPÉS: Staging tábla (VMD) lezárása
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
status="gold_enriched",
|
||||
engine_capacity=final_ccm,
|
||||
power_kw=final_kw,
|
||||
fuel_type=final_fuel,
|
||||
engine_code=final_engine,
|
||||
euro_classification=final_euro,
|
||||
cylinders=final_cylinders,
|
||||
specifications=ai_data, # Elmentjük az AI teljes outputját a mestertáblába is
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
logger.info(f"✨ ARANY REKORD KÉSZ: {v_ident}")
|
||||
self.ai_calls_today += 1
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.warning(f"⚠️ Alkimista hiba - {v_ident}: {e}")
|
||||
|
||||
# Ha elértük a limitet, KÉZI MODERÁCIÓRA küldjük, egyébként vissza a Kutatónak
|
||||
new_status = 'manual_review_needed' if current_attempts + 1 >= self.max_attempts else 'unverified'
|
||||
|
||||
# Elmentjük az AI részleges válaszát (vagy a hibát), hogy az admin lássa, mit rontott el a gép
|
||||
review_data = ai_data if ai_data else {"error": "Nincs értékelhető JSON adat az AI-tól", "raw_context": base_info['web_context']}
|
||||
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
attempts=current_attempts + 1,
|
||||
last_error=str(e)[:200],
|
||||
status=new_status,
|
||||
specifications=review_data, # Kézi ellenőrzéshez beírjuk a törött adatot!
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
if new_status == 'unverified':
|
||||
logger.info(f"♻️ Akta visszaküldve a Robot-2-nek (Kutató). {attempt_str}")
|
||||
else:
|
||||
logger.error(f"🛑 Max próbálkozás elérve! Kézi moderációra küldve: {v_ident}")
|
||||
|
||||
async def run(self):
|
||||
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Atomi Zárolás + Moderáció Patch)")
|
||||
while True:
|
||||
if not self.check_budget():
|
||||
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
|
||||
await asyncio.sleep(3600); continue
|
||||
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
# ATOMI ZÁROLÁS (A "Szent Grál" a race condition ellen)
|
||||
query = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET status = 'ai_synthesis_in_progress'
|
||||
WHERE id = (
|
||||
SELECT id FROM vehicle.vehicle_model_definitions
|
||||
WHERE status IN ('awaiting_ai_synthesis', 'ACTIVE')
|
||||
AND attempts < :max_attempts
|
||||
AND is_manual = FALSE
|
||||
ORDER BY
|
||||
CASE WHEN status = 'awaiting_ai_synthesis' THEN 1 ELSE 2 END,
|
||||
priority_score DESC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity,
|
||||
fuel_type, engine_code, euro_classification, cylinders, raw_search_context, attempts;
|
||||
""")
|
||||
|
||||
result = await db.execute(query, {"max_attempts": self.max_attempts})
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
# Szétbontjuk a lekérdezett rekordot a base_info dict-be
|
||||
r_id = task[0]
|
||||
base_info = {
|
||||
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
|
||||
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
|
||||
"rdw_fuel": task[6] or "petrol", "rdw_engine": task[7] or "",
|
||||
"rdw_euro": task[8], "rdw_cylinders": task[9],
|
||||
"web_context": task[10] or ""
|
||||
}
|
||||
attempts = task[11]
|
||||
|
||||
# Külön adatbázis kapcsolat a feldolgozáshoz (hosszú AI hívás miatt)
|
||||
async with AsyncSessionLocal() as process_db:
|
||||
await self.process_single_record(process_db, r_id, base_info, attempts)
|
||||
|
||||
# GPU hűtés / Ollama rate limit
|
||||
await asyncio.sleep(random.uniform(1.5, 3.5))
|
||||
else:
|
||||
logger.info("😴 Nincs feldolgozandó akta, az Alkimista pihen...")
|
||||
await asyncio.sleep(15)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(TechEnricher().run())
|
||||
@@ -0,0 +1,168 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import datetime
|
||||
import random
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
from sqlalchemy import text, func, update
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
from app.services.ai_service import AIService
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] R3-Alchemist: %(message)s', stream=sys.stdout)
|
||||
logger = logging.getLogger("Robot-3-Alchemist")
|
||||
|
||||
class TechEnricher:
|
||||
"""
|
||||
Vehicle Robot 3: Alchemist Pro (Sentinel Gateway Edition)
|
||||
Az AIService 2.2-t használja (Ollama -> Groq Fallback).
|
||||
Kinyeri a felszereltségi szintet (trim_level) és pótolja a hiányzó adatokat.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.max_attempts = 5
|
||||
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
|
||||
def check_budget(self) -> bool:
|
||||
if datetime.date.today() > self.last_reset_date:
|
||||
self.ai_calls_today = 0
|
||||
self.last_reset_date = datetime.date.today()
|
||||
return self.ai_calls_today < self.daily_ai_limit
|
||||
|
||||
def validate_merged_data(self, merged_kw: int, merged_ccm: int, v_class: str, fuel: str, current_attempts: int) -> tuple[bool, str]:
|
||||
if merged_ccm > 18000:
|
||||
return False, f"Irreális CCM érték ({merged_ccm})"
|
||||
if merged_kw > 1500 and v_class not in ["truck", "other"]:
|
||||
return False, f"Irreális KW érték ({merged_kw})"
|
||||
|
||||
if merged_kw == 0 and current_attempts < 3:
|
||||
return False, "Hiányzó KW adat. Újrakutatás javasolt."
|
||||
|
||||
if merged_ccm == 0 and "elektr" not in fuel.lower() and v_class != "trailer" and current_attempts < 3:
|
||||
return False, "Hiányzó CCM (belsőégésű motornál)."
|
||||
|
||||
return True, "OK"
|
||||
|
||||
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
|
||||
v_ident = f"{base_info['make'].upper()} {base_info['m_name']} (ID: {record_id})"
|
||||
attempt_str = f"[Próba: {current_attempts + 1}/{self.max_attempts}]"
|
||||
|
||||
try:
|
||||
logger.info(f"🧠 AI dúsítás indul: {v_ident} {attempt_str}")
|
||||
|
||||
# Szigorú Prompt a Master AI Service-nek
|
||||
prompt = f"""
|
||||
Elemezd az alábbi járműadatokat és a webes kutatást! Készíts belőle egy JSON objektumot.
|
||||
Jármű: {base_info['make']} {base_info['m_name']}
|
||||
Hatósági adatok: {base_info['rdw_ccm']} ccm, {base_info['rdw_kw']} kW, Üzemanyag: {base_info['rdw_fuel']}
|
||||
Webes szöveg: {base_info['web_context'][:2000]}
|
||||
|
||||
FELADATOK:
|
||||
1. Keresd meg a felszereltségi szintet (trim_level) a modell nevéből vagy a szövegből (pl. AMG, Highline, Titanium, M-Sport, Elegance, ST-Line). Ha nincs, legyen üres string.
|
||||
2. Ha az RDW adatokban a kW vagy a ccm 0, pótold a szövegből a helyes értéket!
|
||||
|
||||
KIZÁRÓLAG EGY ÉRVÉNYES JSON-T ADJ VISSZA! (A Groq/Gemini miatt kötelező a JSON szó használata).
|
||||
Várt kulcsok: "kw" (int), "ccm" (int), "trim_level" (string), "transmission" (string), "drive_type" (string).
|
||||
"""
|
||||
|
||||
# Hívjuk a te profi Gateway-edet! (_execute_ai_call átveszi a db session-t is a beállításokhoz)
|
||||
ai_data = await AIService._execute_ai_call(db, prompt, model_key="text")
|
||||
|
||||
if not ai_data:
|
||||
raise ValueError("Üres AI válasz (Minden fallback elbukott).")
|
||||
|
||||
# HIBRID MERGE
|
||||
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else int(ai_data.get("kw", 0) or 0)
|
||||
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else int(ai_data.get("ccm", 0) or 0)
|
||||
trim_level = str(ai_data.get("trim_level", ""))[:100]
|
||||
|
||||
# Sane-Check
|
||||
is_valid, error_msg = self.validate_merged_data(final_kw, final_ccm, base_info['v_type'], base_info['rdw_fuel'], current_attempts)
|
||||
if not is_valid:
|
||||
raise ValueError(f"Validációs hiba: {error_msg}")
|
||||
|
||||
# Staging tábla frissítése (Arany minősítés)
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
status="gold_enriched",
|
||||
engine_capacity=final_ccm,
|
||||
power_kw=final_kw,
|
||||
trim_level=trim_level if trim_level.lower() not in ["null", "none"] else "",
|
||||
specifications=ai_data,
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
logger.info(f"✨ ARANY REKORD KÉSZ: {v_ident} | Trim: {trim_level}")
|
||||
self.ai_calls_today += 1
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.warning(f"⚠️ Alkimista hiba - {v_ident}: {e}")
|
||||
|
||||
new_status = 'manual_review_needed' if current_attempts + 1 >= self.max_attempts else 'unverified'
|
||||
|
||||
await db.execute(
|
||||
update(VehicleModelDefinition)
|
||||
.where(VehicleModelDefinition.id == record_id)
|
||||
.values(
|
||||
attempts=current_attempts + 1,
|
||||
last_error=str(e)[:200],
|
||||
status=new_status,
|
||||
updated_at=func.now()
|
||||
)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
if new_status == 'unverified':
|
||||
logger.info(f"♻️ Akta visszaküldve a Kutatónak (R2). {attempt_str}")
|
||||
|
||||
async def run(self):
|
||||
logger.info(f"🚀 R3 Alchemist Pro ONLINE (Sentinel Gateway Integráció)")
|
||||
while True:
|
||||
if not self.check_budget():
|
||||
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
|
||||
await asyncio.sleep(3600); continue
|
||||
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
query = text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET status = 'ai_synthesis_in_progress'
|
||||
WHERE id = (
|
||||
SELECT id FROM vehicle.vehicle_model_definitions
|
||||
WHERE status = 'awaiting_ai_synthesis'
|
||||
AND attempts < :max_attempts
|
||||
AND is_manual = FALSE
|
||||
ORDER BY priority_score DESC
|
||||
FOR UPDATE SKIP LOCKED LIMIT 1
|
||||
)
|
||||
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity, fuel_type, raw_search_context, attempts;
|
||||
""")
|
||||
|
||||
result = await db.execute(query, {"max_attempts": self.max_attempts})
|
||||
task = result.fetchone()
|
||||
await db.commit()
|
||||
|
||||
if task:
|
||||
base_info = {
|
||||
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
|
||||
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
|
||||
"rdw_fuel": task[6] or "petrol", "web_context": task[7] or ""
|
||||
}
|
||||
async with AsyncSessionLocal() as process_db:
|
||||
await self.process_single_record(process_db, task[0], base_info, task[8])
|
||||
|
||||
else:
|
||||
await asyncio.sleep(10)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
|
||||
await asyncio.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(TechEnricher().run())
|
||||
@@ -0,0 +1,113 @@
|
||||
import asyncio
|
||||
import json
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
async def test_scraper():
|
||||
# Két probléma-fókuszú URL: a modern Aprilia és a régi, hibás HTML-ű BMW
|
||||
test_urls = [
|
||||
"https://www.autoevolution.com/moto/aprilia-rs-660-factory-2025.html",
|
||||
"https://www.autoevolution.com/moto/bmw-f-650-gs-2011.html"
|
||||
]
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
context = await browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
||||
)
|
||||
page = await context.new_page()
|
||||
|
||||
for url in test_urls:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"🌍 MEGNYITÁS: {url}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# A DOM betöltése megvárása
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
await asyncio.sleep(2) # Várunk picit a JS futásra
|
||||
|
||||
# A TÖKÉLETESÍTETT AUTOEVOLUTION PARSZOLÓ
|
||||
script = """
|
||||
() => {
|
||||
let results = {};
|
||||
|
||||
// 1. MÓDSZER: Régi motorok (pl. BMW F650GS) -> td.left és td.right
|
||||
let leftCells = document.querySelectorAll('td.left');
|
||||
leftCells.forEach(cell => {
|
||||
let key = cell.innerText.replace(/:$/, '').trim();
|
||||
let rightCell = cell.nextElementSibling;
|
||||
if(rightCell && rightCell.classList.contains('right')) {
|
||||
results[key] = rightCell.innerText.trim();
|
||||
}
|
||||
});
|
||||
|
||||
// 2. MÓDSZER: Modern motorok (pl. Aprilia) -> dt és dd
|
||||
let dts = document.querySelectorAll('dt');
|
||||
dts.forEach(dt => {
|
||||
let key = dt.innerText.replace(/:$/, '').trim();
|
||||
let dd = dt.nextElementSibling;
|
||||
if(dd && dd.tagName.toLowerCase() === 'dd') {
|
||||
results[key] = dd.innerText.trim();
|
||||
}
|
||||
});
|
||||
|
||||
// 3. MÓDSZER: Alternatív modern layout -> span.label és span.value
|
||||
let specRows = document.querySelectorAll('.spec-row');
|
||||
specRows.forEach(row => {
|
||||
let label = row.querySelector('.label');
|
||||
let value = row.querySelector('.value');
|
||||
if(label && value) {
|
||||
let key = label.innerText.replace(/:$/, '').trim();
|
||||
if (!results[key]) {
|
||||
results[key] = value.innerText.trim();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// 4. MÓDSZER: "Adler" típusú elavult leírások fallbackje -> Vastagított szöveg
|
||||
if (Object.keys(results).length === 0) {
|
||||
document.querySelectorAll('b, strong').forEach(b => {
|
||||
let key = b.innerText.replace(/:$/, '').trim();
|
||||
if(key.length > 2 && key.length < 30) {
|
||||
let val = "";
|
||||
// Ha a szöveg közvetlenül a tag után van (Text Node)
|
||||
if(b.nextSibling && b.nextSibling.nodeType === 3) {
|
||||
val = b.nextSibling.textContent.trim();
|
||||
}
|
||||
// Ha egy másik elemben van
|
||||
else if (b.nextElementSibling && b.nextElementSibling.tagName !== 'B') {
|
||||
val = b.nextElementSibling.innerText.trim();
|
||||
}
|
||||
if(val && !results[key]) {
|
||||
results[key] = val;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
"""
|
||||
|
||||
data = await page.evaluate(script)
|
||||
|
||||
if data and len(data) > 0:
|
||||
# Kiszűrjük a zajt, csak a releváns műszaki adatokat hagyjuk meg
|
||||
relevant_keys = ["Type", "Displacement", "Bore X Stroke", "Compression Ratio",
|
||||
"Horsepower", "Torque", "Fuel System", "Gearbox", "Clutch",
|
||||
"Final Drive", "Frame", "Front Suspension", "Rear Suspension",
|
||||
"Front Brake", "Rear Brake", "Overall Length", "Overall Width",
|
||||
"Seat Height", "Wheelbase", "Fuel Capacity", "Weight", "Dry Weight",
|
||||
"Wet Weight", "Front", "Rear"]
|
||||
|
||||
filtered_data = {k: v for k, v in data.items() if any(rk.lower() in k.lower() for rk in relevant_keys)}
|
||||
|
||||
print("\n🟢 KINYERT ADATOK (DOM PARSZOLÓ):")
|
||||
print(json.dumps(filtered_data if filtered_data else data, indent=2, ensure_ascii=False))
|
||||
print(f"\n✅ Összesen {len(filtered_data if filtered_data else data)} műszaki paramétert találtam.")
|
||||
else:
|
||||
print("\n🔴 NULLA ADAT - A DOM parszoló nem talált egyezést.")
|
||||
|
||||
await browser.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_scraper())
|
||||
113
archive/old_files/backend/app/workers/vehicle/r5_test.py.old
Normal file
113
archive/old_files/backend/app/workers/vehicle/r5_test.py.old
Normal file
@@ -0,0 +1,113 @@
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
# --- TECHNIKAI SZÓTÁR ÉS MAPPING ---
|
||||
# Ez a szótár fordítja le az UltimateSpecs kulcsokat az adatbázis oszlopneveire
|
||||
MAPPING = {
|
||||
"Maximum power": "power_kw",
|
||||
"Engine capacity": "engine_capacity",
|
||||
"Maximum torque": "torque_nm",
|
||||
"Top Speed": "max_speed",
|
||||
"Acceleration 0 to 100 km/h": "acceleration_0_100",
|
||||
"Curb Weight": "curb_weight",
|
||||
"Wheelbase": "wheelbase",
|
||||
"Num. of Seats": "seats",
|
||||
"Drive wheels - Traction - Layout": "drive_type",
|
||||
"Body": "body_type"
|
||||
}
|
||||
|
||||
async def r5_test_run():
|
||||
print("🚀 R5 Hibrid Robot indítása (Teszt üzemmód)...")
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
# 1. KIVÁLASZTÁS: Kiveszünk egy olyan autót, ami még nincs dúsítva (R1 bázisból)
|
||||
query = text("""
|
||||
SELECT id, make, marketing_name, year_from, technical_code, fuel_type
|
||||
FROM vehicle.vehicle_model_definitions
|
||||
WHERE (power_kw IS NULL OR power_kw = 0 OR engine_capacity IS NULL OR engine_capacity = 0)
|
||||
AND status IN ('manual_review_needed', 'research_failed_empty', 'pending', 'enrich_ready')
|
||||
ORDER BY priority_score DESC
|
||||
LIMIT 1
|
||||
""")
|
||||
target = (await db.execute(query)).fetchone()
|
||||
|
||||
if not target:
|
||||
print("✨ Nincs feldolgozatlan autó az adatbázisban.")
|
||||
return
|
||||
|
||||
t_id, make, model, year, tech_code, fuel = target
|
||||
print(f"🎯 Célpont: {make} {model} ({year})")
|
||||
print(f"📌 Technical Code: {tech_code or 'Nincs megadva'}")
|
||||
|
||||
# 2. RDW ADATOK (Holland hatósági bázis)
|
||||
# Ha van technical_code (pl. Fiatnál a típusazonosító), az RDW-ből pontos adatot kapunk
|
||||
rdw_data = {}
|
||||
if tech_code:
|
||||
print("🇳🇱 RDW adatok lekérése...")
|
||||
# Az RDW API m9d7-ebf2 táblája tartalmazza a típus specifikációkat
|
||||
rdw_url = f"https://opendata.rdw.nl/resource/m9d7-ebf2.json?handelsbenaming={tech_code.upper()}"
|
||||
try:
|
||||
res = requests.get(rdw_url, timeout=5).json()
|
||||
if res:
|
||||
rdw_data = {
|
||||
"power_kw": int(float(res[0].get('nettomaximumvermogen', 0))),
|
||||
"engine_capacity": int(res[0].get('cilinderinhoud', 0)),
|
||||
"curb_weight": int(res[0].get('massa_ledig_voertuig', 0))
|
||||
}
|
||||
print("✅ RDW adatok sikeresen betöltve.")
|
||||
except:
|
||||
print("⚠️ RDW nem elérhető vagy nincs találat.")
|
||||
|
||||
# 3. ULTIMATESPECS ADATOK (Szimulált kaparás a kért logika alapján)
|
||||
print("🏁 UltimateSpecs adatok gyűjtése...")
|
||||
# Itt futna a Playwright scraper, ami kinyeri a táblázatot
|
||||
# Példa nyers adatokra, amit az oldalról szedünk le:
|
||||
raw_web_data = {
|
||||
"Maximum power": "103 PS / 76 kW @ 5750 rpm",
|
||||
"Engine capacity": "1581 cm3",
|
||||
"Maximum torque": "144 Nm @ 4000 rpm",
|
||||
"Top Speed": "180 km/h",
|
||||
"Acceleration 0 to 100 km/h": "11.5 s",
|
||||
"Curb Weight": "1090 kg",
|
||||
"Wheelbase": "254 cm",
|
||||
"Body": "Hatchback"
|
||||
}
|
||||
|
||||
# 4. ÖSSZEFŰZÉS ÉS FORDÍTÁS
|
||||
final_mdm_record = {
|
||||
"id": t_id,
|
||||
"make": make,
|
||||
"marketing_name": model,
|
||||
"year_from": year,
|
||||
"fuel_type": fuel
|
||||
}
|
||||
|
||||
# Alkalmazzuk a mappinget és a regex tisztítást
|
||||
for web_key, db_key in MAPPING.items():
|
||||
val = raw_web_data.get(web_key)
|
||||
if val:
|
||||
# Számértékek kinyerése (pl. "76 kW" -> 76, "1581 cm3" -> 1581)
|
||||
numbers = re.findall(r'\d+', str(val))
|
||||
if numbers:
|
||||
# Ha több szám van (pl. kW és LE), a relevánsat választjuk
|
||||
final_mdm_record[db_key] = numbers[1] if "kW" in str(val) and len(numbers)>1 else numbers[0]
|
||||
else:
|
||||
final_mdm_record[db_key] = val
|
||||
|
||||
# RDW adatok prioritása (ezek a legpontosabbak, felülírják a webet)
|
||||
final_mdm_record.update({k: v for k, v in rdw_data.items() if v})
|
||||
|
||||
# --- TERMINÁL KIMENET ---
|
||||
print("\n" + "="*50)
|
||||
print("📊 VÉGLEGES MDM REKORD (ELŐNÉZET)")
|
||||
print("="*50)
|
||||
print(json.dumps(final_mdm_record, indent=2, ensure_ascii=False))
|
||||
print("="*50)
|
||||
print("\n[R5] Ha az adatok rendben vannak, mehet az élesítés?")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(r5_test_run())
|
||||
@@ -0,0 +1,62 @@
|
||||
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_1_5_heavy_eu1.0.py
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
logger = logging.getLogger("Robot-1-5-Heavy-EU")
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
class HeavyEUHunter:
|
||||
RDW_URL = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
|
||||
|
||||
@classmethod
|
||||
async def fetch_rdw_heavy(cls, vehicle_type: str):
|
||||
query_url = f"{cls.RDW_URL}?voertuigsoort={vehicle_type}&$select=merk,handelsbenaming&$limit=10000"
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
try:
|
||||
resp = await client.get(query_url)
|
||||
return resp.json() if resp.status_code == 200 else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ RDW Error: {e}")
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def run(cls):
|
||||
logger.info("🚛 Robot 1.5 (EU Heavy Duty) indítása - Kötegelt mód...")
|
||||
job_list = {
|
||||
"Vrachtwagen": "truck",
|
||||
"Bus": "bus",
|
||||
"Kampeerauto": "rv"
|
||||
}
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
for rdw_name, internal_class in job_list.items():
|
||||
logger.info(f"📥 {rdw_name} adatok letöltése...")
|
||||
data = await cls.fetch_rdw_heavy(rdw_name)
|
||||
|
||||
if not data: continue
|
||||
|
||||
# A 10.000 adatot egyetlen listába gyűjtjük
|
||||
insert_data = []
|
||||
for item in data:
|
||||
make = item.get('merk', '').upper().strip()
|
||||
model = item.get('handelsbenaming', '').upper().strip()
|
||||
if make and model:
|
||||
insert_data.append({"make": make, "model": model, "v_class": internal_class})
|
||||
|
||||
if insert_data:
|
||||
query = text("""
|
||||
INSERT INTO vehicle.catalog_discovery
|
||||
(make, model, vehicle_class, status, market, priority_score, source)
|
||||
VALUES (:make, :model, :v_class, 'pending', 'EU', 20, 'RDW-HEAVY')
|
||||
ON CONFLICT ON CONSTRAINT _make_model_market_year_uc DO NOTHING
|
||||
""")
|
||||
# Egyetlen SQL hívással beszúrjuk akár a 10.000 sort is!
|
||||
await db.execute(query, insert_data)
|
||||
await db.commit()
|
||||
logger.info(f"✅ {rdw_name}: {len(insert_data)} EU-s nagygép beküldve kötegelve.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(HeavyEUHunter.run())
|
||||
@@ -0,0 +1,387 @@
|
||||
#!/usr/bin/env python3
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
import urllib.parse
|
||||
import sys
|
||||
import signal
|
||||
import re
|
||||
from playwright.async_api import async_playwright
|
||||
from sqlalchemy import text
|
||||
from app.database import AsyncSessionLocal
|
||||
|
||||
# R2.3 - SENTINEL (Hardened & Obedient Edition)
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [R2.3-SENTINEL] %(message)s')
|
||||
logger = logging.getLogger("R2.3")
|
||||
|
||||
# --- 1. SZŰRÉSEK ÉS TILTÓLISTÁK ---
|
||||
# Csak olyan típusokat keresünk, amik nem utánfutók vagy munkagépek
|
||||
JUNK_LIST = [
|
||||
'SARIS', 'ANSSEMS', 'HAPERT', 'HUMBAUR', 'EDUARD', 'IFOR WILLIAMS', 'FENDT',
|
||||
'HOBBY', 'ADRIA', 'PEECON', 'JAKO', 'KAWECO', 'POTTINGER', 'BOCKMANN',
|
||||
'JOHN DEERE', 'CLAAS', 'IVECO', 'SCANIA', 'MAN', 'DAF', 'KNAUS', 'PÖSSL', 'HYMER', 'WESTFALIA'
|
||||
]
|
||||
|
||||
# --- 2. FORDÍTÁSOK (DE/NL -> EN) ---
|
||||
TRANSLATIONS = {
|
||||
"3ER REIHE": "3 Series", "5ER REIHE": "5 Series", "1ER REIHE": "1 Series", "7ER REIHE": "7 Series",
|
||||
"E-KLASSE": "E Class", "C-KLASSE": "C Class", "S-KLASSE": "S Class", "A-KLASSE": "A Class",
|
||||
"REIHE": "Series", "KLASSE": "Class", "BESTELWAGEN": "Van"
|
||||
}
|
||||
|
||||
class RobotScout:
|
||||
def __init__(self):
|
||||
self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
||||
self.running = True
|
||||
|
||||
def clean_name(self, make, model):
|
||||
"""Standardizált angol név előállítása."""
|
||||
m = model.upper()
|
||||
for de, en in TRANSLATIONS.items():
|
||||
m = m.replace(de, en)
|
||||
# Márkanév duplázódás törlése (pl. VOLVO VOLVO V60 -> VOLVO V60)
|
||||
m = m.replace(make.upper(), "").strip()
|
||||
return f"{make} {m}"
|
||||
|
||||
# --- COLUMN MAPPING for scraping ---
|
||||
COLUMN_MAPPING = {
|
||||
"horsepower": "power_kw",
|
||||
"engine displacement": "engine_capacity",
|
||||
"maximum torque": "torque_nm",
|
||||
"top speed": "max_speed",
|
||||
"curb weight": "curb_weight",
|
||||
"wheelbase": "wheelbase",
|
||||
"num. of seats": "seats"
|
||||
}
|
||||
|
||||
def clean_number(self, val: str, key: str = "") -> int:
|
||||
if not val or val == "-": return 0
|
||||
try:
|
||||
if "hp" in val.lower() or "kw" in val.lower():
|
||||
kw_match = re.search(r'(\d+)\s*kw', val.lower())
|
||||
if kw_match: return int(kw_match.group(1))
|
||||
nums = re.findall(r'\d+', val.replace(' ', '').replace(',', '').replace('.', ''))
|
||||
return int(nums[0]) if nums else 0
|
||||
except: return 0
|
||||
|
||||
async def get_car_links(self, page, make, model, year, use_year=True):
|
||||
"""Minden autós link kigyűjtése fallback mechanizmussal retry logikával."""
|
||||
clean_model = self.clean_name(make, model)
|
||||
search_query = f"{clean_model} {year}" if use_year else clean_model
|
||||
url = f"https://www.ultimatespecs.com/index.php?q={urllib.parse.quote(search_query)}"
|
||||
|
||||
logger.info(f"🔎 KERESÉS: {search_query}")
|
||||
|
||||
async def _fetch_links():
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=25000)
|
||||
|
||||
# 1. Ha direkt az adatlapon vagyunk
|
||||
if any(x in page.url for x in ['/car-specs/', '/motorcycles-specs/']):
|
||||
logger.info("🎯 Direkt találat!")
|
||||
return [{"name": await page.title(), "url": page.url}]
|
||||
|
||||
# 2. Várakozás és linkek kigyűjtése
|
||||
await asyncio.sleep(2)
|
||||
variants = await page.evaluate("""
|
||||
() => {
|
||||
let results = [];
|
||||
document.querySelectorAll('a').forEach(a => {
|
||||
let href = a.getAttribute('href') || '';
|
||||
let text = a.innerText.trim();
|
||||
// Csak technikai adatlapokat gyűjtünk, reklámokat/kategóriákat nem
|
||||
if ((href.includes('/car-specs/') || href.includes('/motorcycles-specs/'))
|
||||
&& href.includes('.html') && text.length > 3) {
|
||||
results.push({ name: text, url: href });
|
||||
}
|
||||
});
|
||||
return results;
|
||||
}
|
||||
""")
|
||||
|
||||
# 3. Fallback: Ha nincs találat évvel, próbálja év nélkül
|
||||
if not variants and use_year:
|
||||
logger.info(" ↳ Nincs találat évszámmal, próbálkozom évszám nélkül...")
|
||||
return await self.get_car_links(page, make, model, year, use_year=False)
|
||||
|
||||
return variants
|
||||
|
||||
try:
|
||||
variants = await self._retry_with_backoff(
|
||||
_fetch_links,
|
||||
max_attempts=3,
|
||||
base_delay=2,
|
||||
exception_message=f"❌ Hálózati hiba a(z) {url} oldalon"
|
||||
)
|
||||
return variants if variants is not None else []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hálózati hiba (végleges): {str(e)[:50]}")
|
||||
return []
|
||||
|
||||
async def _retry_with_backoff(self, func, max_attempts=3, base_delay=2,
|
||||
exception_message="Retry failed", retry_exceptions=True):
|
||||
"""Helper function for retry logic with exponential backoff."""
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
return await func()
|
||||
except Exception as e:
|
||||
if attempt == max_attempts - 1:
|
||||
logger.error(f"{exception_message} after {max_attempts} attempts: {str(e)[:100]}")
|
||||
raise
|
||||
else:
|
||||
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
logger.warning(f"⚠️ Attempt {attempt + 1} failed: {str(e)[:50]}. Retrying in {delay:.1f}s...")
|
||||
await asyncio.sleep(delay)
|
||||
return None
|
||||
|
||||
async def scrape_car_details(self, page, url):
|
||||
"""Scrape car specifications from a given Ultimate Specs URL with comprehensive data extraction and retry logic."""
|
||||
async def _scrape():
|
||||
await page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
|
||||
# Parsing all specification tables and sections
|
||||
full_specs = await page.evaluate("""
|
||||
() => {
|
||||
let results = {};
|
||||
|
||||
// 1. Collect all specification tables (existing logic)
|
||||
document.querySelectorAll('table.table_specs, table.responsive').forEach(table => {
|
||||
table.querySelectorAll('tr').forEach(row => {
|
||||
let t = row.querySelector('.table_specs_title, .td_title, td:first-child');
|
||||
let v = row.querySelector('.table_specs_value, .td_value, td:last-child');
|
||||
if(t && v) {
|
||||
let k = t.innerText.replace(':','').trim().toLowerCase();
|
||||
let val = v.innerText.trim();
|
||||
if(k && val && val !== "-") results[k] = val;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// 2. Collect section headers and their content for additional technical data
|
||||
// Look for h2, h3, h4 elements that might contain section titles
|
||||
const sections = {};
|
||||
const headers = document.querySelectorAll('h2, h3, h4, .section-title, .specs-header');
|
||||
|
||||
headers.forEach(header => {
|
||||
const title = header.innerText.trim();
|
||||
if (title && title.length > 0) {
|
||||
// Find the next table or div with specs after this header
|
||||
let nextElement = header.nextElementSibling;
|
||||
let sectionData = {};
|
||||
|
||||
// Look for tables or lists in the next few siblings
|
||||
for (let i = 0; i < 5 && nextElement; i++) {
|
||||
if (nextElement.tagName === 'TABLE') {
|
||||
nextElement.querySelectorAll('tr').forEach(row => {
|
||||
let t = row.querySelector('td:first-child');
|
||||
let v = row.querySelector('td:last-child');
|
||||
if(t && v) {
|
||||
let k = t.innerText.replace(':','').trim().toLowerCase();
|
||||
let val = v.innerText.trim();
|
||||
if(k && val && val !== "-") {
|
||||
sectionData[k] = val;
|
||||
// Also add to main results with section prefix
|
||||
results[`${title.toLowerCase().replace(/ /g, '_')}_${k}`] = val;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
nextElement = nextElement.nextElementSibling;
|
||||
}
|
||||
|
||||
sections[title.toLowerCase().replace(/ /g, '_')] = sectionData;
|
||||
}
|
||||
});
|
||||
|
||||
// 3. Extract specific known sections by looking for text patterns
|
||||
const pageText = document.body.innerText.toLowerCase();
|
||||
|
||||
// Check for electric/hybrid sections
|
||||
if (pageText.includes('electric engine') || pageText.includes('battery')) {
|
||||
// Try to find battery voltage, capacity, etc.
|
||||
const batteryRegex = /battery\s*voltage[:\s]*([\d\.]+)\s*v/gi;
|
||||
const match = batteryRegex.exec(document.body.innerText);
|
||||
if (match) results['battery_voltage_v'] = match[1];
|
||||
}
|
||||
|
||||
// 4. Extract dimensions data
|
||||
const dimensionPatterns = {
|
||||
'wheelbase': /wheelbase[:\s]*([\d\.]+)\s*cm/gi,
|
||||
'length': /length[:\s]*([\d\.]+)\s*cm/gi,
|
||||
'width': /width[:\s]*([\d\.]+)\s*cm/gi,
|
||||
'height': /height[:\s]*([\d\.]+)\s*cm/gi,
|
||||
'curb_weight': /curb\s*weight[:\s]*([\d\.]+)\s*kg/gi,
|
||||
'towing_capacity': /towing\s*capacity[:\s]*([\d\.]+)\s*kg/gi
|
||||
};
|
||||
|
||||
for (const [key, regex] of Object.entries(dimensionPatterns)) {
|
||||
const match = regex.exec(document.body.innerText);
|
||||
if (match) results[key] = match[1];
|
||||
}
|
||||
|
||||
// 5. Add sections data as a nested object
|
||||
results['_sections'] = sections;
|
||||
|
||||
return results;
|
||||
}
|
||||
""")
|
||||
return full_specs
|
||||
|
||||
try:
|
||||
logger.info(f"🌐 Scraping: {url}")
|
||||
full_specs = await self._retry_with_backoff(
|
||||
_scrape,
|
||||
max_attempts=3,
|
||||
base_delay=2,
|
||||
exception_message=f"❌ Scrape hiba a(z) {url} oldalon"
|
||||
)
|
||||
return full_specs
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Scrape hiba (végleges): {str(e)[:100]}...")
|
||||
return None
|
||||
|
||||
async def run(self):
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
context = await browser.new_context(user_agent=self.user_agent)
|
||||
page = await context.new_page()
|
||||
|
||||
while self.running:
|
||||
# --- A FÉK: 3-6 mp szigorú pihenő minden kör elején ---
|
||||
wait = random.uniform(3, 6)
|
||||
logger.info(f"💤 Várakozás {wait:.1f} mp...")
|
||||
await asyncio.sleep(wait)
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
# Következő feldolgozatlan autó (John Deere, Iveco, stb. kizárva)
|
||||
target = (await db.execute(text("""
|
||||
SELECT id, make, marketing_name, year_from FROM vehicle.vehicle_model_definitions
|
||||
WHERE status IN ('pending', 'manual_review_needed')
|
||||
AND NOT (make = ANY(:junks))
|
||||
ORDER BY priority_score DESC LIMIT 1
|
||||
"""), {"junks": JUNK_LIST})).fetchone()
|
||||
|
||||
if not target:
|
||||
logger.info("✨ Minden tétel feldolgozva.")
|
||||
break
|
||||
|
||||
t_id, make, model, year = target
|
||||
logger.info(f"🚀 CÉLPONT: {make} {model} ({year}) [ID: {t_id}]")
|
||||
|
||||
try:
|
||||
links = await self.get_car_links(page, make, model, year)
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Hálózati hiba linkek lekérésekor: {str(e)[:100]}")
|
||||
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='research_failed_network' WHERE id=:id"), {"id": t_id})
|
||||
await db.commit()
|
||||
continue
|
||||
|
||||
if not links:
|
||||
logger.warning(f"❌ Nem található adatlap. research_failed_empty rögzítése.")
|
||||
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='research_failed_empty' WHERE id=:id"), {"id": t_id})
|
||||
await db.commit()
|
||||
continue
|
||||
|
||||
# --- 1. SCRAPE THE FIRST LINK FOR IMMEDIATE ENRICHMENT ---
|
||||
first_link = None
|
||||
if links:
|
||||
first_link = links[0]
|
||||
full_url = first_link['url'] if first_link['url'].startswith('http') else f"https://www.ultimatespecs.com{first_link['url']}"
|
||||
logger.info(f"⚡ Azonnali adatgyűjtés: {full_url}")
|
||||
web_data = await self.scrape_car_details(page, full_url)
|
||||
|
||||
if web_data is None:
|
||||
# Scraping failed after all retries
|
||||
logger.error(f"❌ Scraping sikertelen minden próbálkozás után. research_failed_parsing rögzítése.")
|
||||
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='research_failed_parsing' WHERE id=:id"), {"id": t_id})
|
||||
await db.commit()
|
||||
# Continue to save links as variants anyway
|
||||
web_data = {}
|
||||
elif len(web_data) >= 5:
|
||||
# Map scraped data to columns
|
||||
updates = {col: self.clean_number(web_data.get(k)) for k, col in self.COLUMN_MAPPING.items()}
|
||||
# Also extract fuel_type, transmission, etc. if possible
|
||||
fuel_type = web_data.get('fuel type', 'Unknown')
|
||||
transmission_type = web_data.get('transmission', 'Unknown')
|
||||
drive_type = web_data.get('drive type', 'Unknown')
|
||||
body_type = web_data.get('body type', 'Unknown')
|
||||
engine_capacity = updates.get('engine_capacity', 0)
|
||||
power_kw = updates.get('power_kw', 0)
|
||||
|
||||
# Update the original record with scraped data
|
||||
await db.execute(text("""
|
||||
UPDATE vehicle.vehicle_model_definitions
|
||||
SET power_kw = :power_kw, engine_capacity = :engine_capacity,
|
||||
torque_nm = :torque_nm, max_speed = :max_speed,
|
||||
curb_weight = :curb_weight,
|
||||
wheelbase = :wheelbase, seats = :seats,
|
||||
fuel_type = :fuel_type, transmission_type = :transmission_type,
|
||||
drive_type = :drive_type, body_type = :body_type,
|
||||
specifications = specifications || :full_json,
|
||||
status = 'awaiting_ai_synthesis', updated_at = NOW()
|
||||
WHERE id = :id
|
||||
"""), {
|
||||
**updates,
|
||||
"id": t_id,
|
||||
"fuel_type": fuel_type,
|
||||
"transmission_type": transmission_type,
|
||||
"drive_type": drive_type,
|
||||
"body_type": body_type,
|
||||
"full_json": json.dumps(web_data)
|
||||
})
|
||||
logger.info(f"✅ AZONNALI PUBLIKÁLÁS: {make} {model} ({power_kw} kW)")
|
||||
else:
|
||||
logger.warning("⚠️ Scraping kevés adatot talált, csak linkek mentve.")
|
||||
|
||||
# --- 2. SAVE ALL LINKS AS NEW VARIANT RECORDS (including first if not enriched) ---
|
||||
added = 0
|
||||
for l in links:
|
||||
full_url = l['url'] if l['url'].startswith('http') else f"https://www.ultimatespecs.com{l['url']}"
|
||||
|
||||
# JAVÍTÁS: column "source_url" hiba ellen raw_api_data-t nézünk
|
||||
check_query = text("SELECT id FROM vehicle.vehicle_model_definitions WHERE raw_api_data->>'url' = :u")
|
||||
exists = (await db.execute(check_query, {"u": full_url})).fetchone()
|
||||
|
||||
if not exists:
|
||||
# Create normalized name from marketing name
|
||||
normalized = l['name'].lower().replace(' ', '_').replace('-', '_').replace('.', '').replace(',', '')[:200]
|
||||
|
||||
await db.execute(text("""
|
||||
INSERT INTO vehicle.vehicle_model_definitions
|
||||
(make, marketing_name, normalized_name, year_from, status,
|
||||
raw_api_data, priority_score, source, market,
|
||||
technical_code, variant_code, version_code,
|
||||
specifications, marketing_name_aliases, raw_search_context)
|
||||
VALUES (:make, :name, :normalized, :year, 'awaiting_ai_synthesis',
|
||||
:raw, 30, 'ultimatespecs', 'EU',
|
||||
'UNKNOWN', 'UNKNOWN', 'UNKNOWN',
|
||||
'{}'::jsonb, '[]'::jsonb, '')
|
||||
"""), {
|
||||
"make": make, "name": l['name'], "normalized": normalized,
|
||||
"year": year, "raw": json.dumps({"url": full_url}), "priority": 30
|
||||
})
|
||||
added += 1
|
||||
|
||||
# Eredeti rekord archiválása (ha még nem publikáltuk)
|
||||
if not web_data:
|
||||
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='expanded_to_variants', updated_at=NOW() WHERE id=:id"), {"id": t_id})
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"✅ SIKER: {added} új variáció mentve. R4-R5 robotok értesítve.")
|
||||
|
||||
await browser.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
scout = RobotScout()
|
||||
# Handle CTRL+C
|
||||
def stop_signal(sig, frame):
|
||||
logger.info("🛑 LEÁLLÍTÁS (Kérés érzékelve)...")
|
||||
scout.running = False
|
||||
sys.exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, stop_signal)
|
||||
|
||||
try:
|
||||
asyncio.run(scout.run())
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
111
archive/old_files/backend/archive_v1_scripts/discovery_bot.py.old
Executable file
111
archive/old_files/backend/archive_v1_scripts/discovery_bot.py.old
Executable file
@@ -0,0 +1,111 @@
|
||||
# /opt/docker/dev/service_finder/backend/discovery_bot.py
|
||||
import asyncio
|
||||
import json
|
||||
import httpx
|
||||
import os
|
||||
import hashlib
|
||||
import logging
|
||||
from urllib.parse import quote
|
||||
from sqlalchemy import select
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.staged_data import ServiceStaging
|
||||
|
||||
# Logolás beállítása
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
|
||||
logger = logging.getLogger("OSM-Discovery")
|
||||
|
||||
# Konfiguráció
|
||||
HUNGARY_BBOX = "45.7,16.1,48.6,22.9"
|
||||
OVERPASS_URL = "http://overpass-api.de/api/interpreter?data="
|
||||
|
||||
class OSMDiscoveryBot:
|
||||
@staticmethod
|
||||
def generate_fingerprint(name: str, city: str) -> str:
|
||||
"""
|
||||
Ujjlenyomat generálása a deduplikációhoz.
|
||||
Kicsit lazább, mint a Hunter-nél, mert az OSM címadatok néha hiányosak.
|
||||
"""
|
||||
raw = f"{str(name).lower()}|{str(city).lower()}"
|
||||
return hashlib.md5(raw.encode()).hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def get_service_type(tags: dict, name: str) -> str:
|
||||
""" OSM tagek leképezése belső kategóriákra. """
|
||||
name = name.lower()
|
||||
shop = tags.get('shop', '')
|
||||
amenity = tags.get('amenity', '')
|
||||
|
||||
if shop == 'tyres' or 'gumi' in name: return 'tire_shop'
|
||||
if amenity == 'car_wash' or 'mosó' in name: return 'car_wash'
|
||||
if any(x in name for x in ['villamos', 'autóvill', 'elektro']): return 'electrician'
|
||||
if any(x in name for x in ['fényez', 'lakatos', 'karosszéria']): return 'body_shop'
|
||||
return 'mechanic'
|
||||
|
||||
async def fetch_osm_data(self, query_part: str):
|
||||
""" Aszinkron adatgyűjtés az Overpass API-tól. """
|
||||
query = f'[out:json][timeout:120];(node{query_part}({HUNGARY_BBOX});way{query_part}({HUNGARY_BBOX}););out center;'
|
||||
async with httpx.AsyncClient(timeout=150) as client:
|
||||
try:
|
||||
resp = await client.get(OVERPASS_URL + quote(query))
|
||||
if resp.status_code == 200:
|
||||
return resp.json().get('elements', [])
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Overpass hiba: {e}")
|
||||
return []
|
||||
|
||||
async def sync(self):
|
||||
logger.info("🛰️ OSM Országos szinkronizáció indítása...")
|
||||
|
||||
# 1. Lekérdezések összeállítása
|
||||
queries = [
|
||||
'["shop"~"car_repair|tyres"]',
|
||||
'["amenity"="car_wash"]'
|
||||
]
|
||||
|
||||
all_elements = []
|
||||
for q in queries:
|
||||
elements = await self.fetch_osm_data(q)
|
||||
all_elements.extend(elements)
|
||||
|
||||
logger.info(f"📊 {len(all_elements)} potenciális szervizpont érkezett.")
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
added_count = 0
|
||||
for node in all_elements:
|
||||
tags = node.get('tags', {})
|
||||
if not tags.get('name'): continue
|
||||
|
||||
lat = node.get('lat', node.get('center', {}).get('lat'))
|
||||
lon = node.get('lon', node.get('center', {}).get('lon'))
|
||||
|
||||
name = tags.get('name', tags.get('operator', 'Ismeretlen szerviz'))
|
||||
city = tags.get('addr:city', 'Ismeretlen')
|
||||
street = tags.get('addr:street', '')
|
||||
housenumber = tags.get('addr:housenumber', '')
|
||||
|
||||
f_print = self.generate_fingerprint(name, city)
|
||||
|
||||
# Deduplikáció ellenőrzése
|
||||
stmt = select(ServiceStaging).where(ServiceStaging.fingerprint == f_print)
|
||||
existing = (await db.execute(stmt)).scalar_one_or_none()
|
||||
|
||||
if not existing:
|
||||
db.add(ServiceStaging(
|
||||
name=name,
|
||||
source="osm_discovery_v2",
|
||||
fingerprint=f_print,
|
||||
city=city,
|
||||
full_address=f"{city}, {street} {housenumber}".strip(", "),
|
||||
status="pending",
|
||||
trust_score=20, # Az OSM adatokat alacsonyabb bizalommal kezeljük, mint a Google-t
|
||||
raw_data=tags
|
||||
))
|
||||
added_count += 1
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"✅ Szinkron kész. {added_count} új elem került a Staging táblába.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
bot = OSMDiscoveryBot()
|
||||
asyncio.run(bot.sync())
|
||||
52
archive/old_files/backup_manager.sh.old
Executable file
52
archive/old_files/backup_manager.sh.old
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
# 🛡️ DOCKER INFRA - GFS BACKUP SYSTEM (Ubuntu 24.04 Optimized)
|
||||
|
||||
# ÚJ ELÉRÉSI UTALOK
|
||||
PROJECT_ROOT="/opt/docker"
|
||||
NAS_ROOT="/mnt/nas/app_data/backups"
|
||||
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
||||
DOM=$(date +%d)
|
||||
DOW=$(date +%u)
|
||||
|
||||
# Szükséges mappák biztosítása a NAS-on
|
||||
mkdir -p $NAS_ROOT/daily $NAS_ROOT/weekly $NAS_ROOT/monthly
|
||||
|
||||
echo "--- 📦 Mentés indítása: $TIMESTAMP ---"
|
||||
|
||||
# 1. ADATBÁZIS MENTÉS (Konzisztens SQL Dump)
|
||||
# A shared-postgres konténerből kimentjük az összes adatbázist
|
||||
docker exec shared-postgres pg_dumpall -c -U postgres > $PROJECT_ROOT/full_db_dump.sql
|
||||
|
||||
# 2. TÖMÖRÍTÉS ÉS KONFIGURÁCIÓK MENTÉSE
|
||||
# Kizárjuk a nyers adatbázis fájlokat és a felesleges node mappákat
|
||||
BACKUP_FILE="infra_full_$TIMESTAMP.tar.gz"
|
||||
|
||||
tar -czf /tmp/$BACKUP_FILE -C $PROJECT_ROOT \
|
||||
--exclude='infra/postgres/data' \
|
||||
--exclude='node_modules' \
|
||||
--exclude='*.log' \
|
||||
.
|
||||
|
||||
# 3. GFS ROTÁCIÓS MÁSOLÁS
|
||||
if [ "$DOM" == "01" ]; then
|
||||
echo "Havi mentés rögzítése..."
|
||||
cp /tmp/$BACKUP_FILE $NAS_ROOT/monthly/
|
||||
fi
|
||||
|
||||
if [ "$DOW" == "7" ]; then
|
||||
echo "Heti mentés rögzítése..."
|
||||
cp /tmp/$BACKUP_FILE $NAS_ROOT/weekly/
|
||||
fi
|
||||
|
||||
# Napi mentés
|
||||
mv /tmp/$BACKUP_FILE $NAS_ROOT/daily/
|
||||
|
||||
# 4. AUTOMATIKUS TAKARÍTÁS (30 napos heti, 1 éves havi mentés megőrzése)
|
||||
find $NAS_ROOT/daily -type f -mtime +7 -delete
|
||||
find $NAS_ROOT/weekly -type f -mtime +30 -delete
|
||||
find $NAS_ROOT/monthly -type f -mtime +365 -delete
|
||||
|
||||
# 5. IDEIGLENES SQL DUMP TÖRLÉSE
|
||||
rm $PROJECT_ROOT/full_db_dump.sql
|
||||
|
||||
echo "✅ Mentés sikeresen lezárva: $NAS_ROOT/daily/$BACKUP_FILE"
|
||||
239
archive/old_files/docker-compose_1.9.9.yml.old
Executable file
239
archive/old_files/docker-compose_1.9.9.yml.old
Executable file
@@ -0,0 +1,239 @@
|
||||
services:
|
||||
# 1. ADATBÁZIS MIGRÁCIÓ (Alembic)
|
||||
migrate:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
container_name: service_finder_migrate
|
||||
env_file: .env
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
environment:
|
||||
- PYTHONPATH=/app
|
||||
command: >
|
||||
bash -c "alembic upgrade head"
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
restart: "no"
|
||||
|
||||
# 2. BACKEND API (FastAPI)
|
||||
service_finder_api:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
container_name: service_finder_api
|
||||
env_file: .env
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- /mnt/nas/app_data:/mnt/nas/app_data
|
||||
- ./static_previews:/app/static/previews
|
||||
environment:
|
||||
- PYTHONPATH=/app
|
||||
depends_on:
|
||||
migrate:
|
||||
condition: service_completed_successfully
|
||||
minio:
|
||||
condition: service_started
|
||||
redis:
|
||||
condition: service_started
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
restart: unless-stopped
|
||||
|
||||
# 3. MINIO (Object Storage)
|
||||
minio:
|
||||
image: minio/minio
|
||||
container_name: service_finder_minio
|
||||
env_file: .env
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
- /mnt/nas/app_data/minio_data:/data
|
||||
networks:
|
||||
- default
|
||||
restart: unless-stopped
|
||||
|
||||
# 4. REDIS (Cache & Queue)
|
||||
redis:
|
||||
image: redis:alpine
|
||||
container_name: service_finder_redis
|
||||
volumes:
|
||||
- /mnt/nas/app_data/redis_data:/data
|
||||
networks:
|
||||
- default
|
||||
restart: unless-stopped
|
||||
|
||||
# 5. FRONTEND
|
||||
service_frontend:
|
||||
build:
|
||||
context: ./frontend
|
||||
container_name: service_finder_frontend
|
||||
env_file: .env
|
||||
ports:
|
||||
- "3001:80"
|
||||
networks:
|
||||
- default
|
||||
depends_on:
|
||||
service_finder_api:
|
||||
condition: service_started
|
||||
restart: unless-stopped
|
||||
|
||||
# 6. KATALÓGUS ROBOT (Discovery)
|
||||
catalog_robot:
|
||||
build: ./backend
|
||||
command: python -u -m app.workers.catalog_robot
|
||||
deploy:
|
||||
replicas: 1
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
env_file: .env
|
||||
depends_on:
|
||||
migrate:
|
||||
condition: service_completed_successfully
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
restart: always
|
||||
|
||||
# 7. SERVICE HUNTER (Web Scraping)
|
||||
service_hunter:
|
||||
build: ./backend
|
||||
container_name: service_finder_robot_hunter
|
||||
command: python -u -m app.workers.service_hunter
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
env_file: .env
|
||||
depends_on:
|
||||
migrate:
|
||||
condition: service_completed_successfully
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
restart: always
|
||||
|
||||
# 8. n8n AUTOMATIZÁCIÓ
|
||||
n8n:
|
||||
image: n8nio/n8n:latest
|
||||
container_name: service_finder_n8n
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "5678:5678"
|
||||
env_file: .env
|
||||
volumes:
|
||||
- ./n8n/data:/home/node/.n8n
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
depends_on:
|
||||
- n8n_db
|
||||
|
||||
n8n_db:
|
||||
image: postgres:15-alpine
|
||||
container_name: service_finder_n8n_db
|
||||
restart: unless-stopped
|
||||
env_file: .env
|
||||
volumes:
|
||||
- ./n8n/db_data:/var/lib/postgresql/data
|
||||
networks:
|
||||
- default
|
||||
|
||||
# 9. BROWSERLESS
|
||||
browserless:
|
||||
image: browserless/chrome:latest
|
||||
container_name: service_finder_browserless
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3005:3000"
|
||||
networks:
|
||||
- default
|
||||
|
||||
# 10. ROBOT 2.1 - RESEARCHER (Porszívó - Hálózati kutató)
|
||||
# Mivel I/O bound (netre vár), futtathatjuk több példányban (pl. 3 szálon)
|
||||
robot_researcher:
|
||||
build: ./backend
|
||||
command: python -u -m app.workers.researcher_v2_1
|
||||
deploy:
|
||||
replicas: 3
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
env_file: .env
|
||||
depends_on:
|
||||
migrate:
|
||||
condition: service_completed_successfully
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
restart: always
|
||||
|
||||
# 11. ROBOT 2.2 - ALCHEMIST (Vegyész - GPU AI dúsító)
|
||||
# Ez használja a GPU-t, ebből általában 1 példány elég a VRAM miatt
|
||||
robot_alchemist:
|
||||
build: ./backend
|
||||
command: python -u -m app.workers.alchemist_v2_2
|
||||
deploy:
|
||||
replicas: 1
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
env_file: .env
|
||||
depends_on:
|
||||
migrate:
|
||||
condition: service_completed_successfully
|
||||
ollama:
|
||||
condition: service_started
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
restart: always
|
||||
|
||||
# 12. AI a szerveren :)
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
container_name: service_finder_ollama
|
||||
restart: always
|
||||
volumes:
|
||||
- ./ollama_data:/root/.ollama
|
||||
ports:
|
||||
- "11434:11434"
|
||||
environment:
|
||||
- OLLAMA_KEEP_ALIVE=24h
|
||||
- OLLAMA_ORIGINS="*"
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
|
||||
# 13. VIN AUDITOR
|
||||
vin_auditor:
|
||||
build: ./backend
|
||||
container_name: service_finder_vin_auditor
|
||||
command: python -u -m app.workers.vin_auditor
|
||||
restart: always
|
||||
env_file: .env
|
||||
depends_on:
|
||||
ollama:
|
||||
condition: service_started
|
||||
networks:
|
||||
- default
|
||||
- shared_db_net
|
||||
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
shared_db_net:
|
||||
external: true
|
||||
144
archive/old_files/docker-compose_sentinel.yml.old
Executable file
144
archive/old_files/docker-compose_sentinel.yml.old
Executable file
@@ -0,0 +1,144 @@
|
||||
# /opt/docker/dev/service_finder/docker-compose.yml
|
||||
services:
|
||||
# --- ADATBÁZIS KEZELÉS ---
|
||||
migrate:
|
||||
build: ./backend
|
||||
container_name: sentinel_migrate
|
||||
env_file: .env
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
command: >
|
||||
bash -c "sleep 5 && alembic upgrade head && python -m app.final_admin_fix"
|
||||
networks:
|
||||
- sentinel_net
|
||||
- shared_db_net
|
||||
restart: "no"
|
||||
|
||||
# --- KÖZPONTI API ---
|
||||
api:
|
||||
build: ./backend
|
||||
container_name: sentinel_api
|
||||
env_file: .env
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- /mnt/nas/app_data:/mnt/nas/app_data
|
||||
- ./static_previews:/app/static/previews
|
||||
depends_on:
|
||||
migrate: { condition: service_completed_successfully }
|
||||
redis: { condition: service_started }
|
||||
networks:
|
||||
- sentinel_net
|
||||
- shared_db_net
|
||||
restart: unless-stopped
|
||||
|
||||
# --- SZERVIZ HADOSZTÁLY (Service Robots) ---
|
||||
|
||||
# Robot 1: Felfedező (OSM & Hunt)
|
||||
service_scout:
|
||||
build: ./backend
|
||||
container_name: sentinel_service_scout
|
||||
command: python -u -m app.workers.service.service_robot_1_scout_osm
|
||||
env_file: .env
|
||||
depends_on:
|
||||
api: { condition: service_started }
|
||||
networks:
|
||||
- sentinel_net
|
||||
- shared_db_net
|
||||
|
||||
# Robot 2: Kutató (Adat pontosító - több példányban)
|
||||
service_researcher:
|
||||
build: ./backend
|
||||
container_name: sentinel_service_researcher
|
||||
command: python -u -m app.workers.service.service_robot_2_researcher
|
||||
deploy:
|
||||
replicas: 2
|
||||
env_file: .env
|
||||
networks:
|
||||
- sentinel_net
|
||||
- shared_db_net
|
||||
|
||||
# Robot 3: Szakértő (AI dúsító - ExpertiseTags)
|
||||
service_enricher:
|
||||
build: ./backend
|
||||
container_name: sentinel_service_enricher
|
||||
command: python -u -m app.workers.service.service_robot_3_enricher
|
||||
env_file: .env
|
||||
networks:
|
||||
- sentinel_net
|
||||
- shared_db_net
|
||||
|
||||
# --- JÁRMŰ HADOSZTÁLY (Vehicle Robots) ---
|
||||
|
||||
# Robot 2: Alkimista (Technikai pontosítás - GPU igényes)
|
||||
vehicle_alchemist:
|
||||
build: ./backend
|
||||
container_name: sentinel_vehicle_alchemist
|
||||
command: python -u -m app.workers.vehicle.vehicle_robot_2_spec_fix
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
env_file: .env
|
||||
depends_on:
|
||||
ollama: { condition: service_started }
|
||||
networks:
|
||||
- sentinel_net
|
||||
- shared_db_net
|
||||
|
||||
# --- RENDSZER HADOSZTÁLY (System Robots) ---
|
||||
|
||||
# Robot 1: Dokumentum feldolgozó (OCR)
|
||||
system_ocr:
|
||||
build: ./backend
|
||||
container_name: sentinel_system_ocr
|
||||
command: python -u -m app.workers.system.robot_1_ocr_processor
|
||||
env_file: .env
|
||||
networks:
|
||||
- sentinel_net
|
||||
volumes:
|
||||
- /mnt/nas/app_data:/mnt/nas/app_data
|
||||
|
||||
# --- AI MAG & INFRA ---
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
container_name: sentinel_ollama
|
||||
volumes:
|
||||
- ./ollama_data:/root/.ollama
|
||||
ports:
|
||||
- "11434:11434"
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- sentinel_net
|
||||
|
||||
redis:
|
||||
image: redis:alpine
|
||||
container_name: sentinel_redis
|
||||
networks:
|
||||
- sentinel_net
|
||||
|
||||
minio:
|
||||
image: minio/minio
|
||||
container_name: sentinel_minio
|
||||
env_file: .env
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
- /mnt/nas/app_data/minio_data:/data
|
||||
networks:
|
||||
- sentinel_net
|
||||
|
||||
networks:
|
||||
sentinel_net:
|
||||
driver: bridge
|
||||
shared_db_net:
|
||||
external: true
|
||||
Reference in New Issue
Block a user