2026.03.29 20:00 Gitea_manager javítás előtt

This commit is contained in:
Roo
2026-03-29 17:59:06 +00:00
parent 03258db091
commit ba8b6579ef
148 changed files with 7951 additions and 591 deletions

View File

@@ -0,0 +1,175 @@
# /opt/docker/dev/service_finder/backend/app/models/marketplace/service.py
import uuid
from datetime import datetime
from typing import Any, List, Optional
from sqlalchemy import Integer, String, Boolean, DateTime, ForeignKey, text, Text, Float, Index, Numeric, BigInteger
from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy.dialects.postgresql import UUID as PG_UUID, JSONB
from geoalchemy2 import Geometry
from sqlalchemy.sql import func
# MB 2.0: Központi aszinkron adatbázis motorból húzzuk be a Base-t
from app.database import Base
class ServiceProfile(Base):
""" Szerviz szolgáltató adatai (v1.3.1). """
__tablename__ = "service_profiles"
__table_args__ = (
Index('idx_service_fingerprint', 'fingerprint', unique=True),
{"schema": "marketplace"}
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
organization_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("fleet.organizations.id"), unique=True)
parent_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("marketplace.service_profiles.id"))
fingerprint: Mapped[str] = mapped_column(String(255), index=True, nullable=False)
location: Mapped[Any] = mapped_column(Geometry(geometry_type='POINT', srid=4326, spatial_index=False), index=True)
status: Mapped[str] = mapped_column(String(20), server_default=text("'ghost'"), index=True)
last_audit_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
google_place_id: Mapped[Optional[str]] = mapped_column(String(100), unique=True)
rating: Mapped[Optional[float]] = mapped_column(Float)
user_ratings_total: Mapped[Optional[int]] = mapped_column(Integer)
# Aggregated verified review ratings (Social 3)
rating_verified_count: Mapped[Optional[int]] = mapped_column(Integer, server_default=text("0"))
rating_price_avg: Mapped[Optional[float]] = mapped_column(Float)
rating_quality_avg: Mapped[Optional[float]] = mapped_column(Float)
rating_time_avg: Mapped[Optional[float]] = mapped_column(Float)
rating_communication_avg: Mapped[Optional[float]] = mapped_column(Float)
rating_overall: Mapped[Optional[float]] = mapped_column(Float)
last_review_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
vibe_analysis: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
social_links: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
specialization_tags: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
trust_score: Mapped[int] = mapped_column(Integer, default=30)
is_verified: Mapped[bool] = mapped_column(Boolean, default=False)
verification_log: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
opening_hours: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
contact_phone: Mapped[Optional[str]] = mapped_column(String)
contact_email: Mapped[Optional[str]] = mapped_column(String)
website: Mapped[Optional[str]] = mapped_column(String)
bio: Mapped[Optional[str]] = mapped_column(Text)
# Kapcsolatok
organization: Mapped["Organization"] = relationship("Organization", back_populates="service_profile")
expertises: Mapped[List["ServiceExpertise"]] = relationship("ServiceExpertise", back_populates="service")
reviews: Mapped[List["ServiceReview"]] = relationship("ServiceReview", back_populates="service")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), onupdate=func.now())
class ExpertiseTag(Base):
"""
Szakmai címkék mesterlistája (MB 2.0).
Ez a tábla vezérli a robotok keresését és a Gamification pontozást is.
"""
__tablename__ = "expertise_tags"
__table_args__ = {"schema": "marketplace"}
id: Mapped[int] = mapped_column(Integer, primary_key=True)
# Egyedi azonosító kulcs (pl. 'ENGINE_REBUILD')
key: Mapped[str] = mapped_column(String(50), unique=True, index=True)
# Megjelenítendő nevek
name_hu: Mapped[Optional[str]] = mapped_column(String(100))
name_en: Mapped[Optional[str]] = mapped_column(String(100))
# Főcsoport (pl. 'MECHANICS', 'ELECTRICAL', 'EMERGENCY')
category: Mapped[Optional[str]] = mapped_column(String(30), index=True)
# --- 🎮 GAMIFICATION ÉS DISCOVERY ---
# Hivatalos címke (True) vagy júzer/robot által javasolt (False)
is_official: Mapped[bool] = mapped_column(Boolean, default=True, server_default=text("true"))
# Ha júzer javasolta, itt tároljuk, ki volt az (XP jóváíráshoz)
suggested_by_id: Mapped[Optional[int]] = mapped_column(BigInteger, ForeignKey("identity.persons.id"))
# ÁLLÍTHATÓ PONTÉRTÉK: Az adatbázisból jön, így bármikor módosítható.
# Ritka szakmáknál magasabb, gyakoriaknál alacsonyabb érték állítható be.
discovery_points: Mapped[int] = mapped_column(Integer, default=10, server_default=text("10"))
# Robot kulcsszavak (JSONB): ["fék", "betét", "tárcsa", "fékfolyadék"]
# A Scout robot ez alapján azonosítja be a szervizt a weboldala alapján.
search_keywords: Mapped[Any] = mapped_column(JSONB, server_default=text("'[]'::jsonb"))
# Népszerűségi mutató (hányszor lett felhasználva a rendszerben)
usage_count: Mapped[int] = mapped_column(Integer, default=0, server_default=text("0"))
# UI ikon azonosító (pl. 'wrench', 'tire-flat', 'car-electric')
icon: Mapped[Optional[str]] = mapped_column(String(50))
# Leírás a szakmáról (Adminisztratív célokra)
description: Mapped[Optional[str]] = mapped_column(Text)
# Időbélyegek
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), onupdate=func.now())
# --- KAPCSOLATOK ---
services: Mapped[List["ServiceExpertise"]] = relationship("ServiceExpertise", back_populates="tag")
# Visszamutatás a beküldőre (ha van)
suggested_by: Mapped[Optional["Person"]] = relationship("Person")
class ServiceExpertise(Base):
"""
KAPCSOLÓTÁBLA: Ez köti össze a szervizt a szakmáival.
Itt tároljuk, hogy az adott szerviznél mennyire validált egy szakma.
"""
__tablename__ = "service_expertises"
__table_args__ = {"schema": "marketplace"}
id: Mapped[int] = mapped_column(Integer, primary_key=True)
service_id: Mapped[int] = mapped_column(Integer, ForeignKey("marketplace.service_profiles.id", ondelete="CASCADE"))
expertise_id: Mapped[int] = mapped_column(Integer, ForeignKey("marketplace.expertise_tags.id", ondelete="CASCADE"))
# Mennyire biztos ez a tudás? (0: robot találta, 1: júzer mondta, 2: igazolt szakma)
confidence_level: Mapped[int] = mapped_column(Integer, default=0, server_default=text("0"))
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=text("now()"))
# Kapcsolatok visszafelé
service = relationship("ServiceProfile", back_populates="expertises")
tag = relationship("ExpertiseTag", back_populates="services")
class ServiceStaging(Base):
""" Hunter (robot) adatok tárolója. """
__tablename__ = "service_staging"
__table_args__ = (
Index('idx_staging_fingerprint', 'fingerprint', unique=True),
{"schema": "marketplace"}
)
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
name: Mapped[str] = mapped_column(String, index=True, nullable=False)
postal_code: Mapped[Optional[str]] = mapped_column(String(10), index=True)
city: Mapped[Optional[str]] = mapped_column(String(100), index=True)
full_address: Mapped[Optional[str]] = mapped_column(String)
fingerprint: Mapped[str] = mapped_column(String(255), nullable=False)
raw_data: Mapped[Any] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
# Additional contact and identification fields
contact_phone: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
website: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
external_id: Mapped[Optional[str]] = mapped_column(String(100), nullable=True, index=True)
status: Mapped[str] = mapped_column(String(20), server_default=text("'pending'"), index=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
class DiscoveryParameter(Base):
""" Robot vezérlési paraméterek adminból. """
__tablename__ = "discovery_parameters"
__table_args__ = {"schema": "marketplace"}
id: Mapped[int] = mapped_column(Integer, primary_key=True)
city: Mapped[str] = mapped_column(String(100))
keyword: Mapped[str] = mapped_column(String(100))
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
last_run_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))

View File

@@ -0,0 +1,73 @@
# /opt/docker/dev/service_finder/backend/app/models/marketplace/staged_data.py
from datetime import datetime
from typing import Optional, Any
from sqlalchemy import String, Integer, DateTime, text, Boolean, Float
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.sql import func
from app.db.base_class import Base
class StagedVehicleData(Base):
""" Robot 2.1 (Researcher) nyers adatgyűjtője. """
__tablename__ = "staged_vehicle_data"
__table_args__ = {"schema": "system"}
id: Mapped[int] = mapped_column(Integer, primary_key=True)
source_url: Mapped[Optional[str]] = mapped_column(String)
raw_data: Mapped[dict] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
status: Mapped[str] = mapped_column(String(20), default="PENDING", index=True)
error_log: Mapped[Optional[str]] = mapped_column(String)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
class ServiceStaging(Base):
""" Robot 1.3 (Scout) által talált nyers szerviz adatok és a Robot 5 (Auditor) naplója. """
__tablename__ = "service_staging"
__table_args__ = {"schema": "marketplace"}
id: Mapped[int] = mapped_column(Integer, primary_key=True)
name: Mapped[str] = mapped_column(String(255), index=True)
source: Mapped[Optional[str]] = mapped_column(String(50))
external_id: Mapped[Optional[str]] = mapped_column(String(100), index=True)
fingerprint: Mapped[str] = mapped_column(String(64), unique=True, index=True)
# Elérhetőségek
city: Mapped[str] = mapped_column(String(100), index=True)
postal_code: Mapped[Optional[str]] = mapped_column(String(10))
full_address: Mapped[Optional[str]] = mapped_column(String(500))
contact_phone: Mapped[Optional[str]] = mapped_column(String(50))
website: Mapped[Optional[str]] = mapped_column(String(255))
contact_email: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
# Beküldés és Bizalom
description: Mapped[Optional[str]] = mapped_column(Text)
submitted_by: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("identity.users.id"))
trust_score: Mapped[int] = mapped_column(Integer, default=0, server_default=text("0"))
# Nyers adatok és Státusz
raw_data: Mapped[dict] = mapped_column(JSONB, server_default=text("'{}'::jsonb"))
status: Mapped[str] = mapped_column(String(20), default="pending", index=True)
# --- Robot 5 (Auditor) technikai mezők ---
# Ezek kellenek a munka naplózásához
rejection_reason: Mapped[Optional[str]] = mapped_column(String(500))
published_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
service_profile_id: Mapped[Optional[int]] = mapped_column(Integer)
organization_id: Mapped[Optional[int]] = mapped_column(Integer)
audit_trail: Mapped[Optional[dict]] = mapped_column(JSONB)
# Időbélyegek
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), onupdate=func.now())
class DiscoveryParameter(Base):
""" Felderítési paraméterek (Városok, ahol a Scout keres). """
__tablename__ = "discovery_parameters"
__table_args__ = {"schema": "marketplace"}
id: Mapped[int] = mapped_column(Integer, primary_key=True)
city: Mapped[str] = mapped_column(String(100), unique=True, index=True)
keyword: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
last_run_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))

View File

@@ -0,0 +1,58 @@
#!/usr/bin/env python3
"""
Move tables from system schema to gamification schema.
"""
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy import text
async def move_tables():
# Use the same DATABASE_URL as sync_engine
from app.core.config import settings
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
async with engine.begin() as conn:
# Check if tables exist in system schema
result = await conn.execute(text("""
SELECT table_schema, table_name
FROM information_schema.tables
WHERE table_name IN ('competitions', 'user_scores')
ORDER BY table_schema;
"""))
rows = result.fetchall()
print("Current tables:")
for row in rows:
print(f" {row.table_schema}.{row.table_name}")
# Move competitions
print("\nMoving system.competitions to gamification.competitions...")
try:
await conn.execute(text('ALTER TABLE system.competitions SET SCHEMA gamification;'))
print(" OK")
except Exception as e:
print(f" Error: {e}")
# Move user_scores
print("Moving system.user_scores to gamification.user_scores...")
try:
await conn.execute(text('ALTER TABLE system.user_scores SET SCHEMA gamification;'))
print(" OK")
except Exception as e:
print(f" Error: {e}")
# Verify
result = await conn.execute(text("""
SELECT table_schema, table_name
FROM information_schema.tables
WHERE table_name IN ('competitions', 'user_scores')
ORDER BY table_schema;
"""))
rows = result.fetchall()
print("\nAfter moving:")
for row in rows:
print(f" {row.table_schema}.{row.table_name}")
await engine.dispose()
if __name__ == "__main__":
asyncio.run(move_tables())

View File

@@ -0,0 +1,53 @@
#!/usr/bin/env python3
"""
Rename tables in system schema to deprecated to avoid extra detection.
"""
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy import text
async def rename():
from app.core.config import settings
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
async with engine.begin() as conn:
# Check if tables exist
result = await conn.execute(text("""
SELECT table_schema, table_name
FROM information_schema.tables
WHERE table_schema = 'system' AND table_name IN ('competitions', 'user_scores');
"""))
rows = result.fetchall()
print("Tables to rename:")
for row in rows:
print(f" {row.table_schema}.{row.table_name}")
# Rename competitions
try:
await conn.execute(text('ALTER TABLE system.competitions RENAME TO competitions_deprecated;'))
print("Renamed system.competitions -> system.competitions_deprecated")
except Exception as e:
print(f"Error renaming competitions: {e}")
# Rename user_scores
try:
await conn.execute(text('ALTER TABLE system.user_scores RENAME TO user_scores_deprecated;'))
print("Renamed system.user_scores -> system.user_scores_deprecated")
except Exception as e:
print(f"Error renaming user_scores: {e}")
# Verify
result = await conn.execute(text("""
SELECT table_schema, table_name
FROM information_schema.tables
WHERE table_schema = 'system' AND table_name LIKE '%deprecated';
"""))
rows = result.fetchall()
print("\nAfter rename:")
for row in rows:
print(f" {row.table_schema}.{row.table_name}")
await engine.dispose()
if __name__ == "__main__":
asyncio.run(rename())

View File

@@ -0,0 +1,170 @@
# /opt/docker/dev/service_finder/backend/app/scripts/sync_engine.py
#!/usr/bin/env python3
"""
Universal Schema Synchronizer
Dynamically imports all SQLAlchemy models from app.models, compares them with the live database,
and creates missing tables/columns without dropping anything.
Safety First:
- NEVER drops tables or columns.
- Prints planned SQL before execution.
- Requires confirmation for destructive operations (none in this script).
"""
import asyncio
import importlib
import os
import sys
from pathlib import Path
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy import inspect, text
from sqlalchemy.schema import CreateTable, AddConstraint
from sqlalchemy.sql.ddl import CreateColumn
# Add backend to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from app.database import Base
from app.core.config import settings
def dynamic_import_models():
"""
Dynamically import all .py files in app.models directory to ensure Base.metadata is populated.
"""
models_dir = Path(__file__).parent.parent / "models"
imported = []
for py_file in models_dir.glob("*.py"):
if py_file.name == "__init__.py":
continue
module_name = f"app.models.{py_file.stem}"
try:
module = importlib.import_module(module_name)
imported.append(module_name)
print(f"✅ Imported {module_name}")
except Exception as e:
print(f"⚠️ Could not import {module_name}: {e}")
# Also ensure the __init__ is loaded (it imports many models manually)
import app.models
print(f"📦 Total tables in Base.metadata: {len(Base.metadata.tables)}")
return imported
async def compare_and_repair():
"""
Compare SQLAlchemy metadata with live database and create missing tables/columns.
"""
print("🔗 Connecting to database...")
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
def get_diff_and_repair(connection):
inspector = inspect(connection)
# Get all schemas from models
expected_schemas = sorted({t.schema for t in Base.metadata.sorted_tables if t.schema})
print(f"📋 Expected schemas: {expected_schemas}")
# Ensure enum types exist in marketplace schema
if 'marketplace' in expected_schemas:
print("\n🔧 Ensuring enum types in marketplace schema...")
# moderation_status enum
connection.execute(text("""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'moderation_status' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
CREATE TYPE marketplace.moderation_status AS ENUM ('pending', 'approved', 'rejected');
END IF;
END $$;
"""))
# source_type enum
connection.execute(text("""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'source_type' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
CREATE TYPE marketplace.source_type AS ENUM ('manual', 'ocr', 'import');
END IF;
END $$;
"""))
print("✅ Enum types ensured.")
for schema in expected_schemas:
print(f"\n--- 🔍 Checking schema '{schema}' ---")
# Check if schema exists
db_schemas = inspector.get_schema_names()
if schema not in db_schemas:
print(f"❌ Schema '{schema}' missing. Creating...")
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
print(f"✅ Schema '{schema}' created.")
# Get tables in this schema from models
model_tables = [t for t in Base.metadata.sorted_tables if t.schema == schema]
db_tables = inspector.get_table_names(schema=schema)
for table in model_tables:
if table.name not in db_tables:
print(f"❌ Missing table: {schema}.{table.name}")
# Generate CREATE TABLE statement
create_stmt = CreateTable(table)
# Print SQL for debugging
sql_str = str(create_stmt.compile(bind=engine))
print(f" SQL: {sql_str}")
connection.execute(create_stmt)
print(f"✅ Table {schema}.{table.name} created.")
else:
# Check columns
db_columns = {c['name']: c for c in inspector.get_columns(table.name, schema=schema)}
model_columns = table.columns
missing_cols = []
for col in model_columns:
if col.name not in db_columns:
missing_cols.append(col)
if missing_cols:
print(f"⚠️ Table {schema}.{table.name} missing columns: {[c.name for c in missing_cols]}")
for col in missing_cols:
# Generate ADD COLUMN statement
col_type = col.type.compile(dialect=engine.dialect)
sql = f'ALTER TABLE "{schema}"."{table.name}" ADD COLUMN "{col.name}" {col_type}'
if col.nullable is False:
sql += " NOT NULL"
if col.default is not None:
# Handle default values (simplistic)
sql += f" DEFAULT {col.default.arg}"
print(f" SQL: {sql}")
connection.execute(text(sql))
print(f"✅ Column {col.name} added.")
else:
print(f"✅ Table {schema}.{table.name} is uptodate.")
print("\n--- ✅ Schema synchronization complete. ---")
async with engine.begin() as conn:
await conn.run_sync(get_diff_and_repair)
await engine.dispose()
async def main():
print("🚀 Universal Schema Synchronizer")
print("=" * 50)
# Step 1: Dynamic import
print("\n📥 Step 1: Dynamically importing all models...")
dynamic_import_models()
# Step 2: Compare and repair
print("\n🔧 Step 2: Comparing with database and repairing...")
await compare_and_repair()
# Step 3: Final verification
print("\n📊 Step 3: Final verification...")
# Run compare_schema.py logic to confirm everything is green
from app.tests_internal.diagnostics.compare_schema import compare
await compare()
print("\n✨ Synchronization finished successfully!")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,232 @@
#!/usr/bin/env python3
"""
Unified Database Synchronizer with Deep Constraint & Index Support
Dynamically imports all SQLAlchemy models, compares metadata with live database,
and creates missing tables, columns, unique constraints, and indexes.
Safety First:
- NEVER drops tables, columns, constraints, or indexes.
- Prints planned SQL before execution.
- Requires confirmation for destructive operations (none in this script).
"""
import asyncio
import importlib
import os
import sys
from pathlib import Path
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy import inspect, text, UniqueConstraint, Index
from sqlalchemy.schema import CreateTable, AddConstraint, CreateIndex
from sqlalchemy.sql.ddl import CreateColumn
# Add backend to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from app.database import Base
from app.core.config import settings
def dynamic_import_models():
"""
Dynamically import all .py files in app.models directory to ensure Base.metadata is populated.
"""
models_dir = Path(__file__).parent.parent / "models"
imported = []
for py_file in models_dir.glob("*.py"):
if py_file.name == "__init__.py":
continue
module_name = f"app.models.{py_file.stem}"
try:
module = importlib.import_module(module_name)
imported.append(module_name)
print(f"✅ Imported {module_name}")
except Exception as e:
print(f"⚠️ Could not import {module_name}: {e}")
# Also ensure the __init__ is loaded (it imports many models manually)
import app.models
print(f"📦 Total tables in Base.metadata: {len(Base.metadata.tables)}")
return imported
async def compare_and_repair(apply: bool = False):
"""
Compare SQLAlchemy metadata with live database and create missing
tables, columns, unique constraints, and indexes.
If apply is False, only prints SQL statements without executing.
"""
print("🔗 Connecting to database...")
engine = create_async_engine(str(settings.SQLALCHEMY_DATABASE_URI))
def get_diff_and_repair(connection):
inspector = inspect(connection)
# Get all schemas from models
expected_schemas = sorted({t.schema for t in Base.metadata.sorted_tables if t.schema})
print(f"📋 Expected schemas: {expected_schemas}")
# Ensure enum types exist in marketplace schema
if 'marketplace' in expected_schemas:
print("\n🔧 Ensuring enum types in marketplace schema...")
# moderation_status enum
connection.execute(text("""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'moderation_status' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
CREATE TYPE marketplace.moderation_status AS ENUM ('pending', 'approved', 'rejected');
END IF;
END $$;
"""))
# source_type enum
connection.execute(text("""
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'source_type' AND typnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'marketplace')) THEN
CREATE TYPE marketplace.source_type AS ENUM ('manual', 'ocr', 'import');
END IF;
END $$;
"""))
print("✅ Enum types ensured.")
for schema in expected_schemas:
print(f"\n--- 🔍 Checking schema '{schema}' ---")
# Check if schema exists
db_schemas = inspector.get_schema_names()
if schema not in db_schemas:
print(f"❌ Schema '{schema}' missing. Creating...")
if apply:
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
print(f"✅ Schema '{schema}' created.")
else:
print(f" SQL: CREATE SCHEMA IF NOT EXISTS \"{schema}\"")
# Get tables in this schema from models
model_tables = [t for t in Base.metadata.sorted_tables if t.schema == schema]
db_tables = inspector.get_table_names(schema=schema)
for table in model_tables:
if table.name not in db_tables:
print(f"❌ Missing table: {schema}.{table.name}")
# Generate CREATE TABLE statement
create_stmt = CreateTable(table)
sql_str = str(create_stmt.compile(bind=engine))
print(f" SQL: {sql_str}")
if apply:
connection.execute(create_stmt)
print(f"✅ Table {schema}.{table.name} created.")
continue
# Check columns
db_columns = {c['name']: c for c in inspector.get_columns(table.name, schema=schema)}
model_columns = table.columns
missing_cols = []
for col in model_columns:
if col.name not in db_columns:
missing_cols.append(col)
if missing_cols:
print(f"⚠️ Table {schema}.{table.name} missing columns: {[c.name for c in missing_cols]}")
for col in missing_cols:
col_type = col.type.compile(dialect=engine.dialect)
sql = f'ALTER TABLE "{schema}"."{table.name}" ADD COLUMN "{col.name}" {col_type}'
if col.nullable is False:
sql += " NOT NULL"
if col.default is not None:
sql += f" DEFAULT {col.default.arg}"
print(f" SQL: {sql}")
if apply:
connection.execute(text(sql))
print(f"✅ Column {col.name} added.")
else:
print(f"✅ Table {schema}.{table.name} columns are uptodate.")
# Check Unique Constraints
db_unique_constraints = inspector.get_unique_constraints(table.name, schema=schema)
# Map by column names (since constraint names may differ)
db_unique_map = {}
for uc in db_unique_constraints:
key = tuple(sorted(uc['column_names']))
db_unique_map[key] = uc['name']
# Find unique constraints defined in model
model_unique_constraints = [c for c in table.constraints if isinstance(c, UniqueConstraint)]
for uc in model_unique_constraints:
uc_columns = tuple(sorted([col.name for col in uc.columns]))
if uc_columns not in db_unique_map:
# Constraint missing
constraint_name = uc.name or f"uq_{table.name}_{'_'.join(uc_columns)}"
columns_sql = ', '.join([f'"{col}"' for col in uc_columns])
sql = f'ALTER TABLE "{schema}"."{table.name}" ADD CONSTRAINT "{constraint_name}" UNIQUE ({columns_sql})'
print(f"⚠️ Missing unique constraint on {schema}.{table.name} columns {uc_columns}")
print(f" SQL: {sql}")
if apply:
connection.execute(text(sql))
print(f"✅ Unique constraint {constraint_name} added.")
else:
print(f"✅ Unique constraint on {uc_columns} exists.")
# Check Indexes
db_indexes = inspector.get_indexes(table.name, schema=schema)
db_index_map = {}
for idx in db_indexes:
key = tuple(sorted(idx['column_names']))
db_index_map[key] = idx['name']
# Find indexes defined in model (Index objects)
model_indexes = [idx for idx in table.indexes]
for idx in model_indexes:
idx_columns = tuple(sorted([col.name for col in idx.columns]))
if idx_columns not in db_index_map:
# Index missing
index_name = idx.name or f"idx_{table.name}_{'_'.join(idx_columns)}"
columns_sql = ', '.join([f'"{col}"' for col in idx_columns])
unique_sql = "UNIQUE " if idx.unique else ""
sql = f'CREATE {unique_sql}INDEX "{index_name}" ON "{schema}"."{table.name}" ({columns_sql})'
print(f"⚠️ Missing index on {schema}.{table.name} columns {idx_columns}")
print(f" SQL: {sql}")
if apply:
connection.execute(text(sql))
print(f"✅ Index {index_name} added.")
else:
print(f"✅ Index on {idx_columns} exists.")
print("\n--- ✅ Schema synchronization complete. ---")
async with engine.begin() as conn:
await conn.run_sync(get_diff_and_repair)
await engine.dispose()
async def main():
import argparse
parser = argparse.ArgumentParser(description="Unified Database Synchronizer")
parser.add_argument('--apply', action='store_true', help='Apply changes to database (otherwise dryrun)')
args = parser.parse_args()
print("🚀 Unified Database Synchronizer")
print("=" * 50)
# Step 1: Dynamic import
print("\n📥 Step 1: Dynamically importing all models...")
dynamic_import_models()
# Step 2: Compare and repair
print("\n🔧 Step 2: Comparing with database and repairing...")
await compare_and_repair(apply=args.apply)
# Step 3: Final verification
print("\n📊 Step 3: Final verification...")
try:
from app.tests_internal.diagnostics.compare_schema import compare
await compare()
except ImportError:
print("⚠️ compare_schema module not found, skipping verification.")
print("\n✨ Synchronization finished successfully!")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,111 @@
import os
import json
import logging
import asyncio
import re
from typing import Dict, Any, Optional
from google import genai
from google.genai import types
from sqlalchemy import select
from app.db.session import SessionLocal
from app.models import SystemParameter
logger = logging.getLogger("AI-Service")
class AIService:
"""
AI Service v1.2.5 - Final Integrated Edition
- Robot 2: Technikai dúsítás (Search + Regex JSON parsing)
- Robot 3: OCR (Controlled JSON generation)
"""
api_key = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key=api_key) if api_key else None
PRIMARY_MODEL = "gemini-2.0-flash"
@classmethod
async def get_config_delay(cls) -> float:
try:
async with SessionLocal() as db:
stmt = select(SystemParameter).where(SystemParameter.key == "AI_REQUEST_DELAY")
res = await db.execute(stmt)
param = res.scalar_one_or_none()
return float(param.value) if param else 1.0
except Exception: return 1.0
@classmethod
async def get_clean_vehicle_data(cls, make: str, raw_model: str, v_type: str) -> Optional[Dict[str, Any]]:
"""Robot 2: Adatbányászat Google Search segítségével."""
if not cls.client: return None
await asyncio.sleep(await cls.get_config_delay())
search_tool = types.Tool(google_search=types.GoogleSearch())
prompt = f"""
KERESS RÁ az interneten: {make} {raw_model} ({v_type}) pontos gyári modellkódja és technikai adatai.
Adj választ szigorúan csak egy JSON blokkban:
{{
"marketing_name": "tiszta név",
"synonyms": ["név1", "név2"],
"technical_code": "gyári kód",
"year_from": int,
"year_to": int_vagy_null,
"ccm": int,
"kw": int,
"maintenance": {{ "oil_type": "string", "oil_qty": float, "spark_plug": "string", "coolant": "string" }}
}}
FONTOS: A 'technical_code' NEM lehet üres. Ha nem találod, adj 'N/A' értéket!
"""
# Search tool használata esetén a response_mime_type tilos!
config = types.GenerateContentConfig(
system_instruction="Profi járműtechnikai adatbányász vagy. Csak tiszta JSON-t válaszolsz markdown kódblokk nélkül.",
tools=[search_tool],
temperature=0.1
)
try:
response = cls.client.models.generate_content(model=cls.PRIMARY_MODEL, contents=prompt, config=config)
text = response.text
# Tisztítás: ha az AI mégis tenne bele markdown jeleket
clean_json = re.sub(r'```json\s*|```', '', text).strip()
res_json = json.loads(clean_json)
if isinstance(res_json, list) and len(res_json) > 0: res_json = res_json[0]
return res_json if isinstance(res_json, dict) else None
except Exception as e:
logger.error(f"❌ AI hiba ({make} {raw_model}): {e}")
return None
@classmethod
async def analyze_document_image(cls, image_data: bytes, doc_type: str) -> Optional[Dict[str, Any]]:
"""Robot 3: OCR funkció - Forgalmi, Személyi, Számla, Odometer."""
if not cls.client: return None
await asyncio.sleep(await cls.get_config_delay())
prompts = {
"identity": "Személyes okmány adatok (név, szám, lejárat).",
"vehicle_reg": "Forgalmi adatok (rendszám, alvázszám, kW, ccm).",
"invoice": "Számla adatok (partner, végösszeg, dátum).",
"odometer": "Csak a kilométeróra állása számként."
}
# Itt maradhat a response_mime_type, mert nem használunk Search-öt
config = types.GenerateContentConfig(
system_instruction="Profi OCR dokumentum-elemző vagy. Csak tiszta JSON-t válaszolsz.",
response_mime_type="application/json"
)
try:
response = cls.client.models.generate_content(
model=cls.PRIMARY_MODEL,
contents=[
f"Elemezd ezt a képet ({doc_type}): {prompts.get(doc_type, 'OCR')}",
types.Part.from_bytes(data=image_data, mime_type="image/jpeg")
],
config=config
)
res_json = json.loads(response.text)
if isinstance(res_json, list) and len(res_json) > 0: res_json = res_json[0]
return res_json if isinstance(res_json, dict) else None
except Exception as e:
logger.error(f"❌ OCR hiba: {e}")
return None

View File

@@ -0,0 +1,208 @@
import asyncio
import httpx
import logging
import os
import sys
from datetime import datetime, timedelta
from sqlalchemy import text, select
from app.database import AsyncSessionLocal
from app.models.asset import AssetCatalog
# MB 2.0 Szigorú naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-0-Discovery: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Vehicle-Robot-0-Discovery")
class DiscoveryEngine:
"""
THOUGHT PROCESS (IPARI ÜZEMMÓD 2.0):
1. Őrkutya (Watchdog): Megkeresi és kiszabadítja a beragadt feladatokat óránként.
2. Differential Sync (Különbözeti Szinkron): Csak a hiányzó vagy új modelleket rögzíti, a gold_enriched-eket kihagyja.
3. Monthly Scheduler: Havonta egyszer tölti le a teljes RDW adatbázist lapozva.
"""
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
SYNC_STATE_FILE = "/app/temp/.last_rdw_sync" # Állapotfájl, hogy Docker újrainduláskor se kezdje elölről azonnal
@staticmethod
async def run_watchdog():
""" 1. FÁZIS: Az Őrkutya (Dead-Letter Queue Manager) """
logger.info("🐕 Őrkutya: Beragadt feladatok keresése a rendszerben...")
try:
async with AsyncSessionLocal() as db:
# A) Hunter takarítás (visszaállítás pending-re, ha a Hunter lefagyott)
res1 = await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'pending' WHERE status = 'processing' RETURNING id;"))
hunter_resets = len(res1.fetchall())
if hunter_resets > 0:
logger.warning(f"🔄 {hunter_resets} db beragadt Hunter feladat (processing) visszaállítva 'pending'-re.")
# B) AI Robotok takarítása (2 órás timeout)
query2 = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = CASE
WHEN status = 'research_in_progress' THEN 'unverified'
WHEN status = 'ai_synthesis_in_progress' THEN 'awaiting_ai_synthesis'
END
WHERE status IN ('research_in_progress', 'ai_synthesis_in_progress')
AND updated_at < NOW() - INTERVAL '2 hours'
RETURNING id;
""")
res2 = await db.execute(query2)
ai_resets = len(res2.fetchall())
if ai_resets > 0:
logger.warning(f"🔄 {ai_resets} db beragadt AI feladat visszaállítva.")
await db.commit()
except Exception as e:
logger.error(f"❌ Őrkutya hiba: {e}")
@staticmethod
async def seed_manual_bootstrap():
""" 2. FÁZIS: Alapozó adatok rögzítése """
initial_data = [
{"make": "AUDI", "model": "A4", "generation": "B8 (2008-2015)"}, # vehicle_class törölve
{"make": "BMW", "model": "3 SERIES", "generation": "F30 (2012-2019)"}
]
try:
async with AsyncSessionLocal() as db:
for item in initial_data:
stmt = select(AssetCatalog).where(AssetCatalog.make == item["make"], AssetCatalog.model == item["model"])
if not (await db.execute(stmt)).scalar_one_or_none():
db.add(AssetCatalog(**item))
await db.commit()
except Exception as e:
logger.warning(f"Manual bootstrap hiba (Ignorálható, ha az adatbázis már tele van): {e}")
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, params: dict, retries: int = 3):
""" Hibatűrő HTTP kérés API leállások ellen. """
for attempt in range(retries):
try:
resp = await client.get(url, params=params, headers=cls.HEADERS)
if resp.status_code == 200:
return resp
elif resp.status_code == 429:
await asyncio.sleep(2 ** attempt)
else:
return None
except httpx.RequestError:
if attempt == retries - 1:
return None
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def seed_from_rdw(cls):
""" 3. FÁZIS: Távoli felfedezés - KÜLÖNBÖZETI SZINKRONIZÁCIÓ (Differential Sync) """
logger.info("📥 RDW TÖMEGES LETÖLTÉS: Új modellek keresése (Differential Sync)...")
limit = 10000
offset = 0
inserted_count = 0
updated_count = 0
async with httpx.AsyncClient(timeout=60.0) as client:
while True:
params = {
"$select": "merk,handelsbenaming,voertuigsoort,count(*) as total",
"$group": "merk,handelsbenaming,voertuigsoort",
"$order": "total DESC",
"$limit": limit,
"$offset": offset
}
resp = await cls.fetch_with_retry(client, "https://opendata.rdw.nl/resource/m9d7-ebf2.json", params)
if not resp: break
raw_data = resp.json()
if not raw_data: break
logger.info(f"📊 Lapozás: {offset} - {offset + len(raw_data)} tételek analízise...")
async with AsyncSessionLocal() as db:
for entry in raw_data:
make = str(entry.get("merk", "")).upper().strip()
model = str(entry.get("handelsbenaming", "")).upper().strip()
v_kind = entry.get("voertuigsoort", "")
total_count = int(entry.get("total", 0))
if not make or not model: continue
if "Personenauto" in v_kind: v_class = 'car'
elif "Motorfiets" in v_kind: v_class = 'motorcycle'
else: v_class = 'truck'
# A MÁGIA: Különbözeti Szinkronizáció SQL + Explicit Type Casting
query = text("""
INSERT INTO vehicle.catalog_discovery (make, model, vehicle_class, status, priority_score)
SELECT
CAST(:make AS VARCHAR),
CAST(:model AS VARCHAR),
CAST(:v_class AS VARCHAR),
'pending',
:priority
WHERE NOT EXISTS (
SELECT 1 FROM vehicle.vehicle_model_definitions
WHERE make = CAST(:make AS VARCHAR)
AND marketing_name = CAST(:model AS VARCHAR)
AND status = 'gold_enriched'
)
ON CONFLICT (make, model)
DO UPDATE SET priority_score = EXCLUDED.priority_score
WHERE vehicle.catalog_discovery.status != 'processed'
RETURNING xmax;
""")
result = await db.execute(query, {
"make": make, "model": model, "v_class": v_class, "priority": total_count
})
row = result.fetchone()
if row:
if row[0] == 0: inserted_count += 1 # Új beszúrás
else: updated_count += 1 # Meglévő frissítése
await db.commit()
offset += limit
await asyncio.sleep(1)
logger.info(f"✅ RDW Szinkron kész! Új modellek a listán: {inserted_count} | Frissített prioritások: {updated_count}")
# Sikeres futás regisztrálása a fájlrendszeren
os.makedirs(os.path.dirname(cls.SYNC_STATE_FILE), exist_ok=True)
with open(cls.SYNC_STATE_FILE, 'w') as f:
f.write(datetime.now().isoformat())
@classmethod
def should_run_rdw_sync(cls) -> bool:
""" Ellenőrzi, hogy eltelt-e 30 nap a legutóbbi sikeres RDW szinkronizáció óta. """
if not os.path.exists(cls.SYNC_STATE_FILE):
return True
try:
with open(cls.SYNC_STATE_FILE, 'r') as f:
last_sync = datetime.fromisoformat(f.read().strip())
return datetime.now() - last_sync > timedelta(days=30)
except Exception:
return True
@classmethod
async def run(cls):
""" FŐ CIKLUS: Havi ütemező és Óránkénti Őrkutya """
logger.info("🚀 ÉLES ÜZEM: Discovery Engine (Differential Sync) & Watchdog indítása...")
await cls.seed_manual_bootstrap()
while True:
# 1. Óránkénti takarítás
await cls.run_watchdog()
# 2. Havi szinkronizáció ellenőrzése
if cls.should_run_rdw_sync():
await cls.seed_from_rdw()
else:
logger.info("🛌 Az RDW szinkronizáció már lefutott az elmúlt 30 napban. Ugrás...")
# 3. Alvás 1 órát (Heartbeat)
logger.info("⏱️ A Discovery Engine most 1 órát pihen a következő Őrkutya futásig.")
await asyncio.sleep(3600)
if __name__ == "__main__":
asyncio.run(DiscoveryEngine.run())

View File

@@ -0,0 +1,108 @@
# /app/app/workers/vehicle/vehicle_robot_0_strategist.py
import asyncio
import httpx
import logging
import os
from sqlalchemy import text
from app.database import AsyncSessionLocal # MB 2.0 Standard import
# Sentinel rendszerhez illesztett logolás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s]: %(message)s')
logger = logging.getLogger("Vehicle-Robot-0-Strategist")
class Robot0Strategist:
"""
THOUGHT PROCESS:
1. A robot célja a 'priority_score' meghatározása valós piaci adatok (RDW) alapján.
2. Első lépésben ellenőrizzük a sémát (Self-healing), hogy létezik-e az oszlop.
3. A kategóriákat (autó, motor, teher) szétválasztjuk, hogy célzott prioritásokat kapjunk.
4. Az 'ON CONFLICT' logika garantálja, hogy ne rontsuk el a már feldolgozott (processed) sorokat.
5. A prioritás alapja a darabszám: minél több van egy típusból, annál előrébb kerül a listán.
"""
RDW_API = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
# Holland típusok leképezése belső kategóriákra
CATEGORIES = [
{"name": "car", "rdw_types": ["'Personenauto'"]},
{"name": "motorcycle", "rdw_types": ["'Motorfiets'"]},
{"name": "truck", "rdw_types": ["'Bedrijfsauto'", "'Vrachtwagen'", "'Opleggertrekker'"]},
{"name": "other", "rdw_types": ["NOT IN ('Personenauto', 'Motorfiets', 'Bedrijfsauto', 'Vrachtwagen', 'Opleggertrekker')"]}
]
async def get_popular_makes(self, vehicle_class: str, rdw_types: list):
""" Piaci adatok lekérése darabszám szerinti sorrendben. """
if "NOT IN" in rdw_types[0]:
type_filter = f"voertuigsoort {rdw_types[0]}"
else:
type_filter = " OR ".join([f"voertuigsoort = {t}" for t in rdw_types])
params = {
"$select": "merk, count(*) AS darabszam",
"$where": type_filter,
"$group": "merk",
"$order": "darabszam DESC",
"$limit": 500
}
async with httpx.AsyncClient(timeout=45.0) as client:
try:
resp = await client.get(self.RDW_API, params=params, headers=self.HEADERS)
if resp.status_code == 200:
return resp.json()
logger.error(f"⚠️ RDW API Hiba: {resp.status_code}")
return []
except Exception as e:
logger.error(f"❌ Kapcsolati hiba az RDW felé: {e}")
return []
async def run(self):
logger.info("🚀 Robot 0 (Strategist) ONLINE - Piaci elemzés indítása...")
# --- SÉMA ELLENŐRZÉS (Golyóálló megoldás) ---
async with AsyncSessionLocal() as db:
try:
await db.execute(text("ALTER TABLE vehicle.catalog_discovery ADD COLUMN IF NOT EXISTS priority_score INTEGER DEFAULT 0;"))
await db.commit()
logger.info("✅ Adatbázis séma rendben (priority_score aktív).")
except Exception as e:
await db.rollback()
logger.error(f"⚠️ Séma hiba: {e}")
for category in self.CATEGORIES:
v_class = category["name"]
logger.info(f"📊 {v_class.upper()} hadosztály prioritásainak számítása...")
makes = await self.get_popular_makes(v_class, category["rdw_types"])
if not makes: continue
added_count = 0
for item in makes:
make_name = str(item.get("merk", "")).upper().strip()
if not make_name: continue
count = int(item.get("darabszam", 0))
async with AsyncSessionLocal() as db:
try:
# UPSERT: Beállítjuk a prioritást, de nem bántjuk a már kész rekordokat
query = text("""
INSERT INTO vehicle.catalog_discovery (make, model, vehicle_class, status, source, attempts, priority_score)
VALUES (:make, 'ALL_VARIANTS', :class, 'pending', 'STRATEGIST-V2', 0, :score)
ON CONFLICT (make, model, vehicle_class)
DO UPDATE SET priority_score = :score
WHERE vehicle.catalog_discovery.status NOT IN ('processed', 'in_progress');
""")
await db.execute(query, {"make": make_name, "class": v_class, "score": count})
await db.commit()
added_count += 1
except Exception as e:
await db.rollback()
logger.warning(f"❌ Hiba a márka rögzítésekor ({make_name}): {e}")
logger.info(f"{v_class.upper()} kategória kész: {added_count} márka rangsorolva.")
if __name__ == "__main__":
asyncio.run(Robot0Strategist().run())

View File

@@ -0,0 +1,224 @@
import asyncio
import httpx
import logging
import os
import re
import sys
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
# Naplózás beállítása a standard kimenetre
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Robot-1-Hunter")
class CatalogHunter:
"""
Vehicle Robot 1.9.3: The Truly Invincible Hunter (SAVEPOINT PATCH)
Kezeli az ALL_VARIANTS utasítást és row-level tranzakcióvédelmet használ.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return ""
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, retries: int = 3):
""" Hibatűrő HTTP lekérdezés exponenciális várakozással. """
for attempt in range(retries):
try:
resp = await client.get(url, headers=cls.HEADERS)
if resp.status_code == 200:
return resp
elif resp.status_code == 429: # Rate limit
await asyncio.sleep(2 ** attempt)
else:
return resp
except httpx.RequestError as e:
if attempt == retries - 1:
logger.debug(f"Hálózati hiba: {e}")
raise
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def fetch_tech_details(cls, client, plate):
""" Technikai adatok (üzemanyag, teljesítmény, motorkód) begyűjtése. """
results = {
"power_kw": 0, "engine_code": None, "euro_class": None,
"fuel_desc": "Unknown", "co2": 0, "consumption": 0.0
}
try:
# Üzemanyag adatok
f_resp = await cls.fetch_with_retry(client, f"{cls.RDW_FUEL}?kenteken={plate}")
if f_resp and f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
results.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
# Motorkód adatok
e_resp = await cls.fetch_with_retry(client, f"{cls.RDW_ENGINE}?kenteken={plate}")
if e_resp and e_resp.status_code == 200 and e_resp.json():
results["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception:
pass
return results
@classmethod
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
""" Egy adott márka/modell (vagy wildcard) feldolgozása. """
clean_make = make_name.strip().upper()
clean_model = model_name.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
offset = 0
async with httpx.AsyncClient(timeout=30.0) as client:
while True:
# Dinamikus paraméterezés: ALL_VARIANTS esetén nem szűrünk modellre
if clean_model == 'ALL_VARIANTS':
params = f"merk={clean_make}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
else:
params = f"merk={clean_make}&handelsbenaming={clean_model}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await cls.fetch_with_retry(client, f"{cls.RDW_MAIN}?{params}")
batch = r.json() if r and r.status_code == 200 else []
except Exception as e:
logger.error(f"❌ API hiba: {e}")
break
if not batch:
break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
# SAVEPOINT: Ha egy rekord mentése hibás, a tranzakció blokk nem sérül
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
# Valódi modellnév kinyerése (Wildcard esetén fontos)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
stmt = insert(VehicleModelDefinition).values(
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
type_approval_number=item.get("typegoedkeuringsnummer"),
technical_code=plate,
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
power_kw=tech["power_kw"],
fuel_type=tech["fuel_desc"],
engine_code=tech["engine_code"],
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
body_type=item.get("inrichting"),
co2_emissions_combined=tech["co2"],
fuel_consumption_combined=tech["consumption"],
euro_classification=tech["euro_class"],
cylinders=cls.parse_int(item.get("aantal_cilinders")),
vehicle_class=v_class,
priority_score=priority,
status="unverified", # R2 Researcher számára előkészítve
source="MEGA-HUNTER-v1.9.3"
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type']
)
await db.execute(stmt)
except Exception as e:
logger.warning(f"⚠️ Sor eldobva ({plate}): {e}")
# Batch commit a sikeres sorok után
await db.commit()
offset += len(batch)
if offset >= 500: # Biztonsági korlát egy-egy márkánál
break
await asyncio.sleep(0.5)
# Discovery feladat lezárása
await db.execute(
text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"),
{"id": task_id}
)
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v1.9.3 ONLINE (SAVEPOINT ENABLED)")
while True:
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS: Keresés, Zárolás és Állapotváltás egy lépésben
query = text("""
UPDATE vehicle.catalog_discovery
SET status = 'processing'
WHERE id = (
SELECT id FROM vehicle.catalog_discovery
WHERE status = 'pending'
ORDER BY priority_score DESC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, model, vehicle_class, priority_score;
""")
result = await db.execute(query)
task = result.fetchone()
await db.commit()
if task:
await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
else:
# Ha nincs munka, 30 másodperc pihenő
await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,179 @@
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
# version: 1.9.6
import asyncio
import httpx
import logging
import os
import re
import sys
from datetime import datetime
from sqlalchemy import text, func
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
# MB 2.0 Standard Naplózás
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s',
stream=sys.stdout
)
logger = logging.getLogger("Robot-1-Hunter")
class CatalogHunter:
"""
Vehicle Robot 1.9.6: Mega-Hunter (TIMESTAMP & INTEGRITY PATCH)
Kezeli az ALL_VARIANTS-t, a Savepoint-okat és az összes kötelező mezőt.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return ""
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_with_retry(cls, client: httpx.AsyncClient, url: str, retries: int = 3):
for attempt in range(retries):
try:
resp = await client.get(url, headers=cls.HEADERS)
if resp.status_code == 200: return resp
elif resp.status_code == 429: await asyncio.sleep(2 ** attempt)
else: return resp
except httpx.RequestError:
if attempt == retries - 1: raise
await asyncio.sleep(2 ** attempt)
return None
@classmethod
async def fetch_tech_details(cls, client, plate):
results = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await cls.fetch_with_retry(client, f"{cls.RDW_FUEL}?kenteken={plate}")
if f_resp and f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen") or f.get("nettomaximumvermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen") or f.get("nominaalcontinuvermogen"))
results.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await cls.fetch_with_retry(client, f"{cls.RDW_ENGINE}?kenteken={plate}")
if e_resp and e_resp.status_code == 200 and e_resp.json():
results["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return results
@classmethod
async def process_make_model(cls, db, task_id, make_name, model_name, v_class, priority):
clean_make = make_name.strip().upper()
clean_model = model_name.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
offset = 0
async with httpx.AsyncClient(timeout=30.0) as client:
while True:
if clean_model == 'ALL_VARIANTS':
params = f"merk={clean_make}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
else:
params = f"merk={clean_make}&handelsbenaming={clean_model}&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await cls.fetch_with_retry(client, f"{cls.RDW_MAIN}?{params}")
batch = r.json() if r and r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
stmt = insert(VehicleModelDefinition).values(
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate,
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
power_kw=tech["power_kw"],
fuel_type=tech["fuel_desc"],
engine_code=tech["engine_code"],
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
vehicle_class=v_class,
priority_score=priority,
market='EU', # KÖTELEZŐ
status="unverified",
is_manual=False,
created_at=func.now(), # KÖTELEZŐ DÁTUMOK
updated_at=func.now(),
source="MEGA-HUNTER-v1.9.6"
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type']
)
await db.execute(stmt)
except Exception as e:
logger.warning(f"⚠️ Sor eldobva ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task_id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v1.9.6 ONLINE (TIMESTAMP PATCH)")
while True:
try:
async with AsyncSessionLocal() as db:
query = text("""
UPDATE vehicle.catalog_discovery SET status = 'processing'
WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending'
ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1)
RETURNING id, make, model, vehicle_class, priority_score;
""")
result = await db.execute(query)
task = result.fetchone()
await db.commit()
if task: await cls.process_make_model(db, task[0], task[1], task[2], task[3], task[4])
else: await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,168 @@
import asyncio
import httpx
import logging
import os
import re
import sys
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
"""
Vehicle Robot 2.1.2: A Végleges Vadász
Tökéletes adattípus szinkron. raw_search_context -> string.
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return "UNKNOWN"
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_tech_details(cls, client, plate):
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
if f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
res.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
if e_resp.status_code == 200 and e_resp.json():
res["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return res
@classmethod
async def process_task(cls, db, task):
clean_make = task.make.strip().upper()
clean_model = task.model.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
async with httpx.AsyncClient(timeout=30.0) as client:
offset = 0
while True:
params = f"merk={clean_make}"
if clean_model != 'ALL_VARIANTS':
params += f"&handelsbenaming={clean_model}"
params += f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
datum_eerste_toelating = str(item.get("datum_eerste_toelating", ""))
year_from = cls.parse_int(datum_eerste_toelating[:4]) if len(datum_eerste_toelating) >= 4 else 0
stmt = insert(VehicleModelDefinition).values(
market='EU',
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate,
type_approval_number=item.get("typegoedkeuringsnummer"),
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
fuel_consumption_combined=tech["consumption"],
co2_emissions_combined=tech["co2"],
vehicle_class=task.vehicle_class,
body_type=item.get("inrichting"),
fuel_type=tech["fuel_desc"],
engine_capacity=cls.parse_int(item.get("cilinderinhoud")),
power_kw=tech["power_kw"],
cylinders=cls.parse_int(item.get("aantal_cilinders")),
engine_code=tech["engine_code"],
euro_classification=tech["euro_class"],
year_from=year_from,
priority_score=task.priority_score,
status="unverified",
source="MEGA-HUNTER-v2.1.2",
# JAVÍTÁS: A raw_search_context most már üres STRING (''), ahogy a modell elvárja!
raw_search_context='',
research_metadata={},
specifications={},
marketing_name_aliases=[]
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
)
await db.execute(stmt)
except Exception as e:
logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v2.1.2 (Adattípus Fix) ONLINE")
while True:
try:
async with AsyncSessionLocal() as db:
query = text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;")
res = await db.execute(query)
task = res.fetchone()
await db.commit()
if task: await cls.process_task(db, task)
else: await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,205 @@
# /app/app/workers/vehicle/vehicle_robot_1_catalog_hunter.py
import asyncio
import httpx
import logging
import os
import re
import sys
import json
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
"""
Vehicle Robot 2.2.0: Fast-Track to Gold Edition
Ha az RDW-ből megvan minden kulcsadat (kw, ccm, fuel), azonnal 'gold_enriched'-re teszi a járművet
és beírja a vehicle_catalog mestertáblába!
"""
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
if not text_val: return "UNKNOWN"
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower()
@classmethod
def parse_int(cls, value) -> int:
try:
if value is None or str(value).strip() == "": return 0
return int(float(value))
except (ValueError, TypeError): return 0
@classmethod
def parse_float(cls, value) -> float:
try:
if value is None or str(value).strip() == "": return 0.0
return float(value)
except (ValueError, TypeError): return 0.0
@classmethod
async def fetch_tech_details(cls, client, plate):
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
if f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1 = cls.parse_int(f.get("netto_maximum_vermogen"))
p2 = cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
res.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
if e_resp.status_code == 200 and e_resp.json():
res["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return res
@classmethod
async def process_task(cls, db, task):
clean_make = task.make.strip().upper()
clean_model = task.model.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
async with httpx.AsyncClient(timeout=30.0) as client:
offset = 0
while True:
params = f"merk={clean_make}"
if clean_model != 'ALL_VARIANTS':
params += f"&handelsbenaming={clean_model}"
params += f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
datum_eerste_toelating = str(item.get("datum_eerste_toelating", ""))
year_from = cls.parse_int(datum_eerste_toelating[:4]) if len(datum_eerste_toelating) >= 4 else 0
engine_ccm = cls.parse_int(item.get("cilinderinhoud"))
power_kw = tech["power_kw"]
fuel_type = tech["fuel_desc"]
# FAST-TRACK LOGIKA: Ha a kötelező műszaki adatok megvannak, azonnal ARANY minősítést kap!
# Villanyautóknál a CCM lehet 0, ezt is kezeljük.
is_gold = False
if (power_kw > 0 and engine_ccm > 0) or (power_kw > 0 and "elektri" in fuel_type.lower()):
is_gold = True
final_status = "gold_enriched" if is_gold else "unverified"
# 1. Beírjuk a VMD-be (Staging tábla)
stmt = insert(VehicleModelDefinition).values(
market='EU',
make=clean_make,
marketing_name=actual_model,
normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"),
version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate,
type_approval_number=item.get("typegoedkeuringsnummer"),
seats=cls.parse_int(item.get("aantal_zitplaatsen")),
doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")),
wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")),
max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")),
max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
fuel_consumption_combined=tech["consumption"],
co2_emissions_combined=tech["co2"],
vehicle_class=task.vehicle_class,
body_type=item.get("inrichting"),
fuel_type=fuel_type,
engine_capacity=engine_ccm,
power_kw=power_kw,
cylinders=cls.parse_int(item.get("aantal_cilinders")),
engine_code=tech["engine_code"],
euro_classification=tech["euro_class"],
year_from=year_from,
priority_score=task.priority_score,
status=final_status, # Dinamikus státusz
source="MEGA-HUNTER-v2.2.0-FAST",
raw_search_context='',
research_metadata={},
specifications={"fast_track": True}, # Jelezzük, hogy ez RDW-ből jött közvetlenül
marketing_name_aliases=[]
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
).returning(VehicleModelDefinition.id)
res = await db.execute(stmt)
vmd_id = res.scalar()
# 2. HA ARANY, AZONNAL LÉPÜNK A VÉGSŐ KATALÓGUSBA (Ahogy az Alchemist is tenné)
if is_gold and vmd_id:
cat_stmt = text("""
INSERT INTO vehicle.vehicle_catalog
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING;
""")
await db.execute(cat_stmt, {
"m_id": vmd_id,
"make": clean_make,
"model": actual_model[:50],
"kw": power_kw,
"ccm": engine_ccm,
"fuel": fuel_type,
"factory": json.dumps({"source": "RDW API Direct", "verified": True})
})
logger.info(f"✨ FAST-TRACK ARANY: {clean_make} {actual_model} (KW: {power_kw}, CCM: {engine_ccm})")
except Exception as e:
logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v2.2.0 (Fast-Track Edition) ONLINE")
while True:
try:
async with AsyncSessionLocal() as db:
query = text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;")
res = await db.execute(query)
task = res.fetchone()
await db.commit()
if task: await cls.process_task(db, task)
else: await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Főciklus hiba: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,140 @@
import asyncio, httpx, logging, os, re, sys, json
from sqlalchemy import text
from sqlalchemy.dialects.postgresql import insert
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-1-Hunter: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-1")
class CatalogHunter:
RDW_MAIN = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
RDW_FUEL = "https://opendata.rdw.nl/resource/8ys7-d773.json"
RDW_ENGINE = "https://opendata.rdw.nl/resource/jh96-v4pq.json"
RDW_TOKEN = os.getenv("RDW_APP_TOKEN")
HEADERS = {"X-App-Token": RDW_TOKEN} if RDW_TOKEN else {}
BATCH_SIZE = 50
@classmethod
def normalize(cls, text_val: str) -> str:
return re.sub(r'[^a-zA-Z0-9]', '', text_val).lower() if text_val else "UNKNOWN"
@classmethod
def parse_int(cls, value) -> int:
try: return int(float(value)) if value and str(value).strip() else 0
except: return 0
@classmethod
def parse_float(cls, value) -> float:
try: return float(value) if value and str(value).strip() else 0.0
except: return 0.0
@classmethod
async def fetch_tech_details(cls, client, plate):
res = {"power_kw": 0, "engine_code": None, "euro_class": None, "fuel_desc": "Unknown", "co2": 0, "consumption": 0.0}
try:
f_resp = await client.get(f"{cls.RDW_FUEL}?kenteken={plate}", headers=cls.HEADERS)
if f_resp.status_code == 200 and f_resp.json():
f = f_resp.json()[0]
p1, p2 = cls.parse_int(f.get("netto_maximum_vermogen")), cls.parse_int(f.get("nominaal_continu_maximum_vermogen"))
res.update({
"power_kw": max(p1, p2),
"fuel_desc": f.get("brandstof_omschrijving") or "Unknown",
"euro_class": f.get("euro_klasse") or f.get("uitlaatemissieniveau"),
"co2": cls.parse_int(f.get("co2_uitstoot_gecombineerd")),
"consumption": cls.parse_float(f.get("brandstofverbruik_gecombineerd"))
})
e_resp = await client.get(f"{cls.RDW_ENGINE}?kenteken={plate}", headers=cls.HEADERS)
if e_resp.status_code == 200 and e_resp.json():
res["engine_code"] = e_resp.json()[0].get("motorcode")
except Exception: pass
return res
@classmethod
async def process_task(cls, db, task):
clean_make, clean_model = task.make.strip().upper(), task.model.strip().upper()
logger.info(f"🎯 ADATGYŰJTÉS INDUL: {clean_make} {clean_model}")
async with httpx.AsyncClient(timeout=30.0) as client:
offset = 0
while True:
params = f"merk={clean_make}" + (f"&handelsbenaming={clean_model}" if clean_model != 'ALL_VARIANTS' else "") + f"&$limit={cls.BATCH_SIZE}&$offset={offset}&$order=kenteken DESC"
try:
r = await client.get(f"{cls.RDW_MAIN}?{params}", headers=cls.HEADERS)
batch = r.json() if r.status_code == 200 else []
except Exception: break
if not batch: break
for item in batch:
plate = item.get("kenteken", "UNKNOWN")
try:
async with db.begin_nested():
tech = await cls.fetch_tech_details(client, plate)
actual_model = (item.get("handelsbenaming") or clean_model).upper()
norm_name = cls.normalize(actual_model.replace(clean_make, "").strip() or actual_model)
datum = str(item.get("datum_eerste_toelating", ""))
year_from = cls.parse_int(datum[:4]) if len(datum) >= 4 else 0
engine_ccm, power_kw, fuel_type = cls.parse_int(item.get("cilinderinhoud")), tech["power_kw"], tech["fuel_desc"]
# FAST-TRACK LOGIKA: Ha van KW és CCM, egyből ARANY!
is_gold = (power_kw > 0 and engine_ccm > 0) or (power_kw > 0 and "elektri" in fuel_type.lower())
final_status = "gold_enriched" if is_gold else "unverified"
stmt = insert(VehicleModelDefinition).values(
market='EU', make=clean_make, marketing_name=actual_model, normalized_name=norm_name,
variant_code=item.get("variant", "UNKNOWN"), version_code=item.get("uitvoering", "UNKNOWN"),
technical_code=plate, type_approval_number=item.get("typegoedkeuringsnummer"),
seats=cls.parse_int(item.get("aantal_zitplaatsen")), doors=cls.parse_int(item.get("aantal_deuren")),
width=cls.parse_int(item.get("breedte")), wheelbase=cls.parse_int(item.get("wielbasis")),
list_price=cls.parse_int(item.get("catalogusprijs")), max_speed=cls.parse_int(item.get("maximale_constructiesnelheid")),
curb_weight=cls.parse_int(item.get("massa_ledig_voertuig")), max_weight=cls.parse_int(item.get("technische_max_massa_voertuig")),
fuel_consumption_combined=tech["consumption"], co2_emissions_combined=tech["co2"],
vehicle_class=task.vehicle_class, body_type=item.get("inrichting"), fuel_type=fuel_type,
engine_capacity=engine_ccm, power_kw=power_kw, cylinders=cls.parse_int(item.get("aantal_cilinders")),
engine_code=tech["engine_code"], euro_classification=tech["euro_class"], year_from=year_from,
priority_score=task.priority_score, status=final_status, source="MEGA-HUNTER-v2.2.0-FAST",
raw_search_context='', research_metadata={}, specifications={"fast_track": True} if is_gold else {}, marketing_name_aliases=[]
).on_conflict_do_nothing(
index_elements=['make', 'normalized_name', 'variant_code', 'version_code', 'fuel_type', 'market', 'year_from']
).returning(VehicleModelDefinition.id)
res = await db.execute(stmt)
vmd_id = res.scalar()
# Automatikus Publikálás (Ha Arany)
if is_gold and vmd_id:
cat_stmt = text("""
INSERT INTO vehicle.vehicle_catalog (master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING;
""")
await db.execute(cat_stmt, {"m_id": vmd_id, "make": clean_make, "model": actual_model[:50], "kw": power_kw, "ccm": engine_ccm, "fuel": fuel_type, "factory": '{"source": "RDW Fast-Track"}'})
logger.info(f"✨ FAST-TRACK ARANY: {clean_make} {actual_model}")
except Exception as e: logger.warning(f"⚠️ Sor hiba ({plate}): {e}")
await db.commit()
offset += len(batch)
if offset >= 500: break
await asyncio.sleep(0.5)
await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processed' WHERE id = :id"), {"id": task.id})
await db.commit()
@classmethod
async def run(cls):
logger.info("🤖 Mega-Hunter v2.2.0 (Fast-Track) ONLINE")
while True:
try:
async with AsyncSessionLocal() as db:
res = await db.execute(text("UPDATE vehicle.catalog_discovery SET status = 'processing' WHERE id = (SELECT id FROM vehicle.catalog_discovery WHERE status = 'pending' ORDER BY priority_score DESC FOR UPDATE SKIP LOCKED LIMIT 1) RETURNING id, make, model, vehicle_class, priority_score;"))
task = res.fetchone()
await db.commit()
if task: await cls.process_task(db, task)
else: await asyncio.sleep(30)
except Exception: await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(CatalogHunter.run())

View File

@@ -0,0 +1,239 @@
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_2_researcher.py
import asyncio
import logging
import warnings
import os
import json
from datetime import datetime
from sqlalchemy import text, update, func
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
warnings.filterwarnings("ignore", category=RuntimeWarning, module='duckduckgo_search')
from duckduckgo_search import DDGS
# MB 2.0 Szabvány naplózás
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Robot-2-Researcher: %(message)s')
logger = logging.getLogger("Vehicle-Robot-2-Researcher")
class QuotaManager:
""" Szigorú napi limit figyelő a fizetős/hatósági API-khoz """
def __init__(self, service_name: str, daily_limit: int):
self.service_name = service_name
self.daily_limit = daily_limit
self.state_file = f"/app/temp/.quota_{service_name}.json"
self._ensure_file()
def _ensure_file(self):
os.makedirs(os.path.dirname(self.state_file), exist_ok=True)
if not os.path.exists(self.state_file):
with open(self.state_file, 'w') as f:
json.dump({"date": datetime.now().strftime("%Y-%m-%d"), "count": 0}, f)
def can_make_request(self) -> bool:
with open(self.state_file, 'r') as f:
data = json.load(f)
today = datetime.now().strftime("%Y-%m-%d")
if data["date"] != today:
data = {"date": today, "count": 0} # Új nap, kvóta nullázása
if data["count"] >= self.daily_limit:
return False
# Növeljük a számlálót
data["count"] += 1
with open(self.state_file, 'w') as f:
json.dump(data, f)
return True
class VehicleResearcher:
"""
Vehicle Robot 2.5: Sniper Researcher (Mesterlövész Adatgyűjtő)
Célzott keresésekkel és strukturált aktakészítéssel dolgozik az AI kímélése érdekében.
"""
def __init__(self):
self.max_attempts = 5
self.search_timeout = 15.0
# Kvóta menedzserek beállítása (.env-ből olvasva)
dvla_limit = int(os.getenv("DVLA_DAILY_LIMIT", "1000"))
self.dvla_quota = QuotaManager("dvla", dvla_limit)
self.dvla_token = os.getenv("DVLA_API_KEY")
async def fetch_ddg_targeted(self, label: str, query: str) -> str:
""" Célzott keresés szálbiztosan a DuckDuckGo-n. """
try:
def search():
with DDGS() as ddgs:
# max_results=2: Nem kell sok zaj, csak a legrelevánsabb 2 találat
results = ddgs.text(query, max_results=2)
return [f"- {r.get('body', '')}" for r in results] if results else []
results = await asyncio.wait_for(asyncio.to_thread(search), timeout=self.search_timeout)
if not results:
return f"[SOURCE: {label}]\nNincs érdemi találat.\n"
content = f"[SOURCE: {label} | KERESÉS: {query}]\n"
content += "\n".join(results) + "\n"
return content
except Exception as e:
logger.debug(f"Keresési hiba ({label}): {e}")
return f"[SOURCE: {label}]\nKERESÉSI HIBA.\n"
def extract_specs_from_text(self, text: str) -> dict:
""" Regex alapú kinyerés a nyers szövegből: ccm, kW, motoradatok. """
import re
specs = {}
# CCM (köbcentiméter) minta: 1998 cc, 2.0 L, 2000 cm³
ccm_pattern = r'(\d{3,4})\s*(?:cc|ccm|cm³|cm3|cc\.)'
match = re.search(ccm_pattern, text, re.IGNORECASE)
if match:
specs['ccm'] = int(match.group(1))
else:
# Alternatív minta: 2.0 liter -> 2000 cc
liter_pattern = r'(\d+\.?\d*)\s*(?:L|liter|)'
match = re.search(liter_pattern, text, re.IGNORECASE)
if match:
liters = float(match.group(1))
specs['ccm'] = int(liters * 1000)
# KW (kilowatt) minta: 150 kW, 150kW, 150 KW
kw_pattern = r'(\d{2,4})\s*(?:kW|kw|KW)'
match = re.search(kw_pattern, text, re.IGNORECASE)
if match:
specs['kw'] = int(match.group(1))
else:
# Le (lóerő) átváltás: 150 LE -> 110 kW (kb)
hp_pattern = r'(\d{2,4})\s*(?:HP|hp|LE|le|Ps)'
match = re.search(hp_pattern, text, re.IGNORECASE)
if match:
hp = int(match.group(1))
specs['kw'] = int(hp * 0.7355) # hozzávetőleges átváltás
# Motor kód minta: motor kód: 1.8 TSI, engine code: N47
engine_pattern = r'(?:motor\s*kód|engine\s*code|motor\s*code)[:\s]+([A-Z0-9\.\- ]+)'
match = re.search(engine_pattern, text, re.IGNORECASE)
if match:
specs['engine_code'] = match.group(1).strip()
return specs
async def research_vehicle(self, db, vehicle_id: int, make: str, model: str, engine: str, year: str, current_attempts: int):
""" Egy jármű átvilágítása és a strukturált 'Akta' elkészítése a GPU számára. """
engine_safe = engine or ""
year_safe = str(year) if year else ""
logger.info(f"🔎 Mesterlövész Kutatás: {make} {model} (Motor: {engine_safe})")
# 1. TIER: Ingyenes, Célzott Keresések (A legmegbízhatóbb források)
queries = [
("ULTIMATE_SPECS", f"{make} {model} {engine_safe} {year_safe} site:ultimatespecs.com"),
("AUTO_DATA", f"{make} {model} {engine_safe} {year_safe} site:auto-data.net"),
("COMMON_ISSUES", f"{make} {model} {engine_safe} reliability common problems")
]
tasks = [self.fetch_ddg_targeted(label, q) for label, q in queries]
search_results = await asyncio.gather(*tasks)
# 2. TIER: Fizetős / Kvótás API-k (Példa a DVLA helyére)
# Ha a jövőben bejön brit rendszám, itt hívjuk meg a DVLA-t:
# if has_uk_plate and self.dvla_quota.can_make_request():
# uk_data = await self.fetch_dvla_data(plate)
# search_results.append(uk_data)
# 3. ÖSSZESÍTÉS (Az Akta összeállítása)
# Maximalizáljuk a szöveg hosszát, hogy az AI GPU ne fulladjon le!
full_context = "\n".join(search_results)
if len(full_context) > 2500:
full_context = full_context[:2500] + "\n...[TRUNCATED TO SAVE GPU TOKENS]"
# Regex alapú specifikáció kinyerés
extracted_specs = self.extract_specs_from_text(full_context)
try:
if len(full_context.strip()) > 150: # Csökkentettük az elvárást, mert a célzott keresés tömörebb
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
raw_search_context=full_context,
research_metadata=extracted_specs,
status='awaiting_ai_synthesis', # Kész az Akta, mehet az Alkimistának!
last_research_at=func.now(),
attempts=current_attempts + 1
)
)
logger.info(f"✅ Akta rögzítve ({len(full_context)} karakter): {make} {model}")
else:
new_status = 'suspended_research' if current_attempts + 1 >= self.max_attempts else 'unverified'
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == vehicle_id)
.values(
status=new_status,
attempts=current_attempts + 1,
last_research_at=func.now()
)
)
if new_status == 'suspended_research':
logger.warning(f"🛑 Felfüggesztve (Nincs nyom a weben): {make} {model}")
else:
logger.warning(f"⚠️ Kevés adat: {make} {model}, visszatéve a sorba.")
await db.commit()
except Exception as e:
await db.rollback()
logger.error(f"🚨 Adatbázis hiba az eredmény mentésénél ({vehicle_id}): {e}")
@classmethod
async def run(cls):
self_instance = cls()
logger.info("🚀 Vehicle Researcher 2.5 ONLINE (Sniper & Quota Manager)")
while True:
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS
query = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = 'research_in_progress'
WHERE id = (
SELECT id FROM vehicle.vehicle_model_definitions
WHERE status IN ('unverified', 'awaiting_research', 'ACTIVE')
AND attempts < :max_attempts
AND is_manual = FALSE
ORDER BY
CASE WHEN make = 'TOYOTA' THEN 1 ELSE 2 END,
attempts ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, marketing_name, engine_code, year_from, attempts;
""")
result = await db.execute(query, {"max_attempts": self_instance.max_attempts})
task = result.fetchone()
await db.commit()
if task:
v_id, v_make, v_model, v_engine, v_year, v_attempts = task
async with AsyncSessionLocal() as process_db:
await self_instance.research_vehicle(process_db, v_id, v_make, v_model, v_engine, v_year, v_attempts)
await asyncio.sleep(2) # Rate limit védelem a DDG felé
else:
await asyncio.sleep(30)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
try:
asyncio.run(VehicleResearcher.run())
except KeyboardInterrupt:
logger.info("🛑 Kutató robot leállítva.")

View File

@@ -0,0 +1,225 @@
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_3_alchemist_pro.py
import asyncio
import logging
import datetime
import random
import sys
import json
import os
from sqlalchemy import text, func, update, case
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.models.asset import AssetCatalog
from app.services.ai_service import AIService
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] Vehicle-Alchemist-Pro: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Vehicle-Robot-3-Alchemist-Pro")
class TechEnricher:
"""
Vehicle Robot 3: Alchemist Pro (Atomi Zárolás + Kézi Moderáció Patch)
Tiszta GPU fókusz: Csak az AI elemzésre és adategyesítésre koncentrál.
Nincs felesleges webkeresés. Szigorú, de intelligens Sane-Check.
"""
def __init__(self):
self.max_attempts = 5
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
def check_budget(self) -> bool:
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def validate_merged_data(self, merged_kw: int, merged_ccm: int, v_class: str, fuel: str, current_attempts: int) -> tuple[bool, str]:
""" Intelligens validáció a MERGE után. Visszaadja a státuszt és a hiba okát. """
if merged_ccm > 18000:
return False, f"Irreális CCM érték ({merged_ccm})"
if merged_kw > 1500 and v_class != "truck":
return False, f"Irreális KW érték ({merged_kw})"
# Ha hiányzik a KW
if merged_kw == 0:
if current_attempts < 3:
return False, "Hiányzó KW adat. Újrakutatás javasolt."
else:
logger.warning("Sane-check: Többszöri próbálkozás után sincs KW, de átengedjük részlegesként.")
# Ha hiányzik a CCM (és belsőégésű)
if merged_ccm == 0 and "electric" not in fuel and "elektric" not in fuel and v_class != "trailer":
if current_attempts < 3:
return False, "Hiányzó CCM (belsőégésű motornál). Újrakutatás javasolt."
else:
logger.warning("Sane-check: Többszöri próbálkozás után sincs CCM, átengedjük részlegesként.")
return True, "OK"
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
# Pontos azonosító a logokhoz (Márka, Modell, ID, RDW adatok)
v_ident = f"{base_info['make'].upper()} {base_info['m_name']} (ID: {record_id}, RDW: {base_info['rdw_ccm']}ccm, KW: {base_info['rdw_kw']})"
attempt_str = f"[Próba: {current_attempts + 1}/{self.max_attempts}]"
ai_data = {} # Üres dict, ha az AI hívás elszállna
try:
logger.info(f"🧠 AI dúsítás indul: {v_ident} {attempt_str}")
# 1. LÉPÉS: AI Hívás (Rábízzuk az adatokat a modellre)
ai_data = await AIService.get_clean_vehicle_data(
base_info['make'],
base_info['m_name'],
base_info
)
if not ai_data:
raise ValueError("Teljesen üres AI válasz (API hiba vagy extrém hallucináció).")
# 2. LÉPÉS: HIBRID MERGE (Még a validáció előtt!)
# Az RDW adatok felülbírálják az AI-t a hatósági paramétereknél
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else int(ai_data.get("kw", 0) or 0)
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else int(ai_data.get("ccm", 0) or 0)
# Üzemanyag tisztítása
fuel_rdw = base_info.get('rdw_fuel', '')
final_fuel = fuel_rdw if fuel_rdw and fuel_rdw != "Unknown" else ai_data.get("fuel_type", "petrol")
final_engine = base_info['rdw_engine'] if base_info['rdw_engine'] else ai_data.get("engine_code", "Unknown")
final_euro = base_info['rdw_euro'] or ai_data.get("euro_classification")
final_cylinders = base_info['rdw_cylinders'] or ai_data.get("cylinders")
# 3. LÉPÉS: Intelligens Validáció
is_valid, error_msg = self.validate_merged_data(final_kw, final_ccm, base_info['v_type'], final_fuel.lower(), current_attempts)
if not is_valid:
raise ValueError(f"Validációs hiba: {error_msg}")
# 4. LÉPÉS: Mentés az Arany Katalógusba
clean_model = str(ai_data.get("marketing_name", base_info['m_name']))[:50].upper()
cat_stmt = text("""
INSERT INTO vehicle.vehicle_catalog
(master_definition_id, make, model, power_kw, engine_capacity, fuel_type, factory_data)
VALUES (:m_id, :make, :model, :kw, :ccm, :fuel, :factory)
ON CONFLICT ON CONSTRAINT uix_vehicle_catalog_full DO NOTHING
RETURNING id;
""")
await db.execute(cat_stmt, {
"m_id": record_id,
"make": base_info['make'].upper(),
"model": clean_model,
"kw": final_kw,
"ccm": final_ccm,
"fuel": final_fuel,
"factory": json.dumps(ai_data)
})
# 5. LÉPÉS: Staging tábla (VMD) lezárása
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="gold_enriched",
engine_capacity=final_ccm,
power_kw=final_kw,
fuel_type=final_fuel,
engine_code=final_engine,
euro_classification=final_euro,
cylinders=final_cylinders,
specifications=ai_data, # Elmentjük az AI teljes outputját a mestertáblába is
updated_at=func.now()
)
)
await db.commit()
logger.info(f"✨ ARANY REKORD KÉSZ: {v_ident}")
self.ai_calls_today += 1
except Exception as e:
await db.rollback()
logger.warning(f"⚠️ Alkimista hiba - {v_ident}: {e}")
# Ha elértük a limitet, KÉZI MODERÁCIÓRA küldjük, egyébként vissza a Kutatónak
new_status = 'manual_review_needed' if current_attempts + 1 >= self.max_attempts else 'unverified'
# Elmentjük az AI részleges válaszát (vagy a hibát), hogy az admin lássa, mit rontott el a gép
review_data = ai_data if ai_data else {"error": "Nincs értékelhető JSON adat az AI-tól", "raw_context": base_info['web_context']}
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
attempts=current_attempts + 1,
last_error=str(e)[:200],
status=new_status,
specifications=review_data, # Kézi ellenőrzéshez beírjuk a törött adatot!
updated_at=func.now()
)
)
await db.commit()
if new_status == 'unverified':
logger.info(f"♻️ Akta visszaküldve a Robot-2-nek (Kutató). {attempt_str}")
else:
logger.error(f"🛑 Max próbálkozás elérve! Kézi moderációra küldve: {v_ident}")
async def run(self):
logger.info(f"🚀 Alchemist Pro HIBRID ONLINE (Atomi Zárolás + Moderáció Patch)")
while True:
if not self.check_budget():
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
await asyncio.sleep(3600); continue
try:
async with AsyncSessionLocal() as db:
# ATOMI ZÁROLÁS (A "Szent Grál" a race condition ellen)
query = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = 'ai_synthesis_in_progress'
WHERE id = (
SELECT id FROM vehicle.vehicle_model_definitions
WHERE status IN ('awaiting_ai_synthesis', 'ACTIVE')
AND attempts < :max_attempts
AND is_manual = FALSE
ORDER BY
CASE WHEN status = 'awaiting_ai_synthesis' THEN 1 ELSE 2 END,
priority_score DESC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity,
fuel_type, engine_code, euro_classification, cylinders, raw_search_context, attempts;
""")
result = await db.execute(query, {"max_attempts": self.max_attempts})
task = result.fetchone()
await db.commit()
if task:
# Szétbontjuk a lekérdezett rekordot a base_info dict-be
r_id = task[0]
base_info = {
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
"rdw_fuel": task[6] or "petrol", "rdw_engine": task[7] or "",
"rdw_euro": task[8], "rdw_cylinders": task[9],
"web_context": task[10] or ""
}
attempts = task[11]
# Külön adatbázis kapcsolat a feldolgozáshoz (hosszú AI hívás miatt)
async with AsyncSessionLocal() as process_db:
await self.process_single_record(process_db, r_id, base_info, attempts)
# GPU hűtés / Ollama rate limit
await asyncio.sleep(random.uniform(1.5, 3.5))
else:
logger.info("😴 Nincs feldolgozandó akta, az Alkimista pihen...")
await asyncio.sleep(15)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(TechEnricher().run())

View File

@@ -0,0 +1,168 @@
import asyncio
import logging
import datetime
import random
import sys
import json
import os
from sqlalchemy import text, func, update
from app.database import AsyncSessionLocal
from app.models.vehicle_definitions import VehicleModelDefinition
from app.services.ai_service import AIService
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] R3-Alchemist: %(message)s', stream=sys.stdout)
logger = logging.getLogger("Robot-3-Alchemist")
class TechEnricher:
"""
Vehicle Robot 3: Alchemist Pro (Sentinel Gateway Edition)
Az AIService 2.2-t használja (Ollama -> Groq Fallback).
Kinyeri a felszereltségi szintet (trim_level) és pótolja a hiányzó adatokat.
"""
def __init__(self):
self.max_attempts = 5
self.daily_ai_limit = int(os.getenv("AI_DAILY_LIMIT", "10000"))
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
def check_budget(self) -> bool:
if datetime.date.today() > self.last_reset_date:
self.ai_calls_today = 0
self.last_reset_date = datetime.date.today()
return self.ai_calls_today < self.daily_ai_limit
def validate_merged_data(self, merged_kw: int, merged_ccm: int, v_class: str, fuel: str, current_attempts: int) -> tuple[bool, str]:
if merged_ccm > 18000:
return False, f"Irreális CCM érték ({merged_ccm})"
if merged_kw > 1500 and v_class not in ["truck", "other"]:
return False, f"Irreális KW érték ({merged_kw})"
if merged_kw == 0 and current_attempts < 3:
return False, "Hiányzó KW adat. Újrakutatás javasolt."
if merged_ccm == 0 and "elektr" not in fuel.lower() and v_class != "trailer" and current_attempts < 3:
return False, "Hiányzó CCM (belsőégésű motornál)."
return True, "OK"
async def process_single_record(self, db, record_id: int, base_info: dict, current_attempts: int):
v_ident = f"{base_info['make'].upper()} {base_info['m_name']} (ID: {record_id})"
attempt_str = f"[Próba: {current_attempts + 1}/{self.max_attempts}]"
try:
logger.info(f"🧠 AI dúsítás indul: {v_ident} {attempt_str}")
# Szigorú Prompt a Master AI Service-nek
prompt = f"""
Elemezd az alábbi járműadatokat és a webes kutatást! Készíts belőle egy JSON objektumot.
Jármű: {base_info['make']} {base_info['m_name']}
Hatósági adatok: {base_info['rdw_ccm']} ccm, {base_info['rdw_kw']} kW, Üzemanyag: {base_info['rdw_fuel']}
Webes szöveg: {base_info['web_context'][:2000]}
FELADATOK:
1. Keresd meg a felszereltségi szintet (trim_level) a modell nevéből vagy a szövegből (pl. AMG, Highline, Titanium, M-Sport, Elegance, ST-Line). Ha nincs, legyen üres string.
2. Ha az RDW adatokban a kW vagy a ccm 0, pótold a szövegből a helyes értéket!
KIZÁRÓLAG EGY ÉRVÉNYES JSON-T ADJ VISSZA! (A Groq/Gemini miatt kötelező a JSON szó használata).
Várt kulcsok: "kw" (int), "ccm" (int), "trim_level" (string), "transmission" (string), "drive_type" (string).
"""
# Hívjuk a te profi Gateway-edet! (_execute_ai_call átveszi a db session-t is a beállításokhoz)
ai_data = await AIService._execute_ai_call(db, prompt, model_key="text")
if not ai_data:
raise ValueError("Üres AI válasz (Minden fallback elbukott).")
# HIBRID MERGE
final_kw = base_info['rdw_kw'] if base_info['rdw_kw'] > 0 else int(ai_data.get("kw", 0) or 0)
final_ccm = base_info['rdw_ccm'] if base_info['rdw_ccm'] > 0 else int(ai_data.get("ccm", 0) or 0)
trim_level = str(ai_data.get("trim_level", ""))[:100]
# Sane-Check
is_valid, error_msg = self.validate_merged_data(final_kw, final_ccm, base_info['v_type'], base_info['rdw_fuel'], current_attempts)
if not is_valid:
raise ValueError(f"Validációs hiba: {error_msg}")
# Staging tábla frissítése (Arany minősítés)
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
status="gold_enriched",
engine_capacity=final_ccm,
power_kw=final_kw,
trim_level=trim_level if trim_level.lower() not in ["null", "none"] else "",
specifications=ai_data,
updated_at=func.now()
)
)
await db.commit()
logger.info(f"✨ ARANY REKORD KÉSZ: {v_ident} | Trim: {trim_level}")
self.ai_calls_today += 1
except Exception as e:
await db.rollback()
logger.warning(f"⚠️ Alkimista hiba - {v_ident}: {e}")
new_status = 'manual_review_needed' if current_attempts + 1 >= self.max_attempts else 'unverified'
await db.execute(
update(VehicleModelDefinition)
.where(VehicleModelDefinition.id == record_id)
.values(
attempts=current_attempts + 1,
last_error=str(e)[:200],
status=new_status,
updated_at=func.now()
)
)
await db.commit()
if new_status == 'unverified':
logger.info(f"♻️ Akta visszaküldve a Kutatónak (R2). {attempt_str}")
async def run(self):
logger.info(f"🚀 R3 Alchemist Pro ONLINE (Sentinel Gateway Integráció)")
while True:
if not self.check_budget():
logger.warning("💸 Napi AI limit kimerítve! Pihenés...")
await asyncio.sleep(3600); continue
try:
async with AsyncSessionLocal() as db:
query = text("""
UPDATE vehicle.vehicle_model_definitions
SET status = 'ai_synthesis_in_progress'
WHERE id = (
SELECT id FROM vehicle.vehicle_model_definitions
WHERE status = 'awaiting_ai_synthesis'
AND attempts < :max_attempts
AND is_manual = FALSE
ORDER BY priority_score DESC
FOR UPDATE SKIP LOCKED LIMIT 1
)
RETURNING id, make, marketing_name, vehicle_class, power_kw, engine_capacity, fuel_type, raw_search_context, attempts;
""")
result = await db.execute(query, {"max_attempts": self.max_attempts})
task = result.fetchone()
await db.commit()
if task:
base_info = {
"make": task[1], "m_name": task[2], "v_type": task[3] or "car",
"rdw_kw": task[4] or 0, "rdw_ccm": task[5] or 0,
"rdw_fuel": task[6] or "petrol", "web_context": task[7] or ""
}
async with AsyncSessionLocal() as process_db:
await self.process_single_record(process_db, task[0], base_info, task[8])
else:
await asyncio.sleep(10)
except Exception as e:
logger.error(f"💀 Kritikus hiba a főciklusban: {e}")
await asyncio.sleep(10)
if __name__ == "__main__":
asyncio.run(TechEnricher().run())

View File

@@ -0,0 +1,113 @@
import asyncio
import json
from playwright.async_api import async_playwright
async def test_scraper():
# Két probléma-fókuszú URL: a modern Aprilia és a régi, hibás HTML-ű BMW
test_urls = [
"https://www.autoevolution.com/moto/aprilia-rs-660-factory-2025.html",
"https://www.autoevolution.com/moto/bmw-f-650-gs-2011.html"
]
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
page = await context.new_page()
for url in test_urls:
print(f"\n{'='*60}")
print(f"🌍 MEGNYITÁS: {url}")
print(f"{'='*60}")
# A DOM betöltése megvárása
await page.goto(url, wait_until="domcontentloaded", timeout=60000)
await asyncio.sleep(2) # Várunk picit a JS futásra
# A TÖKÉLETESÍTETT AUTOEVOLUTION PARSZOLÓ
script = """
() => {
let results = {};
// 1. MÓDSZER: Régi motorok (pl. BMW F650GS) -> td.left és td.right
let leftCells = document.querySelectorAll('td.left');
leftCells.forEach(cell => {
let key = cell.innerText.replace(/:$/, '').trim();
let rightCell = cell.nextElementSibling;
if(rightCell && rightCell.classList.contains('right')) {
results[key] = rightCell.innerText.trim();
}
});
// 2. MÓDSZER: Modern motorok (pl. Aprilia) -> dt és dd
let dts = document.querySelectorAll('dt');
dts.forEach(dt => {
let key = dt.innerText.replace(/:$/, '').trim();
let dd = dt.nextElementSibling;
if(dd && dd.tagName.toLowerCase() === 'dd') {
results[key] = dd.innerText.trim();
}
});
// 3. MÓDSZER: Alternatív modern layout -> span.label és span.value
let specRows = document.querySelectorAll('.spec-row');
specRows.forEach(row => {
let label = row.querySelector('.label');
let value = row.querySelector('.value');
if(label && value) {
let key = label.innerText.replace(/:$/, '').trim();
if (!results[key]) {
results[key] = value.innerText.trim();
}
}
});
// 4. MÓDSZER: "Adler" típusú elavult leírások fallbackje -> Vastagított szöveg
if (Object.keys(results).length === 0) {
document.querySelectorAll('b, strong').forEach(b => {
let key = b.innerText.replace(/:$/, '').trim();
if(key.length > 2 && key.length < 30) {
let val = "";
// Ha a szöveg közvetlenül a tag után van (Text Node)
if(b.nextSibling && b.nextSibling.nodeType === 3) {
val = b.nextSibling.textContent.trim();
}
// Ha egy másik elemben van
else if (b.nextElementSibling && b.nextElementSibling.tagName !== 'B') {
val = b.nextElementSibling.innerText.trim();
}
if(val && !results[key]) {
results[key] = val;
}
}
});
}
return results;
}
"""
data = await page.evaluate(script)
if data and len(data) > 0:
# Kiszűrjük a zajt, csak a releváns műszaki adatokat hagyjuk meg
relevant_keys = ["Type", "Displacement", "Bore X Stroke", "Compression Ratio",
"Horsepower", "Torque", "Fuel System", "Gearbox", "Clutch",
"Final Drive", "Frame", "Front Suspension", "Rear Suspension",
"Front Brake", "Rear Brake", "Overall Length", "Overall Width",
"Seat Height", "Wheelbase", "Fuel Capacity", "Weight", "Dry Weight",
"Wet Weight", "Front", "Rear"]
filtered_data = {k: v for k, v in data.items() if any(rk.lower() in k.lower() for rk in relevant_keys)}
print("\n🟢 KINYERT ADATOK (DOM PARSZOLÓ):")
print(json.dumps(filtered_data if filtered_data else data, indent=2, ensure_ascii=False))
print(f"\n✅ Összesen {len(filtered_data if filtered_data else data)} műszaki paramétert találtam.")
else:
print("\n🔴 NULLA ADAT - A DOM parszoló nem talált egyezést.")
await browser.close()
if __name__ == "__main__":
asyncio.run(test_scraper())

View File

@@ -0,0 +1,113 @@
import asyncio
import json
import re
import requests
from sqlalchemy import text
from app.database import AsyncSessionLocal
# --- TECHNIKAI SZÓTÁR ÉS MAPPING ---
# Ez a szótár fordítja le az UltimateSpecs kulcsokat az adatbázis oszlopneveire
MAPPING = {
"Maximum power": "power_kw",
"Engine capacity": "engine_capacity",
"Maximum torque": "torque_nm",
"Top Speed": "max_speed",
"Acceleration 0 to 100 km/h": "acceleration_0_100",
"Curb Weight": "curb_weight",
"Wheelbase": "wheelbase",
"Num. of Seats": "seats",
"Drive wheels - Traction - Layout": "drive_type",
"Body": "body_type"
}
async def r5_test_run():
print("🚀 R5 Hibrid Robot indítása (Teszt üzemmód)...")
async with AsyncSessionLocal() as db:
# 1. KIVÁLASZTÁS: Kiveszünk egy olyan autót, ami még nincs dúsítva (R1 bázisból)
query = text("""
SELECT id, make, marketing_name, year_from, technical_code, fuel_type
FROM vehicle.vehicle_model_definitions
WHERE (power_kw IS NULL OR power_kw = 0 OR engine_capacity IS NULL OR engine_capacity = 0)
AND status IN ('manual_review_needed', 'research_failed_empty', 'pending', 'enrich_ready')
ORDER BY priority_score DESC
LIMIT 1
""")
target = (await db.execute(query)).fetchone()
if not target:
print("✨ Nincs feldolgozatlan autó az adatbázisban.")
return
t_id, make, model, year, tech_code, fuel = target
print(f"🎯 Célpont: {make} {model} ({year})")
print(f"📌 Technical Code: {tech_code or 'Nincs megadva'}")
# 2. RDW ADATOK (Holland hatósági bázis)
# Ha van technical_code (pl. Fiatnál a típusazonosító), az RDW-ből pontos adatot kapunk
rdw_data = {}
if tech_code:
print("🇳🇱 RDW adatok lekérése...")
# Az RDW API m9d7-ebf2 táblája tartalmazza a típus specifikációkat
rdw_url = f"https://opendata.rdw.nl/resource/m9d7-ebf2.json?handelsbenaming={tech_code.upper()}"
try:
res = requests.get(rdw_url, timeout=5).json()
if res:
rdw_data = {
"power_kw": int(float(res[0].get('nettomaximumvermogen', 0))),
"engine_capacity": int(res[0].get('cilinderinhoud', 0)),
"curb_weight": int(res[0].get('massa_ledig_voertuig', 0))
}
print("✅ RDW adatok sikeresen betöltve.")
except:
print("⚠️ RDW nem elérhető vagy nincs találat.")
# 3. ULTIMATESPECS ADATOK (Szimulált kaparás a kért logika alapján)
print("🏁 UltimateSpecs adatok gyűjtése...")
# Itt futna a Playwright scraper, ami kinyeri a táblázatot
# Példa nyers adatokra, amit az oldalról szedünk le:
raw_web_data = {
"Maximum power": "103 PS / 76 kW @ 5750 rpm",
"Engine capacity": "1581 cm3",
"Maximum torque": "144 Nm @ 4000 rpm",
"Top Speed": "180 km/h",
"Acceleration 0 to 100 km/h": "11.5 s",
"Curb Weight": "1090 kg",
"Wheelbase": "254 cm",
"Body": "Hatchback"
}
# 4. ÖSSZEFŰZÉS ÉS FORDÍTÁS
final_mdm_record = {
"id": t_id,
"make": make,
"marketing_name": model,
"year_from": year,
"fuel_type": fuel
}
# Alkalmazzuk a mappinget és a regex tisztítást
for web_key, db_key in MAPPING.items():
val = raw_web_data.get(web_key)
if val:
# Számértékek kinyerése (pl. "76 kW" -> 76, "1581 cm3" -> 1581)
numbers = re.findall(r'\d+', str(val))
if numbers:
# Ha több szám van (pl. kW és LE), a relevánsat választjuk
final_mdm_record[db_key] = numbers[1] if "kW" in str(val) and len(numbers)>1 else numbers[0]
else:
final_mdm_record[db_key] = val
# RDW adatok prioritása (ezek a legpontosabbak, felülírják a webet)
final_mdm_record.update({k: v for k, v in rdw_data.items() if v})
# --- TERMINÁL KIMENET ---
print("\n" + "="*50)
print("📊 VÉGLEGES MDM REKORD (ELŐNÉZET)")
print("="*50)
print(json.dumps(final_mdm_record, indent=2, ensure_ascii=False))
print("="*50)
print("\n[R5] Ha az adatok rendben vannak, mehet az élesítés?")
if __name__ == "__main__":
asyncio.run(r5_test_run())

View File

@@ -0,0 +1,62 @@
# /opt/docker/dev/service_finder/backend/app/workers/vehicle/vehicle_robot_1_5_heavy_eu1.0.py
import asyncio
import httpx
import logging
from sqlalchemy import text
from app.database import AsyncSessionLocal
logger = logging.getLogger("Robot-1-5-Heavy-EU")
logging.basicConfig(level=logging.INFO)
class HeavyEUHunter:
RDW_URL = "https://opendata.rdw.nl/resource/m9d7-ebf2.json"
@classmethod
async def fetch_rdw_heavy(cls, vehicle_type: str):
query_url = f"{cls.RDW_URL}?voertuigsoort={vehicle_type}&$select=merk,handelsbenaming&$limit=10000"
async with httpx.AsyncClient(timeout=30.0) as client:
try:
resp = await client.get(query_url)
return resp.json() if resp.status_code == 200 else []
except Exception as e:
logger.error(f"❌ RDW Error: {e}")
return []
@classmethod
async def run(cls):
logger.info("🚛 Robot 1.5 (EU Heavy Duty) indítása - Kötegelt mód...")
job_list = {
"Vrachtwagen": "truck",
"Bus": "bus",
"Kampeerauto": "rv"
}
async with AsyncSessionLocal() as db:
for rdw_name, internal_class in job_list.items():
logger.info(f"📥 {rdw_name} adatok letöltése...")
data = await cls.fetch_rdw_heavy(rdw_name)
if not data: continue
# A 10.000 adatot egyetlen listába gyűjtjük
insert_data = []
for item in data:
make = item.get('merk', '').upper().strip()
model = item.get('handelsbenaming', '').upper().strip()
if make and model:
insert_data.append({"make": make, "model": model, "v_class": internal_class})
if insert_data:
query = text("""
INSERT INTO vehicle.catalog_discovery
(make, model, vehicle_class, status, market, priority_score, source)
VALUES (:make, :model, :v_class, 'pending', 'EU', 20, 'RDW-HEAVY')
ON CONFLICT ON CONSTRAINT _make_model_market_year_uc DO NOTHING
""")
# Egyetlen SQL hívással beszúrjuk akár a 10.000 sort is!
await db.execute(query, insert_data)
await db.commit()
logger.info(f"{rdw_name}: {len(insert_data)} EU-s nagygép beküldve kötegelve.")
if __name__ == "__main__":
asyncio.run(HeavyEUHunter.run())

View File

@@ -0,0 +1,387 @@
#!/usr/bin/env python3
import asyncio
import json
import logging
import random
import urllib.parse
import sys
import signal
import re
from playwright.async_api import async_playwright
from sqlalchemy import text
from app.database import AsyncSessionLocal
# R2.3 - SENTINEL (Hardened & Obedient Edition)
logging.basicConfig(level=logging.INFO, format='%(asctime)s [R2.3-SENTINEL] %(message)s')
logger = logging.getLogger("R2.3")
# --- 1. SZŰRÉSEK ÉS TILTÓLISTÁK ---
# Csak olyan típusokat keresünk, amik nem utánfutók vagy munkagépek
JUNK_LIST = [
'SARIS', 'ANSSEMS', 'HAPERT', 'HUMBAUR', 'EDUARD', 'IFOR WILLIAMS', 'FENDT',
'HOBBY', 'ADRIA', 'PEECON', 'JAKO', 'KAWECO', 'POTTINGER', 'BOCKMANN',
'JOHN DEERE', 'CLAAS', 'IVECO', 'SCANIA', 'MAN', 'DAF', 'KNAUS', 'PÖSSL', 'HYMER', 'WESTFALIA'
]
# --- 2. FORDÍTÁSOK (DE/NL -> EN) ---
TRANSLATIONS = {
"3ER REIHE": "3 Series", "5ER REIHE": "5 Series", "1ER REIHE": "1 Series", "7ER REIHE": "7 Series",
"E-KLASSE": "E Class", "C-KLASSE": "C Class", "S-KLASSE": "S Class", "A-KLASSE": "A Class",
"REIHE": "Series", "KLASSE": "Class", "BESTELWAGEN": "Van"
}
class RobotScout:
def __init__(self):
self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
self.running = True
def clean_name(self, make, model):
"""Standardizált angol név előállítása."""
m = model.upper()
for de, en in TRANSLATIONS.items():
m = m.replace(de, en)
# Márkanév duplázódás törlése (pl. VOLVO VOLVO V60 -> VOLVO V60)
m = m.replace(make.upper(), "").strip()
return f"{make} {m}"
# --- COLUMN MAPPING for scraping ---
COLUMN_MAPPING = {
"horsepower": "power_kw",
"engine displacement": "engine_capacity",
"maximum torque": "torque_nm",
"top speed": "max_speed",
"curb weight": "curb_weight",
"wheelbase": "wheelbase",
"num. of seats": "seats"
}
def clean_number(self, val: str, key: str = "") -> int:
if not val or val == "-": return 0
try:
if "hp" in val.lower() or "kw" in val.lower():
kw_match = re.search(r'(\d+)\s*kw', val.lower())
if kw_match: return int(kw_match.group(1))
nums = re.findall(r'\d+', val.replace(' ', '').replace(',', '').replace('.', ''))
return int(nums[0]) if nums else 0
except: return 0
async def get_car_links(self, page, make, model, year, use_year=True):
"""Minden autós link kigyűjtése fallback mechanizmussal retry logikával."""
clean_model = self.clean_name(make, model)
search_query = f"{clean_model} {year}" if use_year else clean_model
url = f"https://www.ultimatespecs.com/index.php?q={urllib.parse.quote(search_query)}"
logger.info(f"🔎 KERESÉS: {search_query}")
async def _fetch_links():
await page.goto(url, wait_until="domcontentloaded", timeout=25000)
# 1. Ha direkt az adatlapon vagyunk
if any(x in page.url for x in ['/car-specs/', '/motorcycles-specs/']):
logger.info("🎯 Direkt találat!")
return [{"name": await page.title(), "url": page.url}]
# 2. Várakozás és linkek kigyűjtése
await asyncio.sleep(2)
variants = await page.evaluate("""
() => {
let results = [];
document.querySelectorAll('a').forEach(a => {
let href = a.getAttribute('href') || '';
let text = a.innerText.trim();
// Csak technikai adatlapokat gyűjtünk, reklámokat/kategóriákat nem
if ((href.includes('/car-specs/') || href.includes('/motorcycles-specs/'))
&& href.includes('.html') && text.length > 3) {
results.push({ name: text, url: href });
}
});
return results;
}
""")
# 3. Fallback: Ha nincs találat évvel, próbálja év nélkül
if not variants and use_year:
logger.info(" ↳ Nincs találat évszámmal, próbálkozom évszám nélkül...")
return await self.get_car_links(page, make, model, year, use_year=False)
return variants
try:
variants = await self._retry_with_backoff(
_fetch_links,
max_attempts=3,
base_delay=2,
exception_message=f"❌ Hálózati hiba a(z) {url} oldalon"
)
return variants if variants is not None else []
except Exception as e:
logger.error(f"❌ Hálózati hiba (végleges): {str(e)[:50]}")
return []
async def _retry_with_backoff(self, func, max_attempts=3, base_delay=2,
exception_message="Retry failed", retry_exceptions=True):
"""Helper function for retry logic with exponential backoff."""
for attempt in range(max_attempts):
try:
return await func()
except Exception as e:
if attempt == max_attempts - 1:
logger.error(f"{exception_message} after {max_attempts} attempts: {str(e)[:100]}")
raise
else:
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
logger.warning(f"⚠️ Attempt {attempt + 1} failed: {str(e)[:50]}. Retrying in {delay:.1f}s...")
await asyncio.sleep(delay)
return None
async def scrape_car_details(self, page, url):
"""Scrape car specifications from a given Ultimate Specs URL with comprehensive data extraction and retry logic."""
async def _scrape():
await page.goto(url, wait_until="networkidle", timeout=30000)
# Parsing all specification tables and sections
full_specs = await page.evaluate("""
() => {
let results = {};
// 1. Collect all specification tables (existing logic)
document.querySelectorAll('table.table_specs, table.responsive').forEach(table => {
table.querySelectorAll('tr').forEach(row => {
let t = row.querySelector('.table_specs_title, .td_title, td:first-child');
let v = row.querySelector('.table_specs_value, .td_value, td:last-child');
if(t && v) {
let k = t.innerText.replace(':','').trim().toLowerCase();
let val = v.innerText.trim();
if(k && val && val !== "-") results[k] = val;
}
});
});
// 2. Collect section headers and their content for additional technical data
// Look for h2, h3, h4 elements that might contain section titles
const sections = {};
const headers = document.querySelectorAll('h2, h3, h4, .section-title, .specs-header');
headers.forEach(header => {
const title = header.innerText.trim();
if (title && title.length > 0) {
// Find the next table or div with specs after this header
let nextElement = header.nextElementSibling;
let sectionData = {};
// Look for tables or lists in the next few siblings
for (let i = 0; i < 5 && nextElement; i++) {
if (nextElement.tagName === 'TABLE') {
nextElement.querySelectorAll('tr').forEach(row => {
let t = row.querySelector('td:first-child');
let v = row.querySelector('td:last-child');
if(t && v) {
let k = t.innerText.replace(':','').trim().toLowerCase();
let val = v.innerText.trim();
if(k && val && val !== "-") {
sectionData[k] = val;
// Also add to main results with section prefix
results[`${title.toLowerCase().replace(/ /g, '_')}_${k}`] = val;
}
}
});
}
nextElement = nextElement.nextElementSibling;
}
sections[title.toLowerCase().replace(/ /g, '_')] = sectionData;
}
});
// 3. Extract specific known sections by looking for text patterns
const pageText = document.body.innerText.toLowerCase();
// Check for electric/hybrid sections
if (pageText.includes('electric engine') || pageText.includes('battery')) {
// Try to find battery voltage, capacity, etc.
const batteryRegex = /battery\s*voltage[:\s]*([\d\.]+)\s*v/gi;
const match = batteryRegex.exec(document.body.innerText);
if (match) results['battery_voltage_v'] = match[1];
}
// 4. Extract dimensions data
const dimensionPatterns = {
'wheelbase': /wheelbase[:\s]*([\d\.]+)\s*cm/gi,
'length': /length[:\s]*([\d\.]+)\s*cm/gi,
'width': /width[:\s]*([\d\.]+)\s*cm/gi,
'height': /height[:\s]*([\d\.]+)\s*cm/gi,
'curb_weight': /curb\s*weight[:\s]*([\d\.]+)\s*kg/gi,
'towing_capacity': /towing\s*capacity[:\s]*([\d\.]+)\s*kg/gi
};
for (const [key, regex] of Object.entries(dimensionPatterns)) {
const match = regex.exec(document.body.innerText);
if (match) results[key] = match[1];
}
// 5. Add sections data as a nested object
results['_sections'] = sections;
return results;
}
""")
return full_specs
try:
logger.info(f"🌐 Scraping: {url}")
full_specs = await self._retry_with_backoff(
_scrape,
max_attempts=3,
base_delay=2,
exception_message=f"❌ Scrape hiba a(z) {url} oldalon"
)
return full_specs
except Exception as e:
logger.error(f"❌ Scrape hiba (végleges): {str(e)[:100]}...")
return None
async def run(self):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(user_agent=self.user_agent)
page = await context.new_page()
while self.running:
# --- A FÉK: 3-6 mp szigorú pihenő minden kör elején ---
wait = random.uniform(3, 6)
logger.info(f"💤 Várakozás {wait:.1f} mp...")
await asyncio.sleep(wait)
async with AsyncSessionLocal() as db:
# Következő feldolgozatlan autó (John Deere, Iveco, stb. kizárva)
target = (await db.execute(text("""
SELECT id, make, marketing_name, year_from FROM vehicle.vehicle_model_definitions
WHERE status IN ('pending', 'manual_review_needed')
AND NOT (make = ANY(:junks))
ORDER BY priority_score DESC LIMIT 1
"""), {"junks": JUNK_LIST})).fetchone()
if not target:
logger.info("✨ Minden tétel feldolgozva.")
break
t_id, make, model, year = target
logger.info(f"🚀 CÉLPONT: {make} {model} ({year}) [ID: {t_id}]")
try:
links = await self.get_car_links(page, make, model, year)
except Exception as e:
logger.error(f"❌ Hálózati hiba linkek lekérésekor: {str(e)[:100]}")
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='research_failed_network' WHERE id=:id"), {"id": t_id})
await db.commit()
continue
if not links:
logger.warning(f"❌ Nem található adatlap. research_failed_empty rögzítése.")
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='research_failed_empty' WHERE id=:id"), {"id": t_id})
await db.commit()
continue
# --- 1. SCRAPE THE FIRST LINK FOR IMMEDIATE ENRICHMENT ---
first_link = None
if links:
first_link = links[0]
full_url = first_link['url'] if first_link['url'].startswith('http') else f"https://www.ultimatespecs.com{first_link['url']}"
logger.info(f"⚡ Azonnali adatgyűjtés: {full_url}")
web_data = await self.scrape_car_details(page, full_url)
if web_data is None:
# Scraping failed after all retries
logger.error(f"❌ Scraping sikertelen minden próbálkozás után. research_failed_parsing rögzítése.")
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='research_failed_parsing' WHERE id=:id"), {"id": t_id})
await db.commit()
# Continue to save links as variants anyway
web_data = {}
elif len(web_data) >= 5:
# Map scraped data to columns
updates = {col: self.clean_number(web_data.get(k)) for k, col in self.COLUMN_MAPPING.items()}
# Also extract fuel_type, transmission, etc. if possible
fuel_type = web_data.get('fuel type', 'Unknown')
transmission_type = web_data.get('transmission', 'Unknown')
drive_type = web_data.get('drive type', 'Unknown')
body_type = web_data.get('body type', 'Unknown')
engine_capacity = updates.get('engine_capacity', 0)
power_kw = updates.get('power_kw', 0)
# Update the original record with scraped data
await db.execute(text("""
UPDATE vehicle.vehicle_model_definitions
SET power_kw = :power_kw, engine_capacity = :engine_capacity,
torque_nm = :torque_nm, max_speed = :max_speed,
curb_weight = :curb_weight,
wheelbase = :wheelbase, seats = :seats,
fuel_type = :fuel_type, transmission_type = :transmission_type,
drive_type = :drive_type, body_type = :body_type,
specifications = specifications || :full_json,
status = 'awaiting_ai_synthesis', updated_at = NOW()
WHERE id = :id
"""), {
**updates,
"id": t_id,
"fuel_type": fuel_type,
"transmission_type": transmission_type,
"drive_type": drive_type,
"body_type": body_type,
"full_json": json.dumps(web_data)
})
logger.info(f"✅ AZONNALI PUBLIKÁLÁS: {make} {model} ({power_kw} kW)")
else:
logger.warning("⚠️ Scraping kevés adatot talált, csak linkek mentve.")
# --- 2. SAVE ALL LINKS AS NEW VARIANT RECORDS (including first if not enriched) ---
added = 0
for l in links:
full_url = l['url'] if l['url'].startswith('http') else f"https://www.ultimatespecs.com{l['url']}"
# JAVÍTÁS: column "source_url" hiba ellen raw_api_data-t nézünk
check_query = text("SELECT id FROM vehicle.vehicle_model_definitions WHERE raw_api_data->>'url' = :u")
exists = (await db.execute(check_query, {"u": full_url})).fetchone()
if not exists:
# Create normalized name from marketing name
normalized = l['name'].lower().replace(' ', '_').replace('-', '_').replace('.', '').replace(',', '')[:200]
await db.execute(text("""
INSERT INTO vehicle.vehicle_model_definitions
(make, marketing_name, normalized_name, year_from, status,
raw_api_data, priority_score, source, market,
technical_code, variant_code, version_code,
specifications, marketing_name_aliases, raw_search_context)
VALUES (:make, :name, :normalized, :year, 'awaiting_ai_synthesis',
:raw, 30, 'ultimatespecs', 'EU',
'UNKNOWN', 'UNKNOWN', 'UNKNOWN',
'{}'::jsonb, '[]'::jsonb, '')
"""), {
"make": make, "name": l['name'], "normalized": normalized,
"year": year, "raw": json.dumps({"url": full_url}), "priority": 30
})
added += 1
# Eredeti rekord archiválása (ha még nem publikáltuk)
if not web_data:
await db.execute(text("UPDATE vehicle.vehicle_model_definitions SET status='expanded_to_variants', updated_at=NOW() WHERE id=:id"), {"id": t_id})
await db.commit()
logger.info(f"✅ SIKER: {added} új variáció mentve. R4-R5 robotok értesítve.")
await browser.close()
if __name__ == "__main__":
scout = RobotScout()
# Handle CTRL+C
def stop_signal(sig, frame):
logger.info("🛑 LEÁLLÍTÁS (Kérés érzékelve)...")
scout.running = False
sys.exit(0)
signal.signal(signal.SIGINT, stop_signal)
try:
asyncio.run(scout.run())
except KeyboardInterrupt:
pass