refaktorálás javításai
This commit is contained in:
183
backend/app/services/deduplication_service.py
Normal file
183
backend/app/services/deduplication_service.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""
|
||||
DeduplicationService - Explicit deduplikáció a márka, technikai kód és jármű típus alapján.
|
||||
Integrálja a mapping_rules.py és mapping_dictionary.py fájlokat.
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional, Dict, Any
|
||||
from sqlalchemy import select, and_, or_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models.vehicle_definitions import VehicleModelDefinition
|
||||
from app.workers.vehicle.mapping_rules import SOURCE_MAPPINGS, unify_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Ha nincs mapping_dictionary, hozzunk létre egy egyszerű szinonima szótárt
|
||||
MAPPING_DICTIONARY = {
|
||||
"make_synonyms": {
|
||||
"BMW": ["BMW", "Bayerische Motoren Werke"],
|
||||
"MERCEDES": ["MERCEDES", "MERCEDES-BENZ", "MERCEDES BENZ"],
|
||||
"VOLKSWAGEN": ["VOLKSWAGEN", "VW"],
|
||||
"AUDI": ["AUDI"],
|
||||
"TOYOTA": ["TOYOTA"],
|
||||
"FORD": ["FORD"],
|
||||
# További márkák...
|
||||
},
|
||||
"technical_code_synonyms": {
|
||||
# Példa: "1.8 TSI" -> ["1.8 TSI", "1.8TSI", "1.8 TSI 180"]
|
||||
},
|
||||
"vehicle_class_synonyms": {
|
||||
"SUV": ["SUV", "SPORT UTILITY VEHICLE"],
|
||||
"SEDAN": ["SEDAN", "SALOON"],
|
||||
"HATCHBACK": ["HATCHBACK", "HATCH"],
|
||||
"COUPE": ["COUPE", "COUPÉ"],
|
||||
}
|
||||
}
|
||||
|
||||
class DeduplicationService:
|
||||
"""Szolgáltatás a duplikált járműmodell rekordok azonosítására és kezelésére."""
|
||||
|
||||
@staticmethod
|
||||
def normalize_make(make: str) -> str:
|
||||
"""Normalizálja a márka nevet a szinonimák alapján."""
|
||||
make_upper = make.strip().upper()
|
||||
for canonical, synonyms in MAPPING_DICTIONARY["make_synonyms"].items():
|
||||
if make_upper in synonyms or make_upper == canonical:
|
||||
return canonical
|
||||
return make_upper
|
||||
|
||||
@staticmethod
|
||||
def normalize_technical_code(technical_code: Optional[str]) -> str:
|
||||
"""Normalizálja a technikai kódot (pl. motor kód)."""
|
||||
if not technical_code:
|
||||
return ""
|
||||
# Egyszerű whitespace és pont eltávolítás
|
||||
code = technical_code.strip().upper()
|
||||
# További normalizáció: eltávolítás speciális karakterek
|
||||
import re
|
||||
code = re.sub(r'[^A-Z0-9]', '', code)
|
||||
return code
|
||||
|
||||
@staticmethod
|
||||
def normalize_vehicle_class(vehicle_class: Optional[str]) -> str:
|
||||
"""Normalizálja a jármű osztályt."""
|
||||
if not vehicle_class:
|
||||
return ""
|
||||
class_upper = vehicle_class.strip().upper()
|
||||
for canonical, synonyms in MAPPING_DICTIONARY["vehicle_class_synonyms"].items():
|
||||
if class_upper in synonyms or class_upper == canonical:
|
||||
return canonical
|
||||
return class_upper
|
||||
|
||||
@classmethod
|
||||
async def find_duplicate(
|
||||
cls,
|
||||
session: AsyncSession,
|
||||
make: str,
|
||||
technical_code: str,
|
||||
vehicle_class: str,
|
||||
exclude_id: Optional[int] = None
|
||||
) -> Optional[VehicleModelDefinition]:
|
||||
"""
|
||||
Megkeresi, hogy létezik-e már ugyanilyen (normalizált) rekord a vehicle_model_definitions táblában.
|
||||
|
||||
Args:
|
||||
session: SQLAlchemy async session
|
||||
make: márka (pl. "BMW")
|
||||
technical_code: technikai kód (pl. "N47")
|
||||
vehicle_class: jármű osztály (pl. "SEDAN")
|
||||
exclude_id: kizárni kívánt rekord ID (pl. frissítésnél)
|
||||
|
||||
Returns:
|
||||
VehicleModelDefinition instance ha talált duplikátumot, egyébként None.
|
||||
"""
|
||||
norm_make = cls.normalize_make(make)
|
||||
norm_technical_code = cls.normalize_technical_code(technical_code)
|
||||
norm_vehicle_class = cls.normalize_vehicle_class(vehicle_class)
|
||||
|
||||
# Keresés a normalizált értékek alapján
|
||||
stmt = select(VehicleModelDefinition).where(
|
||||
and_(
|
||||
VehicleModelDefinition.make.ilike(f"%{norm_make}%"),
|
||||
VehicleModelDefinition.technical_code.ilike(f"%{norm_technical_code}%"),
|
||||
VehicleModelDefinition.vehicle_class.ilike(f"%{norm_vehicle_class}%")
|
||||
)
|
||||
)
|
||||
if exclude_id:
|
||||
stmt = stmt.where(VehicleModelDefinition.id != exclude_id)
|
||||
|
||||
result = await session.execute(stmt)
|
||||
duplicate = result.scalar_one_or_none()
|
||||
|
||||
if duplicate:
|
||||
logger.info(f"Duplikátum találva: ID {duplicate.id} - {duplicate.make} {duplicate.technical_code} {duplicate.vehicle_class}")
|
||||
return duplicate
|
||||
|
||||
@classmethod
|
||||
async def ensure_no_duplicate(
|
||||
cls,
|
||||
session: AsyncSession,
|
||||
make: str,
|
||||
technical_code: str,
|
||||
vehicle_class: str,
|
||||
exclude_id: Optional[int] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Ellenőrzi, hogy nincs-e duplikátum. Ha van, False-t ad vissza.
|
||||
"""
|
||||
duplicate = await cls.find_duplicate(session, make, technical_code, vehicle_class, exclude_id)
|
||||
return duplicate is None
|
||||
|
||||
@classmethod
|
||||
async def deduplicate_and_merge(
|
||||
cls,
|
||||
session: AsyncSession,
|
||||
new_record: Dict[str, Any],
|
||||
source_name: str = "manual"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Duplikáció ellenőrzése és esetleges merge logika.
|
||||
Ha talál duplikátumot, visszaadja a meglévő rekord adatait.
|
||||
Ha nem, visszaadja a normalizált új rekordot.
|
||||
|
||||
Args:
|
||||
session: SQLAlchemy async session
|
||||
new_record: új rekord adatai (make, technical_code, vehicle_class, stb.)
|
||||
source_name: adatforrás neve a mapping_rules-hoz
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
- is_duplicate: bool
|
||||
- existing_id: int if duplicate else None
|
||||
- normalized_data: normalizált adatok
|
||||
"""
|
||||
# Normalizálás mapping_rules segítségével
|
||||
unified = unify_data(new_record, source_name)
|
||||
|
||||
make = unified.get("normalized_make", new_record.get("make", ""))
|
||||
technical_code = new_record.get("technical_code", "")
|
||||
vehicle_class = new_record.get("vehicle_class", "")
|
||||
|
||||
duplicate = await cls.find_duplicate(session, make, technical_code, vehicle_class)
|
||||
|
||||
if duplicate:
|
||||
return {
|
||||
"is_duplicate": True,
|
||||
"existing_id": duplicate.id,
|
||||
"normalized_data": {
|
||||
"make": duplicate.make,
|
||||
"technical_code": duplicate.technical_code,
|
||||
"vehicle_class": duplicate.vehicle_class,
|
||||
}
|
||||
}
|
||||
|
||||
# Nincs duplikátum, normalizált adatokkal tér vissza
|
||||
return {
|
||||
"is_duplicate": False,
|
||||
"existing_id": None,
|
||||
"normalized_data": {
|
||||
"make": cls.normalize_make(make),
|
||||
"technical_code": cls.normalize_technical_code(technical_code),
|
||||
"vehicle_class": cls.normalize_vehicle_class(vehicle_class),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user