Files
service-finder/backend/app/workers/ocr_robot.py

66 lines
2.6 KiB
Python

import asyncio
import os
import logging
from PIL import Image
from sqlalchemy import select, update
from app.db.session import SessionLocal
from app.models.document import Document # Feltételezve
from app.models.identity import User
from app.services.ai_service import AIService
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-OCR-V3")
NAS_BASE_PATH = os.getenv("NAS_STORAGE_PATH", "/mnt/nas/user_vault")
class OCRRobot:
@classmethod
async def process_queue(cls):
async with SessionLocal() as db:
# 1. Csak a várólistás és prémium jogosultságú dokumentumokat keressük
stmt = select(Document, User).join(User).where(
Document.status == "pending_ocr",
User.subscription_plan.in_(["PREMIUM_PLUS", "VIP_PLUS"])
).limit(10)
res = await db.execute(stmt)
tasks = res.all()
for doc, user in tasks:
try:
logger.info(f"📸 OCR feldolgozás: {doc.filename} (User: {user.id})")
# 2. AI OCR hívás
with open(doc.temp_path, "rb") as f:
image_bytes = f.read()
ocr_result = await AIService.analyze_document_image(image_bytes, doc.doc_type)
if ocr_result:
# 3. Kép átméretezése (Thumbnail és Standard)
target_dir = os.path.join(NAS_BASE_PATH, user.folder_slug, doc.doc_type)
os.makedirs(target_dir, exist_ok=True)
final_path = os.path.join(target_dir, f"{doc.id}.jpg")
cls.resize_and_save(doc.temp_path, final_path)
# 4. Adatbázis frissítése
doc.ocr_data = ocr_result
doc.file_link = final_path
doc.status = "processed"
# Ideiglenes fájl törlése
os.remove(doc.temp_path)
await db.commit()
except Exception as e:
logger.error(f"❌ OCR Hiba ({doc.id}): {e}")
await db.rollback()
@staticmethod
def resize_and_save(source, target):
with Image.open(source) as img:
img.convert('RGB').save(target, "JPEG", quality=85, optimize=True)
if __name__ == "__main__":
asyncio.run(OCRRobot.process_queue())