service-finder/backend/app/workers/ocr_robot.py

import asyncio
import os
import logging
from PIL import Image
from sqlalchemy import select, update
from app.db.session import SessionLocal
from app.models.document import Document # Feltételezve
from app.models.identity import User
from app.services.ai_service import AIService

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Robot-OCR-V3")

NAS_BASE_PATH = os.getenv("NAS_STORAGE_PATH", "/mnt/nas/user_vault")

class OCRRobot:
    @classmethod
    async def process_queue(cls):
        async with SessionLocal() as db:
            # 1. Csak a várólistás és prémium jogosultságú dokumentumokat keressük
            stmt = select(Document, User).join(User).where(
                Document.status == "pending_ocr",
                User.subscription_plan.in_(["PREMIUM_PLUS", "VIP_PLUS"])
            ).limit(10)

            res = await db.execute(stmt)
            tasks = res.all()

            for doc, user in tasks:
                try:
                    logger.info(f"📸 OCR feldolgozás: {doc.filename} (User: {user.id})")

                    # 2. AI OCR hívás
                    with open(doc.temp_path, "rb") as f:
                        image_bytes = f.read()

                    ocr_result = await AIService.analyze_document_image(image_bytes, doc.doc_type)

                    if ocr_result:
                        # 3. Kép átméretezése (Thumbnail és Standard)
                        target_dir = os.path.join(NAS_BASE_PATH, user.folder_slug, doc.doc_type)
                        os.makedirs(target_dir, exist_ok=True)

                        final_path = os.path.join(target_dir, f"{doc.id}.jpg")
                        cls.resize_and_save(doc.temp_path, final_path)

                        # 4. Adatbázis frissítése
                        doc.ocr_data = ocr_result
                        doc.file_link = final_path
                        doc.status = "processed"

                        # Ideiglenes fájl törlése
                        os.remove(doc.temp_path)

                    await db.commit()
                except Exception as e:
                    logger.error(f"❌ OCR Hiba ({doc.id}): {e}")
                    await db.rollback()

    @staticmethod
    def resize_and_save(source, target):
        with Image.open(source) as img:
            img.convert('RGB').save(target, "JPEG", quality=85, optimize=True)

if __name__ == "__main__":
    asyncio.run(OCRRobot.process_queue())