66 lines
2.6 KiB
Python
66 lines
2.6 KiB
Python
import asyncio
|
|
import os
|
|
import logging
|
|
from PIL import Image
|
|
from sqlalchemy import select, update
|
|
from app.db.session import SessionLocal
|
|
from app.models.document import Document # Feltételezve
|
|
from app.models.identity import User
|
|
from app.services.ai_service import AIService
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("Robot-OCR-V3")
|
|
|
|
NAS_BASE_PATH = os.getenv("NAS_STORAGE_PATH", "/mnt/nas/user_vault")
|
|
|
|
class OCRRobot:
|
|
@classmethod
|
|
async def process_queue(cls):
|
|
async with SessionLocal() as db:
|
|
# 1. Csak a várólistás és prémium jogosultságú dokumentumokat keressük
|
|
stmt = select(Document, User).join(User).where(
|
|
Document.status == "pending_ocr",
|
|
User.subscription_plan.in_(["PREMIUM_PLUS", "VIP_PLUS"])
|
|
).limit(10)
|
|
|
|
res = await db.execute(stmt)
|
|
tasks = res.all()
|
|
|
|
for doc, user in tasks:
|
|
try:
|
|
logger.info(f"📸 OCR feldolgozás: {doc.filename} (User: {user.id})")
|
|
|
|
# 2. AI OCR hívás
|
|
with open(doc.temp_path, "rb") as f:
|
|
image_bytes = f.read()
|
|
|
|
ocr_result = await AIService.analyze_document_image(image_bytes, doc.doc_type)
|
|
|
|
if ocr_result:
|
|
# 3. Kép átméretezése (Thumbnail és Standard)
|
|
target_dir = os.path.join(NAS_BASE_PATH, user.folder_slug, doc.doc_type)
|
|
os.makedirs(target_dir, exist_ok=True)
|
|
|
|
final_path = os.path.join(target_dir, f"{doc.id}.jpg")
|
|
cls.resize_and_save(doc.temp_path, final_path)
|
|
|
|
# 4. Adatbázis frissítése
|
|
doc.ocr_data = ocr_result
|
|
doc.file_link = final_path
|
|
doc.status = "processed"
|
|
|
|
# Ideiglenes fájl törlése
|
|
os.remove(doc.temp_path)
|
|
|
|
await db.commit()
|
|
except Exception as e:
|
|
logger.error(f"❌ OCR Hiba ({doc.id}): {e}")
|
|
await db.rollback()
|
|
|
|
@staticmethod
|
|
def resize_and_save(source, target):
|
|
with Image.open(source) as img:
|
|
img.convert('RGB').save(target, "JPEG", quality=85, optimize=True)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(OCRRobot.process_queue()) |