feat(infra): Stabilized Docker env, fixed circular imports, enabled AI Enricher Robot v1.1
This commit is contained in:
66
backend/app/workers/ocr_robot.py
Normal file
66
backend/app/workers/ocr_robot.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import asyncio
|
||||
import os
|
||||
import logging
|
||||
from PIL import Image
|
||||
from sqlalchemy import select, update
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.document import Document # Feltételezve
|
||||
from app.models.identity import User
|
||||
from app.services.ai_service import AIService
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("Robot-OCR-V3")
|
||||
|
||||
NAS_BASE_PATH = os.getenv("NAS_STORAGE_PATH", "/mnt/nas/user_vault")
|
||||
|
||||
class OCRRobot:
|
||||
@classmethod
|
||||
async def process_queue(cls):
|
||||
async with SessionLocal() as db:
|
||||
# 1. Csak a várólistás és prémium jogosultságú dokumentumokat keressük
|
||||
stmt = select(Document, User).join(User).where(
|
||||
Document.status == "pending_ocr",
|
||||
User.subscription_plan.in_(["PREMIUM_PLUS", "VIP_PLUS"])
|
||||
).limit(10)
|
||||
|
||||
res = await db.execute(stmt)
|
||||
tasks = res.all()
|
||||
|
||||
for doc, user in tasks:
|
||||
try:
|
||||
logger.info(f"📸 OCR feldolgozás: {doc.filename} (User: {user.id})")
|
||||
|
||||
# 2. AI OCR hívás
|
||||
with open(doc.temp_path, "rb") as f:
|
||||
image_bytes = f.read()
|
||||
|
||||
ocr_result = await AIService.analyze_document_image(image_bytes, doc.doc_type)
|
||||
|
||||
if ocr_result:
|
||||
# 3. Kép átméretezése (Thumbnail és Standard)
|
||||
target_dir = os.path.join(NAS_BASE_PATH, user.folder_slug, doc.doc_type)
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
|
||||
final_path = os.path.join(target_dir, f"{doc.id}.jpg")
|
||||
cls.resize_and_save(doc.temp_path, final_path)
|
||||
|
||||
# 4. Adatbázis frissítése
|
||||
doc.ocr_data = ocr_result
|
||||
doc.file_link = final_path
|
||||
doc.status = "processed"
|
||||
|
||||
# Ideiglenes fájl törlése
|
||||
os.remove(doc.temp_path)
|
||||
|
||||
await db.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"❌ OCR Hiba ({doc.id}): {e}")
|
||||
await db.rollback()
|
||||
|
||||
@staticmethod
|
||||
def resize_and_save(source, target):
|
||||
with Image.open(source) as img:
|
||||
img.convert('RGB').save(target, "JPEG", quality=85, optimize=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(OCRRobot.process_queue())
|
||||
Reference in New Issue
Block a user