diff --git a/backend/app/backends/__init__.py b/backend/app/backends/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/backends/common.py b/backend/app/backends/common.py new file mode 100644 index 0000000..cc56dcf --- /dev/null +++ b/backend/app/backends/common.py @@ -0,0 +1,16 @@ +from PIL import Image +import ocrmypdf + +def create_pdf(scanner): + images = [] + for page in scanner.get_pages(): + img = Image.open(f"/var/www/html/img/{page.filename}") + a4im = Image.new('RGB', + (int(210 * 200 / 25.4), int(297 * 200 / 25.4)), + (255, 255, 255)) + a4im.paste(img, img.getbbox()) + images.append(a4im) + images[0].save("/var/www/html/img/out.pdf", save_all=True, append_images=images[1:]) + +def ocr_pdf(): + ocrmypdf.ocr('/var/www/html/img/out.pdf', '/var/www/html/img/final.pdf') \ No newline at end of file diff --git a/backend/app/backends/email.py b/backend/app/backends/email.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/data/schemas.py b/backend/app/data/schemas.py index a3d58a4..82568ac 100644 --- a/backend/app/data/schemas.py +++ b/backend/app/data/schemas.py @@ -1,9 +1,11 @@ from pydantic import BaseModel +from typing import Optional import app.scanner.enums as scan class ScanPage(BaseModel): - filename: str + filename: Optional[str] size_bytes: int + status: scan.PageStatus class Config(): orm_mode = True diff --git a/backend/app/main.py b/backend/app/main.py index 809fe55..51cda47 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,4 +1,4 @@ -import threading +import threading, logging from contextlib import asynccontextmanager from typing import Annotated @@ -7,13 +7,25 @@ from fastapi import FastAPI, Depends from app.data import models from app.data.database import SessionLocal, engine +from uvicorn.logging import DefaultFormatter + from app.scanner.scanner import Scanner from app.scanner.scanner import Status as ScannerStatus +# Set up logging +logger = logging.getLogger() +__syslog = logging.StreamHandler() +__syslog.setFormatter(DefaultFormatter(fmt="%(levelprefix)s %(message)s", use_colors=True)) +logger.setLevel(logging.INFO) +logger.addHandler(__syslog) + +# Create database models.Base.metadata.create_all(bind=engine) -__scanner = Scanner("/var/www/html/img") +# Set up scanner instance +__scanner = Scanner("/var/www/html/img", logger) +# Preload scanner after FastAPI start @asynccontextmanager async def __lifespan(app: FastAPI): threading.Thread(target=__scanner.preload).start() diff --git a/backend/app/scanner/enums.py b/backend/app/scanner/enums.py index 264f519..47771b0 100644 --- a/backend/app/scanner/enums.py +++ b/backend/app/scanner/enums.py @@ -8,6 +8,10 @@ class Status(Enum): ERR_NO_PAPER = "err_no_paper" ERR_COVER_OPEN = "err_cover_open" +class PageStatus(Enum): + PROCESSING = "processing" + DONE = "done" + class Setting(Enum): PAPER_SOURCE = "source" COLOR_MODE = "color" diff --git a/backend/app/scanner/processing.py b/backend/app/scanner/processing.py new file mode 100644 index 0000000..fa77778 --- /dev/null +++ b/backend/app/scanner/processing.py @@ -0,0 +1,97 @@ +import cv2 +import numpy as np + +def order_points(pts): + '''Rearrange coordinates to order: + top-left, top-right, bottom-right, bottom-left''' + rect = np.zeros((4, 2), dtype='float32') + pts = np.array(pts) + s = pts.sum(axis=1) + # Top-left point will have the smallest sum. + rect[0] = pts[np.argmin(s)] + # Bottom-right point will have the largest sum. + rect[2] = pts[np.argmax(s)] + + diff = np.diff(pts, axis=1) + # Top-right point will have the smallest difference. + rect[1] = pts[np.argmin(diff)] + # Bottom-left will have the largest difference. + rect[3] = pts[np.argmax(diff)] + # return the ordered coordinates + return rect.astype('int').tolist() + +def find_dest(pts): + (tl, tr, br, bl) = pts + # Finding the maximum width. + widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) + widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) + maxWidth = max(int(widthA), int(widthB)) + + # Finding the maximum height. + heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) + heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) + maxHeight = max(int(heightA), int(heightB)) + # Final destination co-ordinates. + destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]] + + return order_points(destination_corners) + +def correct_image(img_path): + img = cv2.imread(img_path) + # Resize image to workable size + dim_limit = 1080 + max_dim = max(img.shape) + if max_dim > dim_limit: + resize_scale = dim_limit / max_dim + img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale) + # Create a copy of resized original image for later use + orig_img = img.copy() + # Repeated Closing operation to remove text from the document. + kernel = np.ones((5, 5), np.uint8) + img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3) + # GrabCut + mask = np.zeros(img.shape[:2], np.uint8) + bgdModel = np.zeros((1, 65), np.float64) + fgdModel = np.zeros((1, 65), np.float64) + rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20) + cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT) + mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8') + img = img * mask2[:, :, np.newaxis] + + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + gray = cv2.GaussianBlur(gray, (11, 11), 0) + # Edge Detection. + canny = cv2.Canny(gray, 0, 200) + canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))) + + # Finding contours for the detected edges. + contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) + # Keeping only the largest detected contour. + page = sorted(contours, key=cv2.contourArea, reverse=True)[:5] + + # Detecting Edges through Contour approximation. + # Loop over the contours. + corners = None + if len(page) == 0: + return orig_img + for c in page: + # Approximate the contour. + epsilon = 0.02 * cv2.arcLength(c, True) + corners = cv2.approxPolyDP(c, epsilon, True) + # If our approximated contour has four points. + if len(corners) == 4: + break + # Sorting the corners and converting them to desired shape. + #corners = sorted(corners) + # For 4 corner points being detected. + corners = order_points(corners) + + destination_corners = find_dest(corners) + + h, w = orig_img.shape[:2] + # Getting the homography. + M = cv2.getPerspectiveTransform(corners, destination_corners, cv2.DECOMP_LU) + # Perspective transform using homography. + final = cv2.warpPerspective(orig_img, M, (destination_corners[2][0], destination_corners[2][1]), + flags=cv2.INTER_LINEAR) + cv2.imwrite(img_path, final) \ No newline at end of file diff --git a/backend/app/scanner/scanner.py b/backend/app/scanner/scanner.py index 493f319..19faebb 100644 --- a/backend/app/scanner/scanner.py +++ b/backend/app/scanner/scanner.py @@ -1,8 +1,12 @@ -import gi, os, threading -from typing import List +import gi, os, threading, logging +from typing import List, Optional from PIL import Image -from app.scanner.enums import Status +from app.scanner.enums import Status, PageStatus +from app.scanner.tesseract import Tesseract +from app.scanner.processing import correct_image + +#from app.backends.common import create_pdf, ocr_pdf gi.require_version('Libinsane', '1.0') from gi.repository import Libinsane, GObject # type: ignore @@ -14,10 +18,17 @@ class __LibinsaneSilentLogger(GObject.GObject, Libinsane.Logger): Libinsane.register_logger(__LibinsaneSilentLogger()) class Page: - filename: str + filename: Optional[str] = None size_bytes: int + status: PageStatus class Scanner: + def __init__(self, storage_path, logger = logging.getLogger()): + self.scanned_pages: List[Page] = [] + self.logger = logger + self.tesseract = Tesseract(logger) + self.storage_path = storage_path + self.status = Status.INITIALIZED def __get_device_id(self): """ @@ -27,6 +38,7 @@ class Scanner: :returns: Device id of the first scan device """ devs = self.api.list_devices(Libinsane.DeviceLocations.LOCAL_ONLY) + self.logger.info("Using device: %s", devs[0].get_dev_id()) return devs[0].get_dev_id() def __raw_to_img(self, params, img_bytes): @@ -44,15 +56,24 @@ class Scanner: def __write_file(self, scan_params, data, page_index, last_file): data = b"".join(data) if scan_params.get_format() == Libinsane.ImgFormat.RAW_RGB_24: + filesize = len(data) - img = self.__raw_to_img(scan_params, data) - filename = f"out{page_index}.png" - img.save(os.path.join(self.storage_path, filename), format="PNG") page = Page() - page.filename = filename + page.status = PageStatus.PROCESSING page.size_bytes = filesize self.scanned_pages.append(page) + img = self.__raw_to_img(scan_params, data) + filename = f"out{page_index}.jpeg" + img = self.tesseract.rotate_img(img) + img_path = os.path.join(self.storage_path, filename) + img.save(img_path, format="jpeg", quality=95) + #correct_image(img_path) + page.filename = filename + page.status = PageStatus.DONE + self.scanned_pages[page_index] = page if last_file: + #self.tesseract.create_pdf(scanner=self) + #ocr_pdf() self.status = Status.DONE def __set_defaults(self): @@ -61,23 +82,31 @@ class Scanner: opts = {opt.get_name(): opt for opt in opts} opts["sleeptimer"].set_value(1) opts["resolution"].set_value(200) + opts["swcrop"].set_value(True) + opts["swdeskew"].set_value(True) + opts["page-height"].set_value(300) + opts["mode"].set_value("Color") dev.close() def __scan(self): + self.logger.info("Scan requested") self.status = Status.RUNNING source = self.api.get_device(self.device_id) opts = source.get_options() opts = {opt.get_name(): opt for opt in opts} if opts["cover-open"].get_value() == True: + self.logger.warn("Cover open. Can't scan.") self.status = Status.ERR_COVER_OPEN return + self.logger.info("Starting scan...") session = source.scan_start() try: page_index = 0 while not session.end_of_feed() and page_index < 50: - # Do not assume that all the pages will have the same size ! + self.logger.info("Processing page %s", page_index) + # Do not assume that all the pages will have the same size scan_params = session.get_scan_parameters() img = [] while not session.end_of_page(): @@ -88,15 +117,11 @@ class Scanner: t.start() page_index += 1 if page_index == 0: + self.logger.warn("No paper. Nothing to scan.") self.status = Status.ERR_NO_PAPER finally: session.cancel() source.close() - - def __init__(self, storage_path): - self.scanned_pages: List[Page] = [] - self.storage_path = storage_path - self.status = Status.INITIALIZED def preload(self): os.environ["LIBINSANE_NORMALIZER_SAFE_DEFAULTS"] = "0" diff --git a/backend/app/scanner/tesseract.py b/backend/app/scanner/tesseract.py new file mode 100644 index 0000000..b56ac45 --- /dev/null +++ b/backend/app/scanner/tesseract.py @@ -0,0 +1,34 @@ +import logging + +import pyocr +import pyocr.libtesseract + +from PIL import Image + +class Tesseract: + def __init__(self, logger = logging.getLogger()): + self.logger = logger + tools = pyocr.get_available_tools() + if len(tools) == 0: + logging.error("No OCR tool found") + self.tool = tools[1] + logging.info("Will use tool '%s'" % (self.tool.get_name())) + + def rotate_img(self, image: Image.Image) -> Image.Image: + orientation = self.tool.detect_orientation( + image, + lang='deu' + ) + logging.info("Tesseract: Rotate by %s degrees to correct (Confidence: %s)", orientation["angle"], orientation["confidence"]) + return image.rotate(orientation["angle"], expand=True) + + def create_pdf(self, scanner): + builder = pyocr.libtesseract.LibtesseractPdfBuilder() + builder.set_lang("deu") + builder.set_output_file("/var/www/html/img/out") + for page in scanner.get_pages(): + filename = f"/var/www/html/img/{page.filename}" + self.logger.info(filename) + img = Image.open(filename) + builder.add_image(img) + builder.build() diff --git a/backend/requirements.txt b/backend/requirements.txt index db5c3a2..425df24 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -16,6 +16,7 @@ pycairo==1.24.0 pydantic==1.10.12 pydantic_core==2.6.3 PyGObject==3.44.1 +pytesseract==0.3.10 python-dateutil==2.8.2 python-dotenv==1.0.0 PyYAML==6.0.1 diff --git a/frontend/src/components/ScannedPage.vue b/frontend/src/components/ScannedPage.vue index 6146f5b..316bbbd 100644 --- a/frontend/src/components/ScannedPage.vue +++ b/frontend/src/components/ScannedPage.vue @@ -1,9 +1,10 @@