mirror of
https://github.com/BluemediaGER/ScanOS.git
synced 2024-11-25 09:35:30 +01:00
Add OCR and image correction
This commit is contained in:
parent
e74d3a4e27
commit
e93117aeb4
0
backend/app/backends/__init__.py
Normal file
0
backend/app/backends/__init__.py
Normal file
16
backend/app/backends/common.py
Normal file
16
backend/app/backends/common.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
from PIL import Image
|
||||
import ocrmypdf
|
||||
|
||||
def create_pdf(scanner):
|
||||
images = []
|
||||
for page in scanner.get_pages():
|
||||
img = Image.open(f"/var/www/html/img/{page.filename}")
|
||||
a4im = Image.new('RGB',
|
||||
(int(210 * 200 / 25.4), int(297 * 200 / 25.4)),
|
||||
(255, 255, 255))
|
||||
a4im.paste(img, img.getbbox())
|
||||
images.append(a4im)
|
||||
images[0].save("/var/www/html/img/out.pdf", save_all=True, append_images=images[1:])
|
||||
|
||||
def ocr_pdf():
|
||||
ocrmypdf.ocr('/var/www/html/img/out.pdf', '/var/www/html/img/final.pdf')
|
0
backend/app/backends/email.py
Normal file
0
backend/app/backends/email.py
Normal file
|
@ -1,9 +1,11 @@
|
|||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
import app.scanner.enums as scan
|
||||
|
||||
class ScanPage(BaseModel):
|
||||
filename: str
|
||||
filename: Optional[str]
|
||||
size_bytes: int
|
||||
status: scan.PageStatus
|
||||
|
||||
class Config():
|
||||
orm_mode = True
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import threading
|
||||
import threading, logging
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Annotated
|
||||
|
||||
|
@ -7,13 +7,25 @@ from fastapi import FastAPI, Depends
|
|||
from app.data import models
|
||||
from app.data.database import SessionLocal, engine
|
||||
|
||||
from uvicorn.logging import DefaultFormatter
|
||||
|
||||
from app.scanner.scanner import Scanner
|
||||
from app.scanner.scanner import Status as ScannerStatus
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger()
|
||||
__syslog = logging.StreamHandler()
|
||||
__syslog.setFormatter(DefaultFormatter(fmt="%(levelprefix)s %(message)s", use_colors=True))
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.addHandler(__syslog)
|
||||
|
||||
# Create database
|
||||
models.Base.metadata.create_all(bind=engine)
|
||||
|
||||
__scanner = Scanner("/var/www/html/img")
|
||||
# Set up scanner instance
|
||||
__scanner = Scanner("/var/www/html/img", logger)
|
||||
|
||||
# Preload scanner after FastAPI start
|
||||
@asynccontextmanager
|
||||
async def __lifespan(app: FastAPI):
|
||||
threading.Thread(target=__scanner.preload).start()
|
||||
|
|
|
@ -8,6 +8,10 @@ class Status(Enum):
|
|||
ERR_NO_PAPER = "err_no_paper"
|
||||
ERR_COVER_OPEN = "err_cover_open"
|
||||
|
||||
class PageStatus(Enum):
|
||||
PROCESSING = "processing"
|
||||
DONE = "done"
|
||||
|
||||
class Setting(Enum):
|
||||
PAPER_SOURCE = "source"
|
||||
COLOR_MODE = "color"
|
||||
|
|
97
backend/app/scanner/processing.py
Normal file
97
backend/app/scanner/processing.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def order_points(pts):
|
||||
'''Rearrange coordinates to order:
|
||||
top-left, top-right, bottom-right, bottom-left'''
|
||||
rect = np.zeros((4, 2), dtype='float32')
|
||||
pts = np.array(pts)
|
||||
s = pts.sum(axis=1)
|
||||
# Top-left point will have the smallest sum.
|
||||
rect[0] = pts[np.argmin(s)]
|
||||
# Bottom-right point will have the largest sum.
|
||||
rect[2] = pts[np.argmax(s)]
|
||||
|
||||
diff = np.diff(pts, axis=1)
|
||||
# Top-right point will have the smallest difference.
|
||||
rect[1] = pts[np.argmin(diff)]
|
||||
# Bottom-left will have the largest difference.
|
||||
rect[3] = pts[np.argmax(diff)]
|
||||
# return the ordered coordinates
|
||||
return rect.astype('int').tolist()
|
||||
|
||||
def find_dest(pts):
|
||||
(tl, tr, br, bl) = pts
|
||||
# Finding the maximum width.
|
||||
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
||||
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
||||
maxWidth = max(int(widthA), int(widthB))
|
||||
|
||||
# Finding the maximum height.
|
||||
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
||||
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
||||
maxHeight = max(int(heightA), int(heightB))
|
||||
# Final destination co-ordinates.
|
||||
destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]]
|
||||
|
||||
return order_points(destination_corners)
|
||||
|
||||
def correct_image(img_path):
|
||||
img = cv2.imread(img_path)
|
||||
# Resize image to workable size
|
||||
dim_limit = 1080
|
||||
max_dim = max(img.shape)
|
||||
if max_dim > dim_limit:
|
||||
resize_scale = dim_limit / max_dim
|
||||
img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale)
|
||||
# Create a copy of resized original image for later use
|
||||
orig_img = img.copy()
|
||||
# Repeated Closing operation to remove text from the document.
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3)
|
||||
# GrabCut
|
||||
mask = np.zeros(img.shape[:2], np.uint8)
|
||||
bgdModel = np.zeros((1, 65), np.float64)
|
||||
fgdModel = np.zeros((1, 65), np.float64)
|
||||
rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20)
|
||||
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
|
||||
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
|
||||
img = img * mask2[:, :, np.newaxis]
|
||||
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
gray = cv2.GaussianBlur(gray, (11, 11), 0)
|
||||
# Edge Detection.
|
||||
canny = cv2.Canny(gray, 0, 200)
|
||||
canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
|
||||
|
||||
# Finding contours for the detected edges.
|
||||
contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
|
||||
# Keeping only the largest detected contour.
|
||||
page = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
|
||||
|
||||
# Detecting Edges through Contour approximation.
|
||||
# Loop over the contours.
|
||||
corners = None
|
||||
if len(page) == 0:
|
||||
return orig_img
|
||||
for c in page:
|
||||
# Approximate the contour.
|
||||
epsilon = 0.02 * cv2.arcLength(c, True)
|
||||
corners = cv2.approxPolyDP(c, epsilon, True)
|
||||
# If our approximated contour has four points.
|
||||
if len(corners) == 4:
|
||||
break
|
||||
# Sorting the corners and converting them to desired shape.
|
||||
#corners = sorted(corners)
|
||||
# For 4 corner points being detected.
|
||||
corners = order_points(corners)
|
||||
|
||||
destination_corners = find_dest(corners)
|
||||
|
||||
h, w = orig_img.shape[:2]
|
||||
# Getting the homography.
|
||||
M = cv2.getPerspectiveTransform(corners, destination_corners, cv2.DECOMP_LU)
|
||||
# Perspective transform using homography.
|
||||
final = cv2.warpPerspective(orig_img, M, (destination_corners[2][0], destination_corners[2][1]),
|
||||
flags=cv2.INTER_LINEAR)
|
||||
cv2.imwrite(img_path, final)
|
|
@ -1,8 +1,12 @@
|
|||
import gi, os, threading
|
||||
from typing import List
|
||||
import gi, os, threading, logging
|
||||
from typing import List, Optional
|
||||
|
||||
from PIL import Image
|
||||
from app.scanner.enums import Status
|
||||
from app.scanner.enums import Status, PageStatus
|
||||
from app.scanner.tesseract import Tesseract
|
||||
from app.scanner.processing import correct_image
|
||||
|
||||
#from app.backends.common import create_pdf, ocr_pdf
|
||||
|
||||
gi.require_version('Libinsane', '1.0')
|
||||
from gi.repository import Libinsane, GObject # type: ignore
|
||||
|
@ -14,10 +18,17 @@ class __LibinsaneSilentLogger(GObject.GObject, Libinsane.Logger):
|
|||
Libinsane.register_logger(__LibinsaneSilentLogger())
|
||||
|
||||
class Page:
|
||||
filename: str
|
||||
filename: Optional[str] = None
|
||||
size_bytes: int
|
||||
status: PageStatus
|
||||
|
||||
class Scanner:
|
||||
def __init__(self, storage_path, logger = logging.getLogger()):
|
||||
self.scanned_pages: List[Page] = []
|
||||
self.logger = logger
|
||||
self.tesseract = Tesseract(logger)
|
||||
self.storage_path = storage_path
|
||||
self.status = Status.INITIALIZED
|
||||
|
||||
def __get_device_id(self):
|
||||
"""
|
||||
|
@ -27,6 +38,7 @@ class Scanner:
|
|||
:returns: Device id of the first scan device
|
||||
"""
|
||||
devs = self.api.list_devices(Libinsane.DeviceLocations.LOCAL_ONLY)
|
||||
self.logger.info("Using device: %s", devs[0].get_dev_id())
|
||||
return devs[0].get_dev_id()
|
||||
|
||||
def __raw_to_img(self, params, img_bytes):
|
||||
|
@ -44,15 +56,24 @@ class Scanner:
|
|||
def __write_file(self, scan_params, data, page_index, last_file):
|
||||
data = b"".join(data)
|
||||
if scan_params.get_format() == Libinsane.ImgFormat.RAW_RGB_24:
|
||||
|
||||
filesize = len(data)
|
||||
img = self.__raw_to_img(scan_params, data)
|
||||
filename = f"out{page_index}.png"
|
||||
img.save(os.path.join(self.storage_path, filename), format="PNG")
|
||||
page = Page()
|
||||
page.filename = filename
|
||||
page.status = PageStatus.PROCESSING
|
||||
page.size_bytes = filesize
|
||||
self.scanned_pages.append(page)
|
||||
img = self.__raw_to_img(scan_params, data)
|
||||
filename = f"out{page_index}.jpeg"
|
||||
img = self.tesseract.rotate_img(img)
|
||||
img_path = os.path.join(self.storage_path, filename)
|
||||
img.save(img_path, format="jpeg", quality=95)
|
||||
#correct_image(img_path)
|
||||
page.filename = filename
|
||||
page.status = PageStatus.DONE
|
||||
self.scanned_pages[page_index] = page
|
||||
if last_file:
|
||||
#self.tesseract.create_pdf(scanner=self)
|
||||
#ocr_pdf()
|
||||
self.status = Status.DONE
|
||||
|
||||
def __set_defaults(self):
|
||||
|
@ -61,23 +82,31 @@ class Scanner:
|
|||
opts = {opt.get_name(): opt for opt in opts}
|
||||
opts["sleeptimer"].set_value(1)
|
||||
opts["resolution"].set_value(200)
|
||||
opts["swcrop"].set_value(True)
|
||||
opts["swdeskew"].set_value(True)
|
||||
opts["page-height"].set_value(300)
|
||||
opts["mode"].set_value("Color")
|
||||
dev.close()
|
||||
|
||||
def __scan(self):
|
||||
self.logger.info("Scan requested")
|
||||
self.status = Status.RUNNING
|
||||
source = self.api.get_device(self.device_id)
|
||||
|
||||
opts = source.get_options()
|
||||
opts = {opt.get_name(): opt for opt in opts}
|
||||
if opts["cover-open"].get_value() == True:
|
||||
self.logger.warn("Cover open. Can't scan.")
|
||||
self.status = Status.ERR_COVER_OPEN
|
||||
return
|
||||
|
||||
self.logger.info("Starting scan...")
|
||||
session = source.scan_start()
|
||||
try:
|
||||
page_index = 0
|
||||
while not session.end_of_feed() and page_index < 50:
|
||||
# Do not assume that all the pages will have the same size !
|
||||
self.logger.info("Processing page %s", page_index)
|
||||
# Do not assume that all the pages will have the same size
|
||||
scan_params = session.get_scan_parameters()
|
||||
img = []
|
||||
while not session.end_of_page():
|
||||
|
@ -88,15 +117,11 @@ class Scanner:
|
|||
t.start()
|
||||
page_index += 1
|
||||
if page_index == 0:
|
||||
self.logger.warn("No paper. Nothing to scan.")
|
||||
self.status = Status.ERR_NO_PAPER
|
||||
finally:
|
||||
session.cancel()
|
||||
source.close()
|
||||
|
||||
def __init__(self, storage_path):
|
||||
self.scanned_pages: List[Page] = []
|
||||
self.storage_path = storage_path
|
||||
self.status = Status.INITIALIZED
|
||||
|
||||
def preload(self):
|
||||
os.environ["LIBINSANE_NORMALIZER_SAFE_DEFAULTS"] = "0"
|
||||
|
|
34
backend/app/scanner/tesseract.py
Normal file
34
backend/app/scanner/tesseract.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import logging
|
||||
|
||||
import pyocr
|
||||
import pyocr.libtesseract
|
||||
|
||||
from PIL import Image
|
||||
|
||||
class Tesseract:
|
||||
def __init__(self, logger = logging.getLogger()):
|
||||
self.logger = logger
|
||||
tools = pyocr.get_available_tools()
|
||||
if len(tools) == 0:
|
||||
logging.error("No OCR tool found")
|
||||
self.tool = tools[1]
|
||||
logging.info("Will use tool '%s'" % (self.tool.get_name()))
|
||||
|
||||
def rotate_img(self, image: Image.Image) -> Image.Image:
|
||||
orientation = self.tool.detect_orientation(
|
||||
image,
|
||||
lang='deu'
|
||||
)
|
||||
logging.info("Tesseract: Rotate by %s degrees to correct (Confidence: %s)", orientation["angle"], orientation["confidence"])
|
||||
return image.rotate(orientation["angle"], expand=True)
|
||||
|
||||
def create_pdf(self, scanner):
|
||||
builder = pyocr.libtesseract.LibtesseractPdfBuilder()
|
||||
builder.set_lang("deu")
|
||||
builder.set_output_file("/var/www/html/img/out")
|
||||
for page in scanner.get_pages():
|
||||
filename = f"/var/www/html/img/{page.filename}"
|
||||
self.logger.info(filename)
|
||||
img = Image.open(filename)
|
||||
builder.add_image(img)
|
||||
builder.build()
|
|
@ -16,6 +16,7 @@ pycairo==1.24.0
|
|||
pydantic==1.10.12
|
||||
pydantic_core==2.6.3
|
||||
PyGObject==3.44.1
|
||||
pytesseract==0.3.10
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
PyYAML==6.0.1
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
<script setup lang="ts">
|
||||
import { ref } from 'vue';
|
||||
import LoadingSpinner from '@/components/LoadingSpinner.vue';
|
||||
import type { ScannedPage as ScannedPageType } from '@/types/scanner'
|
||||
|
||||
const props = defineProps({
|
||||
imgUrl: String
|
||||
scannedPage: ScannedPageType
|
||||
})
|
||||
|
||||
const imgLoaded = ref(false)
|
||||
|
@ -12,7 +13,7 @@ const imgLoaded = ref(false)
|
|||
<div class="p-2">
|
||||
<div class="w-full h-full rounded-lg shadow-lg bg-white flex justify-center items-center">
|
||||
<LoadingSpinner v-if="!imgLoaded" class="w-10 h-10 text-gray-600" />
|
||||
<img v-if="imgUrl" v-show="imgLoaded" :src="imgUrl" @load="imgLoaded=true" class="w-full h-full rounded-lg object-cover">
|
||||
<img v-if="scannedPage.status === 'done'" v-show="imgLoaded" :src="'/img/' + scannedPage.filename" @load="imgLoaded=true" class="w-full h-full rounded-lg object-cover">
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
16
frontend/src/types/scanner.d.ts
vendored
Normal file
16
frontend/src/types/scanner.d.ts
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
export interface ScannedPage {
|
||||
filename?: string;
|
||||
size_bytes: number;
|
||||
status: "processing" | "done";
|
||||
}
|
||||
|
||||
export interface ScanStatus {
|
||||
pages: Array<ScannedPage>;
|
||||
status:
|
||||
| "initialized"
|
||||
| "idle"
|
||||
| "running"
|
||||
| "done"
|
||||
| "err_no_paper"
|
||||
| "err_cover_open";
|
||||
}
|
|
@ -34,7 +34,7 @@ axios.post('/api/scan')
|
|||
<template>
|
||||
<dev class="w-full h-full flex flex-col">
|
||||
<div class="w-full h-full p-2 flex flex-row flex-wrap overflow-auto">
|
||||
<ScannedPage v-for="page in data.pages" :key="page.filename" class="w-1/5 h-1/2" :imgUrl="'/img/' + page.filename" />
|
||||
<ScannedPage v-for="page in data.pages" :scannedPage="page" class="w-1/5 h-1/2" />
|
||||
<ScannedPage v-if="data.status==='running'" class="w-1/5 h-1/2" />
|
||||
</div>
|
||||
<div class="w-full h-28 p-4 flex">
|
||||
|
|
Loading…
Reference in a new issue