mirror of
https://github.com/BluemediaGER/ScanOS.git
synced 2024-11-12 21:05:28 +01:00
35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
import logging
|
|
|
|
import pyocr
|
|
import pyocr.libtesseract
|
|
|
|
from PIL import Image
|
|
|
|
class Tesseract:
|
|
def __init__(self, logger = logging.getLogger()):
|
|
self.logger = logger
|
|
tools = pyocr.get_available_tools()
|
|
if len(tools) == 0:
|
|
logging.error("No OCR tool found")
|
|
self.tool = tools[1]
|
|
logging.info("Will use tool '%s'" % (self.tool.get_name()))
|
|
|
|
def rotate_img(self, image: Image.Image) -> Image.Image:
|
|
orientation = self.tool.detect_orientation(
|
|
image,
|
|
lang='deu'
|
|
)
|
|
logging.info("Tesseract: Rotate by %s degrees to correct (Confidence: %s)", orientation["angle"], orientation["confidence"])
|
|
return image.rotate(orientation["angle"], expand=True)
|
|
|
|
def create_pdf(self, scanner):
|
|
builder = pyocr.libtesseract.LibtesseractPdfBuilder()
|
|
builder.set_lang("deu")
|
|
builder.set_output_file("/var/www/html/img/out")
|
|
for page in scanner.get_pages():
|
|
filename = f"/var/www/html/img/{page.filename}"
|
|
self.logger.info(filename)
|
|
img = Image.open(filename)
|
|
builder.add_image(img)
|
|
builder.build()
|