import logging import pyocr import pyocr.libtesseract from PIL import Image class Tesseract: def __init__(self, logger = logging.getLogger()): self.logger = logger tools = pyocr.get_available_tools() if len(tools) == 0: logging.error("No OCR tool found") self.tool = tools[1] logging.info("Will use tool '%s'" % (self.tool.get_name())) def rotate_img(self, image: Image.Image) -> Image.Image: orientation = self.tool.detect_orientation( image, lang='deu' ) logging.info("Tesseract: Rotate by %s degrees to correct (Confidence: %s)", orientation["angle"], orientation["confidence"]) return image.rotate(orientation["angle"], expand=True) def create_pdf(self, scanner): builder = pyocr.libtesseract.LibtesseractPdfBuilder() builder.set_lang("deu") builder.set_output_file("/var/www/html/img/out") for page in scanner.get_pages(): filename = f"/var/www/html/img/{page.filename}" self.logger.info(filename) img = Image.open(filename) builder.add_image(img) builder.build()