October 19, 2024

$ sudo apt -y install tesseract-ocr tesseract-ocr-jpn libtesseract-dev libleptonica-dev tesseract-ocr-script-jpan tesseract-ocr-script-jpan-vert

$ tesseract --list-langs
List of available languages (5):
Japanese
Japanese_vert
eng
jpn
osd

$ tesseract javascript_logo.png outbase -l eng
$ cat mytext.txt
JavaScript

Calling from pyocr

from PIL import Image, ImageEnhance
import pyocr

img_pil = Image.open(fBytesIO)
img = img_pil.convert('L')
enhancer = ImageEnhance.Contrast(img)
img_c = enhancer.enhance(2.0)
builder = pyocr.builders.TextBuilder(tesseract_layout=6)

tools = pyocr.get_available_tools()
readText = tools[0].image_to_string(
    img_c,
    lang='eng',
    builder=build
)