diff --git a/src/pyocr/builders.py b/src/pyocr/builders.py index b6f3af5..188be1f 100644 --- a/src/pyocr/builders.py +++ b/src/pyocr/builders.py @@ -304,8 +304,9 @@ class TextBuilder(BaseBuilder): def __init__(self, tesseract_layout=3, cuneiform_dotmatrix=False, cuneiform_fax=False, cuneiform_singlecolumn=False): + from .tesseract import psm_parameter + tess_flags = [psm_parameter(), str(tesseract_layout)] file_ext = ["txt"] - tess_flags = ["-psm", str(tesseract_layout)] cun_args = ["-f", "text"] # Add custom cuneiform parameters if needed for par, arg in [(cuneiform_dotmatrix, "--dotmatrix"), @@ -561,8 +562,9 @@ class WordBoxBuilder(BaseBuilder): """ def __init__(self, tesseract_layout=1): + from .tesseract import psm_parameter + tess_flags = [psm_parameter(), str(tesseract_layout)] file_ext = ["html", "hocr"] - tess_flags = ["-psm", str(tesseract_layout)] tess_conf = ["hocr"] cun_args = ["-f", "hocr"] super(WordBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf, @@ -637,8 +639,9 @@ class LineBoxBuilder(BaseBuilder): """ def __init__(self, tesseract_layout=1): + from .tesseract import psm_parameter + tess_flags = [psm_parameter(), str(tesseract_layout)] file_ext = ["html", "hocr"] - tess_flags = ["-psm", str(tesseract_layout)] tess_conf = ["hocr"] cun_args = ["-f", "hocr"] super(LineBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf, diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py index 22cc48d..52d348f 100755 --- a/src/pyocr/tesseract.py +++ b/src/pyocr/tesseract.py @@ -161,6 +161,12 @@ def can_detect_orientation(): ) +def psm_parameter(): + """Return the psm option string depending on the Tesseract version.""" + version = get_version() + return "--psm" if version[0] > 3 else "-psm" + + def detect_orientation(image, lang=None): """ Arguments: @@ -178,7 +184,7 @@ def detect_orientation(image, lang=None): """ _set_environment() with temp_dir() as tmpdir: - command = [TESSERACT_CMD, "input.bmp", 'stdout', "-psm", "0"] + command = [TESSERACT_CMD, "input.bmp", 'stdout', psm_parameter(), "0"] version = get_version() if version[0] >= 4: # XXX: temporary fix to remove once Tesseract 4 is stable