diff options
Diffstat (limited to 'tests/avocado/tesseract_utils.py')
-rw-r--r-- | tests/avocado/tesseract_utils.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/avocado/tesseract_utils.py b/tests/avocado/tesseract_utils.py new file mode 100644 index 0000000000..72cd9ab798 --- /dev/null +++ b/tests/avocado/tesseract_utils.py @@ -0,0 +1,46 @@ +# ... +# +# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +import re +import logging + +from avocado.utils import process +from avocado.utils.path import find_command, CmdNotFoundError + +def tesseract_available(expected_version): + try: + find_command('tesseract') + except CmdNotFoundError: + return False + res = process.run('tesseract --version') + try: + version = res.stdout_text.split()[1] + except IndexError: + version = res.stderr_text.split()[1] + return int(version.split('.')[0]) == expected_version + + match = re.match(r'tesseract\s(\d)', res) + if match is None: + return False + # now this is guaranteed to be a digit + return int(match.groups()[0]) == expected_version + + +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): + console_logger = logging.getLogger('tesseract') + console_logger.debug(image_path) + if tesseract_version == 4: + tesseract_args += ' --oem 1' + proc = process.run("tesseract {} {} stdout".format(tesseract_args, + image_path)) + lines = [] + for line in proc.stdout_text.split('\n'): + sline = line.strip() + if len(sline): + console_logger.debug(sline) + lines += [sline] + return lines |