Skip to content

Commit

Permalink
feat: implement command line for running OCR over screenshot image files
Browse files Browse the repository at this point in the history
Includes config for building executable using PyOxidizer.
  • Loading branch information
cofiem committed Feb 19, 2023
1 parent 1c95807 commit ea48ba6
Show file tree
Hide file tree
Showing 10 changed files with 438 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,6 @@ dmypy.json

# Pyre type checker
.pyre/

# idea
.idea/
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# screenshot-ocr

Extract text from screenshots.
82 changes: 82 additions & 0 deletions pyoxidizer.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
def make_exe():
dist = default_python_distribution()
policy = dist.make_python_packaging_policy()
python_config = dist.make_python_interpreter_config()

# Run a Python module as __main__ when the interpreter starts.
python_config.run_module = "screenshot_ocr.main"

# Produce a PythonExecutable from a Python distribution, embedded
# resources, and other options. The returned object represents the
# standalone executable that will be built.
exe = dist.to_python_executable(
name="screenshot-ocr",

# If no argument passed, the default `PythonPackagingPolicy` for the
# distribution is used.
packaging_policy=policy,

# If no argument passed, the default `PythonInterpreterConfig` is used.
config=python_config,
)

# Read Python files from a local directory and add them to our embedded
# context, taking just the resources belonging to the `foo` and `bar`
# Python packages.
exe.add_python_resources(exe.read_package_root(
path=".",
packages=["screenshot_ocr"],
))

# Return our `PythonExecutable` instance so it can be built and
# referenced by other consumers of this target.
return exe

def make_embedded_resources(exe):
return exe.to_embedded_resources()

def make_install(exe):
# Create an object that represents our installed application file layout.
files = FileManifest()

# Add the generated executable to our install layout in the root directory.
files.add_python_resource("screenshot-ocr", exe)

return files

def make_msi(exe):
# See the full docs for more. But this will convert your Python executable
# into a `WiXMSIBuilder` Starlark type, which will be converted to a Windows
# .msi installer when it is built.
return exe.to_wix_msi_builder(
# Simple identifier of your app.
"screenshot-ocr",
# The name of your application.
"Screenshot OCR",
# The version of your application.
"0.1.0",
# The author/manufacturer of your application.
"Mark C"
)


# Dynamically enable automatic code signing.
def register_code_signers():
# You will need to run with `pyoxidizer build --var ENABLE_CODE_SIGNING 1` for
# this if block to be evaluated.
if not VARS.get("ENABLE_CODE_SIGNING"):
return


# Call our function to set up automatic code signers.
register_code_signers()

# Tell PyOxidizer about the build targets defined above.
register_target("exe", make_exe)
register_target("resources", make_embedded_resources, depends=["exe"], default_build_script=True)
register_target("install", make_install, depends=["exe"], default=True)
register_target("msi_installer", make_msi, depends=["exe"])

# Resolve whatever targets the invoker of this configuration file is requesting
# be resolved.
resolve_targets()
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
black
pyoxidizer
Empty file added screenshot_ocr/__init__.py
Empty file.
149 changes: 149 additions & 0 deletions screenshot_ocr/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import argparse
import logging
import pathlib
import shutil
import sys
import typing

from screenshot_ocr import tesseract, files

logger = logging.getLogger(__name__)


def build_args(args: list[str] = None) -> argparse.Namespace:
# prog is set for pyOxidizer, due to issue: https://github.com/indygreg/PyOxidizer/issues/307
parser = argparse.ArgumentParser(
description="Extract text from screenshots.", prog="screenshot-ocr"
)
parser.add_argument(
"--tesseract-exe",
type=pathlib.Path,
help="path to the Tesseract executable file",
)
parser.add_argument(
"--tesseract-data",
type=pathlib.Path,
help="path to the Tesseract data directory",
)
parser.add_argument(
"--input-dir",
type=pathlib.Path,
help="path to the folder containing the input images",
)
parser.add_argument(
"--output-dir",
type=pathlib.Path,
help="path to the folder that will contain processed images",
)
parser.add_argument(
"--no-move-images",
action="store_true",
help="don't move image files to the output directory (image files are moved by default)",
)
result = parser.parse_args(args)
return result


def norm_args(args: list[str] = None):
parsed_args = build_args(args)

tesseract_install_dir_reg = tesseract.get_tesseract_install_dir_win_reg()
tesseract_install_dir_guess = tesseract.get_tesseract_install_dir_win_guess()

downloads_dir_reg = files.get_user_downloads_dir_win_guess()
downloads_dir_guess = files.get_user_downloads_dir_win_reg()

documents_dir_guess = files.get_user_documents_dir_win_guess()

# Tesseract exe
tesseract_exe = parsed_args.tesseract_exe
if not tesseract_exe:
tesseract_exe = tesseract.get_tesseract_executable_win_guess(
tesseract_install_dir_reg
)
if not tesseract_exe:
tesseract_exe = tesseract.get_tesseract_executable_win_guess(
tesseract_install_dir_guess
)

# Tesseract tessdata
tesseract_data = parsed_args.tesseract_data
if not tesseract_data:
tesseract_data = tesseract.get_tesseract_data_dir_win_guess(
tesseract_install_dir_reg
)
if not tesseract_data:
tesseract_data = tesseract.get_tesseract_data_dir_win_guess(
tesseract_install_dir_guess
)

# input dir
input_dir = parsed_args.input_dir
if not input_dir:
input_dir = downloads_dir_reg
if not input_dir:
input_dir = downloads_dir_guess

# output dir
output_dir = parsed_args.output_dir
if not output_dir:
output_dir = documents_dir_guess / "Tesseract"

logger.info(f"Using Tesseract executable: '{tesseract_exe}'.")
logger.info(f"Using Tesseract data: '{tesseract_data}'.")
logger.info(f"Using input directory: '{input_dir}'.")
logger.info(f"Using output directory: '{output_dir}'.")

return {
"tesseract_exe": tesseract_exe,
"tesseract_data": tesseract_data,
"input_dir": input_dir,
"output_dir": output_dir,
"no_move_images": parsed_args.no_move_images,
}


def get_image_text(
exe_path: pathlib.Path,
data_dir: pathlib.Path,
image_dir: pathlib.Path,
) -> typing.Tuple[pathlib.Path, str]:
for image_file in files.find_ff_screenshot_files(image_dir):
output_text = tesseract.run_tesseract(exe_path, data_dir, image_file)
yield image_file, output_text


def run_program(args: list[str] = None) -> None:
logger.info("Starting Screenshot OCR...")

# get the arguments
normalised_arguments = norm_args(args)
tesseract_exe = normalised_arguments["tesseract_exe"]
tesseract_data = normalised_arguments["tesseract_data"]
input_dir = normalised_arguments["input_dir"]
output_dir = normalised_arguments["output_dir"]
move_images = not normalised_arguments["no_move_images"]

if not output_dir.exists():
output_dir.mkdir(parents=True, exist_ok=True)

count = 0

# find the image files and extract the text from each
for image_file, output_text in get_image_text(
tesseract_exe, tesseract_data, input_dir
):
if move_images:
# move the image file to the output dir
shutil.move(image_file, output_dir / image_file.name)

# create a text file with the same name as the image file that contains the extracted text
(output_dir / image_file.stem).with_suffix(".txt").write_text(output_text)

# print the image file name and extracted text to stdout
logger.info(f"{image_file.name}: {output_text}")

count += 1

logger.info(f"Found and processed {count} image file(s).")
logger.info("...finished.")
90 changes: 90 additions & 0 deletions screenshot_ocr/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import logging
import pathlib
import sys

from screenshot_ocr import utils

logger = logging.getLogger(__name__)


def get_user_downloads_dir_win_guess():
if sys.platform != "win32":
logger.debug("Cannot use Windows default path on non-Windows platform.")
return None

import os

env_var = os.environ.get("USERPROFILE")
if not env_var or not env_var.strip():
logger.debug("The Windows current user profile path %USERPROFILE% is not set.")
return None

return utils.guess_path(pathlib.Path(env_var), "Downloads", "user downloads")


def get_user_downloads_dir_win_reg():
if sys.platform != "win32":
logger.debug("Cannot use Windows registry on non-Windows platform.")
return None

import winreg

tree_root = winreg.HKEY_CURRENT_USER
tree_leaf = winreg.OpenKeyEx(
tree_root,
r"SOFTWARE\\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders\\",
)
key_value, key_type = winreg.QueryValueEx(
tree_leaf, "{374DE290-123F-4565-9164-39C4925E467B}"
)
if tree_leaf:
winreg.CloseKey(tree_leaf)

if key_value and key_type == winreg.REG_SZ:
logger.debug(
f"Found user downloads directory from Windows registry: '{key_value}'."
)
return pathlib.Path(key_value)

logger.debug("Could not find user downloads directory in Windows registry.")
return None


def get_user_documents_dir_win_guess():
if sys.platform != "win32":
logger.debug("Cannot use Windows default path on non-Windows platform.")
return None

import os

env_var = os.environ.get("USERPROFILE")
if not env_var or not env_var.strip():
logger.debug("The Windows current user profile path %USERPROFILE% is not set.")
return None

return utils.guess_path(pathlib.Path(env_var), "Documents", "user documents")


def find_ff_screenshot_files(image_dir: pathlib.Path):
"""Yield the FireFox screenshot files."""
logger.info(
f"Looking for files in '{image_dir}' "
"that match the pattern 'Screenshot [date] Facebook.png'."
)
for file_path in image_dir.iterdir():
if not file_path.is_file():
continue
if file_path.suffix != ".png":
continue
if not file_path.stem.startswith("Screenshot "):
continue
if not file_path.stem.endswith("Facebook"):
continue

yield file_path


def arrange_image_file_text(
input_file: pathlib.Path, output_dir: pathlib.Path, image_text: str
) -> None:
pass
16 changes: 16 additions & 0 deletions screenshot_ocr/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import logging


def run():
logging.basicConfig(
format="%(asctime)s [%(levelname)-8s] %(message)s",
level=logging.DEBUG,
)

from screenshot_ocr import cli

cli.run_program()


if __name__ == "__main__":
run()
Loading

0 comments on commit ea48ba6

Please sign in to comment.