-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement command line for running OCR over screenshot image files
Includes config for building executable using PyOxidizer.
- Loading branch information
Showing
10 changed files
with
438 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,3 +127,6 @@ dmypy.json | |
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# idea | ||
.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
# screenshot-ocr | ||
|
||
Extract text from screenshots. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
def make_exe(): | ||
dist = default_python_distribution() | ||
policy = dist.make_python_packaging_policy() | ||
python_config = dist.make_python_interpreter_config() | ||
|
||
# Run a Python module as __main__ when the interpreter starts. | ||
python_config.run_module = "screenshot_ocr.main" | ||
|
||
# Produce a PythonExecutable from a Python distribution, embedded | ||
# resources, and other options. The returned object represents the | ||
# standalone executable that will be built. | ||
exe = dist.to_python_executable( | ||
name="screenshot-ocr", | ||
|
||
# If no argument passed, the default `PythonPackagingPolicy` for the | ||
# distribution is used. | ||
packaging_policy=policy, | ||
|
||
# If no argument passed, the default `PythonInterpreterConfig` is used. | ||
config=python_config, | ||
) | ||
|
||
# Read Python files from a local directory and add them to our embedded | ||
# context, taking just the resources belonging to the `foo` and `bar` | ||
# Python packages. | ||
exe.add_python_resources(exe.read_package_root( | ||
path=".", | ||
packages=["screenshot_ocr"], | ||
)) | ||
|
||
# Return our `PythonExecutable` instance so it can be built and | ||
# referenced by other consumers of this target. | ||
return exe | ||
|
||
def make_embedded_resources(exe): | ||
return exe.to_embedded_resources() | ||
|
||
def make_install(exe): | ||
# Create an object that represents our installed application file layout. | ||
files = FileManifest() | ||
|
||
# Add the generated executable to our install layout in the root directory. | ||
files.add_python_resource("screenshot-ocr", exe) | ||
|
||
return files | ||
|
||
def make_msi(exe): | ||
# See the full docs for more. But this will convert your Python executable | ||
# into a `WiXMSIBuilder` Starlark type, which will be converted to a Windows | ||
# .msi installer when it is built. | ||
return exe.to_wix_msi_builder( | ||
# Simple identifier of your app. | ||
"screenshot-ocr", | ||
# The name of your application. | ||
"Screenshot OCR", | ||
# The version of your application. | ||
"0.1.0", | ||
# The author/manufacturer of your application. | ||
"Mark C" | ||
) | ||
|
||
|
||
# Dynamically enable automatic code signing. | ||
def register_code_signers(): | ||
# You will need to run with `pyoxidizer build --var ENABLE_CODE_SIGNING 1` for | ||
# this if block to be evaluated. | ||
if not VARS.get("ENABLE_CODE_SIGNING"): | ||
return | ||
|
||
|
||
# Call our function to set up automatic code signers. | ||
register_code_signers() | ||
|
||
# Tell PyOxidizer about the build targets defined above. | ||
register_target("exe", make_exe) | ||
register_target("resources", make_embedded_resources, depends=["exe"], default_build_script=True) | ||
register_target("install", make_install, depends=["exe"], default=True) | ||
register_target("msi_installer", make_msi, depends=["exe"]) | ||
|
||
# Resolve whatever targets the invoker of this configuration file is requesting | ||
# be resolved. | ||
resolve_targets() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
black | ||
pyoxidizer |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
import argparse | ||
import logging | ||
import pathlib | ||
import shutil | ||
import sys | ||
import typing | ||
|
||
from screenshot_ocr import tesseract, files | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def build_args(args: list[str] = None) -> argparse.Namespace: | ||
# prog is set for pyOxidizer, due to issue: https://github.com/indygreg/PyOxidizer/issues/307 | ||
parser = argparse.ArgumentParser( | ||
description="Extract text from screenshots.", prog="screenshot-ocr" | ||
) | ||
parser.add_argument( | ||
"--tesseract-exe", | ||
type=pathlib.Path, | ||
help="path to the Tesseract executable file", | ||
) | ||
parser.add_argument( | ||
"--tesseract-data", | ||
type=pathlib.Path, | ||
help="path to the Tesseract data directory", | ||
) | ||
parser.add_argument( | ||
"--input-dir", | ||
type=pathlib.Path, | ||
help="path to the folder containing the input images", | ||
) | ||
parser.add_argument( | ||
"--output-dir", | ||
type=pathlib.Path, | ||
help="path to the folder that will contain processed images", | ||
) | ||
parser.add_argument( | ||
"--no-move-images", | ||
action="store_true", | ||
help="don't move image files to the output directory (image files are moved by default)", | ||
) | ||
result = parser.parse_args(args) | ||
return result | ||
|
||
|
||
def norm_args(args: list[str] = None): | ||
parsed_args = build_args(args) | ||
|
||
tesseract_install_dir_reg = tesseract.get_tesseract_install_dir_win_reg() | ||
tesseract_install_dir_guess = tesseract.get_tesseract_install_dir_win_guess() | ||
|
||
downloads_dir_reg = files.get_user_downloads_dir_win_guess() | ||
downloads_dir_guess = files.get_user_downloads_dir_win_reg() | ||
|
||
documents_dir_guess = files.get_user_documents_dir_win_guess() | ||
|
||
# Tesseract exe | ||
tesseract_exe = parsed_args.tesseract_exe | ||
if not tesseract_exe: | ||
tesseract_exe = tesseract.get_tesseract_executable_win_guess( | ||
tesseract_install_dir_reg | ||
) | ||
if not tesseract_exe: | ||
tesseract_exe = tesseract.get_tesseract_executable_win_guess( | ||
tesseract_install_dir_guess | ||
) | ||
|
||
# Tesseract tessdata | ||
tesseract_data = parsed_args.tesseract_data | ||
if not tesseract_data: | ||
tesseract_data = tesseract.get_tesseract_data_dir_win_guess( | ||
tesseract_install_dir_reg | ||
) | ||
if not tesseract_data: | ||
tesseract_data = tesseract.get_tesseract_data_dir_win_guess( | ||
tesseract_install_dir_guess | ||
) | ||
|
||
# input dir | ||
input_dir = parsed_args.input_dir | ||
if not input_dir: | ||
input_dir = downloads_dir_reg | ||
if not input_dir: | ||
input_dir = downloads_dir_guess | ||
|
||
# output dir | ||
output_dir = parsed_args.output_dir | ||
if not output_dir: | ||
output_dir = documents_dir_guess / "Tesseract" | ||
|
||
logger.info(f"Using Tesseract executable: '{tesseract_exe}'.") | ||
logger.info(f"Using Tesseract data: '{tesseract_data}'.") | ||
logger.info(f"Using input directory: '{input_dir}'.") | ||
logger.info(f"Using output directory: '{output_dir}'.") | ||
|
||
return { | ||
"tesseract_exe": tesseract_exe, | ||
"tesseract_data": tesseract_data, | ||
"input_dir": input_dir, | ||
"output_dir": output_dir, | ||
"no_move_images": parsed_args.no_move_images, | ||
} | ||
|
||
|
||
def get_image_text( | ||
exe_path: pathlib.Path, | ||
data_dir: pathlib.Path, | ||
image_dir: pathlib.Path, | ||
) -> typing.Tuple[pathlib.Path, str]: | ||
for image_file in files.find_ff_screenshot_files(image_dir): | ||
output_text = tesseract.run_tesseract(exe_path, data_dir, image_file) | ||
yield image_file, output_text | ||
|
||
|
||
def run_program(args: list[str] = None) -> None: | ||
logger.info("Starting Screenshot OCR...") | ||
|
||
# get the arguments | ||
normalised_arguments = norm_args(args) | ||
tesseract_exe = normalised_arguments["tesseract_exe"] | ||
tesseract_data = normalised_arguments["tesseract_data"] | ||
input_dir = normalised_arguments["input_dir"] | ||
output_dir = normalised_arguments["output_dir"] | ||
move_images = not normalised_arguments["no_move_images"] | ||
|
||
if not output_dir.exists(): | ||
output_dir.mkdir(parents=True, exist_ok=True) | ||
|
||
count = 0 | ||
|
||
# find the image files and extract the text from each | ||
for image_file, output_text in get_image_text( | ||
tesseract_exe, tesseract_data, input_dir | ||
): | ||
if move_images: | ||
# move the image file to the output dir | ||
shutil.move(image_file, output_dir / image_file.name) | ||
|
||
# create a text file with the same name as the image file that contains the extracted text | ||
(output_dir / image_file.stem).with_suffix(".txt").write_text(output_text) | ||
|
||
# print the image file name and extracted text to stdout | ||
logger.info(f"{image_file.name}: {output_text}") | ||
|
||
count += 1 | ||
|
||
logger.info(f"Found and processed {count} image file(s).") | ||
logger.info("...finished.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import logging | ||
import pathlib | ||
import sys | ||
|
||
from screenshot_ocr import utils | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def get_user_downloads_dir_win_guess(): | ||
if sys.platform != "win32": | ||
logger.debug("Cannot use Windows default path on non-Windows platform.") | ||
return None | ||
|
||
import os | ||
|
||
env_var = os.environ.get("USERPROFILE") | ||
if not env_var or not env_var.strip(): | ||
logger.debug("The Windows current user profile path %USERPROFILE% is not set.") | ||
return None | ||
|
||
return utils.guess_path(pathlib.Path(env_var), "Downloads", "user downloads") | ||
|
||
|
||
def get_user_downloads_dir_win_reg(): | ||
if sys.platform != "win32": | ||
logger.debug("Cannot use Windows registry on non-Windows platform.") | ||
return None | ||
|
||
import winreg | ||
|
||
tree_root = winreg.HKEY_CURRENT_USER | ||
tree_leaf = winreg.OpenKeyEx( | ||
tree_root, | ||
r"SOFTWARE\\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders\\", | ||
) | ||
key_value, key_type = winreg.QueryValueEx( | ||
tree_leaf, "{374DE290-123F-4565-9164-39C4925E467B}" | ||
) | ||
if tree_leaf: | ||
winreg.CloseKey(tree_leaf) | ||
|
||
if key_value and key_type == winreg.REG_SZ: | ||
logger.debug( | ||
f"Found user downloads directory from Windows registry: '{key_value}'." | ||
) | ||
return pathlib.Path(key_value) | ||
|
||
logger.debug("Could not find user downloads directory in Windows registry.") | ||
return None | ||
|
||
|
||
def get_user_documents_dir_win_guess(): | ||
if sys.platform != "win32": | ||
logger.debug("Cannot use Windows default path on non-Windows platform.") | ||
return None | ||
|
||
import os | ||
|
||
env_var = os.environ.get("USERPROFILE") | ||
if not env_var or not env_var.strip(): | ||
logger.debug("The Windows current user profile path %USERPROFILE% is not set.") | ||
return None | ||
|
||
return utils.guess_path(pathlib.Path(env_var), "Documents", "user documents") | ||
|
||
|
||
def find_ff_screenshot_files(image_dir: pathlib.Path): | ||
"""Yield the FireFox screenshot files.""" | ||
logger.info( | ||
f"Looking for files in '{image_dir}' " | ||
"that match the pattern 'Screenshot [date] Facebook.png'." | ||
) | ||
for file_path in image_dir.iterdir(): | ||
if not file_path.is_file(): | ||
continue | ||
if file_path.suffix != ".png": | ||
continue | ||
if not file_path.stem.startswith("Screenshot "): | ||
continue | ||
if not file_path.stem.endswith("Facebook"): | ||
continue | ||
|
||
yield file_path | ||
|
||
|
||
def arrange_image_file_text( | ||
input_file: pathlib.Path, output_dir: pathlib.Path, image_text: str | ||
) -> None: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import logging | ||
|
||
|
||
def run(): | ||
logging.basicConfig( | ||
format="%(asctime)s [%(levelname)-8s] %(message)s", | ||
level=logging.DEBUG, | ||
) | ||
|
||
from screenshot_ocr import cli | ||
|
||
cli.run_program() | ||
|
||
|
||
if __name__ == "__main__": | ||
run() |
Oops, something went wrong.