initial code commit

RaphaelaHeil · May 17, 2021 · c59fb11 · c59fb11
1 parent 6959820
commit c59fb11
Show file tree

Hide file tree

Showing 9 changed files with 655 additions and 1 deletion.
diff --git a/0004.png b/0004.png
diff --git a/0004_struck.png b/0004_struck.png
diff --git a/README.md b/README.md
@@ -1 +1,23 @@
-# strikethrough-generation
+# Synthetic Strikethrough Generation
+
+This package generates synthetic strikethrough and applies it to a given word image. Strikethrough strokes are generated based on image statistics.
+
+To get started, install the required packages (cf. [requirements.txt](requirements.txt)) and run [example.py](example.py).
+
+### Generation Example
+Input|Output
+---|---
+![clean word word image spelling 'landlord'](0004.png)|![word image struck through with a wavy line](0004_struck.png)
+## License
+MIT License, see [LICENSE](LICENSE) for details.
+
+## Citation
+If you find this work useful, please consider citing this repository or the related paper:
+```
+@INPROCEEDINGS{heil2021strikethrough,
+  author={Heil, Raphaela and Vats, Ekta and Hast, Anders},
+  booktitle={2021 International Conference on Document Analysis and Recognition (ICDAR)},
+  title={{Strikethrough Removal from Handwritten Words Using CycleGANs}},
+  year={2021},
+  pubstate={to appear}}
+```
diff --git a/example.py b/example.py
@@ -0,0 +1,15 @@
+import cv2
+
+import matplotlib.pyplot as plt
+
+from strikethrough_generator import StrokeType, StrikeThroughGenerator
+
+if __name__ == "__main__":
+    stg = StrikeThroughGenerator(drawFromStrokeTypes=[StrokeType.ZIG_ZAG])
+    original = cv2.imread('0004.png',cv2.CV_8UC1)
+    output, strike_type = stg.generateStruckWord(original)
+
+    output, _ = stg.generateStruckWord(original)
+
+    plt.imshow(output, cmap="gray")
+    plt.show()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+opencv-python>=4.5.1.48
+numpy>=1.19.5
+imgaug>=0.4.0
+scipy>=1.5.4
+scikit-image>=0.17.2
diff --git a/strikethrough_generator/__init__.py b/strikethrough_generator/__init__.py
@@ -0,0 +1,5 @@
+from .backgroundremoval import backgroundRemoval
+from .core_extraction import extractCoreRegion
+from .generator import StrikeThroughGenerator, StrokeType, OptionsKeys
+
+__all__ = ["backgroundRemoval", "extractCoreRegion", "StrikeThroughGenerator", "StrokeType", "OptionsKeys"]
diff --git a/strikethrough_generator/backgroundremoval.py b/strikethrough_generator/backgroundremoval.py
@@ -0,0 +1,109 @@
+"""
+Code to remove background noise from a handwritten word image. Original matlab code by Anders Hast
+([email protected]), adapted to Python by Raphaela Heil ([email protected]).
+
+See also: P. Singh, E. Vats and A. Hast, "Learning Surrogate Models of Document Image Quality Metrics for Automated
+Document Image Processing," 2018 13th IAPR International Workshop on Document Analysis Systems (DAS), 2018, pp. 67-72,
+doi: 10.1109/DAS.2018.14.
+"""
+
+from math import ceil
+
+import numpy as np
+from scipy import signal
+from skimage import filters
+
+
+def __calculateGaussianKernel(width=5, sigma=1.):
+    ax = np.arange(-width // 2 + 1., width // 2 + 1.)
+    xx, yy = np.meshgrid(ax, ax)
+
+    kernel = np.exp(-0.5 * (np.square(xx) + np.square(yy)) / np.square(sigma))
+
+    return kernel / np.sum(kernel)
+
+
+def __calculateMaskParameters(arraySize, sz):
+    if type(arraySize) == tuple:
+        if sz == 0:
+            kernelSize = ceil(max(arraySize))
+        else:
+            scale = min(arraySize) / sz
+            width = ceil(arraySize[0] / scale)
+            height = ceil(arraySize[1] / scale)
+            kernelSize = ceil(max((width, height)))
+
+        if kernelSize % 2 == 0:
+            kernelSize = kernelSize + 1
+
+        sigma = kernelSize / 6.0
+    else:
+        if sz == 0:
+            kernelSize = ceil(arraySize)
+        else:
+            scale = arraySize / sz
+            kernelSize = ceil(arraySize / scale)
+
+        if kernelSize % 2 == 0:
+            kernelSize = kernelSize + 1
+
+        sigma = kernelSize / 6.0
+
+    return kernelSize, sigma
+
+
+def __applyFilters(image: np.ndarray, so, sz: int) -> np.ndarray:
+    imageShape = image.shape
+    N, sigma = __calculateMaskParameters(so, sz)
+    kernel = __calculateGaussianKernel(N, sigma)
+    divisor = signal.fftconvolve(np.ones(imageShape).astype('float'), kernel, 'same')
+    numerator = signal.fftconvolve(image.astype('float'), kernel, 'same')
+    filteredImage = np.divide(numerator, divisor)
+    return filteredImage
+
+
+def __blurryBandpassFilter(image: np.ndarray, blurringMaskSize: int, threshold: float) -> np.ndarray:
+    if blurringMaskSize > 1:
+        thickMask = __applyFilters(image, blurringMaskSize, 0)
+    else:
+        thickMask = image
+
+    p2 = __applyFilters(image, image.shape, 300)
+    im2 = p2 - thickMask
+
+    th2 = filters.threshold_otsu(p2 - thickMask)
+    thresholdedImage = im2 > (th2 * threshold)
+    return thresholdedImage
+
+
+def __thinBandpassFilter(image, noiseMaskSize, enhanceContrast) -> np.ndarray:
+    if noiseMaskSize > 1:
+        thinMask = __applyFilters(image, noiseMaskSize, 0)
+    else:
+        thinMask = image
+
+    p2 = __applyFilters(image, image.shape, 100)
+    im2 = p2 - thinMask
+
+    nim2 = np.zeros(im2.shape)
+    nim2[im2 > 0] = im2[im2 > 0]
+
+    if enhanceContrast:
+        nim2 = nim2 - nim2.min()
+        nim2 = nim2 / nim2.max()
+
+    return nim2
+
+
+def backgroundRemoval(image: np.ndarray, blurringMaskSize: int, noiseMaskSize: int, threshold: float,
+                      enhanceContrast: bool) -> np.ndarray:
+    nim1 = __blurryBandpassFilter(image, blurringMaskSize, threshold)
+    nim2 = __thinBandpassFilter(image, noiseMaskSize, enhanceContrast)
+
+    if enhanceContrast:
+        nim2 = nim2 - nim2.min()
+        nim2 = nim2 / nim2.max()
+
+    result = 255 - (nim1 * nim2)
+
+    return result
diff --git a/strikethrough_generator/core_extraction.py b/strikethrough_generator/core_extraction.py
@@ -0,0 +1,65 @@
+"""
+Core extraction based on:
+
+A. Papandreou, B. Gatos,
+Slant estimation and core-region detection for handwritten Latin words,
+Pattern Recognition Letters, Volume 35, 2014, Pages 16-22, ISSN 0167-8655,
+https://doi.org/10.1016/j.patrec.2012.08.005.
+
+Implemented by R.Heil, 2021
+"""
+import itertools
+from typing import Tuple
+
+import numpy as np
+
+
+def __runCountForLine__(line: np.ndarray, inkValue: int = 255) -> int:
+    groups = []
+    for _, g in itertools.groupby(line, lambda x: x == inkValue):
+        groups.append(list(g))
+    count = len([x for x in groups if inkValue in x])
+    return count
+
+
+def __countRuns__(image: np.ndarray) -> np.ndarray:
+    return np.apply_along_axis(__runCountForLine__, axis=1, arr=image)
+
+
+def __calculateThreshold__(lines, t: float = 0.15) -> float:
+    return t / len(lines) * sum(lines)
+
+
+def __findCoreRegion__(booleanHorizontalProfile: np.ndarray, horizontalBlackRunProfile: np.ndarray) -> Tuple[int, int]:
+    total = []
+    borders = []
+    current = 0
+    start = 0
+    for i, x in enumerate(booleanHorizontalProfile):
+        if current == 0:
+            start = i
+        if x == 1:
+            current += horizontalBlackRunProfile[i]
+        else:
+            if current > 0:
+                total.append(current)
+                borders.append((start, i - 1))
+                current = 0
+                start = i + 1
+    if current > 0:
+        total.append(current)
+        borders.append((start, len(booleanHorizontalProfile) - 1))
+
+    return borders[np.argmax(total)]
+
+
+def extractCoreRegion(image: np.ndarray, thresholdModifier: float = 0.15) -> Tuple[int, int]:
+    horizontalProfile = np.sum(image, 1)
+    counts = __countRuns__(image)
+
+    horizontalBlackRunProfile = counts * counts * horizontalProfile
+
+    threshold = __calculateThreshold__(horizontalBlackRunProfile, thresholdModifier)
+    booleanHorizontalProfile = (horizontalBlackRunProfile > threshold) * 1
+
+    return __findCoreRegion__(booleanHorizontalProfile, horizontalBlackRunProfile)