From 2effecdf03948b7c1feb07b5245662677c50d993 Mon Sep 17 00:00:00 2001 From: ollynowell Date: Thu, 13 Jun 2024 16:31:24 +0100 Subject: [PATCH] Sort the PDFMiner text objects along the x axis before applying the grouping algorithm, to avoid missing columns --- camelot/parsers/stream.py | 1 + 1 file changed, 1 insertion(+) diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index 266a0e95..c70bfb0d 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -129,6 +129,7 @@ def _group_rows(text, row_tol=2): rows = [] temp = [] + text.sort(key=lambda x: (-x.y0, x.x0)) for t in text: # is checking for upright necessary? # if t.get_text().strip() and all([obj.upright for obj in t._objs if