From e426b474d59e8dd81a1d1adafa95627d89864a2c Mon Sep 17 00:00:00 2001 From: Aryan Tandon <113663996+PhoenixAlpha23@users.noreply.github.com> Date: Sat, 30 Nov 2024 17:21:09 +0530 Subject: [PATCH] Update app.py Shorten the description --- app.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/app.py b/app.py index b69b932..75e85a7 100644 --- a/app.py +++ b/app.py @@ -185,22 +185,11 @@ def main(): initialize_session_state() # App title and description - st.title("Text Extraction from Images and PDFs using Tesseract OCR") - st.markdown('## You can upload multiple images or PDF files to extract text from.') - st.write('Tesseract Page Segmentation Modes (PSM) control how Tesseract analyzes and interprets document layouts:') - st.write(""" - PSM 3: Automatic page segmentation with orientation and script detection.\n - PSM 4: Assumes single column of text.\n - PSM 6: Assumes single uniform block of text.\n - PSM 11: Treats each line as a single text line.\n - PSM 12: Considers each word as a separate entity.\n - - Choose based on your document's structure: - - Complex layouts: PSM 3 - Simple, single-column documents: PSM 4 or 6 - Need line-by-line extraction: PSM 11 - Word-level processing: PSM 12.""") + st.title("Text Extraction using Tesseract OCR") + st.markdown('## Upload multiple images or PDF files to extract text from.') + st.write('From the list of Tesseract Page Segmentation Modes (PSM) on the left,\n you control how Tesseract analyzes and interprets document with varying layouts:') + st.write(""" Automatic detection works fine for most documents,\n + You can Choose a different one based on your document's structure from the list.\n""") # File uploader uploaded_files = st.file_uploader(