Update app.py

Shorten the description
PhoenixAlpha23 · Nov 30, 2024 · e426b47 · e426b47
1 parent 2cb73be
commit e426b47
Showing 1 changed file with 5 additions and 16 deletions.
diff --git a/app.py b/app.py
@@ -185,22 +185,11 @@ def main():
     initialize_session_state()
 
     # App title and description
-    st.title("Text Extraction from Images and PDFs using Tesseract OCR")
-    st.markdown('## You can upload multiple images or PDF files to extract text from.')
-    st.write('Tesseract Page Segmentation Modes (PSM) control how Tesseract analyzes and interprets document layouts:')
-    st.write("""
-    PSM 3: Automatic page segmentation with orientation and script detection.\n
-    PSM 4: Assumes single column of text.\n
-    PSM 6: Assumes single uniform block of text.\n
-    PSM 11: Treats each line as a single text line.\n
-    PSM 12: Considers each word as a separate entity.\n
-
-    Choose based on your document's structure:
-
-    Complex layouts: PSM 3
-    Simple, single-column documents: PSM 4 or 6
-    Need line-by-line extraction: PSM 11
-    Word-level processing: PSM 12.""")
+    st.title("Text Extraction using Tesseract OCR")
+    st.markdown('## Upload multiple images or PDF files to extract text from.')
+    st.write('From the list of Tesseract Page Segmentation Modes (PSM)  on the left,\n you control how Tesseract analyzes and interprets document with varying layouts:')
+    st.write(""" Automatic detection works fine for most documents,\n
+    You can Choose a different one based on your document's structure from the list.\n""")
 
     # File uploader
     uploaded_files = st.file_uploader(