-
Notifications
You must be signed in to change notification settings - Fork 0
/
ocr.py
32 lines (27 loc) · 936 Bytes
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
'''
In this program we are converting a photo to text and extracting useful information
For photo to text conversion we are using pytesseract library
For data extraction we are using the basic python silicing
'''
# Import the required libraries
import pytesseract
from PIL import Image
#Including this because tesseract is not added to the path
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\Admin\AppData\Local\Programs\Tesseract-OCR\tesseract.exe"
# Load the image
img = Image.open(r"C:\Users\Admin\Pictures\Papa hisaab dataset\1 (1).jpg")
# Convert the image to text
text = pytesseract.image_to_string(img)
# extracting text into variable
purity = text[108:129]
net_wt = text[162:165] +" : "+ text[170:177]
gross_wt = text[141:154]+ text[155:161]
voucher = text[26:43]
Date = text[55:74]
#printing extracted text
print(Date,end = " ")
print(voucher,end = "")
print(purity,end = "")
print(gross_wt)
print()
print(net_wt)