-
Notifications
You must be signed in to change notification settings - Fork 0
/
Pdf2Audio.py
38 lines (29 loc) · 898 Bytes
/
Pdf2Audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/python
import PyPDF2
import os
from gtts import gTTS
import re
file_path = input("Enter full path to PDF to read: ")
if(os.path.exists(file_path)):
pass
else:
print("File does not exist!")
f = open(file_path, 'rb')
# get number of pages in pdf
pdffile = PyPDF2.PdfFileReader(f)
no_of_pages = pdffile.getNumPages()
# iterate through the pages
# use regex to filter only words and numbers
# concatenate the words in each page
string_words = ''
for pageno in range(no_of_pages):
pi = pdffile.getPage(pageno)
page = pdffile.getPage(pageno)
content = page.extractText()
textonly = re.findall(r'[a-zA-Z0-9]+', content)
for word in textonly:
string_words = string_words + ' ' + word
# convert string of words to mp3 file
print(string_words)
tts = gTTS(text=string_words, lang='en')
tts.save("listen_pdf.mp3")