-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext.html
57 lines (52 loc) · 1.8 KB
/
text.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>GET TEXT FROM UPLOADED PDF</title>
<script type="text/javascript" src="/pdf.js"></script>
</head>
<body>
<canvas id="the-canvas"></canvas>
<script type="text/javascript">
const BASE_URI = window.location.origin;
PDFJS.disableWorker = true;
var url;
try {
url = window.location.search.slice(5)
} catch (e) {
// console.log('use "?pdf=PDF-URI" like!')
}
PDFJS.workerSrc = `${BASE_URI}/pdf.worker.js`;
function gettext (pdfUrl) {
var pdf = PDFJS.getDocument(pdfUrl);
return pdf.then(function(pdf) { // get all pages text
var maxPages = pdf.pdfInfo.numPages;
if (maxPages > 7) {
maxPages = 7
}
var countPromises = []; // collecting all page promises
for (var j = 1; j <= maxPages; j++) {
var page = pdf.getPage(j);
var txt = "";
countPromises.push(page.then(function(page) { // add page promise
var textContent = page.getTextContent();
return textContent.then(function(text){ // return content promise
return text.items.map(function (s) { return s.str; }).join(''); // value page text
})
}))
}
// Wait for all pages and join text
return Promise.all(countPromises).then(function (texts) {
return texts.join('');
});
});
}
// waiting on gettext to finish completion, or error
gettext(url).then(function (text) {
console.log(text);
}, function (reason) {
console.error(reason);
});
</script>
</body>
</html>