forked from tleyden/open-ocr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
go_tesseract_engine.go
98 lines (71 loc) · 2.1 KB
/
go_tesseract_engine.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
package ocrworker
import (
"errors"
"fmt"
"os"
"github.com/GeertJohan/go.leptonica"
"github.com/GeertJohan/go.tesseract"
"github.com/couchbaselabs/logg"
)
const TESSERACT_MODEL_DIR = "/usr/local/share/tessdata"
const TESSERACT_LANG = "eng"
type GoTesseractEngine struct {
}
func (t GoTesseractEngine) ProcessRequest(ocrRequest OcrRequest) (OcrResult, error) {
ocrResult := OcrResult{Text: "Error"}
err := errors.New("")
if ocrRequest.ImgUrl != "" {
ocrResult, err = t.ProcessImageUrl(ocrRequest.ImgUrl)
} else {
ocrResult, err = t.ProcessImageBytes(ocrRequest.ImgBytes)
}
return ocrResult, err
}
func (t GoTesseractEngine) ProcessImageBytes(imgBytes []byte) (OcrResult, error) {
tmpFileName, err := createTempFileName()
if err != nil {
return OcrResult{}, err
}
defer os.Remove(tmpFileName)
// we have to write the contents of the image url to a temp
// file, because the leptonica lib can't seem to handle byte arrays
err = saveBytesToFileName(imgBytes, tmpFileName)
if err != nil {
return OcrResult{}, err
}
return t.processImageFile(tmpFileName)
}
func (t GoTesseractEngine) ProcessImageUrl(imgUrl string) (OcrResult, error) {
logg.LogTo("OCR_TESSERACT", "ProcessImageUrl()")
tmpFileName, err := createTempFileName()
if err != nil {
return OcrResult{}, err
}
defer os.Remove(tmpFileName)
// we have to write the contents of the image url to a temp
// file, because the leptonica lib can't seem to handle byte arrays
err = saveUrlContentToFileName(imgUrl, tmpFileName)
if err != nil {
return OcrResult{}, err
}
return t.processImageFile(tmpFileName)
}
func (t GoTesseractEngine) processImageFile(tmpFileName string) (OcrResult, error) {
tess, err := tesseract.NewTess(TESSERACT_MODEL_DIR, TESSERACT_LANG)
if err != nil {
return OcrResult{}, err
}
defer tess.Close()
pix, err := leptonica.NewPixFromFile(tmpFileName)
if err != nil {
return OcrResult{}, err
}
defer pix.Close()
// set the image to the tesseract instance
tess.SetImagePix(pix)
// retrieve text from the tesseract instance
fmt.Println(tess.Text())
return OcrResult{
Text: tess.Text(),
}, nil
}