Summary
I am running tesseract and gosseract on the same image, a single line of text. Tesseract finds the text, gosseract does not.
Reproducibility
Reproducibility Frequency
- Run
tesseract d2.pbm - --psm 13
and it will show the output
- Run
go run main.go
and it will not show any output
go.mod:
module gosstest
go 1.19
require github.com/otiai10/gosseract/v2 v2.4.0
main.go:
package main
import (
"fmt"
"os"
"github.com/otiai10/gosseract/v2"
)
func main() {
const (
want = "BPJAZGAP"
filename = "d2.pbm"
)
buf, err := os.ReadFile("d2.pbm")
if err != nil {
fmt.Fprintf(os.Stderr, "error reading %q: %v\n", filename, err)
}
fmt.Fprintln(os.Stderr, gosseract.Version())
ocr := gosseract.NewClient()
defer ocr.Close()
ocr.SetPageSegMode(gosseract.PSM_RAW_LINE) // --psm 13
ocr.SetImageFromBytes(buf)
got, err := ocr.Text()
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
if want != got {
fmt.Fprintf(os.Stderr, "want %q but got %q", want, got)
}
fmt.Println(got)
}
d2.pbm:
P1 42 8
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 1 1 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 0 1 1 1 1 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 0
0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0
0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0
0 1 0 0 1 0 1 1 1 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 0 0 1 0 1 1 0 1 1 1 1 0 1 1 1 0 0 0
0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0
0 1 1 1 0 0 1 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 1 1 1 0 0 1 1 1 0 1 0 0 1 0 1 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Environment
Linux chieftec 6.0.15-300.fc37.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Dec 21 18:33:23 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
GO111MODULE=""
GOARCH="amd64"
GOBIN=""
GOCACHE="/home/jot/.cache/go-build"
GOENV="/home/jot/.config/go/env"
GOEXE=""
GOEXPERIMENT=""
GOFLAGS=""
GOHOSTARCH="amd64"
GOHOSTOS="linux"
GOINSECURE=""
GOMODCACHE="/home/jot/go/pkg/mod"
GONOPROXY=""
GONOSUMDB=""
GOOS="linux"
GOPATH="/home/jot/go"
[project.zip](https://github.com/otiai10/gosseract/files/10347142/project.zip)
GOPRIVATE=""
GOPROXY="direct"
GOROOT="/usr/lib/golang"
GOSUMDB="off"
GOTMPDIR=""
GOTOOLDIR="/usr/lib/golang/pkg/tool/linux_amd64"
GOVCS=""
GOVERSION="go1.19.4"
GCCGO="gccgo"
GOAMD64="v1"
AR="ar"
CC="gcc"
CXX="g++"
CGO_ENABLED="1"
GOMOD="/home/jot/work/gosseract/go.mod"
GOWORK=""
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -Wl,--no-gc-sections -fmessage-length=0 -fdebug-prefix-map=/tmp/go-build1563688176=/tmp/go-build -gno-record-gcc-switches"
go version go1.19.4 linux/amd64
tesseract 5.2.0
leptonica-1.82.0
libgif 5.2.1 : libjpeg 6b (libjpeg-turbo 2.1.3) : libpng 1.6.37 : libtiff 4.4.0 : zlib 1.2.12 : libwebp 1.2.4
Found AVX2
Found AVX
Found FMA
Found SSE4.1