blob: e773e8b55a4fc91e58de8053662024b66f75814e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
#!/bin/sh
STARTPAGE=1 # set to pagenumber of the first page of PDF you wish to convert
ENDPAGE=12 # set to pagenumber of the last page of PDF you wish to convert
SOURCE="$1" # set to the file name of the PDF
OUTPUT=book.txt # set to the final output file
RESOLUTION=600 # set to the resolution the scanner used (the higher, the better)
touch $OUTPUT
for i in `seq $STARTPAGE $ENDPAGE`; do
convert -monochrome -density $RESOLUTION $SOURCE\[$(($i - 1 ))\] page.tif
echo processing page $i
tesseract page.tif tempoutput
cat tempoutput.txt >> $OUTPUT
done
|