aboutsummaryrefslogtreecommitdiff
path: root/graphics/tesseract
diff options
context:
space:
mode:
Diffstat (limited to 'graphics/tesseract')
-rw-r--r--graphics/tesseract/README3
-rw-r--r--graphics/tesseract/patches/tesseract-2.03-java.patch17
-rw-r--r--graphics/tesseract/patches/tesseract-2.03-missing_includes.patch37
-rw-r--r--graphics/tesseract/patches/tesseract-2.03-patch.patch53
-rw-r--r--graphics/tesseract/tesseract.SlackBuild89
-rw-r--r--graphics/tesseract/tesseract.info10
6 files changed, 189 insertions, 20 deletions
diff --git a/graphics/tesseract/README b/graphics/tesseract/README
index 31bfc8b694aa0..f9be7a1c4c17a 100644
--- a/graphics/tesseract/README
+++ b/graphics/tesseract/README
@@ -10,11 +10,12 @@ all available language packs. Note that you can install more than one
(or even all) of the language packs, as they do not conflict with each
other. The build script defaults to use English, but this is easily
changed by passing an alternate value on the command line.
+
Here is the relevant code from the build script:
# Language pack(s) to use
# We'll install English by default, but you can pass another one (or all)
# of them on the command line (space delimited). If you pass more than one
# (again, space delimited), you must enclose the string in quotes. Examples:
# TESSLANG=fra ./tesseract.SlackBuild
- # TESSLANG="deu eng fra ita nld spa" ./tesseract.SlackBuild
+ # TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild
TESSLANG=${TESSLANG:-eng} # Default to English
diff --git a/graphics/tesseract/patches/tesseract-2.03-java.patch b/graphics/tesseract/patches/tesseract-2.03-java.patch
new file mode 100644
index 0000000000000..f384dbdfc1dd0
--- /dev/null
+++ b/graphics/tesseract/patches/tesseract-2.03-java.patch
@@ -0,0 +1,17 @@
+Index: tesseract-2.03/java/makefile
+===================================================================
+--- tesseract-2.03.orig/java/makefile 2008-04-24 21:52:12.000000000 +0200
++++ tesseract-2.03/java/makefile 2008-04-24 21:53:03.000000000 +0200
+@@ -39,8 +39,11 @@
+ clean :
+ rm -f ScrollView.jar *.class
+
++distclean : clean
++ rm -f Makefile
++
+ # all-am does nothing, to make the java part optional.
+-all all-am :
++all all-am install :
+
+ # dist runs the autoconf makefile to archive the files correctly.
+ dist distdir :
diff --git a/graphics/tesseract/patches/tesseract-2.03-missing_includes.patch b/graphics/tesseract/patches/tesseract-2.03-missing_includes.patch
new file mode 100644
index 0000000000000..24931c08a67a8
--- /dev/null
+++ b/graphics/tesseract/patches/tesseract-2.03-missing_includes.patch
@@ -0,0 +1,37 @@
+diff -Nur a/viewer/scrollview.cpp b/viewer/scrollview.cpp
+--- a/viewer/scrollview.cpp 2008-04-21 19:06:24.000000000 -0500
++++ b/viewer/scrollview.cpp 2009-06-22 21:33:53.747817922 -0500
+@@ -36,6 +36,9 @@
+ #include <vector>
+ #include <string>
+
++#include <string.h>
++#include <limits.h>
++
+ #include "svutil.h"
+
+ #ifdef HAVE_LIBLEPT
+diff -Nur a/viewer/svmnode.cpp b/viewer/svmnode.cpp
+--- a/viewer/svmnode.cpp 2008-04-18 23:44:06.000000000 -0500
++++ b/viewer/svmnode.cpp 2009-06-22 21:34:12.375586651 -0500
+@@ -28,6 +28,8 @@
+
+ #include <iostream>
+
++#include <string.h>
++
+ #include "scrollview.h"
+
+ // Create the empty root menu node. with just a caption. All other nodes should
+diff -Nur a/viewer/svutil.cpp b/viewer/svutil.cpp
+--- a/viewer/svutil.cpp 2008-04-21 19:07:25.000000000 -0500
++++ b/viewer/svutil.cpp 2009-06-22 21:34:31.419367272 -0500
+@@ -38,6 +38,8 @@
+
+ #include <iostream>
+ #include <string>
++#include <string.h>
++#include <stdlib.h>
+
+ const int kBufferSize = 65536;
+ const int kMaxMsgSize = 4096;
diff --git a/graphics/tesseract/patches/tesseract-2.03-patch.patch b/graphics/tesseract/patches/tesseract-2.03-patch.patch
new file mode 100644
index 0000000000000..f2868dfa5c89d
--- /dev/null
+++ b/graphics/tesseract/patches/tesseract-2.03-patch.patch
@@ -0,0 +1,53 @@
+*** a/ccmain/baseapi.cpp
+--- b/ccmain/baseapi.cpp
+***************
+*** 954,960 ****
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ OUTLINE *outline = it.data();
+ outline->compute_bb();
+! result.bounding_union(outline->bounding_box());
+ }
+ return result;
+ }
+--- 954,960 ----
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ OUTLINE *outline = it.data();
+ outline->compute_bb();
+! result = result.bounding_union(outline->bounding_box());
+ }
+ return result;
+ }
+***************
+*** 966,972 ****
+ for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
+ C_BLOB *blob = c_it.data();
+ //bboxes.push(tessy_rectangle(blob->bounding_box()));
+! result.bounding_union(blob->bounding_box());
+ }
+ return result;
+ }
+--- 966,972 ----
+ for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
+ C_BLOB *blob = c_it.data();
+ //bboxes.push(tessy_rectangle(blob->bounding_box()));
+! result = result.bounding_union(blob->bounding_box());
+ }
+ return result;
+ }
+***************
+*** 1026,1032 ****
+ for (int i = 0; i < n; i++) {
+ PBLOB *blob = it.data();
+ TBOX current = pblob_get_bbox(blob);
+! bln_rect.bounding_union(current);
+
+ TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
+ str, *len);
+--- 1026,1032 ----
+ for (int i = 0; i < n; i++) {
+ PBLOB *blob = it.data();
+ TBOX current = pblob_get_bbox(blob);
+! bln_rect = bln_rect.bounding_union(current);
+
+ TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
+ str, *len);
diff --git a/graphics/tesseract/tesseract.SlackBuild b/graphics/tesseract/tesseract.SlackBuild
index bfc36e41bd847..63835762dd2d5 100644
--- a/graphics/tesseract/tesseract.SlackBuild
+++ b/graphics/tesseract/tesseract.SlackBuild
@@ -1,14 +1,39 @@
#!/bin/sh
-# Pierre Cazenave revision date 10/11/2007
+# Pierre Cazenave 10/11/2007.
+# Updated 25/01/2009.
+# Updated 08/04/2009.
+# Updated 28/05/2009 for Slackware64.
# Modified by Robby Workman <rworkman@slackbuilds.org> for better
-# consistency with our other scripts
+# consistency with our other scripts
+# Thanks to S+*n_Pe*rm*n for a bug report from OCRopus.
+
+# Copyright 2009 Pierre Cazenave <pwcazenave {at} gmail [dot] com>
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PRGNAM=tesseract
-VERSION=2.01
+VERSION=2.03
ARCH=${ARCH:-i486}
-BUILD=${BUILD:-2}
+BUILD=${BUILD:-5}
TAG=${TAG:-_SBo}
+
CWD=$(pwd)
TMP=${TMP:-/tmp/SBo}
PKG=$TMP/package-$PRGNAM
@@ -19,17 +44,22 @@ OUTPUT=${OUTPUT:-/tmp}
# of them on the command line (space delimited). If you pass more than one
# (again, space delimited), you must enclose the string in quotes. Examples:
# TESSLANG=fra ./tesseract.SlackBuild
-# TESSLANG="deu eng fra ita nld spa" ./tesseract.SlackBuild
+# TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild
TESSLANG=${TESSLANG:-eng} # Default to English
if [ "$ARCH" = "i486" ]; then
SLKCFLAGS="-O2 -march=i486 -mtune=i686"
+ LIBDIRSUFFIX=""
elif [ "$ARCH" = "i686" ]; then
SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+ LIBDIRSUFFIX=""
elif [ "$ARCH" = "x86_64" ]; then
SLKCFLAGS="-O2 -fPIC"
+ LIBDIRSUFFIX="64"
fi
+set -e
+
rm -rf $TMP/$PRGNAM-$VERSION $PKG
mkdir -p $TMP $PKG $OUTPUT
cd $TMP
@@ -38,9 +68,21 @@ cd $PRGNAM-$VERSION
chown -R root:root .
chmod -R u+w,go+r-w,a-s .
+# Patch the source with bug report from sister project OCRopus
+# http://ocropus.googlecode.com/svn/trunk/tesseract-2.03-patch.diff
+# Thanks to S+*n_Pe*rm*n for the bug report.
+patch -p1 < $CWD/patches/tesseract-2.03-patch.patch
+# Also patch for the java make install error.
+# http://tesseract-ocr.googlegroups.com/attach/cd42bea980dbe946/java
+# (renamed from java to tesseract-2.03-java.patch)
+patch -p1 < $CWD/patches/tesseract-2.03-java.patch
+# Fix up some missing includes needed for gcc in 13.0
+patch -p1 < $CWD/patches/tesseract-2.03-missing_includes.patch
+
CFLAGS="$SLKCFLAGS" \
./configure \
--prefix=/usr \
+ --libdir=/usr/lib${LIBDIRSUFFIX} \
--sysconfdir=/etc \
--localstatedir=/var \
--mandir=/usr/man \
@@ -48,16 +90,29 @@ CFLAGS="$SLKCFLAGS" \
make
-# Let's extract the desired language tarballs
+# Let's extract the desired language tarballs, with a hack for the different
+# version numbers for certain language files. I shied away from wildcards as
+# they're a bit unpredictable...
for _language in $(echo "$TESSLANG") ; do
- if [ -r $CWD/tesseract-2.00.$_language.tar.gz ]; then
- tar xf $CWD/tesseract-2.00.$_language.tar.gz
- SUCCESS=yes
+ if [ "$_language" == "deu-f" -o "$_language" == "por" -o "$_language" == "vie" ]; then
+ if [ -r $CWD/tesseract-2.01.$_language.tar.gz ]; then
+ tar xf $CWD/tesseract-2.01.$_language.tar.gz
+ SUCCESS=yes
+ else
+ echo "$CWD/tesseract-2.01.$_language.tar.gz not found."
+ sleep 5
+ fi;
else
- echo "$CWD/tesseract-2.00.$_language.tar.gz not found."
- sleep 5
- fi ;
+ if [ -r $CWD/tesseract-2.00.$_language.tar.gz ]; then
+ tar xf $CWD/tesseract-2.00.$_language.tar.gz
+ SUCCESS=yes
+ else
+ echo "$CWD/tesseract-2.00.$_language.tar.gz not found."
+ sleep 5
+ fi;
+ fi
done
+
if [ ! "$SUCCESS" = "yes" ]; then
echo "No language packs were found, so this package will not work as is."
echo "See $CWD/README for more information."
@@ -67,16 +122,22 @@ fi
make install DESTDIR=$PKG
( cd $PKG
- find . | xargs file | grep "executable" | grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null
- find . | xargs file | grep "shared object" | grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null
+ find . | xargs file | grep "executable" | grep ELF | cut -f 1 -d : | \
+ xargs strip --strip-unneeded 2> /dev/null || true
+ find . | xargs file | grep "shared object" | grep ELF | cut -f 1 -d : | \
+ xargs strip --strip-unneeded 2> /dev/null
)
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
cp -a AUTHORS COPYING ChangeLog INSTALL NEWS README ReleaseNotes \
$PKG/usr/doc/$PRGNAM-$VERSION
cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+cat $CWD/README > $PKG/usr/doc/$PRGNAM-$VERSION/README$TAG
find $PKG/usr/doc/$PRGNAM-$VERSION -type f -exec chmod 644 {} \;
+# remove zero size files (from extra potentially not installed language packs).
+find $PKG/usr/share/tessdata -type f -size 0 -exec rm {} +
+
mkdir -p $PKG/install
cat $CWD/slack-desc > $PKG/install/slack-desc
diff --git a/graphics/tesseract/tesseract.info b/graphics/tesseract/tesseract.info
index de9f56aac5a6f..848deec1ee01f 100644
--- a/graphics/tesseract/tesseract.info
+++ b/graphics/tesseract/tesseract.info
@@ -1,8 +1,8 @@
PRGNAM="tesseract"
-VERSION="2.01"
+VERSION="2.03"
HOMEPAGE="http://code.google.com/p/tesseract-ocr/"
-DOWNLOAD="http://tesseract-ocr.googlecode.com/files/tesseract-2.01.tar.gz"
-MD5SUM="fb0e6e7652b985049c11a4bc8e593885"
+DOWNLOAD="http://tesseract-ocr.googlecode.com/files/tesseract-2.03.tar.gz"
+MD5SUM="5777b70b11df16c1ac9aa155d7cfc553"
MAINTAINER="Pierre Cazenave"
-EMAIL="pwcazenave@gmail.com"
-APPROVED="rworkman"
+EMAIL="pwcazenave <at> gmail {dot} com"
+APPROVED="dsomero,rworkman"