aboutsummaryrefslogtreecommitdiff
path: root/graphics/tesseract
diff options
context:
space:
mode:
authorRobby Workman <rworkman@slackbuilds.org>2010-07-01 00:07:58 -0500
committerRobby Workman <rworkman@slackbuilds.org>2010-07-01 00:07:58 -0500
commit11a829a47ef725367469241bc7feed272f61c602 (patch)
tree59121bfabb683ae1a667560850aa2b34b57495c7 /graphics/tesseract
parent38bdc4c5b2280d75c7b6372b400dcdf250ac6a72 (diff)
graphics/tesseract: Added (OCR engine)
This was dropped from the 13.1 repo due to build failure, but it's needed by tucan, so let's fix it and add it back. Signed-off-by: Robby Workman <rworkman@slackbuilds.org>
Diffstat (limited to 'graphics/tesseract')
-rw-r--r--graphics/tesseract/README21
-rw-r--r--graphics/tesseract/slack-desc19
-rw-r--r--graphics/tesseract/svutil.cpp-include_stdio_h.diff11
-rw-r--r--graphics/tesseract/tesseract.SlackBuild144
-rw-r--r--graphics/tesseract/tesseract.info12
5 files changed, 207 insertions, 0 deletions
diff --git a/graphics/tesseract/README b/graphics/tesseract/README
new file mode 100644
index 000000000000..f9be7a1c4c17
--- /dev/null
+++ b/graphics/tesseract/README
@@ -0,0 +1,21 @@
+Tesseract is a commercial quality OCR engine originally developed at HP
+between 1985 and 1995. In 1995, this engine was among the top 3 evaluated
+by UNLV. It was open-sourced by HP and UNLV in 2005.
+
+You will need to get one of the language packs in order to do anything
+useful with tesseract, and that language pack tarball should be present
+in the same directory as the SlackBuild script when the package is created.
+See http://code.google.com/p/tesseract-ocr/downloads/list for a list of
+all available language packs. Note that you can install more than one
+(or even all) of the language packs, as they do not conflict with each
+other. The build script defaults to use English, but this is easily
+changed by passing an alternate value on the command line.
+
+Here is the relevant code from the build script:
+ # Language pack(s) to use
+ # We'll install English by default, but you can pass another one (or all)
+ # of them on the command line (space delimited). If you pass more than one
+ # (again, space delimited), you must enclose the string in quotes. Examples:
+ # TESSLANG=fra ./tesseract.SlackBuild
+ # TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild
+ TESSLANG=${TESSLANG:-eng} # Default to English
diff --git a/graphics/tesseract/slack-desc b/graphics/tesseract/slack-desc
new file mode 100644
index 000000000000..2136326be897
--- /dev/null
+++ b/graphics/tesseract/slack-desc
@@ -0,0 +1,19 @@
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description. Line
+# up the first '|' above the ':' following the base package name, and the '|' on
+# the right side marks the last column you can put a character in. You must make
+# exactly 11 lines for the formatting to be correct. It's also customary to
+# leave one space after the ':'.
+
+ |-----handy-ruler--------------------------------------------------|
+tesseract: Tesseract (OCR Engine)
+tesseract:
+tesseract: Tesseract is a commercial quality OCR engine originally developed
+tesseract: at HP between 1985 and 1995. In 1995, this engine was among the
+tesseract: top 3 evaluated by UNLV. It was open-sourced by HP and UNLV in
+tesseract: 2005.
+tesseract:
+tesseract: http://code.google.com/p/tesseract-ocr/
+tesseract:
+tesseract:
+tesseract:
diff --git a/graphics/tesseract/svutil.cpp-include_stdio_h.diff b/graphics/tesseract/svutil.cpp-include_stdio_h.diff
new file mode 100644
index 000000000000..b2a55a9514fc
--- /dev/null
+++ b/graphics/tesseract/svutil.cpp-include_stdio_h.diff
@@ -0,0 +1,11 @@
+diff -Nur tesseract-2.04.orig//viewer/svutil.cpp tesseract-2.04/viewer/svutil.cpp
+--- tesseract-2.04.orig//viewer/svutil.cpp 2009-06-03 11:29:38.000000000 -0500
++++ tesseract-2.04/viewer/svutil.cpp 2010-07-01 00:03:45.253070024 -0500
+@@ -35,6 +35,7 @@
+ #include <signal.h>
+ #include <stdlib.h>
+ #include <string.h>
++#include <stdio.h>
+ #include <netdb.h>
+ #include <sys/socket.h>
+ #ifdef __linux__
diff --git a/graphics/tesseract/tesseract.SlackBuild b/graphics/tesseract/tesseract.SlackBuild
new file mode 100644
index 000000000000..95f010a6cf41
--- /dev/null
+++ b/graphics/tesseract/tesseract.SlackBuild
@@ -0,0 +1,144 @@
+#!/bin/sh
+
+# Pierre Cazenave 10/11/2007.
+# Updated 25/01/2009.
+# Updated 08/04/2009.
+# Updated 28/05/2009 for Slackware64.
+# Modified by Robby Workman <rworkman@slackbuilds.org> for better
+# consistency with our other scripts
+# Thanks to S+*n_Pe*rm*n for a bug report from OCRopus.
+
+# Copyright 2009 Pierre Cazenave <pwcazenave {at} gmail [dot] com>
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+PRGNAM=tesseract
+VERSION=2.04
+BUILD=${BUILD:-1}
+TAG=${TAG:-_SBo}
+
+if [ -z "$ARCH" ]; then
+ case "$( uname -m )" in
+ i?86) ARCH=i486 ;;
+ arm*) ARCH=arm ;;
+ *) ARCH=$( uname -m ) ;;
+ esac
+fi
+
+CWD=$(pwd)
+TMP=${TMP:-/tmp/SBo}
+PKG=$TMP/package-$PRGNAM
+OUTPUT=${OUTPUT:-/tmp}
+
+# Language pack(s) to use
+# We'll install English by default, but you can pass another one (or all)
+# of them on the command line (space delimited). If you pass more than one
+# (again, space delimited), you must enclose the string in quotes. Examples:
+# TESSLANG=fra ./tesseract.SlackBuild
+# TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild
+TESSLANG=${TESSLANG:-eng} # Default to English
+
+if [ "$ARCH" = "i486" ]; then
+ SLKCFLAGS="-O2 -march=i486 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "i686" ]; then
+ SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "x86_64" ]; then
+ SLKCFLAGS="-O2 -fPIC"
+ LIBDIRSUFFIX="64"
+else
+ SLKCFLAGS="-O2"
+ LIBDIRSUFFIX=""
+fi
+
+set -e
+
+rm -rf $TMP/$PRGNAM-$VERSION $PKG
+mkdir -p $TMP $PKG $OUTPUT
+cd $TMP
+tar xvf $CWD/$PRGNAM-$VERSION.tar.gz
+cd $PRGNAM-$VERSION
+chown -R root:root .
+chmod -R u+w,go+r-w,a-s .
+
+# Fix missing snprintf()
+patch -p1 < $CWD/svutil.cpp-include_stdio_h.diff
+
+CFLAGS="$SLKCFLAGS" \
+./configure \
+ --prefix=/usr \
+ --libdir=/usr/lib${LIBDIRSUFFIX} \
+ --sysconfdir=/etc \
+ --localstatedir=/var \
+ --mandir=/usr/man \
+ --disable-static \
+ --build=$ARCH-slackware-linux
+
+make
+
+# Let's extract the desired language tarballs, with a hack for the different
+# version numbers for certain language files. I shied away from wildcards as
+# they're a bit unpredictable...
+for _language in $(echo "$TESSLANG") ; do
+ if [ "$_language" == "deu-f" -o "$_language" == "por" -o "$_language" == "vie" ]; then
+ if [ -r $CWD/tesseract-2.01.$_language.tar.gz ]; then
+ tar xf $CWD/tesseract-2.01.$_language.tar.gz
+ SUCCESS=yes
+ else
+ echo "$CWD/tesseract-2.01.$_language.tar.gz not found."
+ sleep 5
+ fi;
+ else
+ if [ -r $CWD/tesseract-2.00.$_language.tar.gz ]; then
+ tar xf $CWD/tesseract-2.00.$_language.tar.gz
+ SUCCESS=yes
+ else
+ echo "$CWD/tesseract-2.00.$_language.tar.gz not found."
+ sleep 5
+ fi;
+ fi
+done
+
+if [ ! "$SUCCESS" = "yes" ]; then
+ echo "No language packs were found, so this package will not work as is."
+ echo "See $CWD/README for more information."
+ exit 1
+fi
+
+make install DESTDIR=$PKG
+
+find $PKG | xargs file | grep -e "executable" -e "shared object" | grep ELF \
+ | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
+
+mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
+cp -a AUTHORS COPYING ChangeLog INSTALL NEWS README ReleaseNotes \
+ $PKG/usr/doc/$PRGNAM-$VERSION
+cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+cat $CWD/README > $PKG/usr/doc/$PRGNAM-$VERSION/README.$TAG
+find $PKG/usr/doc/$PRGNAM-$VERSION -type f -exec chmod 644 {} \;
+
+# remove zero size files (from extra potentially not installed language packs).
+find $PKG/usr/share/tessdata -type f -size 0 -exec rm {} +
+
+mkdir -p $PKG/install
+cat $CWD/slack-desc > $PKG/install/slack-desc
+
+cd $PKG
+/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}
diff --git a/graphics/tesseract/tesseract.info b/graphics/tesseract/tesseract.info
new file mode 100644
index 000000000000..115d24f6191c
--- /dev/null
+++ b/graphics/tesseract/tesseract.info
@@ -0,0 +1,12 @@
+PRGNAM="tesseract"
+VERSION="2.04"
+HOMEPAGE="http://code.google.com/p/tesseract-ocr/"
+DOWNLOAD="http://tesseract-ocr.googlecode.com/files/tesseract-2.04.tar.gz \
+ http://tesseract-ocr.googlecode.com/files/tesseract-2.00.eng.tar.gz"
+MD5SUM="b44eba1a9f4892ac62e484c807fe0533 \
+ b8291d6b3a63ce7879d688e845e341a9"
+DOWNLOAD_x86_64=""
+MD5SUM_x86_64=""
+MAINTAINER="Pierre Cazenave"
+EMAIL="pwcazenave <at> gmail {dot} com"
+APPROVED="rworkman"