From e502945912c3ccc6d55a6819bc921cf5f47cc4fd Mon Sep 17 00:00:00 2001 From: Pierre Cazenave Date: Fri, 30 Jul 2010 02:46:02 -0500 Subject: graphics/ocropus: Added (document analysis and OCR system) Signed-off-by: Robby Workman --- graphics/ocropus/README | 9 +++ graphics/ocropus/ocrodata-env.diff | 15 +++++ graphics/ocropus/ocropus.SlackBuild | 109 ++++++++++++++++++++++++++++++++++++ graphics/ocropus/ocropus.info | 10 ++++ graphics/ocropus/ocroscript.1 | 43 ++++++++++++++ graphics/ocropus/slack-desc | 19 +++++++ graphics/ocropus/usr-local.diff | 22 ++++++++ 7 files changed, 227 insertions(+) create mode 100644 graphics/ocropus/README create mode 100644 graphics/ocropus/ocrodata-env.diff create mode 100644 graphics/ocropus/ocropus.SlackBuild create mode 100644 graphics/ocropus/ocropus.info create mode 100644 graphics/ocropus/ocroscript.1 create mode 100644 graphics/ocropus/slack-desc create mode 100644 graphics/ocropus/usr-local.diff diff --git a/graphics/ocropus/README b/graphics/ocropus/README new file mode 100644 index 000000000000..804acf85ff04 --- /dev/null +++ b/graphics/ocropus/README @@ -0,0 +1,9 @@ +OCRopus is a state-of-the-art document analysis and OCR system, featuring +pluggable layout analysis, pluggable character recognition, statistical +natural language modeling, and multi-lingual capabilities. + +The system is being developed with the generous support from Google and +other organizations; the primary developers are at the IUPR Research +Group at the DFKI Research Center. + +This requires tesseract and iulib. diff --git a/graphics/ocropus/ocrodata-env.diff b/graphics/ocropus/ocrodata-env.diff new file mode 100644 index 000000000000..04cfd5d9af84 --- /dev/null +++ b/graphics/ocropus/ocrodata-env.diff @@ -0,0 +1,15 @@ +Description: Respect the OCRODATA environment variable for all lua scripts. +Author: Jakub Wilk + +Index: ocropus-0.3.1/ocroscript/ocrotoplevel.cc +=================================================================== +--- ocropus-0.3.1.orig/ocroscript/ocrotoplevel.cc 2009-11-26 18:47:54.000000000 +0100 ++++ ocropus-0.3.1/ocroscript/ocrotoplevel.cc 2009-11-26 18:47:54.000000000 +0100 +@@ -471,6 +471,7 @@ + lua_call(L, 0, 0); + + // handle OCRODATA environment variable as a directory ++ if(getenv("OCRODATA")) ocroscripts = getenv("OCRODATA"); + lua_pushstring(L, ocrodata); + lua_setglobal(L, "ocrodata"); + diff --git a/graphics/ocropus/ocropus.SlackBuild b/graphics/ocropus/ocropus.SlackBuild new file mode 100644 index 000000000000..e8c2ce60b019 --- /dev/null +++ b/graphics/ocropus/ocropus.SlackBuild @@ -0,0 +1,109 @@ +#!/bin/sh + +# Slackware build script for OCROpus. + +# Copyright 2010 Pierre Cazenave +# All rights reserved. +# +# Redistribution and use of this script, with or without modification, is +# permitted provided that the following conditions are met: +# +# 1. Redistributions of this script must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +PRGNAM=ocropus +VERSION=${VERSION:-0.3.1} +BUILD=${BUILD:-1} +TAG=${TAG:-_SBo} + +DIRVER=${DIRVER:-0.3} + +if [ -z "$ARCH" ]; then + case "$( uname -m )" in + i?86) ARCH=i486 ;; + arm*) ARCH=arm ;; + *) ARCH=$( uname -m ) ;; + esac +fi + +CWD=$(pwd) +TMP=${TMP:-/tmp/SBo} +PKG=$TMP/package-$PRGNAM +OUTPUT=${OUTPUT:-/tmp} + +if [ "$ARCH" = "i486" ]; then + SLKCFLAGS="-O2 -march=i486 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "i686" ]; then + SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "x86_64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +else + SLKCFLAGS="-O2" + LIBDIRSUFFIX="" +fi + +set -e + +rm -rf $PKG +mkdir -p $TMP $PKG $OUTPUT +cd $TMP +rm -rf $PRGNAM-$DIRVER +tar xvf $CWD/$PRGNAM-$VERSION.tar.gz +cd $PRGNAM-$DIRVER +chown -R root:root . +chmod -R u+w,go+r-w,a-s . + +# Debian patch to fix hardcoded /usr/local paths in some source files +patch -p1 < $CWD/usr-local.diff +# Debian patch to fix behaviour of the OCRODATA environment variable +patch -p1 < $CWD/ocrodata-env.diff + +CFLAGS="$SLKCFLAGS" \ +CXXFLAGS="$SLKCFLAGS" \ +./configure \ + --prefix=/usr \ + --sysconfdir=/etc \ + --localstatedir=/var \ + --libdir=/usr/lib${LIBDIRSUFFIX} \ + --mandir=/usr/man \ + --docdir=/usr/doc/$PRGNAM-$VERSION \ + --with-tesseract=/usr \ + --with-iulib=/usr \ + --without-fst \ + --without-SDL \ + --without-leptonica \ + --build=$ARCH-slackware-linux + +make +make install DESTDIR=$PKG + +find $PKG | xargs file | grep -e "executable" -e "shared object" | grep ELF \ + | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true + +# Add Debian's manpage +mkdir -p $PKG/usr/man/man1 +gzip -9c $CWD/ocroscript.1 > $PKG/usr/man/man1/ocroscript.1.gz + +mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION +cp -a CHANGES COPYING DIRS INSTALL README $PKG/usr/doc/$PRGNAM-$VERSION +cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild + +mkdir -p $PKG/install +cat $CWD/slack-desc > $PKG/install/slack-desc + +cd $PKG +/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz} diff --git a/graphics/ocropus/ocropus.info b/graphics/ocropus/ocropus.info new file mode 100644 index 000000000000..a38b5bc68066 --- /dev/null +++ b/graphics/ocropus/ocropus.info @@ -0,0 +1,10 @@ +PRGNAM="ocropus" +VERSION="0.3.1" +HOMEPAGE="http://sites.google.com/site/ocropus/" +DOWNLOAD="http://ocropus.googlecode.com/files/ocropus-0.3.1.tar.gz" +MD5SUM="2a1b66419ae69ef031d5e6269db15bb5" +DOWNLOAD_x86_64="" +MD5SUM_x86_64="" +MAINTAINER="Pierre Cazenave" +EMAIL="pwcazenave < at > gmail {dot} com" +APPROVED="rworkman" diff --git a/graphics/ocropus/ocroscript.1 b/graphics/ocropus/ocroscript.1 new file mode 100644 index 000000000000..d8087203f732 --- /dev/null +++ b/graphics/ocropus/ocroscript.1 @@ -0,0 +1,43 @@ +.TH ocroscript 1 "June 06, 2008" +.SH NAME +ocropus \- command line OCR tool +.SH SYNOPSIS +.B ocroscript +.RI "