aboutsummaryrefslogtreecommitdiff
path: root/academic/cld2
diff options
context:
space:
mode:
authorBenjamin Trigona-Harany <slackbuilds@jaxartes.net>2013-10-27 01:16:04 -0500
committerRobby Workman <rworkman@slackbuilds.org>2013-10-27 23:39:01 -0500
commitd4f5065b3f8e788a12cad1186c006039d0dd5cb6 (patch)
tree51d0cc7e6b8bb91e4f2a341c3cf72b0e057c686a /academic/cld2
parent12cd740b634feae442197491f27e04852bf1f9ff (diff)
academic/cld2: Added (Compact Language Detection)
Signed-off-by: Robby Workman <rworkman@slackbuilds.org>
Diffstat (limited to 'academic/cld2')
-rw-r--r--academic/cld2/README9
-rw-r--r--academic/cld2/cld2.SlackBuild124
-rw-r--r--academic/cld2/cld2.info10
-rw-r--r--academic/cld2/slack-desc19
4 files changed, 162 insertions, 0 deletions
diff --git a/academic/cld2/README b/academic/cld2/README
new file mode 100644
index 000000000000..164357bf4fb5
--- /dev/null
+++ b/academic/cld2/README
@@ -0,0 +1,9 @@
+The Compact Language Detection library can detect the language of UTF8-encoded
+text. CLD2 supports over 160 languages and can parse both plain text and HTML.
+
+The Slackware script builds two shared libraries, libcld2 and libcld2_full. The
+libcld2 library can recognise the core 83 languages while the libcld2_full
+library has support for the entire set of 160+ language tables.
+
+The script also builds a commandline tool, cld2, which is compiled against the
+libcld2_full library.
diff --git a/academic/cld2/cld2.SlackBuild b/academic/cld2/cld2.SlackBuild
new file mode 100644
index 000000000000..e3f560ee39f8
--- /dev/null
+++ b/academic/cld2/cld2.SlackBuild
@@ -0,0 +1,124 @@
+#!/bin/sh
+
+# SlackBuild script for cld2
+
+# Copyright 2013 Benjamin Trigona-Harany <slackbuilds@jaxartes.net>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+PRGNAM=cld2
+VERSION=${VERSION:-20130728}
+BUILD=${BUILD:-1}
+TAG=${TAG:-_SBo}
+
+if [ -z "$ARCH" ]; then
+ case "$( uname -m )" in
+ i?86) ARCH=i486 ;;
+ arm*) ARCH=arm ;;
+ *) ARCH=$( uname -m ) ;;
+ esac
+fi
+
+CWD=$(pwd)
+TMP=${TMP:-/tmp/SBo}
+PKG=$TMP/package-$PRGNAM
+OUTPUT=${OUTPUT:-/tmp}
+
+if [ "$ARCH" = "i486" ]; then
+ SLKCFLAGS="-O2 -march=i486 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "i686" ]; then
+ SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "x86_64" ]; then
+ SLKCFLAGS="-O2 -fPIC"
+ LIBDIRSUFFIX="64"
+else
+ SLKCFLAGS="-O2"
+ LIBDIRSUFFIX=""
+fi
+
+set -e
+
+rm -rf $PKG
+mkdir -p $TMP $PKG $OUTPUT
+rm -rf $TMP/$PRGNAM-$VERSION
+cd $TMP
+tar xvf $CWD/$PRGNAM-$VERSION.tar.bz2
+cd $PRGNAM-$VERSION
+
+( cd internal
+ g++ -shared $SLKCFLAGS \
+ cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
+ compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
+ generated_entities.cc generated_language.cc generated_ulscript.cc \
+ getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
+ tote.cc utf8statetable.cc \
+ cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
+ cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
+ cld2_generated_quadchrome0715.cc cld2_generated_deltaoctachrome0614.cc \
+ cld2_generated_distinctoctachrome0604.cc cld_generated_score_quad_octa_1024_256.cc \
+ -o libcld2.so
+
+ g++ -shared $SLKCFLAGS \
+ cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
+ compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
+ generated_entities.cc generated_language.cc generated_ulscript.cc \
+ getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
+ tote.cc utf8statetable.cc \
+ cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
+ cld_generated_cjk_delta_bi_32.cc generated_distinct_bi_0.cc \
+ cld2_generated_quad0720.cc cld2_generated_deltaocta0527.cc \
+ cld2_generated_distinctocta0527.cc cld_generated_score_quad_octa_1024_256.cc \
+ -o libcld2_full.so
+
+ g++ $SLKCFLAGS \
+ compact_lang_det_test.cc -I. -L. libcld2_full.so \
+ -o cld2
+
+ install -D -m 0755 cld2 $PKG/usr/bin/cld2
+ install -D -m 0755 libcld2.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2.so
+ install -D -m 0755 libcld2_full.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2_full.so
+)
+
+# install header files
+mkdir -p $PKG/usr/include/cld2/{internal,public}
+( cd internal
+ cp generated_language.h generated_ulscript.h integral_types.h lang_script.h $PKG/usr/include/cld2/internal
+)
+( cd public
+ cp compact_lang_det.h encodings.h $PKG/usr/include/cld2/public
+)
+
+find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" \
+ | grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
+
+mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
+cp -a \
+ LICENSE \
+ $PKG/usr/doc/$PRGNAM-$VERSION
+cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+
+mkdir -p $PKG/install
+cat $CWD/slack-desc > $PKG/install/slack-desc
+
+cd $PKG
+/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}
diff --git a/academic/cld2/cld2.info b/academic/cld2/cld2.info
new file mode 100644
index 000000000000..b2c6d15b3ca3
--- /dev/null
+++ b/academic/cld2/cld2.info
@@ -0,0 +1,10 @@
+PRGNAM="cld2"
+VERSION="20130728"
+HOMEPAGE="https://code.google.com/p/cld2/"
+DOWNLOAD="http://jaxartes.net/files/cld2-20130728.tar.bz2"
+MD5SUM="3ead394982e394bbd42525a7f51f9891"
+DOWNLOAD_x86_64=""
+MD5SUM_x86_64=""
+REQUIRES=""
+MAINTAINER="Benjamin Trigona-Harany"
+EMAIL="slackbuilds@jaxartes.net"
diff --git a/academic/cld2/slack-desc b/academic/cld2/slack-desc
new file mode 100644
index 000000000000..671603178cc5
--- /dev/null
+++ b/academic/cld2/slack-desc
@@ -0,0 +1,19 @@
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.
+# Line up the first '|' above the ':' following the base package name, and
+# the '|' on the right side marks the last column you can put a character in.
+# You must make exactly 11 lines for the formatting to be correct. It's also
+# customary to leave one space after the ':' except on otherwise blank lines.
+
+ |-----handy-ruler------------------------------------------------------|
+cld2: cld2 (Compact Language Detection)
+cld2:
+cld2: The Compact Language Detection library can detect the language of
+cld2: text, even with a very small amount of sample data. CLD2 supports
+cld2: over 160 language and works on both plain text and HTML.
+cld2:
+cld2: Home: https://code.google.com/p/cld2/
+cld2:
+cld2:
+cld2:
+cld2: