diff options
author | Benjamin Trigona-Harany <slackbuilds@jaxartes.net> | 2013-10-27 01:16:04 -0500 |
---|---|---|
committer | Robby Workman <rworkman@slackbuilds.org> | 2013-10-27 23:39:01 -0500 |
commit | d4f5065b3f8e788a12cad1186c006039d0dd5cb6 (patch) | |
tree | 51d0cc7e6b8bb91e4f2a341c3cf72b0e057c686a /academic | |
parent | 12cd740b634feae442197491f27e04852bf1f9ff (diff) |
academic/cld2: Added (Compact Language Detection)
Signed-off-by: Robby Workman <rworkman@slackbuilds.org>
Diffstat (limited to 'academic')
-rw-r--r-- | academic/cld2/README | 9 | ||||
-rw-r--r-- | academic/cld2/cld2.SlackBuild | 124 | ||||
-rw-r--r-- | academic/cld2/cld2.info | 10 | ||||
-rw-r--r-- | academic/cld2/slack-desc | 19 |
4 files changed, 162 insertions, 0 deletions
diff --git a/academic/cld2/README b/academic/cld2/README new file mode 100644 index 000000000000..164357bf4fb5 --- /dev/null +++ b/academic/cld2/README @@ -0,0 +1,9 @@ +The Compact Language Detection library can detect the language of UTF8-encoded +text. CLD2 supports over 160 languages and can parse both plain text and HTML. + +The Slackware script builds two shared libraries, libcld2 and libcld2_full. The +libcld2 library can recognise the core 83 languages while the libcld2_full +library has support for the entire set of 160+ language tables. + +The script also builds a commandline tool, cld2, which is compiled against the +libcld2_full library. diff --git a/academic/cld2/cld2.SlackBuild b/academic/cld2/cld2.SlackBuild new file mode 100644 index 000000000000..e3f560ee39f8 --- /dev/null +++ b/academic/cld2/cld2.SlackBuild @@ -0,0 +1,124 @@ +#!/bin/sh + +# SlackBuild script for cld2 + +# Copyright 2013 Benjamin Trigona-Harany <slackbuilds@jaxartes.net> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +PRGNAM=cld2 +VERSION=${VERSION:-20130728} +BUILD=${BUILD:-1} +TAG=${TAG:-_SBo} + +if [ -z "$ARCH" ]; then + case "$( uname -m )" in + i?86) ARCH=i486 ;; + arm*) ARCH=arm ;; + *) ARCH=$( uname -m ) ;; + esac +fi + +CWD=$(pwd) +TMP=${TMP:-/tmp/SBo} +PKG=$TMP/package-$PRGNAM +OUTPUT=${OUTPUT:-/tmp} + +if [ "$ARCH" = "i486" ]; then + SLKCFLAGS="-O2 -march=i486 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "i686" ]; then + SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "x86_64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +else + SLKCFLAGS="-O2" + LIBDIRSUFFIX="" +fi + +set -e + +rm -rf $PKG +mkdir -p $TMP $PKG $OUTPUT +rm -rf $TMP/$PRGNAM-$VERSION +cd $TMP +tar xvf $CWD/$PRGNAM-$VERSION.tar.bz2 +cd $PRGNAM-$VERSION + +( cd internal + g++ -shared $SLKCFLAGS \ + cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \ + compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \ + generated_entities.cc generated_language.cc generated_ulscript.cc \ + getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \ + tote.cc utf8statetable.cc \ + cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \ + cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \ + cld2_generated_quadchrome0715.cc cld2_generated_deltaoctachrome0614.cc \ + cld2_generated_distinctoctachrome0604.cc cld_generated_score_quad_octa_1024_256.cc \ + -o libcld2.so + + g++ -shared $SLKCFLAGS \ + cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \ + compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \ + generated_entities.cc generated_language.cc generated_ulscript.cc \ + getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \ + tote.cc utf8statetable.cc \ + cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \ + cld_generated_cjk_delta_bi_32.cc generated_distinct_bi_0.cc \ + cld2_generated_quad0720.cc cld2_generated_deltaocta0527.cc \ + cld2_generated_distinctocta0527.cc cld_generated_score_quad_octa_1024_256.cc \ + -o libcld2_full.so + + g++ $SLKCFLAGS \ + compact_lang_det_test.cc -I. -L. libcld2_full.so \ + -o cld2 + + install -D -m 0755 cld2 $PKG/usr/bin/cld2 + install -D -m 0755 libcld2.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2.so + install -D -m 0755 libcld2_full.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2_full.so +) + +# install header files +mkdir -p $PKG/usr/include/cld2/{internal,public} +( cd internal + cp generated_language.h generated_ulscript.h integral_types.h lang_script.h $PKG/usr/include/cld2/internal +) +( cd public + cp compact_lang_det.h encodings.h $PKG/usr/include/cld2/public +) + +find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" \ + | grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true + +mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION +cp -a \ + LICENSE \ + $PKG/usr/doc/$PRGNAM-$VERSION +cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild + +mkdir -p $PKG/install +cat $CWD/slack-desc > $PKG/install/slack-desc + +cd $PKG +/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz} diff --git a/academic/cld2/cld2.info b/academic/cld2/cld2.info new file mode 100644 index 000000000000..b2c6d15b3ca3 --- /dev/null +++ b/academic/cld2/cld2.info @@ -0,0 +1,10 @@ +PRGNAM="cld2" +VERSION="20130728" +HOMEPAGE="https://code.google.com/p/cld2/" +DOWNLOAD="http://jaxartes.net/files/cld2-20130728.tar.bz2" +MD5SUM="3ead394982e394bbd42525a7f51f9891" +DOWNLOAD_x86_64="" +MD5SUM_x86_64="" +REQUIRES="" +MAINTAINER="Benjamin Trigona-Harany" +EMAIL="slackbuilds@jaxartes.net" diff --git a/academic/cld2/slack-desc b/academic/cld2/slack-desc new file mode 100644 index 000000000000..671603178cc5 --- /dev/null +++ b/academic/cld2/slack-desc @@ -0,0 +1,19 @@ +# HOW TO EDIT THIS FILE: +# The "handy ruler" below makes it easier to edit a package description. +# Line up the first '|' above the ':' following the base package name, and +# the '|' on the right side marks the last column you can put a character in. +# You must make exactly 11 lines for the formatting to be correct. It's also +# customary to leave one space after the ':' except on otherwise blank lines. + + |-----handy-ruler------------------------------------------------------| +cld2: cld2 (Compact Language Detection) +cld2: +cld2: The Compact Language Detection library can detect the language of +cld2: text, even with a very small amount of sample data. CLD2 supports +cld2: over 160 language and works on both plain text and HTML. +cld2: +cld2: Home: https://code.google.com/p/cld2/ +cld2: +cld2: +cld2: +cld2: |