aboutsummaryrefslogtreecommitdiff
path: root/academic/clark-ugene
diff options
context:
space:
mode:
authorPetar Petrov <slackalaxy@gmail.com>2018-12-24 09:09:59 +0700
committerWilly Sudiarto Raharjo <willysr@slackbuilds.org>2018-12-24 09:09:59 +0700
commitf6b1cc61cacf8d462a563b885065902a78d72fa7 (patch)
tree16b50bc50f77e1255fb29274f52b8217beec1270 /academic/clark-ugene
parent6e7811910300d45a4b302c25204c7b97ec24019c (diff)
academic/clark-ugene: Added (supervised sequence classification).
Signed-off-by: Willy Sudiarto Raharjo <willysr@slackbuilds.org>
Diffstat (limited to 'academic/clark-ugene')
-rw-r--r--academic/clark-ugene/README39
-rw-r--r--academic/clark-ugene/clark-ugene.SlackBuild103
-rw-r--r--academic/clark-ugene/clark-ugene.info10
-rw-r--r--academic/clark-ugene/slack-desc19
4 files changed, 171 insertions, 0 deletions
diff --git a/academic/clark-ugene/README b/academic/clark-ugene/README
new file mode 100644
index 000000000000..4e9386f2ff5f
--- /dev/null
+++ b/academic/clark-ugene/README
@@ -0,0 +1,39 @@
+This is Ugene's (http://ugene.net/) fork of the CLARK tool
+(http://clark.cs.ucr.edu/Tool/), with supports building DB directly from
+gzip & 7z packed RefSeq files
+
+CLARK: CLAssifier based on Reduced K-mers
+
+The problem of DNA sequence classification is central to several
+application domains in molecular biology, genomics, metagenomics and
+genetics. The problem is computationally challenging due to the size of
+datasets generated by modern sequencing instruments and the growing size
+of reference sequence databases.
+
+CLARK is a novel method for supervised sequence classification based on
+discriminative k-mers. Somewhat unique among other metagenomic and
+genomic classification methods, CLARK provides a confidence score for
+its assignments which can be used in downstream analysis. The utility of
+CLARK is demonstrated on two distinct specific classification problems:
+
+1) the assignment of metagenomic reads to known bacterial genomes
+2) the assignment of BAC clones and transcript to chromosome arms (in
+ the absence of a finished assembly for the reference genome).
+
+Three classifiers or variants in the CLARK framework are provided :
+CLARK (default): created for powerful workstation, it may require a
+significant amount of RAM to run with large database (e.g., all
+bacterial genomes from NCBI/RefSeq). This classifier queries k-mers
+with exact matching.
+
+CLARK-l (light): created for workstations with limited memory, this
+software tool provides precise classification on small metagenomes.
+Indeed, for metagenomics analysis, CLARK-l works with a sparse or
+"light" database (up to 4 GB of RAM) that is built using distant and
+non-overlapping k-mers. This classifier queries k-mers with exact
+matching.
+
+CLARK-S (spaced): created for powerful workstation exploiting spaced k-
+mers, this classifier requires a higher RAM usage than CLARK or CLARK-l,
+but it does offer a higher sensitivity. CLARK-S completes the CLARK
+series of classifiers.
diff --git a/academic/clark-ugene/clark-ugene.SlackBuild b/academic/clark-ugene/clark-ugene.SlackBuild
new file mode 100644
index 000000000000..bd2a8e55c170
--- /dev/null
+++ b/academic/clark-ugene/clark-ugene.SlackBuild
@@ -0,0 +1,103 @@
+#!/bin/sh
+
+# Slackware build script for clark-ugene
+
+# Copyright 2018 Petar Petrov slackalaxy@gmail.com
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SRCNAM=clark
+PRGNAM=${SRCNAM}-ugene
+VERSION=${VERSION:-git_23801a2}
+COMMIT=${COMMIT:-23801a2738b7c104c8a7a1402d50d94fb5b4036b}
+BUILD=${BUILD:-1}
+TAG=${TAG:-_SBo}
+
+if [ -z "$ARCH" ]; then
+ case "$( uname -m )" in
+ i?86) ARCH=i586 ;;
+ arm*) ARCH=arm ;;
+ *) ARCH=$( uname -m ) ;;
+ esac
+fi
+
+CWD=$(pwd)
+TMP=${TMP:-/tmp/SBo}
+PKG=$TMP/package-$PRGNAM
+OUTPUT=${OUTPUT:-/tmp}
+
+if [ "$ARCH" = "i586" ]; then
+ SLKCFLAGS="-O2 -march=i586 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "i686" ]; then
+ SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "x86_64" ]; then
+ SLKCFLAGS="-O2 -fPIC"
+ LIBDIRSUFFIX="64"
+else
+ SLKCFLAGS="-O2"
+ LIBDIRSUFFIX=""
+fi
+
+set -e
+
+rm -rf $PKG
+mkdir -p $TMP $PKG $OUTPUT
+cd $TMP
+rm -rf $SRCNAM-$COMMIT
+tar xvf $CWD/$SRCNAM-$COMMIT.tar.gz
+cd $SRCNAM-$COMMIT
+chown -R root:root .
+find -L . \
+ \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
+ -o -perm 511 \) -exec chmod 755 {} \; -o \
+ \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
+ -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
+
+mkdir -p build
+cd build
+ cmake \
+ -DCMAKE_C_FLAGS:STRING="$SLKCFLAGS" \
+ -DCMAKE_CXX_FLAGS:STRING="$SLKCFLAGS" \
+ -DCMAKE_INSTALL_PREFIX=/usr \
+ -DCMAKE_BUILD_TYPE=Release ..
+ make
+cd ..
+
+cd bin
+for i in * ; do
+ install -D -m755 $i $PKG/usr/bin/$i
+done
+cd ..
+
+install -D -m755 builddb.sh $PKG/usr/bin/builddb.sh
+
+find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
+ | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
+
+mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
+cp -a README.md $PKG/usr/doc/$PRGNAM-$VERSION
+cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+
+mkdir -p $PKG/install
+cat $CWD/slack-desc > $PKG/install/slack-desc
+
+cd $PKG
+/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}
diff --git a/academic/clark-ugene/clark-ugene.info b/academic/clark-ugene/clark-ugene.info
new file mode 100644
index 000000000000..3e139342ebcc
--- /dev/null
+++ b/academic/clark-ugene/clark-ugene.info
@@ -0,0 +1,10 @@
+PRGNAM="clark-ugene"
+VERSION="git_23801a2"
+HOMEPAGE="https://github.com/ugeneunipro/clark"
+DOWNLOAD="UNSUPPORTED"
+MD5SUM=""
+DOWNLOAD_x86_64="https://github.com/ugeneunipro/clark/archive/23801a2/clark-23801a2738b7c104c8a7a1402d50d94fb5b4036b.tar.gz"
+MD5SUM_x86_64="f73ffa62e4ae6241f07d4d9fc814b455"
+REQUIRES=""
+MAINTAINER="Petar Petrov"
+EMAIL="slackalaxy@gmail.com"
diff --git a/academic/clark-ugene/slack-desc b/academic/clark-ugene/slack-desc
new file mode 100644
index 000000000000..56b5c45e66d7
--- /dev/null
+++ b/academic/clark-ugene/slack-desc
@@ -0,0 +1,19 @@
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.
+# Line up the first '|' above the ':' following the base package name, and
+# the '|' on the right side marks the last column you can put a character in.
+# You must make exactly 11 lines for the formatting to be correct. It's also
+# customary to leave one space after the ':' except on otherwise blank lines.
+
+ |-----handy-ruler------------------------------------------------------|
+clark-ugene: clark-ugene (Ugene's forked CLAssifier based on Reduced K-mers)
+clark-ugene:
+clark-ugene: CLARK is a novel method for supervised sequence classification based
+clark-ugene: on discriminative k-mers. Somewhat unique among other metagenomic
+clark-ugene: and genomic classification methods, CLARK provides a confidence
+clark-ugene: score for its assignments which can be used in downstream analysis.
+clark-ugene:
+clark-ugene: Home: https://github.com/ugeneunipro/clark
+clark-ugene:
+clark-ugene: This is Ugene's (http://ugene.net/) fork of the CLARK tool
+clark-ugene: (http://clark.cs.ucr.edu/)