aboutsummaryrefslogtreecommitdiff
path: root/academic
diff options
context:
space:
mode:
authorPetar Petrov <slackalaxy@gmail.com>2022-09-09 03:10:09 +0100
committerDave Woodfall <dave@slackbuilds.org>2022-09-09 03:10:09 +0100
commit0a280237574259ba25509219a906256638c1c388 (patch)
tree1295140103ac6eaa06e315f58c0c99240f237f0e /academic
parent1f0ff62f35d302c3ca0868682d5475a5b5a80f55 (diff)
academic/vCAPS_coevolution: Added (Coevolution Analysis)
Signed-off-by: Dave Woodfall <dave@slackbuilds.org>
Diffstat (limited to 'academic')
-rw-r--r--academic/vCAPS_coevolution/01_caps_verbose.patch114
-rw-r--r--academic/vCAPS_coevolution/02_caps_sort_input.patch38
-rw-r--r--academic/vCAPS_coevolution/README33
-rw-r--r--academic/vCAPS_coevolution/References14
-rw-r--r--academic/vCAPS_coevolution/slack-desc19
-rw-r--r--academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild129
-rw-r--r--academic/vCAPS_coevolution/vCAPS_coevolution.info10
7 files changed, 357 insertions, 0 deletions
diff --git a/academic/vCAPS_coevolution/01_caps_verbose.patch b/academic/vCAPS_coevolution/01_caps_verbose.patch
new file mode 100644
index 0000000000000..7f64d80f34f84
--- /dev/null
+++ b/academic/vCAPS_coevolution/01_caps_verbose.patch
@@ -0,0 +1,114 @@
+diff -pruN orig/caps.cpp new/caps.cpp
+--- orig/caps.cpp 2012-12-15 17:13:23.000000000 +0200
++++ new/caps.cpp 2020-09-09 23:07:46.080566000 +0300
+@@ -14,7 +14,7 @@
+ #include <gsl/gsl_statistics.h>
+ #include<sys/time.h>
+ #include<iomanip>
+-
++#include <bits/stdc++.h>
+
+
+
+@@ -69,6 +69,8 @@
+ const gsl_rng_type * T;
+ gsl_rng *r;
+
++vector<double> totaltempnew;
++double alphathresh = 0;
+ int main(int argc, char *argv[]){
+
+
+@@ -543,16 +545,27 @@ int main(int argc, char *argv[]){
+
+
+ print_splash(output);
++ OUTPUT << "\n\File1: " << files[i] << endl;
+ vec1.print_to_fasta(output.c_str());
++ OUTPUT << "\n\nFile2: " << files[j] << endl;
+ vec2.print_to_fasta(output.c_str());
+ int length1 = vec1.sequences[0].length();
+ int length2 = vec2.sequences[0].length();
+
++ OUTPUT << "\n\nLength1: " << length1 << endl;
++ OUTPUT << "Length2: " << length2 << endl;
+
+
+ if(tree_in ==0){
+ tree1 = create_input_tree(vec1.names, vec1.sequences);
+ tree2 = create_input_tree(vec2.names, vec2.sequences);
++
++ // Output the CAPS generated trees to the .out file of each pair
++ string temptre1 = TreeTemplateTools::treeToParenthesis(*tree1, true);
++ string temptre2 = TreeTemplateTools::treeToParenthesis(*tree2, true);
++ OUTPUT << "\n" << endl;
++ OUTPUT << "CAPS generated tree 1: " << temptre1 << endl;
++ OUTPUT << "CAPS generated tree 2: " << temptre2 << endl;
+ }/*else if(tree_in ==1 && variable==1){
+
+ std::auto_ptr<DistanceMatrix> DS;
+@@ -666,6 +679,7 @@ int main(int argc, char *argv[]){
+ int value = floor(((totaltemp.size())*(1-(threshval))))+1;
+
+ threshold = totaltemp[value];
++ totaltempnew = totaltemp;
+
+
+ /*=======================================================*/
+@@ -870,6 +884,30 @@ int Chi_squared (int num_pairs, int num_
+
+ } /* ----- end of function Chi_squared ----- */
+
++/*
++ * === FUNCTION ======================================================================
++ * Name: find_alpha
++ * Description: Find the index of an element in a vector totaltemp
++ * Help from: https://www.geeksforgeeks.org/how-to-find-index-of-a-given-element-in-a-vector-in-cpp/
++ * https://stackoverflow.com/questions/8647635/elegant-way-to-find-closest-value-in-a-vector-from-above
++ * Author: Petar Petrov, University of Turku (Finland); pebope@utu.fi
++ * =====================================================================================
++ */
++double getIndex(std::vector<double> const& v, double K)
++{
++ auto const it = std::lower_bound(v.begin(), v.end(), fabs(K));
++ //auto it = std::upper_bound(v.begin(), v.end(), fabs(K));
++
++ if (it != v.end()) {
++ int index = distance(v.begin(), it);
++ alphathresh = (((int)1+(double)v.size()-(int)index)/(double)v.size());
++ return alphathresh;
++ //cerr << index << "\t" << alphathresh << endl;
++ }
++ else {
++ cerr << "ELEMENT NOT FOUND!" << endl;
++ }
++}
+
+
+
+@@ -890,9 +928,9 @@ int print_inter(vector<double>& Correl1,
+ output << endl << endl;
+
+ output << "Coevolving Pairs of amino acid sites\n";
+- output << "=============================================================================\n";
+- output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\n\n";
+- output << "=============================================================================\n";
++ output << "================================================================================================================================\n";
++ output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\tP-value1\tP-value2\tMean P-value\tCorrelation1\tCorrelation2\n\n";
++ output << "================================================================================================================================\n";
+
+ //double mean = average_vec<double>(Correl);
+ //double SD = SD_vf(Correl, mean);
+@@ -951,9 +989,11 @@ int print_inter(vector<double>& Correl1,
+
+ // }
+
++ double Alpha1 = getIndex(totaltempnew, Correl1[cor]);
++ double Alpha2 = getIndex(totaltempnew, Correl2[cor]);
+ //if(bootval>=bootcut && re1<=8 && re2<=8 ){
+ if(bootval>=bootcut){
+- output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << endl;
++ output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << "\t" << Alpha1 << "\t" << Alpha2 << "\t" << (Alpha1+Alpha2)/2 << "\t" << Correl1[cor] << "\t" << Correl2[cor] << endl;
+ signif.push_back(((Correl1[cor]+Correl2[cor])/2));
+ ++pairs;
+ vector<int> tem;
diff --git a/academic/vCAPS_coevolution/02_caps_sort_input.patch b/academic/vCAPS_coevolution/02_caps_sort_input.patch
new file mode 100644
index 0000000000000..917bca7cabead
--- /dev/null
+++ b/academic/vCAPS_coevolution/02_caps_sort_input.patch
@@ -0,0 +1,38 @@
+diff -pruN old/caps.cpp new/caps.cpp
+--- old/caps.cpp 2022-01-20 09:30:24.409886691 +0200
++++ new/caps.cpp 2022-01-20 09:39:10.669845265 +0200
+@@ -1,4 +1,5 @@
+ #include<iostream>
++#include <algorithm>
+ #include<fstream>
+ #include"BCFasta.h"
+ #include"file_manip.h"
+@@ -15,7 +16,7 @@
+ #include<sys/time.h>
+ #include<iomanip>
+ #include <bits/stdc++.h>
+-
++#include <vector>
+
+
+ #include <Seq/SequenceApplicationTools.h>
+@@ -69,6 +70,10 @@
+ const gsl_rng_type * T;
+ gsl_rng *r;
+
++// make sure filenames are sorted!
++// https://stackoverflow.com/a/34757557
++bool compareFunction (std::string a, std::string b) {return a<b;}
++
+ vector<double> totaltempnew;
+ double alphathresh = 0;
+ int main(int argc, char *argv[]){
+@@ -189,6 +194,8 @@ int main(int argc, char *argv[]){
+
+ vector<string> files;
+ files = Folder_to_vector(mystring.c_str());
++ // make sure filenames are sorted!
++ std::sort(files.begin(),files.end(),compareFunction);
+
+ Fasta_vector file;
+ file.ref_num=0;
diff --git a/academic/vCAPS_coevolution/README b/academic/vCAPS_coevolution/README
new file mode 100644
index 0000000000000..b8b4a71a4bcaa
--- /dev/null
+++ b/academic/vCAPS_coevolution/README
@@ -0,0 +1,33 @@
+vCAPS: (verbose) Coevolution Analysis using Protein Sequences
+
+CAPS is aimed at measuring the coevolution between amino acid sites
+belonging to the same protein (intra-molecular coevolution) or to two
+functionally or physically interacting proteins (inter-molecular
+coevolution). The Software implements an improved method to detect
+intra-molecular coevolution as published in Genetics (Fares and Travers,
+2006) and also inter-protein coevolution analysis. The improved scoring
+of amino acid sites is obtained by maximum likelihood ancestral state
+reconstruction along with simulations to assess significance.
+
+This is a modified version of "CAPS_coevolution", used in the AutoCoEv
+pipeline:
+https://github.com/mattilalab/autocoev
+
+It applies two _unofficial_ patches:
+- 01_caps_verbose: makes the program output its generated trees, as well
+ as the p-value for each correlated amino acid pair
+- 02_caps_sort_input: introduce a function to sort input file names
+
+The produced executable is called "vCAPS" and can be installed along
+"caps" from CAPS_coevolution. Building CAPS from source requires the
+Bio++ 1.9 suite libraries, but make sure the current versions of the
+bppsuite (and its dependencies) are NOT installed at build time.
+
+Citing CAPS:
+CAPS: coevolution analysis using protein sequences. Fares MA, McNally D.
+Bioinformatics. 2006 Nov 15;22(22):2821-2. PMID: 17005535
+
+The mathematical model has been described separately:
+A novel method for detecting intramolecular coevolution: adding a
+further dimension to selective constraints analyses. Fares MA, Travers
+SA. Genetics. 2006 May;173(1):9-23. PMID: 16547113
diff --git a/academic/vCAPS_coevolution/References b/academic/vCAPS_coevolution/References
new file mode 100644
index 0000000000000..e8236cd282c47
--- /dev/null
+++ b/academic/vCAPS_coevolution/References
@@ -0,0 +1,14 @@
+If you use CAPS in your research, please include the following citations:
+
+CAPS: coevolution analysis using protein sequences.
+Fares MA, McNally D.
+Bioinformatics. 2006 Nov 15;22(22):2821-2.
+PMID: 17005535
+https://www.ncbi.nlm.nih.gov/pubmed/17005535
+
+The mathematical model has been described separately:
+A novel method for detecting intramolecular coevolution: adding a further dimension to selective constraints analyses.
+Fares MA, Travers SA.
+Genetics. 2006 May;173(1):9-23.
+PMID: 16547113
+https://www.ncbi.nlm.nih.gov/pubmed/16547113
diff --git a/academic/vCAPS_coevolution/slack-desc b/academic/vCAPS_coevolution/slack-desc
new file mode 100644
index 0000000000000..5cd78f0770b38
--- /dev/null
+++ b/academic/vCAPS_coevolution/slack-desc
@@ -0,0 +1,19 @@
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.
+# Line up the first '|' above the ':' following the base package name, and
+# the '|' on the right side marks the last column you can put a character in.
+# You must make exactly 11 lines for the formatting to be correct. It's also
+# customary to leave one space after the ':' except on otherwise blank lines.
+
+ |-----handy-ruler------------------------------------------------------|
+vCAPS_coevolution: vCAPS_coevolution (Coevolution Analysis using Protein Sequences)
+vCAPS_coevolution:
+vCAPS_coevolution: CAPS is aimed at measuring the coevolution between amino acid
+vCAPS_coevolution: sites belonging to the same protein (intra-molecular coevolution)
+vCAPS_coevolution: or to two functionally or physically interacting proteins (inter-
+vCAPS_coevolution: molecular coevolution). In addition, a test which assesses
+vCAPS_coevolution: whether two proteins are interacting is implemented.
+vCAPS_coevolution:
+vCAPS_coevolution: Home: http://bioinf.gen.tcd.ie/~faresm/software/software.html
+vCAPS_coevolution:
+vCAPS_coevolution:
diff --git a/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild b/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild
new file mode 100644
index 0000000000000..f00aea698e20e
--- /dev/null
+++ b/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+# Slackware build script for vCAPS_coevolution
+
+# Copyright 2020-2022 Petar Petrov slackalaxy@gmail.com
+# All rights reserved.
+#
+# Redistribution and use of this script, with or without modification, is
+# permitted provided that the following conditions are met:
+#
+# 1. Redistributions of this script must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cd $(dirname $0) ; CWD=$(pwd)
+
+PRGNAM=vCAPS_coevolution
+VERSION=${VERSION:-2.0_2UN}
+BUILD=${BUILD:-1}
+TAG=${TAG:-_SBo}
+PKGTYPE=${PKGTYPE:-tgz}
+
+SRCNAM=caps
+SRCVER=2.0
+BINNAM=vCAPS
+
+if [ -z "$ARCH" ]; then
+ case "$( uname -m )" in
+ i?86) ARCH=i586 ;;
+ arm*) ARCH=arm ;;
+ *) ARCH=$( uname -m ) ;;
+ esac
+fi
+
+if [ ! -z "${PRINT_PACKAGE_NAME}" ]; then
+ echo "$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE"
+ exit 0
+fi
+
+TMP=${TMP:-/tmp/SBo}
+PKG=$TMP/package-$PRGNAM
+OUTPUT=${OUTPUT:-/tmp}
+
+if [ "$ARCH" = "i586" ]; then
+ SLKCFLAGS="-O2 -march=i586 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "i686" ]; then
+ SLKCFLAGS="-O2 -march=i686 -mtune=i686"
+ LIBDIRSUFFIX=""
+elif [ "$ARCH" = "x86_64" ]; then
+ SLKCFLAGS="-O2 -fPIC"
+ LIBDIRSUFFIX="64"
+else
+ SLKCFLAGS="-O2"
+ LIBDIRSUFFIX=""
+fi
+
+LIBDIRPATH="-Wl,-rpath,/usr/lib${LIBDIRSUFFIX}/Bpp1.9"
+
+set -e
+
+rm -rf $PKG
+mkdir -p $TMP $PKG $OUTPUT
+cd $TMP
+rm -rf ${SRCNAM}${SRCVER}_src
+unzip $CWD/${SRCNAM}2_src.zip
+cd ${SRCNAM}${SRCVER}_src
+chown -R root:root .
+find -L . \
+ \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \
+ -o -perm 511 \) -exec chmod 755 {} \; -o \
+ \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
+ -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
+
+# This is needed for gcc in Slackware 14.2
+sed -i "s:CC=g++ -g:CC=g++-5 -g -std=c++11:" Makefile
+
+# Use our CFLAGS and the custom (legacy) lib path
+sed -i "s:CFLAGS=:CFLAGS=$SLKCFLAGS $LIBDIRPATH:" Makefile
+
+# Find the legacy bpp libraries
+sed -i "s:-lbpp-phyl:-L/usr/lib${LIBDIRSUFFIX}/Bpp1.9 -lbpp-phyl:g" Makefile
+sed -i "s:-lbpp-numcalc:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-numcalc:g" Makefile
+sed -i "s:-lbpp-utils:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-utils:g" Makefile
+sed -i "s:-lbpp-seq:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-seq:g" Makefile
+
+# Rename the produced executable
+sed -i "s:-o caps:-o $BINNAM:" Makefile
+
+# Use our patches
+patch -p1 -i $CWD/01_caps_verbose.patch
+patch -p1 -i $CWD/02_caps_sort_input.patch
+
+# we already specified g++-5 above, so no need for this
+#source /etc/profile.d/gcc5.sh
+
+make all
+
+# Install the binary produced from our patched source, as "vCAPS"
+install -D -m755 $BINNAM $PKG/usr/bin/$BINNAM
+
+find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \
+ | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
+
+mkdir -p $PKG/usr/share/$PRGNAM
+cp -a sample structures trees TLR1.fa.out $PKG/usr/share/$PRGNAM
+
+mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
+cp -a \
+ caps_manual.pdf \
+ $PKG/usr/doc/$PRGNAM-$VERSION
+cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
+cat $CWD/References > $PKG/usr/doc/$PRGNAM-$VERSION/References
+
+mkdir -p $PKG/install
+cat $CWD/slack-desc > $PKG/install/slack-desc
+
+cd $PKG
+/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.$PKGTYPE
diff --git a/academic/vCAPS_coevolution/vCAPS_coevolution.info b/academic/vCAPS_coevolution/vCAPS_coevolution.info
new file mode 100644
index 0000000000000..48545099b1826
--- /dev/null
+++ b/academic/vCAPS_coevolution/vCAPS_coevolution.info
@@ -0,0 +1,10 @@
+PRGNAM="vCAPS_coevolution"
+VERSION="2.0_2UN"
+HOMEPAGE="http://bioinf.gen.tcd.ie/caps/home.html"
+DOWNLOAD="https://raw.githubusercontent.com/slackalaxy/sources/main/caps2_src.zip"
+MD5SUM="0914007c32ed22a9cb8a47b55cd18a39"
+DOWNLOAD_x86_64=""
+MD5SUM_x86_64=""
+REQUIRES="bpp1.9-phyl"
+MAINTAINER="Petar Petrov"
+EMAIL="slackalaxy@gmail.com"