1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
diff -pruN orig/caps.cpp new/caps.cpp
--- orig/caps.cpp 2012-12-15 17:13:23.000000000 +0200
+++ new/caps.cpp 2020-09-09 23:07:46.080566000 +0300
@@ -14,7 +14,7 @@
#include <gsl/gsl_statistics.h>
#include<sys/time.h>
#include<iomanip>
-
+#include <bits/stdc++.h>
@@ -69,6 +69,8 @@
const gsl_rng_type * T;
gsl_rng *r;
+vector<double> totaltempnew;
+double alphathresh = 0;
int main(int argc, char *argv[]){
@@ -543,16 +545,27 @@ int main(int argc, char *argv[]){
print_splash(output);
+ OUTPUT << "\n\File1: " << files[i] << endl;
vec1.print_to_fasta(output.c_str());
+ OUTPUT << "\n\nFile2: " << files[j] << endl;
vec2.print_to_fasta(output.c_str());
int length1 = vec1.sequences[0].length();
int length2 = vec2.sequences[0].length();
+ OUTPUT << "\n\nLength1: " << length1 << endl;
+ OUTPUT << "Length2: " << length2 << endl;
if(tree_in ==0){
tree1 = create_input_tree(vec1.names, vec1.sequences);
tree2 = create_input_tree(vec2.names, vec2.sequences);
+
+ // Output the CAPS generated trees to the .out file of each pair
+ string temptre1 = TreeTemplateTools::treeToParenthesis(*tree1, true);
+ string temptre2 = TreeTemplateTools::treeToParenthesis(*tree2, true);
+ OUTPUT << "\n" << endl;
+ OUTPUT << "CAPS generated tree 1: " << temptre1 << endl;
+ OUTPUT << "CAPS generated tree 2: " << temptre2 << endl;
}/*else if(tree_in ==1 && variable==1){
std::auto_ptr<DistanceMatrix> DS;
@@ -666,6 +679,7 @@ int main(int argc, char *argv[]){
int value = floor(((totaltemp.size())*(1-(threshval))))+1;
threshold = totaltemp[value];
+ totaltempnew = totaltemp;
/*=======================================================*/
@@ -870,6 +884,30 @@ int Chi_squared (int num_pairs, int num_
} /* ----- end of function Chi_squared ----- */
+/*
+ * === FUNCTION ======================================================================
+ * Name: find_alpha
+ * Description: Find the index of an element in a vector totaltemp
+ * Help from: https://www.geeksforgeeks.org/how-to-find-index-of-a-given-element-in-a-vector-in-cpp/
+ * https://stackoverflow.com/questions/8647635/elegant-way-to-find-closest-value-in-a-vector-from-above
+ * Author: Petar Petrov, University of Turku (Finland); pebope@utu.fi
+ * =====================================================================================
+ */
+double getIndex(std::vector<double> const& v, double K)
+{
+ auto const it = std::lower_bound(v.begin(), v.end(), fabs(K));
+ //auto it = std::upper_bound(v.begin(), v.end(), fabs(K));
+
+ if (it != v.end()) {
+ int index = distance(v.begin(), it);
+ alphathresh = (((int)1+(double)v.size()-(int)index)/(double)v.size());
+ return alphathresh;
+ //cerr << index << "\t" << alphathresh << endl;
+ }
+ else {
+ cerr << "ELEMENT NOT FOUND!" << endl;
+ }
+}
@@ -890,9 +928,9 @@ int print_inter(vector<double>& Correl1,
output << endl << endl;
output << "Coevolving Pairs of amino acid sites\n";
- output << "=============================================================================\n";
- output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\n\n";
- output << "=============================================================================\n";
+ output << "================================================================================================================================\n";
+ output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\tP-value1\tP-value2\tMean P-value\tCorrelation1\tCorrelation2\n\n";
+ output << "================================================================================================================================\n";
//double mean = average_vec<double>(Correl);
//double SD = SD_vf(Correl, mean);
@@ -951,9 +989,11 @@ int print_inter(vector<double>& Correl1,
// }
+ double Alpha1 = getIndex(totaltempnew, Correl1[cor]);
+ double Alpha2 = getIndex(totaltempnew, Correl2[cor]);
//if(bootval>=bootcut && re1<=8 && re2<=8 ){
if(bootval>=bootcut){
- output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << endl;
+ output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << "\t" << Alpha1 << "\t" << Alpha2 << "\t" << (Alpha1+Alpha2)/2 << "\t" << Correl1[cor] << "\t" << Correl2[cor] << endl;
signif.push_back(((Correl1[cor]+Correl2[cor])/2));
++pairs;
vector<int> tem;
|