aboutsummaryrefslogtreecommitdiff
path: root/academic/vCAPS_coevolution/caps_verbose.patch
blob: 7f64d80f34f84ccf8cdb3ed14759b48b153d380c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
diff -pruN orig/caps.cpp new/caps.cpp
--- orig/caps.cpp	2012-12-15 17:13:23.000000000 +0200
+++ new/caps.cpp	2020-09-09 23:07:46.080566000 +0300
@@ -14,7 +14,7 @@
 #include <gsl/gsl_statistics.h>
 #include<sys/time.h>
 #include<iomanip>
-
+#include <bits/stdc++.h> 
 
 
 
@@ -69,6 +69,8 @@
 const gsl_rng_type * T;
 gsl_rng *r;
 
+vector<double> totaltempnew;
+double alphathresh = 0;
 int main(int argc, char *argv[]){
 
 
@@ -543,16 +545,27 @@ int main(int argc, char *argv[]){
 
 
 					print_splash(output);
+					OUTPUT << "\n\File1: " << files[i] << endl;
 					vec1.print_to_fasta(output.c_str());
+					OUTPUT << "\n\nFile2: " << files[j] << endl;
 					vec2.print_to_fasta(output.c_str());
 					int length1 = vec1.sequences[0].length();			
 					int length2 = vec2.sequences[0].length();
 
+					OUTPUT << "\n\nLength1: " << length1 << endl;
+					OUTPUT << "Length2: " << length2 << endl;
 
 
 					if(tree_in ==0){
 						tree1 = create_input_tree(vec1.names, vec1.sequences);
 						tree2 = create_input_tree(vec2.names, vec2.sequences);
+						
+						// Output the CAPS generated trees to the .out file of each pair
+						string temptre1 = TreeTemplateTools::treeToParenthesis(*tree1, true);
+						string temptre2 = TreeTemplateTools::treeToParenthesis(*tree2, true);
+						OUTPUT << "\n" << endl;
+						OUTPUT << "CAPS generated tree 1: " << temptre1 << endl;
+						OUTPUT << "CAPS generated tree 2: " << temptre2 << endl;
 					}/*else if(tree_in ==1 && variable==1){
 
 					   std::auto_ptr<DistanceMatrix> DS;
@@ -666,6 +679,7 @@ int main(int argc, char *argv[]){
 					int value = floor(((totaltemp.size())*(1-(threshval))))+1;
 
 					threshold = totaltemp[value];
+					totaltempnew = totaltemp;
 
 
 					/*=======================================================*/
@@ -870,6 +884,30 @@ int Chi_squared (int num_pairs, int num_
 
 }		/* -----  end of function Chi_squared  ----- */
 
+/* 
+ * ===  FUNCTION  ======================================================================
+ *         Name:  find_alpha
+ *  Description:  Find the index of an element in a vector totaltemp
+ *    Help from:  https://www.geeksforgeeks.org/how-to-find-index-of-a-given-element-in-a-vector-in-cpp/
+ *                https://stackoverflow.com/questions/8647635/elegant-way-to-find-closest-value-in-a-vector-from-above
+ *       Author:  Petar Petrov, University of Turku (Finland); pebope@utu.fi
+ * =====================================================================================
+ */
+double getIndex(std::vector<double> const& v, double K) 
+{ 
+    auto const it = std::lower_bound(v.begin(), v.end(), fabs(K));
+    //auto it = std::upper_bound(v.begin(), v.end(), fabs(K));
+  
+    if (it != v.end()) { 
+        int index = distance(v.begin(), it);
+	alphathresh = (((int)1+(double)v.size()-(int)index)/(double)v.size());
+	return alphathresh;
+        //cerr << index << "\t" << alphathresh << endl; 
+    } 
+    else { 
+        cerr << "ELEMENT NOT FOUND!" << endl; 
+    } 
+} 
 
 
 
@@ -890,9 +928,9 @@ int print_inter(vector<double>& Correl1,
 	output << endl << endl;
 
 	output << "Coevolving Pairs of amino acid sites\n";
-	output << "=============================================================================\n";
-	output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\n\n";
-	output << "=============================================================================\n";
+	output << "================================================================================================================================\n";
+	output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\tP-value1\tP-value2\tMean P-value\tCorrelation1\tCorrelation2\n\n";
+	output << "================================================================================================================================\n";
 
 	//double mean = average_vec<double>(Correl);
 	//double SD = SD_vf(Correl, mean);
@@ -951,9 +989,11 @@ int print_inter(vector<double>& Correl1,
 
 						//	}
 
+						double Alpha1 = getIndex(totaltempnew, Correl1[cor]);
+						double Alpha2 = getIndex(totaltempnew, Correl2[cor]);
 						//if(bootval>=bootcut && re1<=8 && re2<=8 ){
 						if(bootval>=bootcut){
-							output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << endl; 
+							output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << "\t" << Alpha1 << "\t" << Alpha2 << "\t" << (Alpha1+Alpha2)/2 << "\t" << Correl1[cor] << "\t" << Correl2[cor] << endl; 
 							signif.push_back(((Correl1[cor]+Correl2[cor])/2));
 							++pairs;
 							vector<int> tem;