1 module dparasail.result;
2 
3 import std.stdio;
4 import std.conv;
5 import std.utf;
6 
7 // import bio.std.hts.bam.cigar;
8 import dparasail.cigar;
9 import std.algorithm : map, filter;
10 import std.algorithm.iteration : sum;
11 import std.array : array;
12 import dparasail.alignment;
13 import dparasail.memory;
14 import parasail;
15 
16 /**
17 * Stores alignment results (scores, cigar, alignment starts).
18 * Contains pointers to underlying parasail data. Owns parasail_result_t.
19 */
20 struct ParasailResult
21 {
22     private Parasail* alnSettings;
23     private string s1;
24     private string s2;
25     private const char* seq1;
26     private const char* seq2;
27     private const int seq1Len;
28     private const int seq2Len;
29     //0-based
30     private int queryPos;
31     private int refPos;
32     private Cigar p_cigar;
33     private bool hasCigar;
34     private ParasailResultPtr result;
35 
36     private int num_gaps;
37     private int num_mis;
38     private int num_matches;
39     private int total_length;
40 
41     /// need to use a constructor
42     @disable this();
43 
44     /// ctor for strings
45     this(Parasail * alnSettings, parasail_result_t * result, string s1, string s2)
46     {
47         this.result = ParasailResultPtr(result);
48         this.alnSettings = alnSettings;
49         this.s1 = s1;
50         this.s2 = s2;
51         auto seq1 = toUTFz!(const char*)(s1);
52         auto seq2 = toUTFz!(const char*)(s2);
53         auto seq1Len = cast(const int) s1.length;
54         auto seq2Len = cast(const int) s2.length;
55         this.seq1 = seq1;
56         this.seq1Len = seq1Len;
57         this.seq2 = seq2;
58         this.seq2Len = seq2Len;
59         assert(alnSettings.scoreMatrix);
60         if(parasail_result_is_trace(this.result))
61         {
62             this.hasCigar = true; 
63             this.p_cigar = this.get_cigar();  
64         }
65     }
66 
67     /// starting position of the alignment 
68     /// on the reference/target sequence
69     int position()
70     {
71         return this.refPos;
72     }
73 
74     /// starting position of the alignment 
75     /// on the query sequence
76     int queryPosition()
77     {
78         return this.queryPos;
79     }
80 
81     /// starting position of the alignment 
82     /// on the reference/target sequence
83     int endQuery()
84     {
85         return this.result.end_query;
86     }
87 
88     /// starting position of the alignment 
89     /// on the query sequence
90     int endRef()
91     {
92         return this.result.end_ref;
93     }
94 
95     /// alignment score
96     int score()
97     {
98         return this.result.score;    
99     }
100 
101     /// get identity
102     float identity()
103     {
104         if(parasail_result_is_trace(this.result)){
105             if(total_length == 0)
106                 cigarStats;
107             return float(num_matches) / float(total_length);
108         }
109         else if(parasail_result_is_stats(this.result))
110             return float(parasail_result_get_matches(this.result)) / float(parasail_result_get_length(this.result));
111         throw new Exception("Alignment type is not trace or stats");
112     }
113     
114     /// get similarity
115     float similarity()
116     {
117         if(parasail_result_is_trace(this.result)){
118             if(total_length == 0)
119                 cigarStats;
120             return float(num_matches + num_mis) / float(total_length);
121         }
122         else if(parasail_result_is_stats(this.result))
123             return float(parasail_result_get_similar(this.result)) / float(parasail_result_get_length(this.result));
124         throw new Exception("Alignment type is not trace or stats");
125     }
126 
127     /// get identity
128     float gapRate()
129     {
130         if(parasail_result_is_trace(this.result)){
131             if(total_length == 0)
132                 cigarStats;
133             return float(num_gaps) / float(total_length);
134         }
135         else if(parasail_result_is_stats(this.result))
136             return 1.0 - (float(parasail_result_get_similar(this.result)) / float(parasail_result_get_length(this.result)));
137         throw new Exception("Alignment type is not trace or stats");
138     }
139 
140     private void cigarStats(){
141         assert(total_length == 0);
142         assert(num_matches == 0);
143         assert(num_mis == 0);
144         assert(num_gaps == 0);
145         auto coords = AlignedCoordinatesItr(cigar).filter!(x => x.cigar_op != Ops.SOFT_CLIP).array;
146         foreach (i, op; coords)
147         {
148             switch(op.cigar_op){
149                 // should no longer happen
150                 // case Ops.SOFT_CLIP:
151                 //     continue;
152                 case Ops.DIFF:
153                     num_mis++;
154                     break;
155                 case Ops.DEL:
156                 case Ops.INS:
157                     num_gaps++;
158                     break;
159                 default:
160                     num_matches++; 
161                     break;
162             }
163             total_length++;
164         }
165     }
166 
167     /// get cigar string of alignment
168     Cigar cigar(){
169         if(hasCigar)    
170             return this.p_cigar;
171         else
172             throw new Exception("This alignment type cannot produce cigar.");
173     }
174 
175     /// get cigar from result and parse it
176     /// default cigar from parasail is not compatible with 
177     /// a bam per say. It needs to be padded with soft-clips,
178     /// and have leading and lagging INDELs removed.
179     /// Alignment positions also have to modified.
180     private Cigar get_cigar()
181     {
182         // Get cigar from result
183         parasail_cigar_t* cigar = parasail_result_get_cigar(this.result, seq1, seq1Len, seq2, seq2Len,
184                 alnSettings.scoreMatrix);
185         this.queryPos = cigar.beg_query;
186         this.refPos = cigar.beg_ref;
187         Cigar cigar_string = Cigar(((cast(CigarOp*) cigar.seq)[0 .. cigar.len]).dup);
188         parasail_cigar_free(cigar);
189         
190         //if *
191         if (cigar_string.length == 0)
192         {
193             return cigar_string;
194         }
195         //if 30I8M make 30S8M
196         if (cigar_string[0].op == Ops.INS)
197         {
198             //move queryPos as well as it is accounted for
199             cigar_string[0] = CigarOp(cigar_string[0].length + this.queryPos, Ops.SOFT_CLIP);
200             this.queryPos += cigar_string[0].length;
201         }
202         //else if 30D8M make 8M and move ref start
203         else if (cigar_string[0].op == Ops.DEL)
204         {
205             this.refPos = this.refPos + cigar_string[0].length;
206             cigar_string = Cigar(cigar_string[1 .. $]);
207             //cigar_string[0]=CigarOperation(cigar_string[0].length+this.queryPos,'S');
208         }
209         //else if begin query not 0 add softclip
210         else if (this.queryPos != 0)
211         {
212             assert(cigar_string[0].op != Ops.SOFT_CLIP);
213             cigar_string = Cigar(CigarOp(this.queryPos, Ops.SOFT_CLIP) ~ cigar_string[]);
214             this.queryPos = cigar_string[0].length;
215         }
216         ///////////////////////////////////////////////////////////
217         int q_bases_covered = cast(int) cigar_string[].filter!(x => x.is_query_consuming())
218             .map!(x => x.length)
219             .sum;
220         if (cigar_string[$ - 1].op == Ops.INS)
221         {
222             cigar_string[$ - 1] = CigarOp(cigar_string[$ - 1].length + this.seq1Len - q_bases_covered,
223                     Ops.SOFT_CLIP);
224             q_bases_covered = cigar_string[].filter!(x => x.is_query_consuming())
225                 .map!(x => x.length)
226                 .sum;
227         }
228         else if (cigar_string[$ - 1].op == Ops.DEL)
229         {
230             cigar_string = Cigar(cigar_string[0 .. ($ - 1)]);
231         }
232         else if (q_bases_covered != this.seq1Len)
233         {
234             cigar_string = Cigar(cigar_string[] ~ CigarOp(this.seq1Len - q_bases_covered,
235                     Ops.SOFT_CLIP));
236             q_bases_covered = cigar_string[].filter!(x => x.is_query_consuming())
237                 .map!(x => x.length)
238                 .sum;
239         }
240         assert(q_bases_covered == seq1Len);
241         // parasail_cigar_free(cigar);  
242         // Bug caused bc Cigar ctor takes reference not copy
243         // Cigar implicit dtor destroys array ptr which cleans up our problem
244         return cigar_string;
245     }
246 
247     /// display parasail representation of alignment
248     void writeAlignment()
249     {
250         parasail_traceback_generic(seq1, seq1Len, seq2, seq2Len, toUTFz!(char*)("Query:"),
251                 toUTFz!(char*)("Target:"), alnSettings.scoreMatrix, result,
252                 '|', '*', '*', 60, 7, 1);
253     }
254 
255     /// clean up 
256     private void close()
257     {
258         parasail_result_free(result);
259     }
260 }