1 module dparasail;
2 import std.stdio;
3 import std.conv;
4 import std.utf;
5 // import bio.std.hts.bam.cigar;
6 import dhtslib.cigar;
7 import std.algorithm:map,filter;
8 import std.algorithm.iteration:sum;
9 public import parasail;
10 
11 /*
12 * As a note nothing about this struct is thread-safe
13 * so just don't
14 *
15 */
16 struct parasail_query{
17     char * seq1;
18     char * seq2;
19     int seq1Len;
20     int seq2Len;
21     //0-based
22     int beg_query;
23     int beg_ref;
24     Cigar cigar;
25     parasail_result_t* result;
26     Cigar get_cigar(parasail_matrix_t* score_matrix){
27         parasail_cigar_t* cigar;
28         Cigar cigar_string;
29         cigar=parasail_result_get_cigar(result,seq1,seq1Len,seq2,seq2Len,score_matrix);
30         beg_query=cigar.beg_query;
31         beg_ref=cigar.beg_ref;
32         cigar_string = Cigar(cigar.seq,cigar.len);
33         //if *
34         if(cigar_string.ops.length==0){
35             return cigar_string;
36         }
37         //if 30I8M make 30S8M
38         if(cigar_string.ops[0].op==Ops.INS){
39             //move beg_query as well as it is accounted for
40             cigar_string.ops[0]=CigarOp(cigar_string.ops[0].length+this.beg_query,Ops.SOFT_CLIP);
41             this.beg_query=0;
42         }
43         //else if 30D8M make 8M and move ref start
44         else if(cigar_string.ops[0].op==Ops.DEL){
45             this.beg_ref=this.beg_ref+cigar_string.ops[0].length;
46             cigar_string=Cigar(cigar_string.ops[1..$]);
47             //cigar_string[0]=CigarOperation(cigar_string[0].length+this.beg_query,'S');
48         }
49         //else if begin query not 0 add softclip
50         else if(this.beg_query!=0){
51             assert(cigar_string.ops[0].op!=Ops.SOFT_CLIP);
52             cigar_string=Cigar(CigarOp(this.beg_query,Ops.SOFT_CLIP)~cigar_string.ops);
53             this.beg_query=0;
54         }
55         ///////////////////////////////////////////////////////////
56         int q_bases_covered=cast(int) cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum;
57         if(cigar_string.ops[$-1].op==Ops.INS){
58             cigar_string.ops[$-1]=CigarOp(cigar_string.ops[$-1].length+this.seq1Len-q_bases_covered,Ops.SOFT_CLIP);
59             q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum;
60         }
61         else if(cigar_string.ops[$-1].op==Ops.DEL){
62             cigar_string=Cigar(cigar_string.ops[0..($-1)]);
63         }
64         else if(q_bases_covered!=this.seq1Len){
65             cigar_string=Cigar(cigar_string.ops~CigarOp(this.seq1Len-q_bases_covered,Ops.SOFT_CLIP));
66             q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum;
67         }
68         assert(q_bases_covered==seq1Len);
69         // parasail_cigar_free(cigar);  
70         // Bug caused bc Cigar ctor takes reference not copy
71         // Cigar implicit dtor destroys array ptr which cleans up our problem
72         return cigar_string;
73     }
74     void close(){
75         parasail_result_free(result);
76     }
77 }
78 struct Parasail{
79     parasail_matrix_t* score_matrix = null;
80     int gap;
81     int open;
82     @disable this();
83     this(string matrix, int open, int gap){
84         this.score_matrix=parasail_matrix_lookup(toUTFz!(char *)(matrix));
85         this.open=open;
86         this.gap=gap;
87     }
88     this(string alphabet,int match,int mismatch, int open, int gap){
89         this.score_matrix=parasail_matrix_create(toUTFz!(char *)(alphabet),match,mismatch);
90         this.open=open;
91         this.gap=gap;
92     }
93     parasail_query sw_striped(string s1,string s2){
94         return sw_striped(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length);
95     }
96     parasail_query sw_striped(char *s1,char * s2, int s1Len,int s2Len){
97         parasail_query p;
98         p.seq1=s1;
99         p.seq2=s2;
100         p.seq1Len=s1Len;
101         p.seq2Len=s2Len;
102         p.result=sw_striped(p);
103         p.cigar=p.get_cigar(score_matrix);
104         return p;
105     }
106     parasail_result_t* sw_striped(parasail_query p){
107         return parasail_sw_trace_striped_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix);
108     }
109     parasail_query nw_scan(string s1,string s2){
110         return nw_scan(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length);
111     }
112     parasail_query nw_scan(char *s1,char * s2, int s1Len,int s2Len){
113         parasail_query p;
114         p.seq1=s1;
115         p.seq2=s2;
116         p.seq1Len=s1Len;
117         p.seq2Len=s2Len;
118         p.result=nw_scan(p);
119         p.cigar=p.get_cigar(score_matrix);
120         return p;
121     }
122     parasail_result_t* nw_scan(parasail_query p){
123         return parasail_nw_trace_scan_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix);
124     }
125 
126     parasail_query aligner(string alg, string option)(string s1,string s2){
127         return aligner!(alg,option)(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length);
128     }
129     parasail_query aligner(string alg, string option)(char *s1,char * s2, int s1Len,int s2Len){
130         parasail_query p;
131         p.seq1=s1;
132         p.seq2=s2;
133         p.seq1Len=s1Len;
134         p.seq2Len=s2Len;
135         p.result=aligner!(alg,option)(p);
136         p.cigar=p.get_cigar(score_matrix);
137         return p;
138     }
139     parasail_result_t* aligner(string alg, string option)(parasail_query p){
140         mixin("return parasail_"~alg~"_trace_"~option~"_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix);");
141     }
142     
143 
144     void close(){
145         parasail_matrix_free(score_matrix);
146     }
147 }
148 
149 unittest{
150     import std.stdio;
151     auto p=Parasail("dnafull",3,2);
152     auto q=p.sw_striped("GATTA","GACTA");
153     writeln(q.seq1[0..q.seq1Len]);
154     writeln(q.get_cigar(p.score_matrix).toString());
155     writeln(q.beg_query);
156     writeln(q.beg_ref);
157     assert(q.get_cigar(p.score_matrix).toString()=="2=1X2=");
158     q=p.sw_striped("GGCTTCTGATCAGGCTTCT","GACTA");
159     writeln(q.seq1[0..q.seq1Len]);
160     writeln(q.get_cigar(p.score_matrix).toString());
161     writeln(q.beg_query);
162     writeln(q.beg_ref);
163     assert(q.get_cigar(p.score_matrix).toString()=="7S2=1D1=1I1=7S");
164     q=p.sw_striped("GACTAA","GGCTTCTGATCAGGCTTCT");
165     writeln(q.seq1[0..q.seq1Len]);
166     writeln(q.get_cigar(p.score_matrix).toString());
167     writeln(q.beg_query);
168     writeln(q.beg_ref);
169     q=p.nw_scan("GATTA","GACTA");
170     writeln(q.seq1[0..q.seq1Len]);
171     writeln(q.get_cigar(p.score_matrix).toString());
172     writeln(q.beg_query);
173     writeln(q.beg_ref);
174     assert(q.get_cigar(p.score_matrix).toString()=="2=1X2=");
175     q=p.nw_scan("GGCTTCTGATCAGGCTTCT","GACTA");
176     writeln(q.seq1[0..q.seq1Len]);
177     writeln(q.get_cigar(p.score_matrix).toString());
178     writeln(q.beg_query);
179     writeln(q.beg_ref);
180     assert(q.get_cigar(p.score_matrix).toString()=="1=1X2=4I1=10S");
181     q=p.nw_scan("GACTAA","GGCTTCTGATCAGGCTTCT");
182     writeln(q.seq1[0..q.seq1Len]);
183     writeln(q.get_cigar(p.score_matrix).toString());
184     writeln(q.beg_query);
185     writeln(q.beg_ref);
186     // auto cigar=parasail_result_get_cigar(q.result,q.seq1,q.seq1Len,q.seq2,q.seq2Len,q.score_matrix);
187     // writeln(parasail_cigar_decode(cigar));
188 }
189