1 module dparasail;
2 import std.stdio;
3 import std.conv;
4 import std.utf;
5 // import bio.std.hts.bam.cigar;
6 import dhtslib.cigar;
7 import std.algorithm:map,filter;
8 import std.algorithm.iteration:sum;
9 extern(C):
10 
11 //Struct declarations
12 struct parasail_result_t {
13     int score;
14     int end_query;
15     int end_ref;
16     int flag;
17     void *extra;
18 }
19 struct parasail_matrix_t;
20 struct parasail_cigar_t{
21     uint *seq;
22     int len;
23     int beg_query;
24     int beg_ref;
25 }
26 //Matrix functions
27 parasail_matrix_t* parasail_matrix_lookup(char * matrix);
28 parasail_matrix_t* parasail_matrix_create(
29         const char *alphabet, const int match, const int mismatch);
30 void parasail_matrix_free(parasail_matrix_t* matrix);
31 
32 //SW functions
33 
34 parasail_result_t* parasail_sw_trace_striped_16(
35         const char * s1,const int s1Len,
36         const char *s2,const int s2Len,
37         const int open,const int gap,
38         const parasail_matrix_t* matrix
39         );
40 
41 parasail_result_t* parasail_nw_trace_scan_16(
42     const char * s1,const int s1Len,
43     const char *s2,const int s2Len,
44     const int open,const int gap,
45     const parasail_matrix_t* matrix
46     );
47 
48 void parasail_result_free(parasail_result_t *result);
49 
50 //Cigar Functions
51 void parasail_cigar_free(parasail_cigar_t *cigar);
52 char parasail_cigar_decode_op(uint cigar_int);
53 uint parasail_cigar_decode_len(uint cigar_int);
54 char* parasail_cigar_decode(parasail_cigar_t *cigar);
55 parasail_cigar_t* parasail_result_get_cigar(
56         parasail_result_t *result,
57         const char *seqA, int lena,
58         const char *seqB, int lenb,
59         const parasail_matrix_t *matrix);
60 
61 
62 
63 //D wrapping
64 extern(D):
65 
66 /*
67 * As a note nothing about this struct is thread-safe
68 * so just don't
69 *
70 */
71 struct parasail_query{
72     char * seq1;
73     char * seq2;
74     int seq1Len;
75     int seq2Len;
76     //0-based
77     int beg_query;
78     int beg_ref;
79     Cigar cigar;
80     parasail_result_t* result;
81     Cigar get_cigar(parasail_matrix_t* score_matrix){
82         parasail_cigar_t* cigar;
83         Cigar cigar_string;
84         cigar=parasail_result_get_cigar(result,seq1,seq1Len,seq2,seq2Len,score_matrix);
85         beg_query=cigar.beg_query;
86         beg_ref=cigar.beg_ref;
87         cigar_string = Cigar(cigar.seq,cigar.len);
88         //if *
89         if(cigar_string.ops.length==0){
90             return cigar_string;
91         }
92         //if 30I8M make 30S8M
93         if(cigar_string.ops[0].op==Ops.INS){
94             //move beg_query as well as it is accounted for
95             cigar_string.ops[0]=CigarOp(cigar_string.ops[0].length+this.beg_query,Ops.SOFT_CLIP);
96             this.beg_query=0;
97         }
98         //else if 30D8M make 8M and move ref start
99         else if(cigar_string.ops[0].op==Ops.DEL){
100             this.beg_ref=this.beg_ref+cigar_string.ops[0].length;
101             cigar_string=Cigar(cigar_string.ops[1..$]);
102             //cigar_string[0]=CigarOperation(cigar_string[0].length+this.beg_query,'S');
103         }
104         //else if begin query not 0 add softclip
105         else if(this.beg_query!=0){
106             assert(cigar_string.ops[0].op!=Ops.SOFT_CLIP);
107             cigar_string=Cigar(CigarOp(this.beg_query,Ops.SOFT_CLIP)~cigar_string.ops);
108             this.beg_query=0;
109         }
110         ///////////////////////////////////////////////////////////
111         int q_bases_covered=cast(int) cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum;
112         if(cigar_string.ops[$-1].op==Ops.INS){
113             cigar_string.ops[$-1]=CigarOp(cigar_string.ops[$-1].length+this.seq1Len-q_bases_covered,Ops.SOFT_CLIP);
114             q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum;
115         }
116         else if(cigar_string.ops[$-1].op==Ops.DEL){
117             cigar_string=Cigar(cigar_string.ops[0..($-1)]);
118         }
119         else if(q_bases_covered!=this.seq1Len){
120             cigar_string=Cigar(cigar_string.ops~CigarOp(this.seq1Len-q_bases_covered,Ops.SOFT_CLIP));
121             q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum;
122         }
123         assert(q_bases_covered==seq1Len);
124         // parasail_cigar_free(cigar);  
125         // Bug caused bc Cigar ctor takes reference not copy
126         // Cigar implicit dtor destroys array ptr which cleans up our problem
127         return cigar_string;
128     }
129     void close(){
130         parasail_result_free(result);
131     }
132 }
133 struct Parasail{
134     parasail_matrix_t* score_matrix = null;
135     int gap;
136     int open;
137     @disable this();
138     this(string matrix, int open, int gap){
139         this.score_matrix=parasail_matrix_lookup(toUTFz!(char *)(matrix));
140         this.open=open;
141         this.gap=gap;
142     }
143     this(string alphabet,int match,int mismatch, int open, int gap){
144         this.score_matrix=parasail_matrix_create(toUTFz!(char *)(alphabet),match,mismatch);
145         this.open=open;
146         this.gap=gap;
147     }
148     parasail_query sw_striped(string s1,string s2){
149         return sw_striped(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length);
150     }
151     parasail_query sw_striped(char *s1,char * s2, int s1Len,int s2Len){
152         parasail_query p;
153         p.seq1=s1;
154         p.seq2=s2;
155         p.seq1Len=s1Len;
156         p.seq2Len=s2Len;
157         p.result=sw_striped(p);
158         p.cigar=p.get_cigar(score_matrix);
159         return p;
160     }
161     parasail_result_t* sw_striped(parasail_query p){
162         return parasail_sw_trace_striped_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix);
163     }
164     parasail_query nw_scan(string s1,string s2){
165         return nw_scan(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length);
166     }
167     parasail_query nw_scan(char *s1,char * s2, int s1Len,int s2Len){
168         parasail_query p;
169         p.seq1=s1;
170         p.seq2=s2;
171         p.seq1Len=s1Len;
172         p.seq2Len=s2Len;
173         p.result=nw_scan(p);
174         p.cigar=p.get_cigar(score_matrix);
175         return p;
176     }
177     parasail_result_t* nw_scan(parasail_query p){
178         return parasail_nw_trace_scan_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix);
179     }
180     void close(){
181         parasail_matrix_free(score_matrix);
182     }
183 }
184 
185 unittest{
186     import std.stdio;
187     auto p=Parasail("dnafull",3,2);
188     auto q=p.sw_striped("GATTA","GACTA");
189     writeln(q.seq1[0..q.seq1Len]);
190     writeln(q.get_cigar(p.score_matrix).toString());
191     writeln(q.beg_query);
192     writeln(q.beg_ref);
193     assert(q.get_cigar(p.score_matrix).toString()=="2=1X2=");
194     q=p.sw_striped("GGCTTCTGATCAGGCTTCT","GACTA");
195     writeln(q.seq1[0..q.seq1Len]);
196     writeln(q.get_cigar(p.score_matrix).toString());
197     writeln(q.beg_query);
198     writeln(q.beg_ref);
199     assert(q.get_cigar(p.score_matrix).toString()=="7S2=1D1=1I1=7S");
200     q=p.sw_striped("GACTAA","GGCTTCTGATCAGGCTTCT");
201     writeln(q.seq1[0..q.seq1Len]);
202     writeln(q.get_cigar(p.score_matrix).toString());
203     writeln(q.beg_query);
204     writeln(q.beg_ref);
205     q=p.nw_scan("GATTA","GACTA");
206     writeln(q.seq1[0..q.seq1Len]);
207     writeln(q.get_cigar(p.score_matrix).toString());
208     writeln(q.beg_query);
209     writeln(q.beg_ref);
210     assert(q.get_cigar(p.score_matrix).toString()=="2=1X2=");
211     q=p.nw_scan("GGCTTCTGATCAGGCTTCT","GACTA");
212     writeln(q.seq1[0..q.seq1Len]);
213     writeln(q.get_cigar(p.score_matrix).toString());
214     writeln(q.beg_query);
215     writeln(q.beg_ref);
216     assert(q.get_cigar(p.score_matrix).toString()=="1=1X2=4I1=10S");
217     q=p.nw_scan("GACTAA","GGCTTCTGATCAGGCTTCT");
218     writeln(q.seq1[0..q.seq1Len]);
219     writeln(q.get_cigar(p.score_matrix).toString());
220     writeln(q.beg_query);
221     writeln(q.beg_ref);
222     // auto cigar=parasail_result_get_cigar(q.result,q.seq1,q.seq1Len,q.seq2,q.seq2Len,q.score_matrix);
223     // writeln(parasail_cigar_decode(cigar));
224 }
225