1 module dparasail; 2 import std.stdio; 3 import std.conv; 4 import std.utf; 5 // import bio.std.hts.bam.cigar; 6 import dhtslib.cigar; 7 import std.algorithm:map,filter; 8 import std.algorithm.iteration:sum; 9 extern(C): 10 11 //Struct declarations 12 struct parasail_result_t { 13 int score; 14 int end_query; 15 int end_ref; 16 int flag; 17 void *extra; 18 } 19 struct parasail_matrix_t; 20 struct parasail_cigar_t{ 21 uint *seq; 22 int len; 23 int beg_query; 24 int beg_ref; 25 } 26 //Matrix functions 27 parasail_matrix_t* parasail_matrix_lookup(char * matrix); 28 parasail_matrix_t* parasail_matrix_create( 29 const char *alphabet, const int match, const int mismatch); 30 void parasail_matrix_free(parasail_matrix_t* matrix); 31 32 //SW functions 33 34 parasail_result_t* parasail_sw_trace_striped_16( 35 const char * s1,const int s1Len, 36 const char *s2,const int s2Len, 37 const int open,const int gap, 38 const parasail_matrix_t* matrix 39 ); 40 41 parasail_result_t* parasail_nw_trace_scan_16( 42 const char * s1,const int s1Len, 43 const char *s2,const int s2Len, 44 const int open,const int gap, 45 const parasail_matrix_t* matrix 46 ); 47 48 void parasail_result_free(parasail_result_t *result); 49 50 //Cigar Functions 51 void parasail_cigar_free(parasail_cigar_t *cigar); 52 char parasail_cigar_decode_op(uint cigar_int); 53 uint parasail_cigar_decode_len(uint cigar_int); 54 char* parasail_cigar_decode(parasail_cigar_t *cigar); 55 parasail_cigar_t* parasail_result_get_cigar( 56 parasail_result_t *result, 57 const char *seqA, int lena, 58 const char *seqB, int lenb, 59 const parasail_matrix_t *matrix); 60 61 62 63 //D wrapping 64 extern(D): 65 66 /* 67 * As a note nothing about this struct is thread-safe 68 * so just don't 69 * 70 */ 71 struct parasail_query{ 72 char * seq1; 73 char * seq2; 74 int seq1Len; 75 int seq2Len; 76 //0-based 77 int beg_query; 78 int beg_ref; 79 Cigar cigar; 80 parasail_result_t* result; 81 Cigar get_cigar(parasail_matrix_t* score_matrix){ 82 parasail_cigar_t* cigar; 83 Cigar cigar_string; 84 cigar=parasail_result_get_cigar(result,seq1,seq1Len,seq2,seq2Len,score_matrix); 85 beg_query=cigar.beg_query; 86 beg_ref=cigar.beg_ref; 87 cigar_string = Cigar(cigar.seq,cigar.len); 88 //if * 89 if(cigar_string.ops.length==0){ 90 return cigar_string; 91 } 92 //if 30I8M make 30S8M 93 if(cigar_string.ops[0].op==Ops.INS){ 94 //move beg_query as well as it is accounted for 95 cigar_string.ops[0]=CigarOp(cigar_string.ops[0].length+this.beg_query,Ops.SOFT_CLIP); 96 this.beg_query=0; 97 } 98 //else if 30D8M make 8M and move ref start 99 else if(cigar_string.ops[0].op==Ops.DEL){ 100 this.beg_ref=this.beg_ref+cigar_string.ops[0].length; 101 cigar_string=Cigar(cigar_string.ops[1..$]); 102 //cigar_string[0]=CigarOperation(cigar_string[0].length+this.beg_query,'S'); 103 } 104 //else if begin query not 0 add softclip 105 else if(this.beg_query!=0){ 106 assert(cigar_string.ops[0].op!=Ops.SOFT_CLIP); 107 cigar_string=Cigar(CigarOp(this.beg_query,Ops.SOFT_CLIP)~cigar_string.ops); 108 this.beg_query=0; 109 } 110 /////////////////////////////////////////////////////////// 111 int q_bases_covered=cast(int) cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 112 if(cigar_string.ops[$-1].op==Ops.INS){ 113 cigar_string.ops[$-1]=CigarOp(cigar_string.ops[$-1].length+this.seq1Len-q_bases_covered,Ops.SOFT_CLIP); 114 q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 115 } 116 else if(cigar_string.ops[$-1].op==Ops.DEL){ 117 cigar_string=Cigar(cigar_string.ops[0..($-1)]); 118 } 119 else if(q_bases_covered!=this.seq1Len){ 120 cigar_string=Cigar(cigar_string.ops~CigarOp(this.seq1Len-q_bases_covered,Ops.SOFT_CLIP)); 121 q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 122 } 123 assert(q_bases_covered==seq1Len); 124 // parasail_cigar_free(cigar); 125 // Bug caused bc Cigar ctor takes reference not copy 126 // Cigar implicit dtor destroys array ptr which cleans up our problem 127 return cigar_string; 128 } 129 void close(){ 130 parasail_result_free(result); 131 } 132 } 133 struct Parasail{ 134 parasail_matrix_t* score_matrix = null; 135 int gap; 136 int open; 137 @disable this(); 138 this(string matrix, int open, int gap){ 139 this.score_matrix=parasail_matrix_lookup(toUTFz!(char *)(matrix)); 140 this.open=open; 141 this.gap=gap; 142 } 143 this(string alphabet,int match,int mismatch, int open, int gap){ 144 this.score_matrix=parasail_matrix_create(toUTFz!(char *)(alphabet),match,mismatch); 145 this.open=open; 146 this.gap=gap; 147 } 148 parasail_query sw_striped(string s1,string s2){ 149 return sw_striped(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length); 150 } 151 parasail_query sw_striped(char *s1,char * s2, int s1Len,int s2Len){ 152 parasail_query p; 153 p.seq1=s1; 154 p.seq2=s2; 155 p.seq1Len=s1Len; 156 p.seq2Len=s2Len; 157 p.result=sw_striped(p); 158 p.cigar=p.get_cigar(score_matrix); 159 return p; 160 } 161 parasail_result_t* sw_striped(parasail_query p){ 162 return parasail_sw_trace_striped_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix); 163 } 164 parasail_query nw_scan(string s1,string s2){ 165 return nw_scan(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length); 166 } 167 parasail_query nw_scan(char *s1,char * s2, int s1Len,int s2Len){ 168 parasail_query p; 169 p.seq1=s1; 170 p.seq2=s2; 171 p.seq1Len=s1Len; 172 p.seq2Len=s2Len; 173 p.result=nw_scan(p); 174 p.cigar=p.get_cigar(score_matrix); 175 return p; 176 } 177 parasail_result_t* nw_scan(parasail_query p){ 178 return parasail_nw_trace_scan_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix); 179 } 180 void close(){ 181 parasail_matrix_free(score_matrix); 182 } 183 } 184 185 unittest{ 186 import std.stdio; 187 auto p=Parasail("dnafull",3,2); 188 auto q=p.sw_striped("GATTA","GACTA"); 189 writeln(q.seq1[0..q.seq1Len]); 190 writeln(q.get_cigar(p.score_matrix).toString()); 191 writeln(q.beg_query); 192 writeln(q.beg_ref); 193 assert(q.get_cigar(p.score_matrix).toString()=="2=1X2="); 194 q=p.sw_striped("GGCTTCTGATCAGGCTTCT","GACTA"); 195 writeln(q.seq1[0..q.seq1Len]); 196 writeln(q.get_cigar(p.score_matrix).toString()); 197 writeln(q.beg_query); 198 writeln(q.beg_ref); 199 assert(q.get_cigar(p.score_matrix).toString()=="7S2=1D1=1I1=7S"); 200 q=p.sw_striped("GACTAA","GGCTTCTGATCAGGCTTCT"); 201 writeln(q.seq1[0..q.seq1Len]); 202 writeln(q.get_cigar(p.score_matrix).toString()); 203 writeln(q.beg_query); 204 writeln(q.beg_ref); 205 q=p.nw_scan("GATTA","GACTA"); 206 writeln(q.seq1[0..q.seq1Len]); 207 writeln(q.get_cigar(p.score_matrix).toString()); 208 writeln(q.beg_query); 209 writeln(q.beg_ref); 210 assert(q.get_cigar(p.score_matrix).toString()=="2=1X2="); 211 q=p.nw_scan("GGCTTCTGATCAGGCTTCT","GACTA"); 212 writeln(q.seq1[0..q.seq1Len]); 213 writeln(q.get_cigar(p.score_matrix).toString()); 214 writeln(q.beg_query); 215 writeln(q.beg_ref); 216 assert(q.get_cigar(p.score_matrix).toString()=="1=1X2=4I1=10S"); 217 q=p.nw_scan("GACTAA","GGCTTCTGATCAGGCTTCT"); 218 writeln(q.seq1[0..q.seq1Len]); 219 writeln(q.get_cigar(p.score_matrix).toString()); 220 writeln(q.beg_query); 221 writeln(q.beg_ref); 222 // auto cigar=parasail_result_get_cigar(q.result,q.seq1,q.seq1Len,q.seq2,q.seq2Len,q.score_matrix); 223 // writeln(parasail_cigar_decode(cigar)); 224 } 225