1 module dparasail; 2 import std.stdio; 3 import std.conv; 4 import std.utf; 5 // import bio.std.hts.bam.cigar; 6 import dhtslib.cigar; 7 import std.algorithm:map,filter; 8 import std.algorithm.iteration:sum; 9 public import parasail; 10 11 /* 12 * As a note nothing about this struct is thread-safe 13 * so just don't 14 * 15 */ 16 struct parasail_query{ 17 char * seq1; 18 char * seq2; 19 int seq1Len; 20 int seq2Len; 21 //0-based 22 int beg_query; 23 int beg_ref; 24 Cigar cigar; 25 parasail_result_t* result; 26 Cigar get_cigar(parasail_matrix_t* score_matrix){ 27 parasail_cigar_t* cigar; 28 Cigar cigar_string; 29 cigar=parasail_result_get_cigar(result,seq1,seq1Len,seq2,seq2Len,score_matrix); 30 beg_query=cigar.beg_query; 31 beg_ref=cigar.beg_ref; 32 cigar_string = Cigar(cigar.seq,cigar.len); 33 //if * 34 if(cigar_string.ops.length==0){ 35 return cigar_string; 36 } 37 //if 30I8M make 30S8M 38 if(cigar_string.ops[0].op==Ops.INS){ 39 //move beg_query as well as it is accounted for 40 cigar_string.ops[0]=CigarOp(cigar_string.ops[0].length+this.beg_query,Ops.SOFT_CLIP); 41 this.beg_query=0; 42 } 43 //else if 30D8M make 8M and move ref start 44 else if(cigar_string.ops[0].op==Ops.DEL){ 45 this.beg_ref=this.beg_ref+cigar_string.ops[0].length; 46 cigar_string=Cigar(cigar_string.ops[1..$]); 47 //cigar_string[0]=CigarOperation(cigar_string[0].length+this.beg_query,'S'); 48 } 49 //else if begin query not 0 add softclip 50 else if(this.beg_query!=0){ 51 assert(cigar_string.ops[0].op!=Ops.SOFT_CLIP); 52 cigar_string=Cigar(CigarOp(this.beg_query,Ops.SOFT_CLIP)~cigar_string.ops); 53 this.beg_query=0; 54 } 55 /////////////////////////////////////////////////////////// 56 int q_bases_covered=cast(int) cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 57 if(cigar_string.ops[$-1].op==Ops.INS){ 58 cigar_string.ops[$-1]=CigarOp(cigar_string.ops[$-1].length+this.seq1Len-q_bases_covered,Ops.SOFT_CLIP); 59 q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 60 } 61 else if(cigar_string.ops[$-1].op==Ops.DEL){ 62 cigar_string=Cigar(cigar_string.ops[0..($-1)]); 63 } 64 else if(q_bases_covered!=this.seq1Len){ 65 cigar_string=Cigar(cigar_string.ops~CigarOp(this.seq1Len-q_bases_covered,Ops.SOFT_CLIP)); 66 q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 67 } 68 assert(q_bases_covered==seq1Len); 69 // parasail_cigar_free(cigar); 70 // Bug caused bc Cigar ctor takes reference not copy 71 // Cigar implicit dtor destroys array ptr which cleans up our problem 72 return cigar_string; 73 } 74 void close(){ 75 parasail_result_free(result); 76 } 77 } 78 struct Parasail{ 79 parasail_matrix_t* score_matrix = null; 80 int gap; 81 int open; 82 @disable this(); 83 this(string matrix, int open, int gap){ 84 this.score_matrix=parasail_matrix_lookup(toUTFz!(char *)(matrix)); 85 this.open=open; 86 this.gap=gap; 87 } 88 this(string alphabet,int match,int mismatch, int open, int gap){ 89 this.score_matrix=parasail_matrix_create(toUTFz!(char *)(alphabet),match,mismatch); 90 this.open=open; 91 this.gap=gap; 92 } 93 parasail_query sw_striped(string s1,string s2){ 94 return sw_striped(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length); 95 } 96 parasail_query sw_striped(char *s1,char * s2, int s1Len,int s2Len){ 97 parasail_query p; 98 p.seq1=s1; 99 p.seq2=s2; 100 p.seq1Len=s1Len; 101 p.seq2Len=s2Len; 102 p.result=sw_striped(p); 103 p.cigar=p.get_cigar(score_matrix); 104 return p; 105 } 106 parasail_result_t* sw_striped(parasail_query p){ 107 return parasail_sw_trace_striped_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix); 108 } 109 parasail_query nw_scan(string s1,string s2){ 110 return nw_scan(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length); 111 } 112 parasail_query nw_scan(char *s1,char * s2, int s1Len,int s2Len){ 113 parasail_query p; 114 p.seq1=s1; 115 p.seq2=s2; 116 p.seq1Len=s1Len; 117 p.seq2Len=s2Len; 118 p.result=nw_scan(p); 119 p.cigar=p.get_cigar(score_matrix); 120 return p; 121 } 122 parasail_result_t* nw_scan(parasail_query p){ 123 return parasail_nw_trace_scan_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix); 124 } 125 126 parasail_query aligner(string alg, string option)(string s1,string s2){ 127 return aligner!(alg,option)(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length); 128 } 129 parasail_query aligner(string alg, string option)(char *s1,char * s2, int s1Len,int s2Len){ 130 parasail_query p; 131 p.seq1=s1; 132 p.seq2=s2; 133 p.seq1Len=s1Len; 134 p.seq2Len=s2Len; 135 p.result=aligner!(alg,option)(p); 136 p.cigar=p.get_cigar(score_matrix); 137 return p; 138 } 139 parasail_result_t* aligner(string alg, string option)(parasail_query p){ 140 mixin("return parasail_"~alg~"_trace_"~option~"_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix);"); 141 } 142 143 144 void close(){ 145 parasail_matrix_free(score_matrix); 146 } 147 } 148 149 unittest{ 150 import std.stdio; 151 auto p=Parasail("dnafull",3,2); 152 auto q=p.sw_striped("GATTA","GACTA"); 153 writeln(q.seq1[0..q.seq1Len]); 154 writeln(q.get_cigar(p.score_matrix).toString()); 155 writeln(q.beg_query); 156 writeln(q.beg_ref); 157 assert(q.get_cigar(p.score_matrix).toString()=="2=1X2="); 158 q=p.sw_striped("GGCTTCTGATCAGGCTTCT","GACTA"); 159 writeln(q.seq1[0..q.seq1Len]); 160 writeln(q.get_cigar(p.score_matrix).toString()); 161 writeln(q.beg_query); 162 writeln(q.beg_ref); 163 assert(q.get_cigar(p.score_matrix).toString()=="7S2=1D1=1I1=7S"); 164 q=p.sw_striped("GACTAA","GGCTTCTGATCAGGCTTCT"); 165 writeln(q.seq1[0..q.seq1Len]); 166 writeln(q.get_cigar(p.score_matrix).toString()); 167 writeln(q.beg_query); 168 writeln(q.beg_ref); 169 q=p.nw_scan("GATTA","GACTA"); 170 writeln(q.seq1[0..q.seq1Len]); 171 writeln(q.get_cigar(p.score_matrix).toString()); 172 writeln(q.beg_query); 173 writeln(q.beg_ref); 174 assert(q.get_cigar(p.score_matrix).toString()=="2=1X2="); 175 q=p.nw_scan("GGCTTCTGATCAGGCTTCT","GACTA"); 176 writeln(q.seq1[0..q.seq1Len]); 177 writeln(q.get_cigar(p.score_matrix).toString()); 178 writeln(q.beg_query); 179 writeln(q.beg_ref); 180 assert(q.get_cigar(p.score_matrix).toString()=="1=1X2=4I1=10S"); 181 q=p.nw_scan("GACTAA","GGCTTCTGATCAGGCTTCT"); 182 writeln(q.seq1[0..q.seq1Len]); 183 writeln(q.get_cigar(p.score_matrix).toString()); 184 writeln(q.beg_query); 185 writeln(q.beg_ref); 186 // auto cigar=parasail_result_get_cigar(q.result,q.seq1,q.seq1Len,q.seq2,q.seq2Len,q.score_matrix); 187 // writeln(parasail_cigar_decode(cigar)); 188 } 189