1 module dparasail; 2 import std.stdio; 3 import std.conv; 4 import std.utf; 5 // import bio.std.hts.bam.cigar; 6 import dhtslib.cigar; 7 import std.algorithm:map,filter; 8 import std.algorithm.iteration:sum; 9 public import parasail; 10 11 /* 12 * As a note nothing about this struct is thread-safe 13 * so just don't 14 * 15 */ 16 struct parasail_query{ 17 char * seq1; 18 char * seq2; 19 int seq1Len; 20 int seq2Len; 21 //0-based 22 int beg_query; 23 int beg_ref; 24 Cigar cigar; 25 parasail_result_t* result; 26 Cigar get_cigar(parasail_matrix_t* score_matrix){ 27 parasail_cigar_t* cigar; 28 Cigar cigar_string; 29 cigar=parasail_result_get_cigar(result,seq1,seq1Len,seq2,seq2Len,score_matrix); 30 beg_query=cigar.beg_query; 31 beg_ref=cigar.beg_ref; 32 cigar_string = Cigar(cigar.seq,cigar.len); 33 //if * 34 if(cigar_string.ops.length==0){ 35 return cigar_string; 36 } 37 //if 30I8M make 30S8M 38 if(cigar_string.ops[0].op==Ops.INS){ 39 //move beg_query as well as it is accounted for 40 cigar_string.ops[0]=CigarOp(cigar_string.ops[0].length+this.beg_query,Ops.SOFT_CLIP); 41 this.beg_query=0; 42 } 43 //else if 30D8M make 8M and move ref start 44 else if(cigar_string.ops[0].op==Ops.DEL){ 45 this.beg_ref=this.beg_ref+cigar_string.ops[0].length; 46 cigar_string=Cigar(cigar_string.ops[1..$]); 47 //cigar_string[0]=CigarOperation(cigar_string[0].length+this.beg_query,'S'); 48 } 49 //else if begin query not 0 add softclip 50 else if(this.beg_query!=0){ 51 assert(cigar_string.ops[0].op!=Ops.SOFT_CLIP); 52 cigar_string=Cigar(CigarOp(this.beg_query,Ops.SOFT_CLIP)~cigar_string.ops); 53 this.beg_query=0; 54 } 55 /////////////////////////////////////////////////////////// 56 int q_bases_covered=cast(int) cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 57 if(cigar_string.ops[$-1].op==Ops.INS){ 58 cigar_string.ops[$-1]=CigarOp(cigar_string.ops[$-1].length+this.seq1Len-q_bases_covered,Ops.SOFT_CLIP); 59 q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 60 } 61 else if(cigar_string.ops[$-1].op==Ops.DEL){ 62 cigar_string=Cigar(cigar_string.ops[0..($-1)]); 63 } 64 else if(q_bases_covered!=this.seq1Len){ 65 cigar_string=Cigar(cigar_string.ops~CigarOp(this.seq1Len-q_bases_covered,Ops.SOFT_CLIP)); 66 q_bases_covered=cigar_string.ops.filter!(x=>x.is_query_consuming()).map!(x=>x.length).sum; 67 } 68 assert(q_bases_covered==seq1Len); 69 // parasail_cigar_free(cigar); 70 // Bug caused bc Cigar ctor takes reference not copy 71 // Cigar implicit dtor destroys array ptr which cleans up our problem 72 return cigar_string; 73 } 74 void close(){ 75 parasail_result_free(result); 76 } 77 } 78 struct Parasail{ 79 parasail_matrix_t* score_matrix = null; 80 int gap; 81 int open; 82 @disable this(); 83 this(string matrix, int open, int gap){ 84 this.score_matrix=parasail_matrix_lookup(toUTFz!(char *)(matrix)); 85 this.open=open; 86 this.gap=gap; 87 } 88 this(string alphabet,int match,int mismatch, int open, int gap){ 89 this.score_matrix=parasail_matrix_create(toUTFz!(char *)(alphabet),match,mismatch); 90 this.open=open; 91 this.gap=gap; 92 } 93 parasail_query sw_striped(string s1,string s2){ 94 return sw_striped(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length); 95 } 96 parasail_query sw_striped(char *s1,char * s2, int s1Len,int s2Len){ 97 parasail_query p; 98 p.seq1=s1; 99 p.seq2=s2; 100 p.seq1Len=s1Len; 101 p.seq2Len=s2Len; 102 p.result=sw_striped(p); 103 p.cigar=p.get_cigar(score_matrix); 104 return p; 105 } 106 parasail_result_t* sw_striped(parasail_query p){ 107 return parasail_sw_trace_striped_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix); 108 } 109 parasail_query nw_scan(string s1,string s2){ 110 return nw_scan(toUTFz!(char *)(s1),toUTFz!(char *)(s2),cast(int) s1.length,cast(int) s2.length); 111 } 112 parasail_query nw_scan(char *s1,char * s2, int s1Len,int s2Len){ 113 parasail_query p; 114 p.seq1=s1; 115 p.seq2=s2; 116 p.seq1Len=s1Len; 117 p.seq2Len=s2Len; 118 p.result=nw_scan(p); 119 p.cigar=p.get_cigar(score_matrix); 120 return p; 121 } 122 parasail_result_t* nw_scan(parasail_query p){ 123 return parasail_nw_trace_scan_16(p.seq1,p.seq1Len,p.seq2,p.seq2Len,open,gap,score_matrix); 124 } 125 void close(){ 126 parasail_matrix_free(score_matrix); 127 } 128 } 129 130 unittest{ 131 import std.stdio; 132 auto p=Parasail("dnafull",3,2); 133 auto q=p.sw_striped("GATTA","GACTA"); 134 writeln(q.seq1[0..q.seq1Len]); 135 writeln(q.get_cigar(p.score_matrix).toString()); 136 writeln(q.beg_query); 137 writeln(q.beg_ref); 138 assert(q.get_cigar(p.score_matrix).toString()=="2=1X2="); 139 q=p.sw_striped("GGCTTCTGATCAGGCTTCT","GACTA"); 140 writeln(q.seq1[0..q.seq1Len]); 141 writeln(q.get_cigar(p.score_matrix).toString()); 142 writeln(q.beg_query); 143 writeln(q.beg_ref); 144 assert(q.get_cigar(p.score_matrix).toString()=="7S2=1D1=1I1=7S"); 145 q=p.sw_striped("GACTAA","GGCTTCTGATCAGGCTTCT"); 146 writeln(q.seq1[0..q.seq1Len]); 147 writeln(q.get_cigar(p.score_matrix).toString()); 148 writeln(q.beg_query); 149 writeln(q.beg_ref); 150 q=p.nw_scan("GATTA","GACTA"); 151 writeln(q.seq1[0..q.seq1Len]); 152 writeln(q.get_cigar(p.score_matrix).toString()); 153 writeln(q.beg_query); 154 writeln(q.beg_ref); 155 assert(q.get_cigar(p.score_matrix).toString()=="2=1X2="); 156 q=p.nw_scan("GGCTTCTGATCAGGCTTCT","GACTA"); 157 writeln(q.seq1[0..q.seq1Len]); 158 writeln(q.get_cigar(p.score_matrix).toString()); 159 writeln(q.beg_query); 160 writeln(q.beg_ref); 161 assert(q.get_cigar(p.score_matrix).toString()=="1=1X2=4I1=10S"); 162 q=p.nw_scan("GACTAA","GGCTTCTGATCAGGCTTCT"); 163 writeln(q.seq1[0..q.seq1Len]); 164 writeln(q.get_cigar(p.score_matrix).toString()); 165 writeln(q.beg_query); 166 writeln(q.beg_ref); 167 // auto cigar=parasail_result_get_cigar(q.result,q.seq1,q.seq1Len,q.seq2,q.seq2Len,q.score_matrix); 168 // writeln(parasail_cigar_decode(cigar)); 169 } 170