B 0`8D@sdZddlZddlmZGdddeZGdddeZeZGdd d eZGd d d eZ d d Z ddZ GdddeZ ddZ ddZddZiZxeddD]\ZZeee<qWiZddZdS)z Simple Smith-Waterman aligner N)StringIOc@s$eZdZdZdddZd ddZdS) ScoringMatrixz Read scoring matrix from a file or string Matrix should be space-delimited in a format like: A C G T A 1 0 0 0 C 0 1 0 0 G 0 0 1 0 T 0 0 0 1 Rows and Columns must be in the same order NrcCs|s |s t|rt|}n t|}g|_d|_||_xf|D]^}|ddks<|sVq<|jst||_t|j|_ q<|}|j dd|ddDq-sz*ScoringMatrix.__init__..) AssertionErroropenrscoresbaseswildcard_scorestripsplitlen base_countextendclose)selffilenametextrfslinecolsrrr __init__s      "zScoringMatrix.__init__cCsl|jr |r ||ks||kr |jSd}d}x.t|jD] \}}||krH|}||kr4|}q4W|j||j|S)Nr)r enumeraterrr)ronetwowildcardZone_idxZtwo_idxibrrr score1szScoringMatrix.score)NNr)N)__name__ __module__ __qualname____doc__rr$rrrr r s rc@s eZdZdddZd ddZdS) IdentityScoringMatrixr cCs||_||_dS)N)matchmismatch)rr+r,rrr rAszIdentityScoringMatrix.__init__NcCs.|r||ks||kr|jS||kr(|jS|jS)N)r+r,)rrr r!rrr r$Es zIdentityScoringMatrix.score)r r*)N)r%r&r'rr$rrrr r)@s r)c@s&eZdZdddZddZddZdS) MatrixNcCs ||_||_|g|||_dS)N)rowsrvalues)rr.rinitrrr rQszMatrix.__init__cCs|j||j|S)N)r/r)rrowcolrrr getVsz Matrix.getcCs||j||j|<dS)N)r/r)rr1r2valrrr setYsz Matrix.set)N)r%r&r'rr3r5rrrr r-Ps r-c@s*eZdZd ddZdd d Zdd d ZdS)LocalAlignmentr*TFNc Cs:||_||_||_||_||_||_||_||_| |_dS)N) scoring_matrix gap_penaltygap_extension_penaltygap_extension_decayverboseprefer_gap_runs globalalignr! full_query) rr8r9r:r;r=r<r>r!r?rrr r^szLocalAlignment.__init__c Cs2|}|}|}|}tt|dt|dd}x"td|jD]} || ddqBWx"td|jD]} |d| dqfWd} d} d} xtd|jD]} xtd|jD]} || d| dd|j || d|| d|j }d}d}|| d| ddkr|| d| d}|| d| ddkrDd}nN|j sh|| d| d|j }n*|| d| dt d|j ||j }n|| d| d|j}|| | dddkrL|| | dd}|| | dddkrd}nN|j s || | dd|j }n*|| | ddt d|j ||j }n|| | dd|j}|jsv|jrt|||}nt|||d}|jsd}d}|r||kr|d|df}nf|r||kr|d|df}nF||kr|d df}n0||kr|ddf}n||kr$|ddf}nd }|d| krF|d} | } | } || | |qWqW|jr|jd} |jd} || | d}n|jr|jd} d} d} x@td|jD]0}|| |d| kr|} || |d} qW|jd} || | d}n | } | } | }d }g}g}x|| | \}}}|jrN| dkrp| dkrpPn"|jrd| dkrpPn |dkrpP|| | f|||d kr| d8} | d8} n*|dkr| d8} n|dkr| d8} nPqW||jr |||||t|t| | f| t|}t||| | || ||||j|j S) Nr )r rr)rr"r)rdrr"rBm)rrrr@)upperr-rranger.r5rr3r8r$r!r;r:minr9r>r?maxr=appendreverser< dump_matrixprint _reduce_cigar Alignment)rrefqueryref_name query_namercorig_ref orig_querymatrixr1r2Zmax_valZmax_rowZmax_colZmm_valZins_runZdel_runZins_valZdel_valZcell_valr4copZalnpathZrunlencigarrrr alignis:,,                   zLocalAlignment.alignc Cstjdtjd|tjdxt|jD]}|dkrPtjdntj||dxnt|jD]`}||kr||krtjdqptjd|||d|||d||f|krd nd fqpWtjdq6WdS) Nz - z  r-r z *z %5s%s%s$rA)sysstdoutwritejoinrFr.rr3) rrOrPrVrYZshow_rowZshow_colr1r2rrr rKs  BzLocalAlignment.dump_matrix)r*r*r7TFFNF)r@r@F)r*r*)r%r&r'rr[rKrrrr r6]s r6cCshd}d}g}x@|D]8}|r,||kr,|d7}n|rF|||fd}|}qW|rd|||f|S)Nr )rIrE) operationscountlastretrXrrr rM s   rMcCs*d}x |D]\}}|d||f7}q W|S)Nr@z%s%sr)rZoutnumrXrrr _cigar_strsric@sFeZdZdddZddZedd Zed d Zdej fd d Z dS)rNr@FNc Cs||_||_||_||_||_||_||_||_| |_| |_ | |_ d|_ d|_ ||_ ||_||_||_d} d} d|_d|_|j}|j}x|jD]\}}|dkr| |7} | |7} xt|D]F}|j||j|kr|jd7_n|jd7_|d7}|d7}qWq|dkr8| |7} ||7}|j|7_q|dkr| |7} ||7}|j|7_qW|| |_|| |_|j|jdkrt|j|j|j|_nd|_dS)NrMr ID)rPrOq_posr_posrZr$r_nameq_namerSr>r!r_offsetr_regionrUrErTmatches mismatchesrFq_endr_endridentity)rrPrOrmrnrZr$rQrRrSr>r!Zq_lenZr_lenr"jrdrXkrrr r%s\      zAlignment.__init__cCs||_||_||_dS)N)rorqrr)rrOoffsetZregionrrr set_ref_offset`szAlignment.set_ref_offsetc Csd}d}d}g}x|jD]\}}|dkrxJt|D]>}|j|j|||j|j||krh|d7}q2|d7}q2W||7}||7}n:|dkr||7}|d|7}n|dkr||7}|d|7}t|}qWd}x |D]\} }|d| |f7}qW|S)Nrr@rjXrkrlz%s%s)rZrFrPrmrOrnrM) rqposrposZ ext_cigar_strZworkingrdrXryrgrhrrr extended_cigar_stres.(     zAlignment.extended_cigar_strcCs t|jS)N)rirZ)rrrr cigar_strszAlignment.cigar_strcCsx|j}|j}d}d}d}d}d} xr|jD]f\} } | dkr|| 7}| | 7} xt| D]|} ||j|7}||j|7}|j||j|ks|jr|j||jks|j||jkr|d7}n|d7}|d7}|d7}qVWq*| dkr$| | 7} xt| D]*} |d7}||j|7}|d 7}|d7}qWq*| d krr|| 7}xZt| D],} ||j|7}|d7}|d 7}|d7}q@Wq*| d kr*|d 7}|d 7}|d 7}q*W|j r| d|j |j rdndt |jf|j r|jr| d|j |jfn| d|j t |jf|jd|jd|j|jd|j|jdg} tdd| D}d|}d|}d d|}|j}|j s||j}n|j}x|r&|r&|r&|j s| ||dn| |||r|d|}|d|}|d|}||d}||d}||d}n|}|}|}d}d}d}| ||j s`xD|D]}|dkrB|d7}qBWn"x |D]}|dkrf|d8}qfW|j s| d|n| d|d| || || d| |||jd| |x |D]}|dkr|d7}qW| d||jqW| d|j| d|j|jdf| d|jf| d|jdS)Nr@rrj|.r rlr]rArkNz-//-z zQuery: %s%s (%s nt) z (reverse-compliment)zRef : %s (%s) zRef : %s (%s nt) cSsg|]}tt|qSr)rstr)rrrrr r sz"Alignment.dump..z Query: %%%ss z Ref : %%%ss z %s r\z %s z Score: %s zMatches: %s (%.1f%%) dzMismatches: %s z CIGAR: %s )rnrmrZrFrUrTrPrOr!rprarSrrorrrurqrvrHr$rsrwrtr)rwraprgr"rxqrDrqlenZrlenrdrXryZposlensmaxlenZq_preZr_preZm_prer~r}Z qfragmentZ mfragmentZ rfragmentbaserrr dumps:    (0                 zAlignment.dump)r@r@FFN) r%r&r'rr{propertyrrr_r`rrrrr rN$s  ;  rNcsfdd}|S)Nc3sd}d}d}dkr tj}d}nt}xx|D]p}|ddkr|rR|rR|||fV|dddd}|d}t|dkr|d}nd}d}q.||7}q.W|r|r|||fVdkr|dS)Nr@r]stdinr>r rA)r_rr rrrr)seqnamecommentsfrspl)fnamerr gens,      zfasta_gen..genr)rrr)rr fasta_gens rcsfdd}|S)Nc3sdfVdS)Nr@rr)rrrr r#szseq_gen..genr)rrrr)rrr seq_gen"src Csd}d}yh|d}xX|D]P}d|kr|d\}}|dkr|d}|d}dd|dd D\}}qWWn YnX|r|r||dd |||ffSdS) NrA=rF:rcSsg|] }t|qSr)int)rrrrr r 7sz"extract_region..r r]z%s:%s-%s)r) rrOstartattrsattrryvrendrrr extract_region)s    $rZ atcgATCGNnZ tagcTAGCNncCsT|tkrt|Sg}x(|dddD]}|t|q(Wd|t|<t|S)Nr*r@)__cacherErI __revcomprb)rrfsrrr revcompKsr)r(r_iorobjectrr)ZNucleotideScoringMatrixr-r6rMrirNrrrrzipar#rrrrrr s( 7  0[$