PK f}7 META-INF/PK e}7 YMETA-INF/MANIFEST.MFManifest-Version: 1.0 Ant-Version: Apache Ant 1.6.5 Created-By: 1.5.0_09-b03 (Sun Microsystems Inc.) Main-Class: applications.MainWindow Class-Path: lib/jaligner.jar lib/swing-layout-1.0.jar X-COMMENT: Main-Class will be added automatically by build PK f}7 applications/PK f}7images/PK e}7 jaligner/PK f}7jaligner/formats/PK f}7jaligner/matrix/PK f}7jaligner/test/PK f}7jaligner/util/PK e}7javax/PK f}7 javax/jnlp/PK f}7learnedMatrices/PK e}7org/PK e}7 org/jdesktop/PK f}7org/jdesktop/layout/PK f}7scoringMatrices/PK e}7"6կapplications/Checker.class1j 9: ;<= > ?@ A BC DEF 9G H I J KLMN BO BPQ RS ; TUrfLjava/io/RandomAccessFile;(Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/Checker;fileLjava/lang/String;check()VlinelenIlineNummain([Ljava/lang/String;)Vargs[Ljava/lang/String;ck SourceFile Checker.java - java/io/File  java/io/RandomAccessFiler V java/io/IOException WXY Z[\ ]^java/lang/StringBuilderlonger than 60: _` _a bXc d  empty line:> ef gh wrong at: i-applications/Checker ,-java/lang/Object#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String;java/lang/Stringlength()Ijava/lang/SystemoutLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;toStringjava/io/PrintStreamprintlnindexOf(Ljava/lang/String;)I startsWith(Ljava/lang/String;)Zclose  ! *Y+M*Y,M"  #* $%&' () *+,-!7* L=>++ =< Y O Y /+%+ Y * L~*L"F 7;Wa j!"#%(')#4.+ /0 10&'() 23!LY*2L+"- ./#45 6)78PK e}7g}2}2'applications/ComplexityCalculator.class1j x| R R  R R R    R R ?  , , , ,   4 6 6    6@ R R?@4 R R   R R R  R R R ! "#$%& R'() R*+, R-./ R012 R34 R56 R789 R:; R<=>?@ RA B RCDalphabetLjava/util/Vector; scoringMatrix[[Dprob[D newAlphabetnewScoringMatrix()VCodeLineNumberTableLocalVariableTablethis#Lapplications/ComplexityCalculator;initializeAlphabetgetProb(Ljava/lang/String;)ViIstrLjava/lang/String;indexlensubStrletter clearProbcalculateEntropy(Ljava/lang/String;)DentropyDcalculateReciprocalProrecProprintScoringMatrixjreadScoringMatrixkfLjava/io/File;rfLjava/io/RandomAccessFile;exLjava/io/IOException;fileNamelinescorenormalizeScoringMatrixPownormalizeScoringMatrixminmax normalizeMoresumcalculateRecProWScoringMatrixvirecProWScoringMatrixcalculateModifiedEntropymodEntr singleProbpscalculateModifiedEntropy2calculateNorModifiedEntropystrLeninitializeNewAlphabetsicalculateNor2LetterEntropylet2EntrnewProbletterscomputeNewScoringMatrixindex1index2lets1lets2letter1letter2testNewScoringMatrix'(Ljava/lang/String;Ljava/lang/String;)Vstr1str2normalizeNewScoringMatrixPow!calculate2LetterEntropyWScoMatrix$calculateNor2LetterEntropyWScoMatrixcalculateSeqUnNormComplexity(Ljava/util/Vector;)DlcrssumLencalculateSeqNormComplexitycalculateComplexitymain([Ljava/lang/String;)Vargs[Ljava/lang/String;cc SourceFileComplexityCalculator.java {| }~java/util/Vector z | yzA EFRNCQEGHLKMFPSTWYV GHjava/lang/String IJ KL IM N OP QRS TUjava/lang/StringBuilder VW VX YZ[ \ ] java/io/File java/io/RandomAccessFiler ^ _Z `Za bc djava/io/IOExceptionknowledge/blosum62Matrix eH fg  entropy: ] reciprocal probablity: !applications/ComplexityCalculator en hFrecreciprocal probablity:sco-reciprocal probablity with a scoring matrix: memodified entropy: me2modified entropy version2: nmenormalized modified entropy: n2let2-letter entropy: 2letsco (2-letter entropy with a scoring matrix: n2letsco3normalized 2-letter entropy with a scoring matrix: test seqQEANQEYQEPVCSPVPEPEPEPEPEPEPPPPEPQPEPEPQPLPDPAPLPEEAEPEP ]i java/lang/Objectadd(Ljava/lang/Object;)Zlength()I substring(II)Ljava/lang/String;indexOf(Ljava/lang/Object;)I(I)Ljava/lang/String;java/lang/Mathlog(D)Dpow(DD)Djava/lang/SystemoutLjava/io/PrintStream;append(D)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintprintln#(Ljava/io/File;Ljava/lang/String;)VreadLinetrimjava/lang/IntegerparseInt(Ljava/lang/String;)Iclosesize elementAt(I)Ljava/lang/Object;equals(D)V Rxyz{|}~z|p2****Y*#1 2R*Y *  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  W*  WZ )3= G!Q"[#e$o%y&'()*+,-./ L=+>+: Y!:62":* #=**1cR$:α* 34567%8/9=:E6K<H5LLJE B9V<*R@A@BXI*%*+&+96=*1+**1oR(*1*1'kgcI(* FGH IJK(L9MPJVO4AXXVF`I*%*+&+96:*1(**1o*1(R(*1kI()(kI(. UVW XYZ([C\MYS^^_4>``^Nx<r=^*21&+,Y-*21./012(+,Y-/0*21./012+3& defg@ieekkqdwm  auxF ~ Y!M Y!N4Y+5:6Y78:6E9M66,,`":N*2-;R`6ӄ<:x{=Bqst'u1v7x:yDzR{a}gymusx{}f =0:3*I^'Q}~~vn3<-=*2>*21(Rӱ&,2  "03?< =>36#*2*21gdoR܄ͱ28>4'6?<9SH>KH6'*21cH6*2*21'oR.  "(2FLR4+!NSQ*@A*B*%*+&+99 Y!:6 6  {9 * 1f6  I+  `":* #6 * 2 1 Cc9  * 2 1c9   'k9  c9 'kI(E'kgI(gI(b &)36BNZetf EO 6o , &) I99*@A*G*H*%*+&+96  * * 1oR : 6 6  S9* 1>6   * 1* 2 1kc9   Rc9 `6 6   )    1oR(  1  1'kgcI (n&0AGMPZ]isz ) l$ SW - &M~ P  I99*@A*B*H*%*+&+96  * * 1oR : 6  :96   * 1* 2 1kc9   R 6  (  1  1'kgcI (Z&0AGMW Z d { p ) ]$ P> &Mf~ I99*@A*G*H*%*+&+96  * * 1oR : 6 6  S9* 1>6   * 1* 2 1kc9   Rc9 `6 6   )    1oR(  1  1'kgcI +9 ( oI(v !"#$&%0&A%G'M(P)Z*]+i,s-,123)6796;<= ) l$ SW - &M~ P  S* I<=E>7*,Y-* J 0* J 01 WɄBCDEFDLCRG*: HSK I*K:6R6+9+: Y!: g96  1": * #61cR$: 6  3 1#  1oR( 1 1'kgcI (oI(^KLM NON$P'Q.R:S@TLVUW`XlYtTz[\]_[abp C7 }8  ~'.1:  *@A Y!L Y!M Y!N Y!:9669*J L*J M+"N,":* -#6 * #6 * 2 1c9+"N,":* -#6 * #6 * 2 1c9*2>oRbQ^ghi'j*k5l@mCnPo]qdrlsvtvwxyz|}lkf v] R 8-'*~*K*L*+#>*,#64 ,<.=*2>*21(RH>MH6'*21cH6*2*21'oRB (.46?ALX^i}H #2Da"8Q6TZI:6R6+9+: Y!: g96  1": * #61cR$: : 9 9 66P9 1=6 1*21kc9   R  c9 `66)  1 oR( 1 1'kgcI(!  #*6<HQ\hpv} ?7 #U- ~#*-6 }~ cI:6R6+9+: Y!: g96  1": * #61cR$: : 9 9 66P9 1=6 1*21kc9   R  c9 `66)  1 oR( 1 1'kgcI(oI("  #*6<HQ\hpv} ?7 #U-  ~#*-6 }~ :I:6R699 Y!: Y!: 6  +I\+ J :  9g96  1 ": * #61cR $: ̄ : 9 966P9 1=6 1*21kc9   R c9`66)  1oR( 1 1'kgcI(%  #);GRZ`l u   !"$2!8' c7 >b #U -::z8 1~#&)2 ; ~ FI:6R699 Y!: Y!: 6  +Ic+ J :  9c9g96  1 ": * #61cR $: ̄ : 9 966P9 1=6 1*21kc9   R c9`66)  1oR( 1 1'kgcI(oI('-. /0/ 1#2)3;4G5R6Z7a8g9s;|<=>94BCDEFGIJIMNO EST&V9S?XDY j7 >i #U-FFzD =~##& )2 ; ~ w;+,Y-M0*+N.1O+,Y-P0*+Q.1O^_:`;; RYSL+T*2UV%+,Y-M0+*2N.1O*2WV%+,Y-X0+*2Q.1O*2YV%+,Y-Z0+*2[.1O*2\V%+,Y-]0+*2^.1Oa*2_V%+,Y-`0+*2a.1O4*2bV%+,Y-c0+*2d.1O*2eV%+,Y-f0+*2g.1O*2hV1+K+L+i+,Y-j0+*2k.1O*2lV1+K+L+i+,Y-m0+*2n.1Oh*2oV+*2*2pP*2qV>YM,r W,s W,t W+T+K+L+i++,uv +*2w)de fg9hDifjqklmnopqr%sGtRuVvZw^xz{|}~  0zPK e}7/,applications/ComplexityCutoffGenerator.class1 N  K   K K             K    :   * @  1 1 1 1 1 7 7 :   K K K 1 * K *?   K KCD ConstantValueSEQNUMIlcrRfLjava/io/RandomAccessFile;seqRfLCRsLjava/util/Vector;'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;this(Lapplications/ComplexityCutoffGenerator;lcrFileLjava/lang/String;seqFile printLCRs()VilengetLCRs(Ljava/lang/String;)VlinestrgetSeq()Ljava/lang/String;seq getLcrSeqsstartendindex getMeanStd&(Ljava/util/Vector;)Ljava/lang/String;elevmeanStdmeanstdnumgeneratetmpIgLjava/lang/Integer;cc#Lapplications/ComplexityCalculator;lcrLineseqLinecomstanDevrpreRk1k2lessZfirst generatorLjava/util/Random;randomscomVmain([Ljava/lang/String;)Vargs[Ljava/lang/String;cvpsc SourceFileComplexityCutoffGenerator.java Zijava/util/Vector XY java/io/File Zmjava/io/RandomAccessFile Z UV WVjava/io/IOException  java/lang/StringBuilder java/lang/String  q m i q yeah m hi q> - ********************java/lang/Double   !applications/ComplexityCalculator i i i ijava/util/Random   java/lang/Integer Z    lm pq sm  Z wx cutoff values is : i i&applications/ComplexityCutoffGenerator Z[ ijava/lang/Object#(Ljava/io/File;Ljava/lang/String;)Vsize()Ijava/lang/SystemoutLjava/io/PrintStream; elementAt(I)Ljava/lang/Object;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringjava/io/PrintStreamprintprintlntrimlengthindexOf(Ljava/lang/String;)I substring(I)Ljava/lang/String;add(Ljava/lang/Object;)Z(II)Ljava/lang/String;readLine startsWith(Ljava/lang/String;)ZparseIntremove(ILjava/lang/Object;)V doubleValue()D(D)Ljava/lang/StringBuilder;java/lang/Mathpow(DD)Dsqrt(D)DinitializeAlphabetinitializeNewAlphabetcomputeNewScoringMatrixnormalizeNewScoringMatrixPownextInt(I)I(I)V(Ljava/lang/Object;)Iseek(J)VcalculateSeqUnNormComplexity(Ljava/util/Vector;)D(D)V parseDouble(Ljava/lang/String;)Dclearclose KNOPQERSQTUVWVXYZ[\B**YY+N*Y- Y,N*Y- N=@ ]& &/="@!A#^4%_`AabBcdBefBgfhi\?* <=,Y*ղ]'()2(8*>+^  .jS?cd7kSlm\X+N-N- N-:-=*-WN*-W-`N* ]:0123467#8(95::=H>SAWB^*#0jSXcdXnfVofpq\NY!LY!M* "M,/,#$&,MY+,L* "MӧN+HK ]& FHI%J*K=LHPKOLQ^*LabNcdFrf>nfsm\;+Y!M>6* g*M,%>,&6,`&6*'W+dM*'W*,(* )]JVWX[\ ]-^4_?`KbRc_fjgth~ilmn^H?EtSK9uScdrfofvS}jSwx\Y Y!M99+ 6 6   A+ *+JY),)c9)-/c9  o9 o9-/g909Y,,M,,]Frstuv+wDxJyVu\{d|l}y~^R+1yPEjS cdzY{f |P}P~S i\»1Y2L+3+4+5+6Y!MY!NY!:999 6 6 6 66667Y8:Y:Y:B ڢ7 ڶ96  :Y ;:<W* =* =66 * "M,#$,* "M*,> * ":#$*?N*-@+*A9*YBW*C:6D9`D9   0oEk3#YG,66H*I* J* J:j ]7  1:IOXjz} &5=FSaty^=n{fFevSabcd f(rf1f4P7|P:P =jS @S CS F|SIySLvOsXjaaYjXY \OKY*2*2LL+M]^dPK e}7M-\,applications/CumulativeDFDataGenerator.class1} J K L MN OPQ RS J T U VW XY Z[@` \]^ J _` a b cde J f ghTOTALNUMI ConstantValueminDmax complexities[D()VCodeLineNumberTableLocalVariableTablethis(Lapplications/CumulativeDFDataGenerator;getComplexities(Ljava/lang/String;)VfLjava/io/File;rfLjava/io/RandomAccessFile;lineLjava/lang/String;exLjava/io/IOException;comFileindexgenerateibinNuminterval percentagessummain([Ljava/lang/String;)Vargs[Ljava/lang/String;cddfg SourceFileCumulativeDFDataGenerator.java *+ %& '& () java/io/File *2java/io/RandomAccessFiler *ijava/lang/String jk lkm no p+java/io/IOExceptionq rst uvjava/lang/StringBuilder wx wy zk{ |2&applications/CumulativeDFDataGenerator 12 =2java/lang/Object#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String;trimjava/lang/Double parseDouble(Ljava/lang/String;)Dclosejava/lang/IntegerparseInt(Ljava/lang/String;)Ijava/lang/SystemoutLjava/io/PrintStream;append(D)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringjava/io/PrintStreamprintln! !"#$%&'&()*+,R****- . /012,k=Y+NY- : Y : :" :*R :N**1**d1PS-> ! "'#,$3%>&A'K)P+T,^-j..H E34956 078T9:k/0k;8i<"=2, **g+oI+`:66g*1*`(kc>*1*`(kc1cR!1cR1cR96J1oR1c9Y*`(kc-N1234'5>7A8X9g<j=yA4CDEFGDJ.Rl>"N>"/0?8@&A)<"RB& CD,WYL+*2+*2-MNOP.EFG0HIPK e}76?66applications/Edge.class1* #$ # % & '()sourceLapplications/Vertex;sinkweightF()VCodeLineNumberTableLocalVariableTablethisLapplications/Edge;.(Lapplications/Vertex;Lapplications/Vertex;F)Vsosiw getSource()Lapplications/Vertex;getSink getWeight()F setSource(Lapplications/Vertex;)VsetSink SourceFile Edge.java applications/Vertex applications/Edgejava/lang/Object    Z **Y*Y*    l**+*,*% *   /* /*! /*& >*+ *+  >*+ ./ !"PK e}7%p11applications/Gbm.class1 lm n o p q rs l t u v w x y z { | } ~  X - 0l - 0 0 0  0 -  = b @ @   Fl F F F   0  b R R     Rl   0   R R  b  b F kl  R R  k   R R R R k k k             R R R <  R k k             0  l @    b l o   +  @ I<61 ! " #$ % & '() *+A ? ,- ./0 l 123 4 5 6 789} :; <=> :? @ A B C D E FG l H I JK LM LN @O 0P Q R STUcomCutD ConstantValuefLjava/io/File;rfLjava/io/RandomAccessFile;verticesLjava/util/Vector; subVerticesedgessubEdges vertexQueuelps repeatMatrix[[FnonRepeatMatrixfVecNor[F fVecUnNor scoringMatrix[[Dalphabet maskedSeqLjavax/swing/JTextPane;LCRLocsconsoleLjavax/swing/JTextArea;wholeSeqLjava/lang/String;DEBUGZ LOOKAHEAD FORESIGHTIZ(Ljava/lang/String;Ljavax/swing/JTextPane;Ljavax/swing/JTextPane;Ljavax/swing/JTextArea;)VCodeLineNumberTableLocalVariableTableithisLapplications/Gbm;fileNamet2t3t4initializeAlphabet()VreadRNRMatrices(Ljava/io/File;)VindexjrfmrowexLjava/io/IOException;readScoringMatrix(Ljava/lang/String;)VklinescorereadScoringMatrix_SEANesLjava/util/Scanner;firstprintMatricesRowByRowcreateFirstVectorwindowlen tmpWindowletter printVector(I)Vmark tmpVectorconstructSingleVertex(II)Vstartend startLetter endLettervLapplications/Vertex; dummySourceworkOnFirstWindowletter2letter1colcheckProbablity(II)Z construct difference1F difference2 addVertices(Ljava/lang/String;II)VstartPostmpIndexconstructVertices(Ljava/lang/String;I)Vstrth4similarpreviousVecUnNOr oldLetter newLettercheckSimilarity'(Ljava/lang/String;Ljava/lang/String;)Zc1c2findLetterPercentageVer(F)VlFappearedLettersperposInt[Ljava/lang/Integer;computePercentage(FLjava/util/Vector;)VlalconstructEdges(III)Vmth1th2th3pqv1v2Lapplications/Edge;checkConditions (IIIIIII)Z satisfiedfoomodifyVertexQueue'(Ljava/util/Vector;Ljava/util/Vector;)VtmpQueue tmpVerticesprintVertexQueueprevious printVertices printEdgestmpEdges sourceVersinkVerlengthaddDummySourceaddToSubVerticesVertexQueue(Lapplications/Vertex;)V addToSubEdgesfindLongestPath()Ljava/util/Vector;lpfindLongestPath_SEAN traverseEdgesversrcsnkvalidstmpELjava/util/Enumeration;wsrcLPsnkLPtraverseSubEdgessourceLPsinkLPtraverseSubEdges_wLookahead(Lapplications/Vertex;I)FfSight numVerEdges lookbehindweightenmaxW findEdges(Lapplications/Vertex;)Ifind*(Lapplications/Vertex;Ljava/util/Vector;)I revSubEdgesmodifyVertexInEdges*(Lapplications/Vertex;Ljava/util/Vector;)VtraverseVerticesmaxVermaxLpconstructLongestPath)(Lapplications/Vertex;)Ljava/util/Vector;printLongestPath(Ljava/util/Vector;)V identifyLCRscheckExistence(Lapplications/Vertex;)Zcopyvt1vt2clusterSubEdges tmpSubEdgesextractConnectedGraphsameexistsisrfoundworkOnSequence(Ljava/lang/String;IIII)IgenerateSequence_OLD&(Ljava/lang/String;)Ljava/lang/String;sequencestrTmplastSeqgenerateSequence_GUI'Ljavax/swing/text/BadLocationException;"Ljava/util/NoSuchElementException;dLjavax/swing/text/Document;norm%Ljavax/swing/text/SimpleAttributeSet;headprintPositionsposprintLCRBlocks difference LCRBlocks getPositions(I)Ljava/util/Vector; sortPositions&(Ljava/util/Vector;)Ljava/util/Vector; positionscurrent tmpLCRBlocksextend;(IIILjava/lang/String;Ljava/lang/String;)Ljava/util/Vector;decendPoslimit directionseqdecRegspointer startDecPos endDecPoscom1com2extRegcc#Lapplications/ComplexityCalculator; shareLetterstr1str2sharedcheckContribution9(Ljava/lang/String;Ljava/util/Vector;Ljava/lang/String;)Z currentBlock contributedregsblock appendLcrs8(Ljava/util/Vector;Ljava/util/Vector;)Ljava/util/Vector;lcrs appendedLcrs lowComRegstmpLcrs pickUpDrop8(Ljava/util/Vector;Ljava/lang/String;)Ljava/util/Vector;comcbStartcbEndcombine lcrBlockStart lcrBlockEnd extendToLeftblocks frontLcrsbackLcrs tmpBLOCKStmpBlock isFirstBlock mergePurgeendIndex startIndex nextBlockcheckCombinedSubBlock((Ljava/lang/String;Ljava/lang/String;D)Zseq1seq2cCutdelete findAlignment8(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;s1Ljaligner/Sequence;s2 alignmentLjaligner/Alignment; similarLenLjava/lang/Exception;aliPos checkLeftRegs+(IIIILjava/lang/String;D)Ljava/util/Vector;aliStartaliEndleft addToResult startResultendLeftresult tmpResult checkAdjBlockM(IILjava/lang/String;Ljava/lang/String;DLjava/lang/String;)Ljava/util/Vector;index2 aliStart2aliEnd2aliSeq1aliSeq2 aliStart1aliEnd1decOrNotstart1end1adjBlockindex1start2end2checkDeletability:(Ljava/util/Vector;ILjava/lang/String;D)Ljava/util/Vector;maxIndexreadSampledLenRepPerfilterdiff1diff2shortestlongestrSizefromBacksampleseqLenrangeprePer preLongest singleCommax postProcess9(ILjava/lang/String;Ljava/lang/String;)Ljava/util/Vector;computeLCRPercentage'(Ljava/util/Vector;Ljava/lang/String;)V printLCRs9(Ljava/util/Vector;Ljava/lang/String;Ljava/lang/String;)VLCRs posBlocksmsLapplications/Masker;newSeqcolorMaskedSeqB(Ljava/lang/String;Ljava/lang/String;Ljavax/swing/text/Document;)VoldSeqdoctmpyellowHighlightdashseqPosstartt(IIIILjava/lang/String;)VidnextId SourceFileGbm.java *  )*   java/util/Vector               A VWRNCQEGHLKMPSTWYVjava/io/RandomAccessFiler Xjava/lang/String YZ [Z \] ^_` ab ^c d*java/io/IOException"> Problem reasing R NR matrices! e f4 java/io/File 4 g]java/util/Scanner hZi jkNon-Repeat matrix: l m4java/lang/StringBuilder fn fo pZ q4 m*Repeat matrix: r \s pcapplications/Vertex t0 u >4 TU GHv wx P4 Z[ yW zr {| }Z ~Zjava/lang/Integer | The percentage is: f /  = applications/Edge  * *   w *All vertices in the queue:  r  All vertices in the graph: All edges in the graph:   lp:  indegree:   weight:   V  |        y k * $This is a vertex with outdegree zero      *   Wwrong1wrong2wrong3 wrong4wrong5 * ^_ uv * >  !#javax/swing/text/SimpleAttributeSet Z  r %javax/swing/text/BadLocationException java/util/NoSuchElementException- r!applications/ComplexityCalculator * * *  g  right   BLOSUM62   rjaligner/formats/Pair java/lang/Exceptionfront  '( , 12backknowledge/sampledLenRepPer p D,:  AB     E WXapplications/Masker  ^_     \   STapplications/Gbmjava/lang/Objectadd(Ljava/lang/Object;)Z#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String;trimindexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;java/lang/Float parseFloat(Ljava/lang/String;)F(I)Ljava/lang/String;closejavax/swing/JTextAreaappendparseIntnextjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintln(F)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringprint()I(Ljava/lang/Object;)I)(Ljava/lang/String;Ljava/lang/String;IF)VsetPredecessorjava/lang/Mathabs(F)Fequalssize elementAt(I)Ljava/lang/Object;getStartLetter getEndLettergetapplications/MergeSort mergeSort([Ljava/lang/Comparable;)V(I)Ljava/lang/StringBuilder; incIndegree incOutdegree setElementAt(Ljava/lang/Object;I)V.(Lapplications/Vertex;Lapplications/Vertex;F)V(I)IcleargetPredecessor()Lapplications/Vertex; getIndegree getWeight()FgetLP getSourcegetSink(ILjava/lang/Object;)VisEmpty()Zremoveelements()Ljava/util/Enumeration;java/util/EnumerationhasMoreElements nextElement()Ljava/lang/Object;setLP decIndegreegetLBLP setSourcesetSink startsWith(Ljava/lang/String;)ZnextLinejavax/swing/JTextPane getDocument()Ljavax/swing/text/Document;javax/swing/text/Document getLength insertString5(ILjava/lang/String;Ljavax/swing/text/AttributeSet;)VintValueinitializeNewAlphabetcomputeNewScoringMatrixnormalizeNewScoringMatrixPow!calculate2LetterEntropyWScoMatrix(Ljava/lang/String;)D lastElement$calculateNor2LetterEntropyWScoMatrixjaligner/util/SequenceParserparse'(Ljava/lang/String;)Ljaligner/Sequence;jaligner/matrix/MatrixLoaderload,(Ljava/lang/String;)Ljaligner/matrix/Matrix;jaligner/SmithWatermanGotohalignV(Ljaligner/Sequence;Ljaligner/Sequence;Ljaligner/matrix/Matrix;FF)Ljaligner/Alignment;jaligner/Alignment getSimilarityformat((Ljaligner/Alignment;)Ljava/lang/String;java/lang/Double(D)Ljava/lang/String; parseDouble maskReturnjava/awt/ColorYELLOWLjava/awt/Color;javax/swing/text/StyleConstants setBackground9(Ljavax/swing/text/MutableAttributeSet;Ljava/awt/Color;)VsetBold*(Ljavax/swing/text/MutableAttributeSet;Z)VhasNext       Eb****,*-**+*Y *Y *Y *Y *Y *Y *****6* Q Z?9 ABCDS#W.X9YDZO[Z\e]q^}_`abcbd!>"#$%&'()*R*Y *W*W*W*W*W*W*W* W*!W*"W*#W*$W*%W*&W*'W*(W*)W**W*+W*,W Zh ijk)l3m=nGoQp[qerosytuvwxyz{|}! #$+, -Y+./M0Y1N6d,2N,2N-3N6E-456*2-67Q*2-7Q-`8N,2W,2W,2W6d,2N,2N-3N6E-456*2-67Q*2-7Q-`8N,9 M*;<: "',6>D[irx~ !p >4-/I.h"4-I.h" /0 12 #$ 34F ~0Y1M0Y1N=Y+>:-Y./:6E2M66,,`63N*2-?R`6ӄ9:x{: B'17:DRagmsx{}!f =0.:35*I"^'Q}12~#$~%v6n78,r MN:6-Y+./:62W6M62M@Y,A:6* BW6*2B?RՄ: : Z "(.8;AKUZ`cv|!p "N..1Q"j92#$67 :; |<=*QCDE<C=)CFYG*20H4IJKײCLCLCLCLCME<C=)CFYG*20H4IJKײCLCLCL R8>DJPV\dlt!4,. F"n,.fF"#$>4:+N=+N0Y1:62-6:*O6**0 bQ-8N6**0Q6**0nQ >!,<BHR `f!p"!#!\ ,-5"K"i"#$?@AxBCDFM *M*M>$CFYG,0HPIJKܲCL & () *,-.9-?/E1!*'"F#$FEAF GH>`QN`Q:RY- S:RYTT S:U* W 4567,839=:!H>#$>I>J7K/L!MN,ONP4A *+V+N66666dq6+`6M`S+```6N*,O6*-O6*21*W*`X >?@A B#C.D8EIFSG]HlIwJKAN!\ I?Q.`Rw"#$? @.~0{STU6>*20*0fY8*20*0fY8> STU*V2W4Y!>6#$606S4V WX* YXZ[b+N60Y1:66dF+`6:*O6*21*W*``dX . ^_`ab*c5dCeMf[aah!RL"b#$b?b-b\\@SBP]^_+N66RYZ::+6: 0Y1: 0Y1: * [6  d 6: +  `6:  d6: * O6 * * 0 fQ* * 0nQ* O6 * * 0 bQ* * 0nQ*   \ r Zmn opq r2s8t;uDvMwYxdyoz{|}~!z oZ- #$`a@ bMNc  ? )d 2e ;\ fgb>+,]>   !*#$hi bjk * ^=Y NRYZ:6O* _R:`:-O6 -Wa:-O6 -W-^=#n8b:6"-c0:bYdSe6-2f:CFYGIPIJKӲCL r"07?ELS[ahnsy!71`?)-R"`%"`0"#$lX@mMNymnXgopqrr,^>#n8CFYGgIhiI#HjIHJE6.,_0:CFYGIPIJKҲCL &  7@Kekq!HK`:1"r#$rsXrtm@ gnXuvL0Y1:0Y1:6666 * ^6 RYZ: RYZ: kYl: 6 d* cR:  `:?6 a:?66`6 * cR:  `:?6 a:?6 * m3 n o*  pkY  q: *  Wd6y7 v'9BN\cjqx{ !{JwE5#$xyz  KL".{| 's 0}N 9~N B9   `6ddd6  r6  Ed=83- dd 66 .  #3CTZ]!f `#$`"`.`{`|`x`y`z] S sRYTT SN*s*-W !*#$ON*gCtERYZL+M*^>6*cRL+uM,eCFYG+`II+aIPI+vhwI+xHwI+yHwI,`II,aIJEJCFYG+`II+aIPI+vhwI+xHwI+yHJE8 2 #059 !4"#$MNNsDY M * M* MRYZN-:,^6CFYGzIhJE6,cRN-u:gCFYG-`II-aIPI-vhwI-xHwI-yHwI`IIaIJEJCFYG-`II-aIPI-vhwI-xHwI-yHJE6 B !$*DNX^c!HG"#$w !MN$N*sD Y M * M* MkYlNRYZ:RYZ:0Y1:,^6CFYG{IhJE6,ckN-|:-}:CFYG`IIaI~IyHIvhI`IIaI~IyHIvhI-HJEa B$% &()!*3+<,B-\.f/p0v1|2.4!\ _"#$w!9*N3N<`B*YRYTT SL*+*+ 89:;!#$ ONZ* +*s*+W ? @AB!#$ONLRYZMkYlN* ^6d6** cRMkY+, qN* -ױ & FGHI$J1K<LEIKN!>,"L#$LONDMN<93s-RYZL**RL*+*M, RSTV&]+^! -#$%MN+|*LRYZM**RM*,+ cd ef g(i! *#$( MNPMNY :::* : 8 8 8 6 6 * *+6  Ak:|M}N,y8 -y8 8+,W6  ^>k:|M}N8,yb-y W ^^<k:|M}N,y8  b8 - -,U* -O6 -* - p*--v6  *-W*  W 2nop qrstu"v%w+y5z<|B}CMY_ekqx ",17<EO!Ew P#$PNNNLN C@9=41X".X %+X ((. +%" o+=>kYl:RYZ:RYZ:0Y1: 8 8 8 * ^6 6 * *+6  =  * *  _k:|:+y8 }:* O6 y8 8 b  b8  U*  p*v6  *W*  W* ^6 >ECE=4=/ ' (1=GNTVky %*!o" +#$+N)J'< 9N N(`+X.X 1X :@ =. <>6kYl:RYZ:RYZ:0Y1: 8 8 8 * ^6 6 * *+6  >  * *  _k:|:+y8 }:* O6y8 *b8   b   b8  U* p*v6 *W*  W* ^6 69CE>'>"  + ),/3<?IPVXm {     #$ ()*+",'-/.4193!{"<#$<N<:J7<.9%N N)`,X / X 3 X <@ ? B FRYZ:RYZ:kYl:* :8 +8QGk:+}|:}:*bF%%8 F;< =>?&@*D.E7HAIMJYK`LgMxNOV!\ #$NX }NtNk9&b*^X[=* ^>6RYZ:kYl:1,* ck:|:+ 6ք= :^_ ` abc)d6e=fFgLiRkWlYn!H[#$[NY" Q@ NEMN<9W>,^66RYZ:kYl:/),ck:|:+ 6؄> :wxy z{|(}2~9BHNSU!RW#$WNWU"O@ LCMN:9hRYZMkYlN* ^66G* ckN-|M,+ -+-}M,+-+* -p :#05=EJRWag!>K"h#$hN`MNX9O@fRYZNkYl:,^66G,ck:|N-+ +}N-+ +,p :!,2:CIQW_e!HK"f#$fNf^MNU9O@YRYZLRYZM* ^> 886** cRL+y8 8+M*,: 2 (5;CGIOV!R"-"Y#$QMNINAs>X:XV5Y MRYTT SN+:-,u:, ")3!45#$5N-ONMN_=RYZN0Y1:+^6=:+cRNCFYG-`II-aIPIJKƲCL *  !*RX^!>_#$_]" UMNL`FG **L+  ! #$ \* +O=  ! #$MN -0+^>kYl:6+ck:,W ")/!>"0#$00+s"9*\Y LkYlMRYZNRYZ:* x* kM+,W,|N6* ^6K* A* _kM,|:-+,W* W* ^6*+*  R  !+7=BEN_lr{ !"!HEXwNO@#$9MN!N* c<RYZMkYlNY : :* kN* -W-|M* ,W* , CE-}M* ,W,W* , CE-|M<66* ^6wp* _kN-|:,E* W* -W-}:* WW* CE6* ^6RM66* ^6* _kN-|:, 6j-}: , X*6  )* WW*  CE* kN* -W* ^6k6* ^6t* _kN-|: , R* W* -W-}: * 6  )*  W W*  CE6}6w* NS'( )*+',+-7.@/E0N1Y2a3f4o5v6789:;<=>?@ABCDEFG J KLMO%P(Q+R4T@UMVSW\XbZh[q\y]~^_`acdegikmnopqrst uvw!x&y0z8{D|L~ORU[^b!WNwy@yJ haN SvN!+ [N (3w+04'@c#$a< YMNQ9H J*+6*6* +* s* s**:*W 6  &-17ADG!\ 7 J#$J`JxJyJzJa @"530Y1M+N6-M--C+E*2N-/-&-3NFYG,I-IJM*2N-$-FYG-II,IJM:, : J   '/<AT_cl!>12#$`  ~I MYN0Y1:+::@Y*A:6:zp:*M,,FYGIIJ-*M,,FYGIIJ-: :: :<23:FYGIIJ::: :(FYGIIIJ:H ! '*/2AHPqy!z 9 9 9 #$` ':;*F+^=0Y1N6,+c0NCFYG-IPIJKԲCL "  9?E!4/"F#$FAs 9` t0Y1M+^>6Z+c0M,56,6:,`8:??d6CFYG,IwIJKCL 6    ( 1 ; H Ngms!\ (?-16I;,JH]"t#$tl` gs Y M0Y1N*^66Y :RYZ:6l*c:^6 6   G cR:`N,-O6 ,-WaN,-O6 ,-W , V%.7ELV b!h"o#u${%&'(+!p OK. LN@ 1o"#$5`s-%}.tMNUY M0Y1N+^666b:6 +c0NbY-dSeY :0Y1: 2d6`Q: 6  T 2fN 26`(FYG IIQIJW-: 6,-W FYG IIQIJW^6Y : 6  I c0N-56 - 6: - `8:  ? ?d6  -W   #0123#4-576D4J8O9X:a;l<u=>?ABCEF=HIJK LMN(O2P?QERLKRU!&$"xX" -- 2J ? M" U#$UME`?@<9#2opXaI S ĻY :666 9 9 d6:Y:]C6d6}dt9 6:9   E6`6 946`6 ˜"FYGhI hJrdic^9 6:9   46`6 ˜"FYGhI hJ61`6 ˜"FYGhI hJ^:6`6`v9 d6:9   E6d6946d6  ˜"FYGhI hJW`f`9 d6:9   46d6  ˜"FYGhI hJW,'d6 FYGhI hJ^ .KZ [\]#^,_1`6a;b@cJdMeRfahjjtl}pqrtxy{|}!@FQW`"-6BKSV\e!M:4#$\     #,gK>+:0Y1:N626:,56>N 8: . #+16>FI!H+-K#$KKIF`=B? s6,:0Y1:6^6SN_0:56  6?6  `8?6 - d 6:*+6 : %1:GUajmp!z :3- G&I UJ s#$ssspmda"Z@~ +N,:-0W-  ! "#!4 #$  IY NY :Y :+:0Y1:0Y1:6 6 6 6 6 Y:-ss66660:56  `8?60:56  6?6  `8?6  "  d`6 6660:56  6?6  `8?6 6 6 6 *   ,Nd6 *   ,N,N`6 *   ,:956  6?d6 `8?6,696-^t-_0:56  6?6—(*,6-,6  d6 6 6FYGhIhJW68—/*,6,6  `6-^6 -^60:56  `8?61^6  d0:56  6?6%˜  6  6  `6FYGhIhJW -^˜ W n[()/*2+>,G-L.Q/V0[1c2g3l4r5u6x9:;<>?@ABDEFGHIM QRS*T8U;W@YEZH[K]\`aagcue}ghijknrsuvwxz{ |~ ?BPafov|&;CF!|B o r u xI#$I IA8/,&#/2 5 8- ;\ >  G +M0Y1N0Y1:,^66`,_0N-56-`8?6,`_0:56  6?6  d  AFYG-6II `8IJN,W,W,-,^6WQ6,^6, ^#,6>KXan~!p >KzJad nWI #$ @" F6Y:FYG+I,IJ9)6 *   9@C!HF#$FFFC :9 G0Y1N+:,:˸θ:6YN:-@C *  & - 3 @CE!\ 2 ,! &"#-$E9%G#$GG?&'(w Y :9 Y:     @ d`d69  $FYGhI`dhJd`d> `d69  #FYG`hIhJW B  $) /!C$K%l'y(+,.!f #$)*IJ +   , +N0Y1:0Y1:66,^,_0:56`8?6 6  [+^E+_0:566?6   6 +`6+W6 r+ ^3456#7.879E:H;M<V=a>j?w@~ACDGIKM6Q!z w- 7n-E`. H] "#$/+0 .12;Y :9 d6: -56 - 6?6 - `8?6 d6:0Y1:ն]* : * :N56 w56 6?6 `6?60Y1:0Y1:ն] `d `d6:`d`d6:`8:56  6?6 `8?6ն]`d`d6: `d `d6:6ն]O6FYG`dhI`dhJW*:*:6FYG`dhI`dhJW*:*:FYG `dhI `dhJW* :*: ,V W XZ[+\8]E^N_X`eboewfghijklpsvwxy&z<~QT^af.8! +O+34567*89T:;#$;;;<;=;;;E 2/ / $ > +? 8@EN&ABm Y :0Y1:66 +_0:56  6?6 `8?6  +d_0:* -ն:^:+^d0+`_0:* -ڶ:^ ڶW F "+8FJVgoy!f #$C /;< +~- DBY L=Y۷>M-Y,./N-2:+W-2:-9M+<?: . #(/8<?@!>*#6@12B#$:MER$0+NY :-^8990Y1: 0Y1: -^Y:     6  P- _0:  56, 6?d `8?6:   9޶W 6 6699*:,N660Y1:996 ^ _0:3:566?6w56 ` 6?656`89gk96A:d6!d6""!gk96gk96 691 gk96  W^669,_0:  9 9609 66 -^8Y :  -_0:  56 ,  6?d  `8?6:   9Y :*-,:^6!6"!e!d_0:  ڶ] !6"-W6##! #_0: - #" -W-^8-W -^8- e ,4=BGLQ]hq+4BPZ`gnu  !$%&()!*(+1,J-U.^0}46789:;<=>?BCDFGCLO QTVW%Y([+\._!t%q3-TV" n0F!u)G"-3 H+IJs$5#{J!xK"^- = " ~.{CxuoLiMf]NZnWOTP1/0#$00. %@XQR# ` , STWY :Y :*:^ *:*,:*:*,:*,:*,- . d efg!h)l2n:pCqLsTt!HW#$W"WWE NE >UV? {! #$`WX +^60Y1:Y:::Y: 6  /+ c0: FYGIwI IJ: 3:,3:*:*,*: : e N(2=X^er{! =` +3" 9 #$YE@Z[\](} ^_:Y:Y:@Y,A:66 6 6 : rB:-6  6?6 `N6?6 + 6: -- + `6: --  `6 + `+N6: -- : > j  #&),08>FMVcv!9 #$`Zab c :;#I&J )d ,e 0 fgD0Y1:0Y1:0Y1:@Y*A: 6 6 *:*:5(56 : 6: `8:: CLCE :6 * s* s*s6  * Q* Q 3:*6 * : :?: .: "(+.39AFQZ^hvz          ! "!" "  12 #$xyzaE `hi(:; +- .< jkPK e}7hQcc#applications/LenRepPerSampler.class1 A Q  > >                  @ @    >   > > combinedRfLjava/io/RandomAccessFile; sampledRf()VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/LenRepPerSampler;samplerItmpIgLjava/lang/Integer;jindexmi generatorLjava/util/Random;lineLjava/lang/String;lenrepPer aveRepPerDexpecValexpecSqutmpD shortestLen longestLensortedR[Ljava/lang/Integer;randomsLjava/util/Vector;kreadSampledLenRepPer()Ljava/util/Vector;rfv useSample(I)Vdiff1diff2index2pershortestlongestlimitfoundZrangeprePer preLongestmain([Ljava/lang/String;)Vargs[Ljava/lang/String;lrps SourceFileLenRepPerSampler.java EF java/io/FileR/cise/research/tamer/xli/LCR/data/swissprot/repeatPercentage/combinedLenRepPer.txt Ejava/io/RandomAccessFile E BCknowledge/sampledLenRepPerrw DCjava/io/IOExceptionjava/util/Randomjava/lang/Stringjava/lang/Integerjava/util/Vector Ep    p java/lang/StringBuilderline:     F F kl , :1: 2: 3: applications/LenRepPerSampler opjava/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)VnextInt(I)IindexOf(Ljava/lang/Object;)Iadd(Ljava/lang/Object;)Zapplications/MergeSort mergeSort([Ljava/lang/Comparable;)VintValue()Ijava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnseek(J)VreadLine()Ljava/lang/String;trimappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString(Ljava/lang/String;)I substring(II)Ljava/lang/String;(I)Ljava/lang/String;java/lang/Double parseDouble(Ljava/lang/String;)Djava/lang/Mathpow(DD)D writeBytessqrt(D)Dabs(D)Ljava/lang/StringBuilder;close elementAt(I)Ljava/lang/Object;parseInt >ABCDCEFG9*YL*Y+Y L*Y+  L47 H"&478I &JK8LM9NOPFGܻ YLY:Y:Y:99 9 9 99:Y:69+۶=Y:WS642=* 6*::Y !""#$%&6':`(:)9   *oc9   ,.*oc9  c9p>* Y "%"#/j*o9  ,.g09  g19  c9* Y "%" 2%"23"#/99 9 4*5* 5: H3!#"/#5$=%F(I)Q*Y+],g-r.z/05789:;<;=>?@AB CD)E0F6G:HUI[JcKsL}MNOPQR7UVYXZIY.QRg STURVR(QR6WR9XRLMNOYZ[\]\#^\&_`)a` ,b` /c` 2d`5e`=fgFhiIjRklGBYLY MY,N-:+W-:-5M+<? H. ]_`a#b(c/d8f<i?h@jI>*JKmC#[\@LMBNO:niopGm)I*6:6Y:9 6 6  7::8&6'96 %&6`'96 :&6`()9 1 +gkI6Y ;"2#$n h d6d6+gkI6Y <"2#$( gkI6Y =" 2#$  69 H"opr stuv"w.x5y>zK{T|d}m~{!%(I_qRYrR>VRTsR{t`KuR dvR )NO)]R'w`!Pi xyz\{` XR  |R }~GO>Y?L+*29@HI OPK e}7:Wdapplications/MainWindow$1.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$1;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$1java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$0008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}77u   applications/MainWindow$10.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$10;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$10java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$9008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+* Z   A *+ \]    PK e}7_   applications/MainWindow$11.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$11;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$11java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$10008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+* g   A *+ ij    PK e}7#g   applications/MainWindow$12.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$12;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$12java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$11008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+* q   A *+ st    PK e}7ԉ   applications/MainWindow$13.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$13;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$13java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$12008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+* {   A *+ }~    PK e}7+   applications/MainWindow$14.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$14;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$14java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$13008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7XC   applications/MainWindow$15.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$15;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$15java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$14008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7*   applications/MainWindow$16.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$16;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$16java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$15008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7\W   applications/MainWindow$17.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$17;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$17java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$16008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7:|applications/MainWindow$2.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$2;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$2java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$1008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7applications/MainWindow$3.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$3;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$3java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$2008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7iPapplications/MainWindow$4.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$4;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$4java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$3008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7ICapplications/MainWindow$5.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$5;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$5java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$4008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7W\applications/MainWindow$6.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$6;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$6java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$5008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+*    A *+     PK e}7[applications/MainWindow$7.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$7;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$7java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$6008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+* 0   A *+ 23    PK e}7yapplications/MainWindow$8.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$8;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$8java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$7008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+* H   A *+ JK    PK e}71Aapplications/MainWindow$9.class1&    this$0Lapplications/MainWindow;(Lapplications/MainWindow;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLapplications/MainWindow$9;actionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent; SourceFileMainWindow.javaEnclosingMethod! "#  # $%applications/MainWindow$9java/lang/Objectjava/awt/event/ActionListenerapplications/MainWindowinitComponents()V access$8008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)V0  4 *+* P   A *+ RS    PK e}7 . IkIkapplications/MainWindow.class1                               !       )  ,        5     ;  >   B                  U  X   \              k  n  q               ! !  ! ! ! ! ! ! !                 !      , , ! ," )#$% !&' !() !*+ !,-. ! 5/ 50   12 > >3 > 45 >6 7 >8 ;  9  : 5;<  = 5>? @ AB 5C D  E  F UGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~  U U X X X   \  , :! n >! n" q q D! q" n I! k N! Q! T! W! Z! _! b!      B ! > B   !  ! ! ! v v v z z  z !   ! U \                     arg1Ljava/lang/String;arg2arg3Iarg4arg5arg6gLapplications/Gbm;AboutLjavax/swing/JMenuItem; AboutDialogLjavax/swing/JDialog;Vars VarsDialog clipboardLjavax/swing/JTextArea;console consoleButtonLjavax/swing/JRadioButton;jButton1Ljavax/swing/JButton;jButton2jButton3jButton4jButton5jButton6jButton7jButton9 jComboBox1Ljavax/swing/JComboBox; jComboBox2jFormattedTextField1!Ljavax/swing/JFormattedTextField;jFormattedTextField2jFormattedTextField3jLabel1Ljavax/swing/JLabel;jLabel2jLabel3jLabel4jLabel5jMenu1Ljavax/swing/JMenu;jMenu2jMenu3 jMenuBar1Ljavax/swing/JMenuBar; jMenuItem1 jMenuItem3 jMenuItem4 jMenuItem5 jMenuItem6 jMenuItem7 jMenuItem8jPanel1Ljavax/swing/JPanel;jPanel10jPanel2jPanel3jPanel4jPanel5jPanel6jPanel7jPanel8jPanel9 jScrollPane1Ljavax/swing/JScrollPane; jScrollPane10 jScrollPane2 jScrollPane3 jScrollPane4 jScrollPane5 jScrollPane6 jScrollPane7 jScrollPane8 jScrollPane9 jScrollPaneA jSplitPane1Ljavax/swing/JSplitPane; jSplitPane2 jSplitPane3 jTextArea1 jTextArea2 jToolBar1Ljavax/swing/JToolBar; lcrPosAreaLjavax/swing/JTextPane; lcrPosButton maskSeqArea maskSeqButtonseqArea seqButton()VCodeLineNumberTableLocalVariableTablethisLapplications/MainWindow; lostOwnershipH(Ljava/awt/datatransfer/Clipboard;Ljava/awt/datatransfer/Transferable;)Vc!Ljava/awt/datatransfer/Clipboard;t$Ljava/awt/datatransfer/Transferable;initComponentsAboutDialogLayout!Lorg/jdesktop/layout/GroupLayout;VarsDialogLayout jPanel8Layout jPanel7Layout jPanel9LayoutjPanel10Layout jPanel1Layout jPanel2Layout jPanel3Layout jPanel4Layout jPanel5Layout jPanel6LayoutlayoutVarsActionPerformed(Ljava/awt/event/ActionEvent;)VevtLjava/awt/event/ActionEvent;AboutActionPerformedjMenuItem7ActionPerformedjMenuItem6ActionPerformedjMenuItem5ActionPerformedjMenuItem4ActionPerformedjMenuItem3ActionPerformedjButton5ActionPerformede'Ljavax/swing/text/BadLocationException;dLjavax/swing/text/Document;jButton4ActionPerformedjButton3ActionPerformedjButton2ActionPerformedjButton1ActionPerformedfileLjava/io/File;fcLjavax/swing/JFileChooser; returnValjButton7ActionPerformedjMenuItem8ActionPerformedjButton9ActionPerformediurlLjava/net/URL; Ljava/lang/NullPointerException;Ljava/net/URISyntaxException;charIndcaretPosLjava/util/Vector;seq$jRadioButtonSequence4ActionPerformedjTextAreaSequence4CaretUpdate!(Ljavax/swing/event/CaretEvent;)VLjavax/swing/event/CaretEvent;jTextAreaSequence4MouseClicked(Ljava/awt/event/MouseEvent;)VLjava/awt/event/MouseEvent;jTextAreaSequence4FocusGained(Ljava/awt/event/FocusEvent;)VLjava/awt/event/FocusEvent;$jRadioButtonSequence3ActionPerformedjTextAreaSequence3CaretUpdatejTextAreaSequence3MouseClickedjTextAreaSequence3FocusGained$jRadioButtonSequence2ActionPerformedjTextAreaSequence2CaretUpdatejTextAreaSequence2MouseClickedjTextAreaSequence2FocusGained$jRadioButtonSequence1ActionPerformedjTextAreaSequence1CaretUpdatejTextAreaSequence1MouseClickedjTextAreaSequence1FocusGainedjMenu1ActionPerformedjMenuItem1ActionPerformedmain([Ljava/lang/String;)Vargs[Ljava/lang/String;m access$0008(Lapplications/MainWindow;Ljava/awt/event/ActionEvent;)Vx0x1 access$100 access$200 access$300 access$400 access$500 access$600 access$700 access$800 access$900 access$1000 access$1100 access$1200 access$1300 access$1400 access$1500 access$1600 SourceFileMainWindow.java *+ .+ /+ 0+ 1+ 2+ 3+ e+ C+ d+ D+ B+ 4+ 9+ :+ ;+ <+         javax/swing/JDialog javax/swing/JScrollPane javax/swing/JTextArea      javax/swing/JToolBar javax/swing/JButton       javax/swing/JSplitPane    javax/swing/JPanel javax/swing/JRadioButton   javax/swing/JTextPane                     javax/swing/JComboBox javax/swing/JLabel  javax/swing/JFormattedTextField             javax/swing/JMenuBar javax/swing/JMenu javax/swing/JMenuItem            GBM v1.0 beta  java/awt/Color    java/awt/FontCourier        WThis graphical interface is based on an algorithm, Graph-based method (GBM), formulated by Dr. Xuehui Li at University of Florida. This program, subsequent versions, and further information, may be obtained from http://cap5510.wordpress.com, or the author's website http://www.cise.ufl.edu/~sm2. Version 1.0 was released on November 30, 2007.    org/jdesktop/layout/GroupLayout      ! " #$% #( )* #+ ,* -Variables --------- 1) Scoring matrix: Choose from a number of PAM and BLOSUM scoring matrices. BLOSUM62 is the default. 2) Threshold 1 & 3: These are used in determining the weight of an edge between two vertices. Refer to the paper on the algorithm for more information.** 3) Threshold 2: The window size used when constructing the vertices. Refer to the paper on the algorithm for more information.** 4) Learned matrix: combinedMatrices90 means the matrices are generated with ( a = 0.9* ) in the forget rate fomula ( a^k ). matricesWForgetComparision.txt is the comparition between the non-repeat matrix and the repeat matrix in every file. **http://cap5510.wordpress.com .Graph-Based Algorithm v1.0javax/swing/ImageIcon/ 01/images/open.gif2 34 5 67 8applications/MainWindow$1 InnerClasses 9 :; #</images/cut.gifapplications/MainWindow$2/images/copy.gifapplications/MainWindow$3/images/paste.gifapplications/MainWindow$4/images/delete.gifapplications/MainWindow$5/images/print.gif/images/close.gifapplications/MainWindow$6 = > ?TahomaMasked sequence@ ABjava/awt/Insets C DE #F GH I LCR positions #J Kjava/awt/Dimension L MNSequence OConsole #P #Q javax/swing/DefaultComboBoxModeljava/lang/StringBLOSUM30BLOSUM35BLOSUM40BLOSUM45BLOSUM50BLOSUM55BLOSUM60BLOSUM62BLOSUM65BLOSUM70BLOSUM80BLOSUM85BLOSUM90 BLOSUM100PAM10PAM20PAM30PAM40PAM50PAM60PAM70PAM80PAM90PAM100PAM110PAM120PAM130PAM140PAM150PAM160PAM170PAM180PAM190PAM200PAM210PAM220PAM230PAM240PAM250PAM260PAM270PAM280PAM290PAM300PAM310PAM320PAM330PAM340PAM350PAM360PAM370PAM380PAM390PAM400PAM410PAM420PAM430PAM440PAM450PAM460PAM470PAM480PAM490PAM500 R ST U VScoring Matrix "W3 Threshold 115 Threshold 25 Threshold 3 XcombinedMatricesColByColcombinedMatricesColByCol085combinedMatricesColByCol090combinedMatricesColByCol095combinedMatricesRowByRowcombinedMatricesRowByRow085combinedMatricesRowByRow090combinedMatricesRowByRow095Learned MatrixGoapplications/MainWindow$7Fileapplications/MainWindow$8 Load sequenceapplications/MainWindow$9 #YExitapplications/MainWindow$10 #ZEditCutapplications/MainWindow$11Copyapplications/MainWindow$12Pasteapplications/MainWindow$13Deleteapplications/MainWindow$14 Select Allapplications/MainWindow$15Help/images/about.gifapplications/MainWindow$16 Variablesapplications/MainWindow$17 [\ ] ^L _ `a ba cde fg hL%javax/swing/text/BadLocationException i jk l m njavax/swing/JFileChooser op qrjava/lang/StringBuilder > Opening: st uv. wv s#> Open command cancelled by user. x y >Opening: ">Open command cancelled by user. java/util/Vector jv z{ |} ~  #   g/scoringMatrices/BLOSUM62 v g java/lang/Integer g  kapplications/Gbm  java/io/File/learnedMatrices/   /scoringMatrices/ 0  > Complete! java/lang/NullPointerException"> Error: Not all fields present! java/net/URISyntaxExceptionBLEHapplications/MainWindowjavax/swing/JFrame$java/awt/datatransfer/ClipboardOwnersetTitle(Ljava/lang/String;)V setResizable(Z)V(III)V setBackground(Ljava/awt/Color;)V setColumns(I)V(Ljava/lang/String;II)VsetFont(Ljava/awt/Font;)V setForeground setLineWrapsetRowssetTextsetWrapStyleWord setBorder(Ljavax/swing/border/Border;)VsetViewportView(Ljava/awt/Component;)VgetContentPane()Ljava/awt/Container;(Ljava/awt/Container;)Vjava/awt/Container setLayout(Ljava/awt/LayoutManager;)VcreateParallelGroup ParallelGroup2(I)Lorg/jdesktop/layout/GroupLayout$ParallelGroup;createSequentialGroupSequentialGroup3()Lorg/jdesktop/layout/GroupLayout$SequentialGroup;/org/jdesktop/layout/GroupLayout$SequentialGroupaddContainerGapaddJ(Ljava/awt/Component;III)Lorg/jdesktop/layout/GroupLayout$SequentialGroup;-org/jdesktop/layout/GroupLayout$ParallelGroupGroupY(ILorg/jdesktop/layout/GroupLayout$Group;)Lorg/jdesktop/layout/GroupLayout$ParallelGroup;setHorizontalGroup*(Lorg/jdesktop/layout/GroupLayout$Group;)VX(Lorg/jdesktop/layout/GroupLayout$Group;)Lorg/jdesktop/layout/GroupLayout$ParallelGroup;setVerticalGroup setEditablesetDefaultCloseOperationjava/lang/ObjectgetClass()Ljava/lang/Class;java/lang/Class getResource"(Ljava/lang/String;)Ljava/net/URL;(Ljava/net/URL;)VsetIcon(Ljavax/swing/Icon;)V setFocusable(Lapplications/MainWindow;)VaddActionListener"(Ljava/awt/event/ActionListener;)V*(Ljava/awt/Component;)Ljava/awt/Component;setDividerLocationsetOrientationsetHorizontalScrollBarPolicyjavax/swing/BorderFactorycreateEmptyBorder!(IIII)Ljavax/swing/border/Border;(IIII)V setMargin(Ljava/awt/Insets;)VH(Ljava/awt/Component;III)Lorg/jdesktop/layout/GroupLayout$ParallelGroup;addPreferredGap4(I)Lorg/jdesktop/layout/GroupLayout$SequentialGroup;setTopComponent6(III)Lorg/jdesktop/layout/GroupLayout$SequentialGroup;setRightComponent(II)VsetMaximumSize(Ljava/awt/Dimension;)VsetLeftComponentI(ILjava/awt/Component;III)Lorg/jdesktop/layout/GroupLayout$ParallelGroup;Z(Lorg/jdesktop/layout/GroupLayout$Group;)Lorg/jdesktop/layout/GroupLayout$SequentialGroup;([Ljava/lang/Object;)VsetModel(Ljavax/swing/ComboBoxModel;)VsetSelectedIndexsetHorizontalAlignment5(II)Lorg/jdesktop/layout/GroupLayout$SequentialGroup;3(IZ)Lorg/jdesktop/layout/GroupLayout$ParallelGroup;0(Ljavax/swing/JMenuItem;)Ljavax/swing/JMenuItem;((Ljavax/swing/JMenu;)Ljavax/swing/JMenu; setJMenuBar(Ljavax/swing/JMenuBar;)VpacksetSize setVisible isFocusOwner()Z isSelected getDocument()Ljavax/swing/text/Document;javax/swing/text/Document getLength()IremovepastegetText(II)Ljava/lang/String;copy selectAllcutshowOpenDialog(Ljava/awt/Component;)IgetSelectedFile()Ljava/io/File;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;getName()Ljava/lang/String;toStringjava/lang/SystemexitgetSelectedItem()Ljava/lang/Object;parseInt(Ljava/lang/String;)IindexOf(II)IvalueOf(I)Ljava/lang/Integer;(Ljava/lang/Object;)ZoutLjava/io/PrintStream;java/io/PrintStreamprintlnlength java/net/URLtoExternalFormsizeget(I)Ljava/lang/Object;intValue elementAt substringZ(Ljava/lang/String;Ljavax/swing/JTextPane;Ljavax/swing/JTextPane;Ljavax/swing/JTextArea;)VtoURI()Ljava/net/URI;(Ljava/net/URI;)VreadRNRMatrices(Ljava/io/File;)VreadScoringMatrix_SEANstartt(IIIILjava/lang/String;)V%org/jdesktop/layout/GroupLayout$Group!L     6{-********** '  !#$((,* -?6  *Y*Y *!Y"#*Y$*Y%*!Y"&*Y'*!Y"(*)Y*+*,Y-.*,Y-/*,Y-0*,Y-1*,Y-2*,Y-3*,Y-4*5Y67*5Y68*5Y69*Y:*;Y<=*>Y?@*YA*BYCD*YE*;Y<F*>Y?G*YH*BYCI*YJ*;Y<K*>Y?L*YM*!Y"N*YO*;Y<P*>Y?Q*YR*!Y"S*;Y<T*UYVW*XYYZ*;Y<[*\Y]^*XYY_*;Y<`*\Y]a*XYYb*;Y<c*\Y]d*XYYe*;Y<f*UYVg*XYYh*;Y<i*,Y-j*kYlm*nYop*qYrs*qYrt*nYou*qYrv*qYrw*qYrx*qYry*qYrz*nYo{*qYr|*qYr}*~**#Y*#*#Y *#Y*#*#*#*#*#* *#Y*L*++++* +++* *&*&*&*&*&*&*%*&Y*$M*$,,,,*%|,,,*%*(*(*'*(***.Y**.*.Y**+*.W*/Y**/*/Y**+*/W*0Y**0*0Y**+*0W*1Y**1*1Y**+*1W*2Y**2*2Y**+*2W*3Y**3*+*3W*4Y**4*4Y**+*4W*7X*7*8*9*9*:Y*:*:*@Y *@Y*@Ŷ*@Ƕ*@Yʶ*A*DY*=N*=----*@+*A?Ͷ---*@*A*:*=*9*:*EY*E*E*GY *GY*Gж*GǶ*GYʶ*H*IY*F:*F   *G+*H?Ͷ*G*H*E*F*9*E*8*9*JY*J*J*JY%Զ*LY *LY*Lֶ*LǶ*LYʶ*N*N*N*M*NY*K:*K*L*MͶ*L*M7*J*K*8*J*7*8*OY*O*OY*O*QY *QY*Qٶ*QǶ*QYʶ*S*S*S*R*SY*P:*P*R7*Q-۶*Q*R*O*P*7*O*WY *WY@YSYSYSYSYSYSYSYSYSY SY SY SY SY SYSYSYSYSYSYSYSYSYSYSYSYSYSYSYSYSYSYSY SY!SY"SY#SY$SY%SY&SY'SY(SY)SY* SY+ SY, SY- SY. SY/SY0SY1SY2SY3SY4SY5SY6SY7SY8SY9SY:SY;SY<SY=SY>SY?S *W!*ZY "*Z#*Z$%Y*T:*T*Z111*W_)))Ѷ۶2&*Z'*W*^'(*_Y "*_#*_)%Y*[:*[*^%*_L&1&*_(*^*a*(*bY "*b#*b+%Y*`: *`      *a' *bP۶   1&*b(*a*d,(*eY "*e#*e-%Y*c: *c     . *eP *d)۶   2&*e'*d*gY *gYY/SY0SY1SY2SY3SY4SY5SY6S *g!*hY "*h#*h7%Y*f: *f     *g*hͶ۶   4&*h%*g*j89*j:Y*;Y*i: *i    *j1&   R&*j"*p<=*p>Y*?@*sY*A*sBC*sDY*EF*p*sGW*tY*A*tHC*tIY*JF*p*tGW*m*pKW*uL=*vY*A*vMC*vNY*OF*u*vGW*wY*A*wPC*wQY*RF*u*wGW*xY*A*xSC*xTY*UF*u*xGW*yY*A*yVC*yWY*XF*u*yGW*zYC*zZY*[F*u*zGW*m*uKW*{\=*|Y*]A*|^C*|_Y*`F*{*|GW*}Y*]A*}aC*}bY*cF*{*}GW*m*{KW**mdY*e: *e   *+W  *7C *T*[*`*c*f*i۶   *+*7A *[*f*i*c*`*Tڶ۶*f.C DE!F,G7HBIMJXKcLnMyNOPQRSTUVWXYZ[\])^4_?`JaUb`ckdvefghijklmnopqrst&u1v<wGxRy]zh{s|~}~(;NV^gow"1<`6>MYpx&2<DNX` w    (W%b'm)*+,-./02456@>rE}GIKLMNOPQR S U &V .W 6X AZ N[ W\ d k m o q r s t u -v @w Ix Wy i{ r| z} ~      ( 3 F      - 6      oEXk1%m/w0678?GH N"O,P;VGX^YhZw`bdefgmopqwy(z2{AMdn}#2>JR_h 1h   N K! " -l# $% & ' ( _:) *+R*$Xg*$h ,-.+R*g*h ,-/+v2*Di'*Nj*Sj *Ii '122,-0+>*+  ,-1+>*+ ,-2+>*+ ,-3+>*+ ,-4+mM*@k*DlM,,mn*Lk *No*Qk *So*Gk*IlM,,mnNhkp:   ! +5?IS[hkl*l56mm,-k789+N*Lk *Nq  ",-:+.M*@k)*DlM*(,,mr*(sf*Lk*Nt*NsK*Qk*St*Ss0*Gk&*IlM*(,,mr*(sNpJ&) ,-(.20<1C2M4W5^6h8r;z<=B@C*56,-78;+^M*@k6*DlM*(,,mr*(s,,mn}*Lk*Nt*Nub*Qk*St*Ss*So=*Gk3*IlM*(,,mr*(s,,mnNpVGJ NO(P/Q?TIUPVZXdYkZr[]abcdigj*56,-78<+QvYwM,*x>7,y:*SzY{|}~}}} *S"noqrtCuFvPx4+=>QQ,-I?@CAB+= {|,-C+QvYwM,*x>7,y:*SzY{}~}}} *S"CFP4+=>QQ,-I?@CAD+=YNo:**N**W޵**^**a**d**g޵*>=#-W*>`=-*dW*:*S6-d*--`:*Y*D*I*S*Y*zY{}*}*Y*zY{}*}*****>*S:*S:*S  (6DR`kpy2a{\ EFG 5H 5I,-J KLMN+5,-OP5,QRS5,TUV5,WX+5,-YP5,QZS5,T[V5,W\+5,-]P5,Q^S5,T_V5,W`+5,-aP5,QbS5,TcV5,Wd+5,-e+= ,- fgJYL+ hijkl:*+mn-ol:*+mn-pl:*+mn-ql:*+mn-rl:*+ mn-sl:*+ mn-tl:*+ mn-ul:*+ mn-vl:*+ mn-wl:*+mn-xl:*+mn-yl:*+mn-zl:*+mn-{l:*+mn-|l:*+mn-}l:*+mn-~l:*+mn- :>DINQTWZ_b  &'PK e}7sF,aaapplications/Masker.class1 ,cd efg h (i (jkl c m no pqr c s t uvw px pyz{|} ~ ( ( (  ( ( (srfLjava/io/RandomAccessFile;prf'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/Masker;seqFileLjava/lang/String;posFile()VgetSeqPosBlocks()Ljava/lang/String;seqLineseqposLinewriteLCR(II)ViIjstartenddiv1div2writeLCRReturn(II)Ljava/lang/String;resultmaskhcr posBlocks indexSpace indexDashcurPosfinishedZ maskReturn8(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String; maskMutiplecloseAllmain([Ljava/lang/String;)Vargs[Ljava/lang/String;ms SourceFile Masker.java 0> java/io/File 0java/io/RandomAccessFiler 0 -. /.java/io/IOExceptionjava/lang/String @  java/lang/StringBuilder @ @*x >x   - N DE MN> P1 >applications/Masker 01 Z> [>java/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)VreadLinejava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringtrimprintindexOf(Ljava/lang/String;)I substringjava/lang/IntegerparseInt(I)Ljava/lang/String;length()I startsWith(Ljava/lang/String;)Zclose (,-./. 0127*Y+N*Y-Y,N*Y-N25 3" $2!5 6"44 %5667879:7;<7=<0>23*3 %'4 9:?@2 } Y L Y M* L +* L+!Y,+M* L* N* N-NY,-M:,vy 3:+-./'0+1>2I4Q5Y6^7v:y9{;44{78}9:uA<mB<Q,C<DE2 d>6 6<  '6<  6

 3~AB CDE#G+H/J2K8L@MFQIRLSVU\VdWjYpZw[z\}]^_acdefgj4f FG/FG2HG9:IGJGVbFGLlHGKG LGMN2.$:Y:d>6ڻY:6<Y:Y:E6<Y:Y:6

Y:3 op qrst'w=xAzD{J}`~filv|!4p $FGA%FGD"HG$9:$IG$JGvFGlHG KGLG O<P12 6,6,6,6,` 6,`6,` M 6[*!`6,6,6,6,` 6,`6,` M6  *6?BCGHIJKLMNOPSBTMWeZm\]`acefhjnpr%t-u5vAwGxW{f|rx),/=W^eht49JGKG LG (KG LG 9:B<R<Q<O<SGTG&IGKpJG UG VW Z>2: Y L Y M* L* L+q Y M+*+$%!Y,+M* L* N -* N-N*,-& * N* L: 3N $,9LW_fnsy44_3C<789:A<B<[>2_*'*'L 34789: \]2W(Y*2*2)L+*++34^_ `:abPK e}7A(A(%applications/MaxRecallPreGettor.class1K W T T  T T T T  T T T           T    T      T         T T T T T T T   T T!" LCRBlocksRf1Ljava/io/RandomAccessFile; LCRBlocksRf2repeatRfLCRs1[Ljava/lang/String;LCRs2 repeatRegionsLjava/util/Vector; repeatLength indexLCR1I indexLCR29(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;this!Lapplications/MaxRecallPreGettor; lcrBlockFile1Ljava/lang/String; lcrBlockFile2 metricsFile printLCRs(I)VimarklimitgetLCRs(Ljava/lang/String;I)Vjlinestr printVector(Ljava/util/Vector;)Vvclean&(Ljava/util/Vector;)Ljava/util/Vector;indexstart1end1start2end2tmpUnionunionlenlcr1lcr2getTotalLength(Ljava/util/Vector;)Istartendlength unionLCRs()Ljava/util/Vector;boundmaxTp intersectLCRs intersectionminFpk lcrStartInt1 lcrEndInt1 lcrStartInt2 lcrEndInt2 lcrStaratStr1 lcrEndStr1 lcrStartStr2 lcrEndStr2foundZ getRepeats()VgetRepeatVector&(Ljava/lang/String;)Ljava/util/Vector;intersectVectors8(Ljava/util/Vector;Ljava/util/Vector;)Ljava/util/Vector;str2str1v1v2len1len2 startInt1endInt1 startInt2endInt2 startStr1endStr1 startStr2endStr2getMaxRecallPretpDfprepeatline1line2 repeatVectorrecPrerecall precisionrecSumpreSummain([Ljava/lang/String;)Vargsmrpg SourceFileMaxRecallPreGettor.java ejava/lang/String \] ^]java/util/Vector _` a` bc dc java/io/File e#java/io/RandomAccessFiler e$ XY ZY [Yjava/io/IOException% &'java/lang/StringBuilder () *+, -# .************************** .# /+ 0yeah 12- 34 35 tu 60 78before cleaning ~9 :2begin1  end1 ;8 (< =>begin2  end2after cleaninglength =?wrongunion of LCRs1 and LCRs2 @A B0%intersections between LCRs1 and LCRs2 C+Repeat Infor.: DETotal length of all repeats:: F no repeatrepeats >ID yz intersection to get max TP intersection to get min FP:G HInonsense (Jall recalls and premax of ave recall and precison applications/MaxRecallPreGettor ef java/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)Vjava/lang/SystemoutLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintprintlntrim()IindexOf(Ljava/lang/String;)I substring(I)Ljava/lang/String;(II)Ljava/lang/String;size elementAt(I)Ljava/lang/Object;java/lang/IntegerparseIntremove(I)Ljava/lang/StringBuilder;add(ILjava/lang/Object;)V(Ljava/lang/Object;)ZvalueOf'(Ljava/lang/String;)Ljava/lang/Integer;intValuereadLine startsWith(Ljava/lang/String;)Zclosejava/lang/Double parseDouble(Ljava/lang/String;)D(D)Ljava/lang/StringBuilder; TW XYZY[Y\]^]_`a`bcdc efg****Y*Y* * Y+ :* Y Y, :* Y Y- :* Y:8h6 .8BQ [!j"t#&%(i>BAjklmnopqrqsqtugy= * =* =>M$Y*2!Y*2 h6 ,-.012 3A5_1e7k8p9x:i*Ovcynoywcwxcyzgk+::: !>"!6/** #S** #S:5** $S** $S`#:*Y ` x*Y ` k*%hZ?@ ABCE"F*G3H8I=JOL^MdPiQ|STVWY[\i>*vc3{cno|qwc}q~gAY&M>+'+Y+(M,Ӳh`ab-c4a:e@fi* 0{cAnoA`9}qg +MY&N)*,*6,'6Y&: Y&: dY,(: ,`(:  "!6 $+6 `#+6 "!6 $+6  `#+6  q,Y  - 6 ,.W,.WY /" /: , 0{ q1Y  2  6,.W,.WY/"/: , 0,'63*,*,h(jk lmn p2q;rFsSt\uivwwxyz{|}~>FMQX_zi \5ci(cwcc c no`` }qvc c)wq 2nq g8 6Y&:6+'j+(:"!=$+6`#+6`d`6Y//Y4/h2  #+7DPyiR+Tc7HcD;cpvcno`c }qg <=>6666 Y: * * z*2"!6*2$+6*2`#+6*2"!6*2$+6*2`#+6  *25Wz  *25Wao h Y/"/5W6* d* d*2"!6*2`#+6o h Y/" /5W6* d* d *2"!6*2`#+6v oh Y/" /5W6* d* d- &*2"!6*2`#+6v o h Y/"/5W6* d* d*2"!6*2`#+6Ͳ6| *  *25W*  *25W7* 8:  hA+8I[hy 0>]`jm~2QT^aru   iCc`CcCcTCc8jcnocvc{c c ccc ` g)ۻYL=>666666 Y&: Y&: Y&: Y&: 6* *2"!6*2$:  9:6*2`#:  9:66* -*2"!6*2$:  9:6*2`#:  9:6  5. '+Y/" /5W{ .'+Y/" /5WI 5.'+Y/"/5W6 6. '+Y/"/5W6ф˄6|;*+*+h' 0BEMZhr !"#$% &(!)?*E,Z-x.~/02347:;=>?ino` c vc{cccccc 'q 0q 9q Bq EgmY&L**+L+@!=*+`#5W*DGHIJ%K1M:N?OFPUR`TgWjVlXi*Fvcllmmnoe|qgkYM+N- NB-@-!6-"!6,-#5WN,-$5W-`#N²C*,*,hB\] _`acd'e/f5g?hDkPl\odpiqi>'5vc/-{cknok|qc` a}qgbԻYN6+'6,'6666 6 6 6 6 Y&:Y&:Y&:Y&:6{+(:"!6  $:9:6  `#:9:6 0),(:"!6  $:9:6  `#:9:6   5  .  '-Y /" /5W  .  '-Y /" /5WO  5  .  '-Y /" /5W6  6   .  '-Y /" /5W6ׄф6-h(vw xy z,|>}P~SZenx'EK`~iqemqno``` cccvc{c c #c &c )c ,c 5q>qGqPqSgZY&LY&MYNY:Y:Y:69999*D* +F>U+*()VCodeLineNumberTableLocalVariableTablethisLapplications/MergeSort; mergeSort([Ljava/lang/Comparable;)Va[Ljava/lang/Comparable;b segmentSizeI mergePass2([Ljava/lang/Comparable;[Ljava/lang/Comparable;I)Vjxyimerge4([Ljava/lang/Comparable;[Ljava/lang/Comparable;III)Vqcd startOfFirst endOfFirst endOfSecondfirstsecondresult printElementsstrLjava/lang/String;main([Ljava/lang/String;)Vargs[Ljava/lang/String;[Ljava/lang/Integer; SourceFileMergeSort.java java/lang/Comparable )* /0 _`10000 abc def ghjava/lang/StringBuilder ij ikl mn ojava/lang/Integer pThe elements are on :# "#The sorted order isapplications/MergeSortjava/lang/Object compareTo(Ljava/lang/Object;)ItoString()Ljava/lang/String;java/lang/Stringequals(Ljava/lang/Object;)Zjava/lang/SystemoutLjava/io/PrintStream;append-(Ljava/lang/Object;)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;java/io/PrintStreamprint(Ljava/lang/String;)Vprintln(I)V!/*  ! "#&*L=**+`=+*`="% &$% &%'( )*Z>*hd*+`dh`dh`>`**+`d*d6*+*2S*  !'%/'A*K+S*Y,4D+(Z,%Z-%Z'(X.( /0G 6`66<5*2*2+*2S+*2S"6+*2S6+*2S>234 78(99;J>P?[@f?oByCBDf T1(s1(2%3%4(5(6(7(8( 9( :#@<*4M,*2 Y *2  ̲ GH IJ3G9M?N  (;<7.(@$% => YY SYYSYYSYYSYYSYYSYY SYYSYYSY YSY YSL ++ +Tabehij?@$ABCPK e}7RXo 0p 0qrst()VCodeLineNumberTableLocalVariableTablethisLapplications/MergeSortDouble; mergeSort([Ljava/lang/Comparable;)Va[Ljava/lang/Comparable;b segmentSizeI mergePass2([Ljava/lang/Comparable;[Ljava/lang/Comparable;I)Vjxyimerge4([Ljava/lang/Comparable;[Ljava/lang/Comparable;III)Vqcd startOfFirst endOfFirst endOfSecondfirstsecondresult printElementsstrLjava/lang/String;main([Ljava/lang/String;)Vargs[Ljava/lang/String;[Ljava/lang/Double; SourceFileMergeSortDouble.java 23java/lang/Comparable @A FG uv10000 wxy z{| }~java/lang/StringBuilder   3java/lang/Double 2The elements are Q: 9:The sorted order isapplications/MergeSortDoublejava/lang/Object compareTo(Ljava/lang/Object;)ItoString()Ljava/lang/String;java/lang/Stringequals(Ljava/lang/Object;)Zjava/lang/SystemoutLjava/io/PrintStream;append-(Ljava/lang/Object;)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;java/io/PrintStreamprintln(Ljava/lang/String;)V(D)V!01234/*56 78 9:4&*L=**+`=+*`=5"%6 &;< =<>? @A4Z>*hd*+`dh`dh`>`**+`d*d6*+*2S5*  !'%/'A*K+S*Y,64DB?ZC<ZD<Z>?XE? FG4G 6`66<5*2*2+*2S+*2S"6+*2S6+*2S5>234 78(99;J>P?[@f?oByCBD6f TH?sH?I<J<K?L?M?N?O? P? Q:4@<*4M,*2 Y *2  ̲ 5GH IJ3G9M?N6  (RS7E?@;< TU4 YYSYYSYYSYYSYYSYYSYY SYY"SYY$SY Y&SY Y(SY Y*SL ,+-+. /+-5Tbcfijk6VW;XYZPK e}7 V &applications/NorNRepCDFProcessor.class1 *Z [\ $]^ Z $_ $`a bcd e f gh i j k lmn op q [rst uv wxy z{ Z | }~ $b $ $ $binNumDrepeatsLjava/util/Vector; nonRepeats(Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablethis"Lapplications/NorNRepCDFProcessor;strLjava/lang/String;getRepeatPercentage&(Ljava/lang/String;)Ljava/util/Vector;fLjava/io/File;rfLjava/io/RandomAccessFile;lineindexI intervalLen percentage prePercentageexLjava/io/IOException;filecom1com2com1sgetNonRepeatPercentage'(Ljava/util/Vector;Ljava/lang/String;)VLjava/lang/Exception;vindexVprint()Vimain([Ljava/lang/String;)Vargs[Ljava/lang/String;np SourceFileNorNRepCDFProcessor.java 0Q +,java/util/Vector -. /. java/io/File 01java/io/RandomAccessFiler 0   shoot 1 java/io/IOException??? java/lang/String 1111111java/lang/Exception java/lang/StringBuilder  applications/NorNRepCDFProcessor 9:OOOOOOOOOOOOOOOO KL PQjava/lang/Objectjava/lang/Double parseDouble(Ljava/lang/String;)D#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String;trimindexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;(I)Ljava/lang/String;java/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnadd(Ljava/lang/Object;)ZtoString(D)Ljava/lang/String; elementAt(I)Ljava/lang/Object;size()I getMessageappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;!$*+,-./.012g#**+*Y*Y3 "4#56#789:2 I9Y:Y+ : Y : :  :  6   9Ig*o9   `:  :(u M(F : :   :  6   9  `: *W(W(( cI:3z"#$&'%(,)3*<+I,L-W.b/f0l1x2345789:?@ABFGH4 ;<%=>,?8 <@A WB, b|C8 fxD8EF56G8H,I,J.KL2%J96Y, : Y :  :  :  6   9+J  `:  : **))C :  :  . :  6   9  `: J* W)*+Jvk:3zLMNPQR&S-T6UCVOWZX^YuZ|\]^_`abeghiklnqr4 ;<=> &?8 6@A ZC8 ^D8 EM56N.G8H,I,OAPQ2E<*: Y!*""*"#3u w>uDx4BRAE56 ST2z($Y*2%L+*2&M'+,*2(+)3{ |}~#'4 (UV W6N.XYPK e}7_applications/Normalizer.class1o <= >?@ AB < CD E F GH I JKL < M N O P QR STU < VW()VCodeLineNumberTableLocalVariableTablethisLapplications/Normalizer; normalize(Ljava/lang/String;)VcomDfLjava/io/File;rfLjava/io/RandomAccessFile;lineLjava/lang/String;preLineindexImaxexLjava/io/IOException;filemain([Ljava/lang/String;)Vargs[Ljava/lang/String;nn SourceFileNormalizer.java  java/io/File %java/io/RandomAccessFiler Xjava/lang/String YZ [\ ]^_ `a bcd efjava/lang/StringBuilder gh ]i gj kZl m% njava/io/IOExceptionapplications/Normalizer $%java/lang/Object#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String;indexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;java/lang/Double parseDouble(Ljava/lang/String;)Dseek(J)Vjava/lang/SystemoutLjava/io/PrintStream;append(D)Ljava/lang/StringBuilder;(I)Ljava/lang/String;-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringjava/io/PrintStreamprintlnclose!3* ! "#$% Y+MY,NY::- ::- : 6 : 9- - :H 6 : o9 Y - :-M Z !',09 B!L"S#X%^&c'l(v),-./12!f %&' ()*+,-!.-Bj/0SY1'23"#4- 56LYL+*2 567!789#:;PK e}7Jn2YY'applications/PfamADomainExtractor.class1 3de 0fg 0h 0ij k 0lmn o 0pq r stu d vw x yz y{| } ~     d  0   y 0 0 0  0k 0fLjava/io/File;rfReaderLjava/io/RandomAccessFile;domains[Ljava/lang/String;domInt[Ljava/lang/Integer; indexDomainI(Ljava/lang/String;)VCodeLineNumberTableLocalVariableTableexLjava/io/IOException;this#Lapplications/PfamADomainExtractor;completeDomainFileLjava/lang/String;assignDomainInt()Vi printDomainsgetDomainStartsstrj sortDomainsstr2str1kfoundZ tmpDomainsgetSortedPfamADomainsnumDom numOfDomseid numOfLetters strDomainmain([Ljava/lang/String;)Vargspade SourceFilePfamADomainExtractor.java >Jjava/lang/String 89java/lang/Integer :; <= java/io/File >? 45java/io/RandomAccessFiler > 67java/io/IOException > java/lang/StringBuilder   ? J- > IJ| aPfam-B 1 ? MJ PJ LJ J!applications/PfamADomainExtractor WJjava/lang/Object#(Ljava/io/File;Ljava/lang/String;)V(I)Vjava/lang/SystemoutLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintprintlnindexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;valueOf'(Ljava/lang/String;)Ljava/lang/Integer;intValue()I(I)Ljava/lang/String;equals(Ljava/lang/Object;)Z startsWith(Ljava/lang/String;)ZreadLine lastIndexOfapplications/MergeSort mergeSort([Ljava/lang/Comparable;)Vclose 03456789:;<=>?@=**d*d**Y+ * Y*  M8;A&  '8;<B <CD=EF=GHIJ@_<d*Y'SA!"!#BK=EFLJ@}5<*'Y*2ײA'( )(*.,4-B5EF3K=MJ@3=*+*2>*2L*+SӱA23 45"7,82:B*"NHO=3EF1K=PJ@BdN6*]66*2L>*26*2M+,-*2S6ɄÄ6**-2SAN?@ BCDE&F+G9HGIOJZK`NfPlRoSxTUWBRGQH9-O=&FRHWS=TTUEFV9 K=WJ@ YLYMYN--- !* "N-*#66- ! *-$%6-``L-&6- `dM-$6* "N-- !--'!-d(N-! -(N-6-:))-%6**-`(S*Y`H664-%6**-`(S*Y`-N* "N:6*<Y +**+*,*-*.,** /:A2[]^)_4`8a<b?cBdKePfXgfhni|jklmnopqrstuvyz{}!~2<DJUX_x|Bp ;X=8K=uYH?YS=BVO=X@K=ZDEF[H\H]H ^_@L0Y*21L+2A B`9 aFbcPK e}7 8$applications/PfamMatrixLearner.class1 Q LU L L   L L  L s &  & & , , , & & &   ,  & L & L L L L  ,  L L L LalphabetLjava/util/Vector; pfamAMatrix[[FnonDomainMatrixrfSeqLjava/io/RandomAccessFile;rfDm'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;this Lapplications/PfamMatrixLearner; sequenceFileLjava/lang/String; domainFileinitializeAlphabet()VgenerateConnectedSeq()Ljava/lang/String;strseq workOnMatrix(Ljava/lang/String;I)Vmatrix subSequencemarkIsubSeqtmpSeq outsideLetter insideLetterrowcollenworkOnSequence9(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Vdomsequencedomains subDomainnonDomainStart nonDomainEnd domainStart domainEndindexij beginWithOneZnormalizeSingle(I)VsumF normalizeBoth learnPatternid closeBoth printMatricesmain([Ljava/lang/String;)Vargs[Ljava/lang/String;pml SourceFilePfamMatrixLearner.java Zi hi TU VU java/io/File Zjava/io/RandomAccessFiler Z WX YXjava/io/IOExceptionjava/util/Vector RSA RNDCQEGHLKMPSTWYVjava/lang/String k> kjava/lang/StringBuilder k  outsideLetter:    insideLetter:1--   no  dummy jk {| i iNon-Domain matrix:   iPfam-A matrix: applications/PfamMatrixLearner Z[ i i ijava/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)Vadd(Ljava/lang/Object;)ZreadLine startsWith(Ljava/lang/String;)Ztrimappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringlength()I substring(I)Ljava/lang/String;(II)Ljava/lang/String;indexOf(Ljava/lang/Object;)Ijava/lang/SystemoutLjava/io/PrintStream;(I)Ljava/lang/StringBuilder;java/io/PrintStreamprintln(Ljava/lang/String;)Ijava/lang/IntegerparseIntendsWithclose(F)Ljava/lang/StringBuilder;print LQRSTUVUWXYX Z[\S****Y+N*Y- Y,N*Y- N NQ ].  )7 @!N$Q#R%^4)%_`RabScdSefSgfhi\R*Y*W*W*W*W*W*W*W*W*W*W*W*W*W*W* W*!W*"W*#W*$W*%W]Z) *+,)-3.=/G0Q1[2e3o4y56789:;<=>^ cdjk\N&Y'L&Y'M* (L+/+)*&++L,Y-,.+./M* (LӧN,HK ]& BDE%F*G=HHLKKLM^*LabNcdFlf>mfno\( 0 *N*N+:&Y':&Y':&Y':66 6 01:2:*36 '4,Y-5..6.7/8-2-20 bQ0r2:*36   '4,Y-9..6. 7/8-2 -2 0 bQ- 2- 20 bQ1:1: *-*-]fUV XY/Z8[@\H]Q^\_h`abcdefghikm"n*p/q^z pU0cd0qf0rspUtfuf& vf/wf2xs5ys 8zs {|\+:&Y':,:&Y':66 6 6 6 6 66+::* 66;<66<6   :: 2: `1:+:2=6 P d6 4,Y--.6.76. 7/848d 2:*>6`1=6 4,Y--.6. 76. 7/848 d 2:*> `6-?M-=6 4,Y--.6.76. 7/848d 2:*>]&uv$w-x0{7|A}GJOXagkq{%-:AJSY^cdmf}fzf~f qfffss !}s $zs 'ws *ts -qs0n\+w E *N*N6K E6$-20bE6$ -2-20$nQ *-*-]J)39CIY_eiqv^R pU"s<#sOswcdwrsudpUi\= *@*@] ^  cdi\$&Y'L&Y'MAN&Y':* (W*BM,P-L-0 -;<* (N--+N* (+:*,-C*BM* (N:*Dz} ]B$)1BMQVbjoz}^>abcd|ftmfqfhzfi\_* E* EL ]^abcdi\Q4F8<C=)4,Y-*20GH./Iײ4J4J4J4J4K8<C=)4,Y-*20GH./Iײ4J4J4J]R8>DJPV\dlt^4,s Fsn,sfFscd \_LY*2*2ML+N+O+P]^ dPK e}7))(applications/PfamMetricsCalculator.class1k h c c c c  c c c c c c c c c c c c c c   c cE        + + + +         +E  c c c!"# c$ c% c& c' ( )*+ , -. /0123 c4 56 c78 c9 c: c; c<=>?@ AB cC cD cE cFGLCRs[Ljava/lang/String;HCRsdomains nonDomains strDomainLjava/lang/String;indexLCRIindexHCR remainder indexDomainindexNonDomain indexMetric totalLCRsFtotalNonDomains sensitivity[F precisionrecallheadZrf1Ljava/io/RandomAccessFile;rf2domInt[Ljava/lang/Integer;'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTableif1Ljava/io/File;f2exLjava/io/IOException;this$Lapplications/PfamMetricsCalculator; filename1 filename2 getLCRHCRs(Ljava/lang/String;)V numOfLettersjstrstrTmp startLCRStr endLCRStr startHCRStr endHCRStr startHCRInt endHCRInt beginWithOne printLCRs()V printHCRsgetInterLCRNonDomain()Ftotalk lcrStartInt lcrEndInt nonDStartInt nonDEndInt lcrStartStr lcrEndStr nonDStartStr nonDEndStrfoundgetInterHCRDomain hcrStartInt hcrEndInt domStartInt domEndInt hcrStartStr hcrEndStr domStartStr domEndStrcomputeMetrics(FFF)V LCRNonDomain HCRDomaintotalNumgetLCRHCRDomainMetrics strMaskedassignDomainIntprintDomainInt printDomainsprintNonDomainsgetDomainStarts sortDomainsstr2str1 tmpDomains getNonDomains(I)VstartStrendStrstartIntendIntgetSortedDomains()Ljava/lang/String;numDom numOfDomseid printMetricssenAvepreAverecAve closeBothmain([Ljava/lang/String;)Vargsmc SourceFilePfamMetricsCalculator.java java/lang/String ij kj lj mj no pq rq sq tq uq vq wx yx z{ |{ }{ ~java/lang/Integer  java/io/File java/io/RandomAccessFiler H java/io/IOException I J1- KL1 MN- OP OQ RNjava/lang/StringBuilder ST SU V VQ WLX YZLLLLLLLLLLLLLLLLLLLLLLLLLLL[ \ ] \HHHHHHHHHHHHHHHHHHHHHHHHHHH ^_ `a:total length of intersections between LCRs and NonDomains: Sb6total length of intersectsss between HCRs and Domains: > totalNonDomains1:  totalLCR: <<<<<<<<<< \c****************************Non-Domain Infor.: de ^Q| fNa0************************************************XXXXnumDom: g hi The sorted domains are: - sensitivity precision recall  Ave. j"applications/PfamMetricsCalculator java/lang/Object#(Ljava/io/File;Ljava/lang/String;)VreadLinetrim startsWith(Ljava/lang/String;)ZindexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;(I)Ljava/lang/String;parseIntappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;toStringendsWithjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnprintvalueOf'(Ljava/lang/String;)Ljava/lang/Integer;intValue()I(F)Ljava/lang/StringBuilder;(Ljava/lang/Object;)Vequals(Ljava/lang/Object;)Z lastIndexOfapplications/MergeSort mergeSort([Ljava/lang/Comparable;)Vclose chijkjljmjnopqrqsqtquqvqwxyxz{|{}{~****d*d*Y* * * * * ** * *7*7*7**dY+NY,:*Y-*Y67$*Q*Q*QۧNZ*5DS]uz !"'%(H(qRHoo=>Y:Y:Y:Y:Y:Y: 6 6 6 * :: !:"# $:6 %&='&>(:"** )S`)::(** (S`(:`):***b*f b*d6  2** +Y,-'- ./S*Y ` 6 *`0:*Y ` +!:+Y,'-+-/1.** +Y,-'-+-/S*Y ` CLO%,-.(/:0@1C3L7O5Q8X9b:i<l=q>y?@ABCDGHIKLMNO(R+S7TDVKWfXY[Q oqq o|oso(jo1ao:Xo =Uq @Rq CO E<234* '2+Y,*2-%-/5ײ26234"_` ab0c6e<fDgECqE<274* '2+Y,*2-%-/5ײ26274"kl mn0o6q<rDsECq D=>66666Y: Y: Y: Y: 6 * 6*2'&6*2(:  896*2`):  896 * *2'&6*2(:  896*2`):  896##bf bD#bf bDq##bf bD6 J 6 =#bf bD6 6 Ȳ2+Y,:-#;/4#%wx yz'{9|<~DQ_ix)5;BHVbhnqwxqq q qqq}qto 'ko 0bo 9Yo <V  D=>66666Y: Y: Y: Y: 6 * 6*2'&6*2(:  896*2`):  896 * *2'&6*2(:  896*2`):  896##bf bD#bf bDq##bf bD6 J 6 =#bf bD6 6 Ȳ2+Y,<-#;/4#% '9<DQ_ix)5;BHVbhnqwxqq q qqq}qto 'ko 0bo 9Yo <V **#$b%nQ* **=Q**#*nQ* **=Q**#*nQ2+Y,**0;>-**0;>-**0;/4*Y`* %4=KZ*xxx* L E F 8++?#*@:2+Y,A-*;/4*B*C2+Y,D-*;/42E4*F*GF*H889E*%$I* * * * Lk* L`Lb">D H d l pu{ H"oo x xx_<d*Y'JS$%$&qf"<2*2K2L4*+*,!-q"}5<* '2+Y,*2-%-/5ײ2612 3(4.64753q=2M4<* '2+Y,*2-%-/5ײ26;< =>0?6A<B= 3q3=* +*2'&>*2(L*+8SӱGH IJ"L,M2O*"oq31qBdN6* ]66*290L>*2'&6*2(M+,N-*2S6ɄÄ6* *-2SNTU WXYZ&\+^9_GaOcZd`gfilkolxmnpRGo9-q&FoWqTj q uYMYNY:66666 * /*2"#$M6*29d6N6H6*d2'&6 *d2 `):89`6*29d6OMC<ON* +Y,,-'---/S**bf b 9* 0N** d2:-1d'&6  `):89`6OM** +Y,,-'---/S**bf b*Y ` $tuv"w(x1y6zE{H|K}\beu '39Yjtz u2q Xq uuqmoeo\oYVq"Sq%Pq(Mq v YLYM* * **?#** *>6*?#* *PQ>*``(L*R&6* `d(M262S42+Y,+-T-,-/4*P&>** **?#*'&**d)*%#**)*%&6*(:$N/*%Q6** *`)S*Y ` n8962+Y,U-./46@*%Q6** *`)S*Y ` **(** 6*V*W*X2Y4*Z,896*[*\*]N,  5-8?ADPU_ny&0;O\f  p faqDq&oAqDq2qqoo}262^4< E F 8*~2+Y,`._-*0;`-*0;`-*0;/4$*0bE*0= %*0bF*0=*0b8$*nE%*nF*n82+Y,a-$;`-%;`-;/426J_ht}4qxxx_*b*bL    gcY*2*2dL+]+e+f+gjPK e}7JA (applications/PrecisionRecallSorter.class1 +\] ^_` a 'b 'c 'de \ fg hi 'jk 'l 'mno \p \ q r s 't u v w xy z{ | }~ ' ' 'gbaRfInLjava/io/RandomAccessFile;cardRfInsegRfIngbaRfOut cardRfOutsegRfOut9(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;this$Lapplications/PrecisionRecallSorter;gbaLjava/lang/String;cardseg readInput.(Ljava/io/RandomAccessFile;)Ljava/util/Vector;rfvcLjava/util/Vector;linesortGba()VgbaVccardVcsegVcvalStrvalDbDmaxiImaxIndexcloseAllmain([Ljava/lang/String;)Vargs[Ljava/lang/String;prs SourceFilePrecisionRecallSorter.java 3I java/io/File 3java/io/RandomAccessFiler 3 ,- .- /-java/lang/StringBuilder Sorted rw 0-Changed 1- 2-java/io/IOExceptionjava/util/Vectorjava/lang/String BC   I"applications/PrecisionRecallSorter 34 HI TIjava/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString()Ljava/lang/String;readLinetrimadd(Ljava/lang/Object;)ZisEmpty()Zsize()I elementAt(I)Ljava/lang/Object;java/lang/Double parseDouble(Ljava/lang/String;)Dremove writeBytesclose '+,-.-/-0-1-2-345v*Y+:*YY,:*YY-:*Y Y Y +  :*YY Y ,  :*YY Y -  :*Y:6B'6 @!O"k#z$%&'*)+7>89:;<=>?@?A?BC53YMYN+N--N,-W+N:,,/6* /02345$6,:/91;741:;3<=3D-+EF#G?HI5 YLYMYN**L**M** NY:996 6 +6 9 + ++ !:"9 9 6 * Y + # $ %* Y , # $ %* Y - # $ %Z: U6j@ABC!D*E3F<GDHJIQJTKYLbMmNtO|PQSVWX[Z[]7f :; <=JFKFLF<M??NODPOGQR JSR TI5/*&*&* &*&*&*&L*-6& bcdef#g*j-i.k7.:;/<= UV5Z'Y*2*2*2(L+)+*6opqr7WX Y=Z[PK e}7Bd8#applications/RNRComCalculator.class1 5pq rst u 2v 2wxy z{ p| }~   p    p   ' @}@  2 2NUMOFSEQUENCESI ConstantValuerepeatRfLjava/io/RandomAccessFile;seqRf'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/RNRComCalculator; repeatFileLjava/lang/String;seqFile calculate()Vcc#Lapplications/ComplexityCalculator; repeatLineseqLineseqrepeats nonRepeatscomDLjava/lang/Exception;iRSCom[DNRSComRComNRComRSComD[Ljava/lang/Double;NRSComDRComDNRComDcomIndexindex1index2startendpreEndtotalmain([Ljava/lang/String;)Vargs[Ljava/lang/String;rcc SourceFileRNRComCalculator.java =L java/io/File =java/io/RandomAccessFiler = :; <;java/io/IOExceptionjava/lang/Double java/lang/String>  REPT_MOUSE  SON_MOUSE DERM_MOUSE HNRH1_MOUSE HNRH2_MOUSE java/lang/StringBuilder  -  !applications/ComplexityCalculator L = Ljava/lang/Exception   H SR SNR NewR NewNRapplications/RNRComCalculator => KLjava/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String; startsWith(Ljava/lang/String;)ZindexOf(Ljava/lang/String;)Itrimappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString substring(II)Ljava/lang/String;java/lang/IntegerparseInt(I)Ljava/lang/String;initializeAlphabetcalculateEntropy(Ljava/lang/String;)D(D)VcalculateModifiedEntropyclosejava/lang/SystemoutLjava/io/PrintStream;(I)Ljava/lang/StringBuilder;java/io/PrintStreamprintln getMessageexit(I)Vapplications/MergeSortDouble mergeSort([Ljava/lang/Comparable;)V 256789:;<;=>?7*Y+N*Y-Y,N*Y-N25 @ $26A4 %BC6DE7FG7HI7JIKL?ԼLԼMԼNԼ:Խ :Խ :Խ :Խ :6 6 6 6 6 6* :* : Y : Y : Y :99# :* :* ::* :6,:Y:* :6 6 6 6 66  6 6   `6 : ` 6  `:Y d : ) "Y d: 6VY: Y!:"#9 Y$S+ R#9 Y$S, R%9 Y$S- R%9 Y$S R  Y : Y : Y :* :*&*&=:(Y) ) )*(+*,----(.*/96 `(Y`)21212121*K'@FQ"# $%&!')(1)9*<+K-T.]/f0o1x2{3456789:;< =#>/A>BECND[EdFjGxHKLNPQRSUXYZ[+\1]:^H_NgWiejkltnoqrstuwyz|}~ ",AMNTpOI]gPIf^QIoURIxLSI{ITU8DV%dW7FGXY ~ZYx[Yq\Y!i]^)a_^1Y`^9Qa^<Nb7 ?Kc7 BHd7 EEe7 HBf7 K?g7"hhU ij?O2Y*2*23L+4@AklmGnoPK e}7I~wMMapplications/Refiner.class1 < 9 9 9 9  9 9            9     A ?  / / 9 9 9 9  9 9blockLenThresholdI ConstantValuesimilarityLenThresholdlcrRfLjava/io/RandomAccessFile;seqRftmpLCRs[Ljava/lang/String;LCRs tmpIndexLCRindexLCR'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/Refiner;lcrFileLjava/lang/String;seqFileprintLCRsOReps(IZ)VimarktmpZlimit getLCRsOReps(Ljava/lang/String;I)VjlinestrgetSeq()Ljava/lang/String;seq findAlignment8(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;s1Ljaligner/Sequence;s2 alignmentLjaligner/Alignment; similarLeneLjava/lang/Exception;seq1seq2aliPos refineLCRs(Ljava/lang/String;)Vblock1block2lcr1lcr2indexstart1end1start2end2refine()VlcrLinemain([Ljava/lang/String;)Vargsr SourceFile Refiner.java Kjava/lang/String FG HG I> J> java/io/File Kvjava/io/RandomAccessFile K CD EDjava/io/IOException !! v**java/lang/StringBuilder  f ************************** v f yeah - combined repeats: YZ f>  BLOSUM62   jaligner/formats/Pair java/lang/Exception hi `a ef uv applications/Refiner KL java/lang/Object#(Ljava/io/File;Ljava/lang/String;)Vjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringprintlntrimlength()IindexOf(Ljava/lang/String;)I substring(I)Ljava/lang/String;(II)Ljava/lang/String;readLine startsWith(Ljava/lang/String;)Zjaligner/util/SequenceParserparse'(Ljava/lang/String;)Ljaligner/Sequence;jaligner/matrix/MatrixLoaderload,(Ljava/lang/String;)Ljaligner/matrix/Matrix;jaligner/SmithWatermanGotohalignV(Ljaligner/Sequence;Ljaligner/Sequence;Ljaligner/matrix/Matrix;FF)Ljaligner/Alignment;jaligner/Alignment getSimilarityformat((Ljaligner/Alignment;)Ljava/lang/String;java/lang/IntegerparseIntclose 9<=>?@A>?BCDEDFGHGI>J>KLMU*****Y+N* Y- Y,N* Y- N"PSN* " +!9"B#P(S&T)O4+%PQTRSUTUUVWUXWYZM<> *>"*> 6SH%Y*2"Y*2 NN/01 2468*<2>;?@@DAfC>MNOPQO45V[>TU\>]^_>`aMB+:::d>6** S:"**!S` :*Y`"*#NVWX YZ[]"^*_3`8a=bLfRiWjgnqpvq}O>*Y[>3Pb>TUcW\>dWefMNY$LY$M* %M,/,&'&,MY+,L* %MӧN+HKN& %*=HKLO*LRSNTUFgW>cWhiMPY$N+(:,(:)*+,-:.6 +/Y01N:-IL2N* &-<ILNO\ ;jk5lk&#mn-o>NpqPTUPrWPsWHtWuvM6 Y$MY$NY$:Y$:6666 6 6 **2M,6,!36,` 36  d` ***2S*Y`Y+d !:*d2N-6-!36 -` 36 + d !:*4: ***2S*Y`*d*`2N-6-!36 -` 36 + d !:*4:***2S*Y`q*df*`2N-6-!36 -` 36 + d !:*4:***2S*Y`L*#N,"4=EMYfr#/<HRZjwOz TUgWwWxWyW"zW%[>({>+|>.}> 1~> 4> M%Y$LY$M* %L* %M+Q+&'=+* %L*+5*6M*,7*** %L* %L* 8* 8NNV $-4<BGLQV^gryO*RSTU}WugW MO9Y*2*2:L+;NOGUPK e}7 -{{applications/RepeatGettor.class1` 78 9:; < = >?@ 7 AB 7C D E F GH IJ IK LM N OPrepRfLjava/io/RandomAccessFile;idRf'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/RepeatGettor;repFileLjava/lang/String;idFile getRepeats()Vidrepeatsmain([Ljava/lang/String;)Vargs[Ljava/lang/String;rg SourceFileRepeatGettor.java - java/io/File Qjava/io/RandomAccessFiler R  java/io/IOExceptionjava/lang/String STjava/lang/StringBuilder> UV WT XYZ [\] ^Q ^- _-applications/RepeatGettor  ,-java/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString startsWith(Ljava/lang/String;)Zjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnclose  7*Y+N*Y-Y,N*Y-N25 !" $256 "4 %#$6%&7'(7)*7+*,-  } Y L Y M* L+Q Y+L,+* M,* M,* L**Nx{ !F$%'()0*8+C,J-R.Y/_0j2q3x6{5|7"*|%&}'(u.*m/* 01 OY*2*2L+!;<="234(56PK e}7|: : &applications/RepeatPerCalculator.class1 *QR STU V 'WXY QZ Q [\ ] ^_ ` a b c d efgh i j k l m nop Q qr s t uvw 'S 'xyinLjava/io/RandomAccessFile;(Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;this"Lapplications/RepeatPerCalculator; repeatFileLjava/lang/String; calculatePer()VperFlentotalRepiIjlineidlenStr totalRepStrlenVecLjava/util/Vector;perVecmain([Ljava/lang/String;)Vargs[Ljava/lang/String;rpc SourceFileRepeatPerCalculator.java -; java/io/File -.java/io/RandomAccessFiler -z +,java/io/IOExceptionjava/lang/Stringjava/util/Vector {|ID }~ |  Total length of all repeats: ;  java/lang/StringBuilder  | . applications/RepeatPerCalculator :;java/lang/Object#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String; startsWith(Ljava/lang/String;)ZtrimindexOf(Ljava/lang/String;)I substring(I)Ljava/lang/String;(II)Ljava/lang/String; lastIndexOfadd(Ljava/lang/Object;)Zjava/lang/Float parseFloat(Ljava/lang/String;)FvalueOf(F)Ljava/lang/String;closesize()Iremove(I)Ljava/lang/Object;(Ljava/lang/Object;)Ijava/lang/SystemoutLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(F)Ljava/lang/StringBuilder;toStringjava/io/PrintStreamprintln '*+,-./ *Y+M*Y,M0 1* 2345 67 89:;/ i Y L Y M Y N Y : Y : Y : * L+++L+6 + `L+6 + M+6 + `N-W-8+* L+L+6 + `:8n8 W* Lc*6  w  N   86  3-6   W   b n8Ͳ Y!-"#"$%&: 3cf0&!!3$;%?&H'M(U)^*f+n,v-./01234567:<=>?@ B CDE G(HBKcOfMhP1 <=D>=?=U{@A  Z<=~@A  WBA h45 i67aC9YD9QE9!HF9*?GH36IH JK/L'Y*2(L+)0T UV1LM N7OPPK e}72z)applications/SEGCARDLCRBlocksGettor.class1S 12 345 6 789 1 :; <= >? @AB @C DE 3 FGrfLjava/io/RandomAccessFile;(Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;this%Lapplications/SEGCARDLCRBlocksGettor;fileNameLjava/lang/String;get()Vlinemain([Ljava/lang/String;)Vargs[Ljava/lang/String;sclg SourceFileSEGCARDLCRBlocksGettor.java ( java/io/File java/io/RandomAccessFiler H java/io/IOExceptionjava/lang/String IJID: KLIDM NOP QLLLLLL Q( R(#applications/SEGCARDLCRBlocksGettor '(java/lang/Object#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String; startsWith(Ljava/lang/String;)Zjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnclose  *Y+M*Y,M  *  !" #$ %&'( Y L* L+k++ * L++ ڲ+* L+ * L* L+* L*MJ!,9@HQ\ d!k"q#|&)(* !"#$)& *+LY*2L+/ 01,- .$/0PK e}7k)mxxapplications/SeqGettor.class1^ 67 89: ; < =>? 6 @A 6B C D E FG HI JK L MNseqRfLjava/io/RandomAccessFile;idRf'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/SeqGettor;seqFileLjava/lang/String;idFile getSequences()Vidseqmain([Ljava/lang/String;)Vargs[Ljava/lang/String;gs SourceFileSeqGettor.java , java/io/File Ojava/io/RandomAccessFiler P  java/io/IOExceptionjava/lang/String QRjava/lang/StringBuilder> ST UR VWX YZ[ \O ],applications/SeqGettor  +,java/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)VreadLine()Ljava/lang/String;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString startsWith(Ljava/lang/String;)Zjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnclose 7*Y+N*Y-Y,N*Y-N25  " $256!4 %"#6$%7&'7()7*)+, Y L Y M* L+_ Y+L,+* M,* M,,* M* L**N  J"#%&'0(8)C*J+R,[-b.m0x23657!*$%&'-){.) /0OY*2*2L+ ;<=!123'45PK e}7@ @ applications/SeqIdGettor.class1 +WX 'YZ[ \]B ^ '_`a Wb W c d e f gh i jkl W mn o p qr qst W uv w x qy z{ 'W '| '}~TOTALNUMI ConstantValue RANDOMNUMdsortedR[Ljava/lang/Integer;rfLjava/io/RandomAccessFile;()VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;thisLapplications/SeqIdGettor; getRandomNumsrtmpIgLjava/lang/Integer;i generatorLjava/util/Random;krandomsLjava/util/Vector; getSeqIDsindexseqIdLjava/lang/String;lineNummain([Ljava/lang/String;)Vargs[Ljava/lang/String;sig SourceFileSeqIdGettor.java 67java/lang/Integer 23 java/io/FileM/cise/research38/tamer/xli/LCR/data/swissprot/repeatPercentage/combinedRepPer 6java/io/RandomAccessFile 6 45java/io/IOExceptionjava/util/Randomjava/util/Vector 6   java/lang/StringBuilder    7java/lang/String :  7applications/SeqIdGettor A7 K7java/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)VnextInt(I)I(I)VindexOf(Ljava/lang/Object;)Iadd(Ljava/lang/Object;)Zapplications/MergeSort mergeSort([Ljava/lang/Comparable;)VintValue()Ijava/lang/SystemoutLjava/io/PrintStream;append(I)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintprintlnreadLine(Ljava/lang/String;)I substring(II)Ljava/lang/String;close '+,-./0-.12345678***dYL*Y+ L %( 9 %(): ;<)=>*?@A78F Y L>Y:d:+۶=Y:W*S*6d-*2=YҲ9J"# %&'!(%)/*:,B-J.P4W5a6l759::R!/B-/!CDlB-Z1E-?@FG H-IJK78[Y L>6d>*2=* !L+"#6+$L+%* &LVY 9>?@ ABC$D,E2G:HBIIAOKVNYMZO:H:L-0B- BE-NMN LO-Z=>[?@ PQ8Q'Y(L+)+*9ST UV:RS T@UVPK e}74E}}applications/ShannonGbm.class1@     !" # $%&  ' ( ) * + , - . / 0 12  3  45 6789:;<=>?@(ABCDEFG 4 H 4IJ 4K 4L MN 4O P ^Q RST UVW B BX BY BZ U[ U\] 4^ _` ^ab Ncd Ne f g h ij N k l 4m n o Np Nqr s ^ tu ^Zv Bwxyz g { N| } g~ i  N N N N g g g         N N N  g g            4 ^ @ I<61      A ?        }                   comCutD ConstantValuefLjava/io/File;rfLjava/io/RandomAccessFile;verticesLjava/util/Vector; subVerticesedgessubEdges vertexQueuelps repeatMatrix[[FnonRepeatMatrixfVecNor[F fVecUnNor scoringMatrix[[Dalphabetcc#Lapplications/ComplexityCalculator;(Ljava/lang/String;)VCodeLineNumberTableLocalVariableTableexLjava/io/IOException;iIthisLapplications/ShannonGbm;fileNameLjava/lang/String;initializeAlphabet()VreadRNRMatricesindexjrfmrow matricesFilereadScoringMatrixklinescoreprintMatricesRowByRowcreateFirstVectorwindowlen tmpWindowletter printVector(I)Vmark tmpVectorconstructSingleVertex(II)Vstartend startLetter endLettervLapplications/Vertex; dummySourceworkOnFirstWindowletter2letter1colcheckProbablity(II)Z constructZ difference1F difference2 addVertices(Ljava/lang/String;II)VstartPostmpIndexconstructVertices(Ljava/lang/String;I)Vstrth4similarpreviousVecUnNOr oldLetter newLettercheckSimilarity'(Ljava/lang/String;Ljava/lang/String;)Zc1c2findLetterPercentageVer(F)VlFappearedLettersperposInt[Ljava/lang/Integer;computePercentage(FLjava/util/Vector;)VlalconstructEdges(III)Vmth1th2th3pqv1v2eLapplications/Edge;checkConditions (IIIIIII)Z satisfiedfoomodifyVertexQueue'(Ljava/util/Vector;Ljava/util/Vector;)VtmpQueue tmpVerticesprintVertexQueueprevious printVertices printEdgestmpEdges sourceVersinkVerlengthaddDummySourceaddToSubVerticesVertexQueue(Lapplications/Vertex;)V addToSubEdgesfindLongestPath()Ljava/util/Vector;lptraverseSubEdgesverfirstwsourceLPsinkLP findEdges(Lapplications/Vertex;)IfindmodifyVertexInEdgestraverseVerticesmaxVermaxLpconstructLongestPath)(Lapplications/Vertex;)Ljava/util/Vector;printLongestPath(Ljava/util/Vector;)V identifyLCRscheckExistence(Lapplications/Vertex;)Zcopyvt1vt2clusterSubEdges tmpSubEdgesextractConnectedGraphsameexistsisrfoundworkOnSequence(Ljava/lang/String;IIII)IgenerateSequence&(Ljava/lang/String;)Ljava/lang/String;sequencestrTmplastSeqprintPositionsposprintLCRBlocks difference LCRBlocks getPositions(I)Ljava/util/Vector; sortPositions&(Ljava/util/Vector;)Ljava/util/Vector; positionscurrent tmpLCRBlocksextend;(IIILjava/lang/String;Ljava/lang/String;)Ljava/util/Vector;decendPoslimit directionseqdecRegspointer startDecPos endDecPoscom1com2extReg shareLetterstr1str2sharedcheckContribution9(Ljava/lang/String;Ljava/util/Vector;Ljava/lang/String;)Z currentBlock contributedregsblock appendLcrs8(Ljava/util/Vector;Ljava/util/Vector;)Ljava/util/Vector;lcrs appendedLcrs lowComRegstmpLcrs pickUpDrop8(Ljava/util/Vector;Ljava/lang/String;)Ljava/util/Vector;comcbStartcbEndcombine lcrBlockStart lcrBlockEnd extendToLeftblocks frontLcrsbackLcrs tmpBLOCKStmpBlock isFirstBlock mergePurgeendIndex startIndex nextBlockcheckCombinedSubBlock((Ljava/lang/String;Ljava/lang/String;D)Zseq1seq2cCutdelete findAlignment8(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;s1Ljaligner/Sequence;s2 alignmentLjaligner/Alignment; similarLenLjava/lang/Exception;aliPos checkLeftRegs+(IIIILjava/lang/String;D)Ljava/util/Vector;aliStartaliEndleft addToResult startResultendLeftresult tmpResult checkAdjBlockM(IILjava/lang/String;Ljava/lang/String;DLjava/lang/String;)Ljava/util/Vector;index2 aliStart2aliEnd2aliSeq1aliSeq2 aliStart1aliEnd1decOrNotstart1end1adjBlockindex1start2end2checkDeletability:(Ljava/util/Vector;ILjava/lang/String;D)Ljava/util/Vector;maxIndexreadSampledLenRepPerfilterdiff1diff2shortestlongestrSizefromBacksampleseqLenrangeprePer preLongest singleCommax postProcess9(ILjava/lang/String;Ljava/lang/String;)Ljava/util/Vector;computeLCRPercentage'(Ljava/util/Vector;Ljava/lang/String;)V printLCRs9(Ljava/util/Vector;Ljava/lang/String;Ljava/lang/String;)VmsLapplications/Masker;LCRs posBlocksstartt(IIIILjava/lang/String;)VidnextIdmain([Ljava/lang/String;)Vargs[Ljava/lang/String;g SourceFileShannonGbm.java   java/io/File java/io/RandomAccessFiler  java/io/IOExceptionjava/util/Vector !applications/ComplexityCalculator A RNCQEGHLKMPSTWYVjava/lang/String         Non-Repeat matrix:  java/lang/StringBuilder     Repeat matrix: `  applications/Vertex 0 c  #$    *+       java/lang/Integer    The percentage is:  /  = applications/Edge QR     All vertices in the queue:    All vertices in the graph: All edges in the graph:   lp:  indegree:   weight: bc dc   ! " hc rf no { #; $ qc$This is a vertex with outdegree zero %c &c uv a ef |V "wrong1wrong2wrong3 z{wrong4wrong5  ./ EF  yf> '(!- ) *+ 7 ,- right . /0BLOSUM621 234 567 8jaligner/formats/Pair 9:java/lang/Exceptionfront 2   backknowledge/sampledLenRepPer; < f,: =+         applications/Masker >?   applications/ShannonGbm knowledge/blosum62Matrix  java/lang/Object#(Ljava/io/File;Ljava/lang/String;)Vadd(Ljava/lang/Object;)ZreadLine()Ljava/lang/String;trimindexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;java/lang/Float parseFloat(Ljava/lang/String;)F(I)Ljava/lang/String;closeparseIntjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnappend(F)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringprint()I(Ljava/lang/Object;)I)(Ljava/lang/String;Ljava/lang/String;IF)VsetPredecessorjava/lang/Mathabs(F)Fequalssize elementAt(I)Ljava/lang/Object;getStartLetter getEndLettergetapplications/MergeSort mergeSort([Ljava/lang/Comparable;)V(I)Ljava/lang/StringBuilder; incIndegree setElementAt(Ljava/lang/Object;I)V.(Lapplications/Vertex;Lapplications/Vertex;F)V(I)IcleargetPredecessor()Lapplications/Vertex; getIndegree getWeight()FgetLP getSourcegetSink(ILjava/lang/Object;)VisEmpty()ZremovesetLP decIndegree setSourcesetSink startsWith(Ljava/lang/String;)ZintValuecalculateEntropy(Ljava/lang/String;)D lastElement()Ljava/lang/Object;jaligner/util/SequenceParserparse'(Ljava/lang/String;)Ljaligner/Sequence;jaligner/matrix/MatrixLoaderload,(Ljava/lang/String;)Ljaligner/matrix/Matrix;jaligner/SmithWatermanGotohalignV(Ljaligner/Sequence;Ljaligner/Sequence;Ljaligner/matrix/Matrix;FF)Ljaligner/Alignment;jaligner/Alignment getSimilarityformat((Ljaligner/Alignment;)Ljava/lang/String;java/lang/Double(D)Ljava/lang/String; parseDoublemask'(Ljava/lang/String;Ljava/lang/String;)V =o***Y+*Y* M* Y * Y * Y * Y * Y * Y *****=* Q*Y*%( ^4689%<(;)=4>??J@UA`BkCwDEFGHIHJKL*)R* Y * W*! W*" W*# W*$ W*% W*& W*' W*( W*) W** W*+ W*, W*- W*. W*/ W*0 W*1 W*2 W*3 WZP QRS)T3U=VGWQX[YeZo[y\]^_`abcde 9Y+MY,N4Y5:6m-6:-6:7:6J896*2:;Q*2;Q`<:-6W-6W-6W6m-6:-6:7:6J896*2:;Q*2;Q`<:-=M  k lmn'o-p3q:rDsMtSukxzzrn}~~z M8=N q8Nq F ~4Y5M4Y5NY+:Y:6E6M66,,`:7N*2->R`6ӄ=:x{ B'17:DRagmsx{}f =0:3 *I^'Q}~~v n  Q?@A<C=)?BYC*20D8EFGײ?H?H?H?H?IA<C=)?BYC*20D8EFGײ?H?H?HR8>DJPV\dlt4, Fn,fF :+J=+N4Y5:62-::*K6**0 bQ-!,<BHR`fp\ ,5KixFM *M*M>$?BYC,0DLEFGܲ?H&  9?E*'FFA>`MN`M:NY- O:NYPP O:Q*  W,3=H>>>7/!,A *+R+J66666dq6+`:M`S+```:N*,K6*-K6*21*S*`T> #. 8 I S ] lw\ I? .`!w ~{"#$6>*20*0fU8*20*0fU8>*24>666"4%& '(* )(*+b+J64Y5:66dF+`::*K6*21*S*``dT. #$%&'*(5)C*M+[&a-RLbbbb,\SP-./+J66NYV::+:: 4Y5: 4Y5: * W6  d :: +  `::  d:: * K6 * * 0 fQ* * 0nQ* K6 * * 0 bQ* * 0nQ*   X rZ23 456 72889;:D;M<Y=d>o?@ABCDEFGz oZ 01 2&3  )4 25 ;, 67b>+,Y>MN O P*89 2&:; * Z= Y NNYV:6O* [N:\:-K6 - W]:-K6 - W-Z=#n8^:6"-_4:^Y`Sa6-2b:?BYCELEFGӲ?HrVWXY"Z0[7\?]E^L_S`[aabhYnesfyhijkimnopnru710?)R0%00<(=ym>(g?@ABr,Z>#n8?BYCcEdeE#DfEDFA6.,[4:?BYCELEFGҲ?H& yz {7|@}K~e|kqHK0:1rrC(rDm g>(EFC 4Y5:4Y5:6666 * Z6 NYV: NYV: gYh: 6 d* _N:  \:>6 ]:>66`6 |* _N:  \:>6 ]:>6 * i. j*  kgY  l: * Wd6~<r'9BN\cjqx{ {&GE   H I J KL 'C 0M 9N BOP QR  `6ddd6  m6  Ed=83- dd 66.  #3CTZ]f ````K`L`H`I`J]S& ST UVsNYPP ON*n*- W*WXYg?oANYVL+M*Z>6*_NL+pM,e?BYC+\EE+]ELE+qdrE+sDrE+tDrE,\EE,]EFAJ?BYC+\EE+]ELE+qdrE+sDrE+tDFA82 #0594ZC[ Y M *M* MNYVN-:,Z6?BYCuEdFA6,_NN-p:g?BYC-\EE-]ELE-qdrE-sDrE-tDrE\EE]EFAJ?BYC-\EE-]ELE-qdrE-sDrE-tDFA6B !$*DNX^cHGG X!$Z*C\  Y M *M*MgYhNNYV:NYV:4Y5:,Z6?BYCvEdFA6,_gN-w:-x:?BYC\EE]EyEtDzEqd{E\EE]EyEtDzEqd|E-}DFAaB !3<B\fpv|\ _G]!OP*^3_<0B`aYNYPP OL*+~*+ bcZ*+*n*+ W dcLNYVMgYhN*Z6d6**_NMgY+, lN*-ױ&     $1<E K>,LLD<OP3Cef-NYVL**NL*+*M,&+ -%+ghc{3=>gYh:NYV:NYV:4Y5: 8 8 8 *Z6 6 * *+6  =   ** [g:w:+t8 x:*K6 t8 }8 b  b8  Q* k*q6  * W* W*Z6 >E?A=4=/("# $%&('1(=)G*N+T,Y-^/s0123456789:;=>?@ABCDEFG H(I-L2No 33i1&/j& &OP^_( 0+k(.l( 1m( : = no[=*Z>6NYV:gYh:1,*_g:w:+ 6ք=6 R S TUV)W6X=YFZL\R^W_Y`H[[iY QC Np&E<OPqchNYVMgYhN*Z66G*_gN-wM,+ -+-xM,+-+*-k:efgh#i0j5k=lEnJoRpWrahgt>Khh_`XOPOCrfYNYVLNYVM*Z> 886**_NL+t8 8+M*,:2 xyz{(|5};~CGI{OVR"-YQIsAC>`(:t(Vguv5 Y MNYPP ON+:-,p:,")3455s-gwx_=NYVN4Y5:+Z6=:+_NN?BYC-\EE-]ELEFGƲ?H*  !*RX^>__g] UL0F`yfG **L+  gz{\*+K=   |V0+Z>gYh:6+_g:, W")/>00}0~+C"OP\ Y LgYhMNYVNNYV:*x*gM+, W,wN6*Z6K*A*[gM,w:-+, W*W*Z6*+*R!+7=BEN_lr{HEXGNOOP!i c<NYVMgYhN Y : :*gN*- W-wM*, W* , ?A-xM*, W, W* , ?A-wM<66*Z6wp*[gN-w:,E*W*- W-x:* W W* ?A6*Z6NM66*Z6*[gN-w:, 6j-x: , X*6  )* W W*  ?A*gN*- W*Z6k6*Z6t*[gN-w: , R*W*- W-x: * 6  )* W W*  ?A6}6w*NS '+7@ENYafov  %(+4@M S \ b hqy~ !"#$%&' ()*!+&,0-8.D/L1O3R5U7[9^;b<Wi&GyyJ& ha Sv!+& [i (3G+0&4'&caj& YQOPHW J*+6*6*+*n*n**:* W6 @B CEFG&H-I1J7MANDOGP\ 7 gJJ0JHJIJJJ1 @5p&34Y5M+N6-M--?+A* 6N-/-&-7NBYC,E-EFM* 6N-$-BYC-EE,EFM:, JVW X Z[\ ^'_/a<bAcTd_fcglhlkn>0  ~&xF+Z=4Y5N6,+_4N?BYC-ELEFGԲ?H"st uv w9u?yEz4/FFAC 90x t4Y5M+Z>6Z+_4M,96,::,`<:>>d6?BYC,ErEFG?H6 ~  (1;HNgms\ (?16;,H]ttl0 gC Y M4Y5N*Z66 Y :NYV:6l*_ :Z6 6   G _N:\N,-K6 ,- W]N,-K6 ,- W ,V%.7ELVbhou{p OK LN 1o 0C%}g.tU Y M4Y5N+Z666^:6 +_4N^Y-`Sa Y :4Y5: 2d6`M: 6  T 2bN 26`(BYC EEMEF W-: 6,- W BYC EEMEF WZ6 Y : 6  I _4N-96 - :: - `<:  > >d6  - W  ##-7DJOXalu (2?ELR&$xX - 2 ? M UUME0?<9Z#2?@Xa S  Y :666 9 9 d::YK6d6dx*9 ::*9   E6`6 946`6 "BYCdE dFvdmgb*9 ::*9   46`6 "BYCdE dF61`6 "BYCdE dFZB6`6`z*9 d::*9   E6d6946d6  "BYCdE dF W}`jd*9 d::*9   46d6  "BYCdE dF W,'d6 BYCdE dFZF #-05DOYdlqt} +1<BKj rux} $%'()-/13'435>6F7I9O:X;w?}ABDGO0B&x<&,     #7K>+:4Y5:J62::,96>J <:. TUVW#X+Y1Z6\>]F^I_H+KKKI&F0=? s6,:4Y5:6Z6SN[4:96  :>6  `<>6 - d ::*+6:defgh%i1j:kGlUmaojpmqprz :3 G& U ssssp&mdaZ~ +N,:-4 W-wx yz4  . Y N Y : Y :+:4Y5:4Y5:6 6 6 6 6 -nn66664:96  `<>64:96  :>6  `<>6  "  d`6 6664:96  :>6  `<>6 6 6 6 *   ,Nd6 *   ,N,J`6 *   ,:996  :>d6 `<>6*,:96-Zt-[4:96  :>6(*,:-,6  d6 6 6BYCdEdF W68/*,:,6  `6-Z6 -Z64:96  `<>61Z6  d4:96  :>6%  6  6  `6BYCdEdF W -Z  WZV/2>FJOUX[cmv #(+.?DJX`or{$'5FKT[ahks}  ( + r|&'&RUX&[p&...&&/2& 5 8 ;, >  +M4Y5N4Y5:,Z66`,[4N-96-`<>6,`[4:96  :>6  d  ABYC-:EE `<EFN,W,W,-,Z6WQ6,Z6,^#,6>KXan~ !$%&(*+;p >Kzad nW  +6*BYC+E,EF9)6@AB%C(D>++++(& U?+A?,A4Y5N+:,::6YN:-NQ2 IJKMN"P4Q;RASNYQWS[\ 2",4;SOUUU?@ Y :9 B*d`d:9  $BYCdE`ddFd`d@*`d:9  #BYC`dEdF W. ` a bc(e0fQh^isk{ln\     +N4Y5:4Y5:66,Z,[4:96`<>6 6  [+ZE+[4:96:>6   6 +`6+ W6 r+^stuv#w.x7yEzH{M|V}a~jw~vz w 7nE` H]&  +] Y :9 d:: -96 - :>6 - `<>6 d::4Y5:Y* : * :J?A96 r96 :>6 `:>64Y5:4Y5:Y `d `d::`d`d::`<:?BYCEEFA96  :>6 `<>6Y`d`d:: `d `d::6YO6BYC`ddE`ddF W*:*:6BYC`ddE`ddF W*:*:BYC `ddE `ddF W* :*:.  +8ENXeow#0>H^sv >PZ  O0*>v&]]]]]]] T Q F > +2 8%ENm Y :4Y5:66 +[4:96  :>6 `<>6  +d[4:* -:Z:+Zd0+`[4:* -:Z  WF "+8FJVgoyf   +~ fB Y LYMY,N-6:+ W-6:-=M+<? . #(/8< ?@ >*# @B:#+N Y :-Z8994Y5: 4Y5: -Z6  R- [4:  96 ,  :>d  `<>:: * 9 W 6 6 699*:,J664Y5:996 Z [4:7:ö96:>6r96`:>6Ķ96`<9gk96A:d6 d6!! gk96gk96 691 gk96  WZ66 9 , [4:  9 9 6 49 6 6 -Z8 Y :   - [4:  96, :>d `<>:: * 9 Y :*- ,:Z6 6! e d[4:  Y 6!- W6"" "[4: -  "! - W-Z8- W -Z8 -` ,4@KTs~)*,-./012345678'95:?;E=L>S?Z@fApBvEFIJKLMNOPQRSTUVWXY[^_`bcd ef/g:hCjblmnuo~pqrstuvwz{|~{ j$T5 7X S0 Z)!JC$ "{ x&!C  }zwqkh&_\>YV  (#0 , W Y : Y :*:Z *:*,:*:*,:*,:*,-.  !)2:CLTHWWWW NE>  ? 0  r+Z64Y5:6/+_4:BYCErEEF:7:-PY?AY:,ϱ2 $?ELU`iq\ $03i rrrrlc0 4Y5:4Y5:4Y5:6 6 * 6:*:9(96 : :: `<:: ?H?A :6 * n*n*n6  * Q* Q 7:*6 * : ?* =: ! !!&/7<GPT^lpu{  HIJ1 0 !j& B*2><*2>=*2>>6Y*2:*2ֶ*2ر& $,3A>B;H4I-J*1$PK f}7վݼ applications/ShannonGbm.javaCopy/* ** Author: Xuehui Li ** Date: Dec, 2005 ** "gbm" or "GBM" is the abbrevaition of "A graph-based method for detecting low-complexity reions". ** This is program is used to find low-complexity regions in sequences when Shannnon Entropy is used to repleace our new complexity measures in GBA. ** NOTE: all vertices and edges have topological orders ** There are totally six input paramaters. The first one is the sequence file ** name. The second one the learned matrix file (/cise/research/tamer/xli/LCR ** /graphLCR/swissprotLearnedMatrices). The nex three are the threshold 1, ** threshold 2 and threshold 3, respectively. At this time, all LCR Blocks ** (or masked sequences) generated in both /cise/research/tamer/xli/LCR/ ** graphLCR/swissprotLCRBlocks/ (/cise/research/tamer/xli/LCR/graphLCR/ ** swissprotMaskedSeqs) and /cise/research/tamer/xli/LCR/graphLCR/ ** pfamLCRBlocks/ are based on the three thresholds: " 3 15 5". The last ** parameter is used to choose the output format. "0" means LCR blocks will ** be generated. "1" means masked sequences will be generated. ** How to run the program? ** One example (to generate LCR blocks instead of masked sequences) with fixed sampling, i.e., 27 LCRs from five sequences masked by SEG: ** java applications.ShannonGbm ../data/swissprot/sequenceInfor/seqFromFlybase swissprotLearnedMatrices/wForgetRate/normalized/combinedMatricesRowByRow095 3 15 5 knowledge/lcrs 0 > wShannonEntropy/LCRBlocks/mimMaskedBlocks ** if conditional-sampling (i.e., G_t / sqrt(t) <= C * u_t)is used, the "knowledge/lcrs" will not make sense any more. */ package applications; import jaligner.Alignment; import jaligner.Sequence; import jaligner.SmithWatermanGotoh; import jaligner.formats.Pair; import jaligner.matrix.MatrixLoader; import jaligner.util.SequenceParser; import java.io.*; import java.util.*; class ShannonGbm { // vertices and edges are vectors used to keep all the vertices, edges in a graph generated from a sequence, respectively. vertexQueue is a vector used to keep all vertices whose indegree is zero. lps is a vector used to keep all the longest paths in all connected subgraphs of a sequence. Every longest path in lps is a vector of vertices, excluding the dummy source. // subVertices and subEdges are vectors used to keep all the vertices, edges in a connected-graph which is a subgraph of the graph generated from a sequence, respectively. private static final double comCut = 3.7491631225622157; //the complexity cutOff value for extending the longest-path intervals. private File f; private RandomAccessFile rf; private Vector vertices, subVertices, edges, subEdges, vertexQueue, lps; private float[][] repeatMatrix, nonRepeatMatrix; private float[] fVecNor, fVecUnNor; private double[][] scoringMatrix; private Vector alphabet; // private double comCut = 0; // the complexity cut-off value private ComplexityCalculator cc; public ShannonGbm ( String fileName ) { // the graph is given in a file where every line represents an edge and has the fromat of "source sink weight" initializeAlphabet(); try{ f = new File ( fileName ); rf = new RandomAccessFile ( f, "r" ); } catch ( IOException ex ) { } vertices = new Vector(); subVertices = new Vector(); edges = new Vector(); subEdges = new Vector(); vertexQueue = new Vector(); lps = new Vector(); repeatMatrix = new float[20][20]; nonRepeatMatrix = new float[20][20]; fVecNor = new float[20]; fVecUnNor = new float[20]; scoringMatrix = new double[20][20]; for ( int i = 0; i < 20; i++ ) fVecUnNor[i] = 0f; cc = new ComplexityCalculator(); cc.initializeAlphabet(); } public void initializeAlphabet() { alphabet = new Vector(); alphabet.add( "A" ); alphabet.add( "R" ); alphabet.add( "N" ); alphabet.add( "D" ); alphabet.add( "C" ); alphabet.add( "Q" ); alphabet.add( "E" ); alphabet.add( "G" ); alphabet.add( "H" ); alphabet.add( "I" ); alphabet.add( "L" ); alphabet.add( "K" ); alphabet.add( "M" ); alphabet.add( "F" ); alphabet.add( "P" ); alphabet.add( "S" ); alphabet.add( "T" ); alphabet.add( "W" ); alphabet.add( "Y" ); alphabet.add( "V" ); } // get repeat/non-repeat matrices public void readRNRMatrices( String matricesFile ) { try { File f = new File( matricesFile ); RandomAccessFile rfm = new RandomAccessFile ( f, "r" ); String row = new String(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { nonRepeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { nonRepeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.readLine(); rfm.readLine(); rfm.readLine(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { repeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { repeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.close(); } catch ( IOException ex ) { } } public void readScoringMatrix( String fileName ){ String line = new String(), score = new String(); try { File f = new File( fileName ); RandomAccessFile rf = new RandomAccessFile( f, "r"); for ( int i = 0; i < 20; i++ ) { line = rf.readLine(); int k = 0; for ( int j = 0; j < 20; j++) { score = line.substring( k, k + 2 ).trim(); scoringMatrix[i][j] = Integer.parseInt( score ); k = k + 3; } } rf.close(); } catch ( IOException ex ) { } } public void printMatricesRowByRow() { System.out.println( "Non-Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( nonRepeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); System.out.println(); System.out.println( "Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( repeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); } /* public void getComCut( String fileName ) { ComplexityCalculator cc = new ComplexityCalculator(); cc.initializeAlphabet(); cc.initializeNewAlphabet(); cc.computeNewScoringMatrix(); cc.normalizeNewScoringMatrixPow(); try { File f = new File ( fileName ); RandomAccessFile rf = new RandomAccessFile( f, "r"); String line = new String(); double com, sum = 0, num = 0; line = rf.readLine(); while ( line != null ) { line = line.trim(); com = cc.calculate2LetterEntropyWScoMatrix( line ); //com = cc.calculateModifiedEntropy( line ); sum = sum + com; num = num + 1; line = rf.readLine(); } comCut = sum / num; System.out.println( "The cut-off value is: " + sum + " / " + num + " = " + comCut ); rf.close(); } catch( IOException ex ){ } } */ public void createFirstVector( String window ) { int len = window.length(); String tmpWindow = window, letter = new String(); for ( int i = 0; i < len; i++ ) { letter = tmpWindow.substring( 0, 1 ); int index = alphabet.indexOf( letter ); fVecUnNor[ index ]= fVecUnNor[index] + 1f; tmpWindow = tmpWindow.substring( 1 ); } for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecUnNor[ i ]; for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecNor[ i ] / len; } // o for unNormalized, 1 for normalized public void printVector( int mark ) { float[] tmpVector = new float[ 20 ]; if ( mark == 0 ) tmpVector = fVecUnNor; else tmpVector = fVecNor; for ( int i = 0; i < 20; i++ ) System.out.print( tmpVector[i] + " " ); System.out.println(); } public void constructSingleVertex( int start, int end ) { String startLetter = Integer.toString( start + 1 ); String endLetter = Integer.toString( end + 1 ); Vertex v = new Vertex( startLetter, endLetter, 1, 1.0f ); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); v.setPredecessor( dummySource ); vertices.add( v ); } public void workOnFirstWindow( String window ) { String letter1, letter2; createFirstVector( window ); int len = window.length(), j = 0, row = 0, col = 0; for ( int i = 0; i < len - 1; i++ ) { j = 1; letter1 = window.substring( i, i + 1 ); while ( ( i + j ) < len ) { letter2 = window.substring( i + j, i + j + 1 ); row = alphabet.indexOf( letter1 ); col = alphabet.indexOf( letter2 ); if ( scoringMatrix[row][col] > 1 ) if ( checkProbablity( row, col )) constructSingleVertex( i, i + j ); ++j; } } } // at this time, row == col, since we only consider same letters public boolean checkProbablity( int row, int col ) { boolean construct = false; float difference1 = Math.abs( repeatMatrix[ row][col] - fVecNor[row] ); float difference2 = Math.abs( nonRepeatMatrix[row][col] - fVecNor[row] ); if ( difference2 > difference1 ) { construct = true; } return construct; } public void addVertices( String window, int index, int startPos ) { int len = window.length(); String letter = new String(); int tmpIndex = 0; for ( int i = 0; i < len - 1; i++ ) { letter = window.substring( i, i + 1 ); tmpIndex = alphabet.indexOf( letter ); if ( scoringMatrix[index][tmpIndex] > 1 ) if ( checkProbablity( index, tmpIndex )) constructSingleVertex( startPos + i, startPos + len - 1 ); } } public void constructVertices( String str, int th4 ) { char c1, c2; int len = str.length(); boolean similar = false; Vertex v = new Vertex(); float[] previousVecUnNOr = new float[20]; String window = str.substring( 0, th4 ); String oldLetter = new String(), newLetter = new String(); workOnFirstWindow( window ); int startPos = 1; while( startPos <= ( len - th4 ) ) { oldLetter = window.substring( 0, 1 ); window = str.substring( startPos, startPos + th4 ); newLetter = window.substring( th4 - 1, th4 ); int index = alphabet.indexOf( oldLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] - 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; index = alphabet.indexOf( newLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] + 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; addVertices( window, index, startPos ); ++startPos; } } // return the actual weight of the vertex ??? /// to be modified later to include similar cases public boolean checkSimilarity( String c1, String c2 ) { boolean similar = false; if ( c1.equals( c2 )) similar = true; return similar; } // find the percentage of letters appearing in vertices public void findLetterPercentageVer( float lF ) { int len = vertices.size(); Vector appearedLetters = new Vector(); Vertex v = new Vertex(); for ( int i = 0; i < len ; i++ ) { v = (Vertex)vertices.elementAt( i ); String str = v.getStartLetter(); int index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); str = v.getEndLetter(); index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); } ///// Sort first len = appearedLetters.size(); float per = len / lF; // System.out.println( "The letter percentage after vertex construction is: " + len + " / " + lF + " = " + per ); Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { String str = (String)appearedLetters.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); for ( int i = 0; i < len; i ++ ) { String str = posInt[i].toString(); System.out.print( str + " " ); } System.out.println(); // computePercentage( lF, appearedLetters ); } public void computePercentage( float l, Vector al ) { int len = al.size(); float per = len / l; System.out.println( "The percentage is: " + len + " / " + l + " = " + per ); for ( int i = 0; i < len; i ++ ) { String str = (String)al.elementAt( i ); System.out.print( str + " " ); } System.out.println(); } public void constructEdges( int th1, int th2, int th3 ) { String startLetter = new String(), endLetter = new String(); int i = 0, j = 0, p = 0, q = 0, l = vertices.size(); Vertex v1 = new Vertex(), v2 = new Vertex(); Edge e = new Edge(); for ( int k = 0; k < l-1; k++ ) { v1 = (Vertex)vertices.get( k ); startLetter = v1.getStartLetter(); i = Integer.parseInt ( startLetter ); endLetter = v1.getEndLetter(); j = Integer.parseInt( endLetter ); boolean end = false; int m = k + 1; while (( !end )&& ( m < l )) { v2 = (Vertex)vertices.get( m ); startLetter = v2.getStartLetter(); p = Integer.parseInt ( startLetter ); endLetter = v2.getEndLetter(); q = Integer.parseInt( endLetter ); if ( checkConditions( i, j, p, q, th1, th2, th3 )) { v2.incIndegree(); // modify the vertex in vertices vertices.setElementAt( v2, m ); e = new Edge( v1, v2, 1.0f ); edges.add( e ); } else if ( ( p - i ) > th2 ) // k2 = 25 end = false; ++m; } } } // return the actual weight of the edge ??? public boolean checkConditions ( int i, int j, int p, int q, int th1, int th2, int th3 ) { boolean satisfied = false; int foo = ( j - i ) - ( q - p ); foo = Math.abs( foo ); if ( foo <= th1 ) // condition #1, k1 = 5 if (( p - i ) <= th2 ) // condition #2, k2 = 26 if ((( i <= p ) && ( p <= j ) ) && ( j <= q )) // condition #3 if (( i == p ) || ( p == j ) || ( j == q )) { // condition #4 if ((( j - i ) <= th3) && ( ( q - p ) <= th3 )) satisfied = true; } else satisfied = true; return satisfied; } public void modifyVertexQueue ( Vector tmpQueue, Vector tmpVertices ) { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void printVertexQueue() { System.out.println( "All vertices in the queue:" ); Vertex v = new Vertex(); Vertex previous = v; int l = vertexQueue.size(); for ( int i = 0; i < l; i++ ) { v = (Vertex) vertexQueue.get( i ); previous = v.getPredecessor(); if ( previous != null) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printVertices( int m ) { Vector tmpVertices = new Vector(); if ( m == 0 ) tmpVertices = subVertices; else tmpVertices = vertices; Vertex v = new Vertex(); Vertex previous = v; int l = tmpVertices.size(); System.out.println( "All vertices in the graph: " + l ); for ( int i = 0; i < l; i++ ) { v = (Vertex) tmpVertices.get( i ); previous = v.getPredecessor(); if ( previous != null ) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printEdges( int m ) { Vector tmpEdges = new Vector(); if ( m == 0 ) tmpEdges = subEdges; else tmpEdges = edges; Edge e = new Edge(); Vertex sourceVer = new Vertex(), sinkVer = new Vertex(); String str = new String(); int length = tmpEdges.size(); System.out.println( "All edges in the graph: " + length ); for ( int i = 0; i < length; i++ ) { e = (Edge) tmpEdges.get( i ); sourceVer = e.getSource(); sinkVer = e.getSink(); System.out.println( sourceVer.getStartLetter() + "A" + sourceVer.getEndLetter() + " lp: " + sourceVer.getLP()+ " indegree: "+ sourceVer.getIndegree() + " "+ sinkVer.getStartLetter() + "A" + sinkVer.getEndLetter() + " lp: " + sinkVer.getLP() + " indegree: " + sinkVer.getIndegree() + " weight: "+ e.getWeight() ); } } public void addDummySource() { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); addToSubVerticesVertexQueue( dummySource ); addToSubEdges( dummySource ); } public void addToSubVerticesVertexQueue( Vertex dummySource ) { subVertices.add( 0, dummySource ); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void addToSubEdges( Vertex dummySource ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subVertices.size(); for ( int i = l-1; i > 0; i-- ) { v = (Vertex)subVertices.get( i ); e = new Edge( dummySource, v, 0.0f ); subEdges.add( 0, e ); } } public Vector findLongestPath() { Vertex v = new Vertex(); while ( !( vertexQueue.isEmpty())) { v = (Vertex) vertexQueue.remove( 0 ); traverseSubEdges( v ); } // find the vertex to which the path from the source is the longest Vector lp = traverseVertices(); return lp; } public void traverseSubEdges( Vertex ver ) { boolean end = false, first = true; Edge e = new Edge(); Vertex sourceVer = new Vertex(); Vertex sinkVer = new Vertex(); String str = new String(); float w = 0, sourceLP = 0, sinkLP = 0; int len = subEdges.size(), j = 0; if (!(subEdges.isEmpty())) j = findEdges( ver ); if ( j == -1 ) end = true; else if ( j != 0 ) { } while (( !end ) && ( j < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( j ); sourceVer = e.getSource(); if ( sourceVer.equals( ver )) { sourceLP = sourceVer.getLP(); sinkVer = e.getSink(); int i = subVertices.indexOf( sinkVer ); sinkLP = sinkVer.getLP(); w = e.getWeight(); if (( sourceLP + w ) > sinkLP ) { sinkLP = sourceLP +w; sinkVer.setLP( sinkLP ); sinkVer.setPredecessor( sourceVer ); } sinkVer.decIndegree(); subVertices.setElementAt( sinkVer, i ); modifyVertexInEdges( sinkVer ); i = sinkVer.getIndegree(); if ( i == 0 ) vertexQueue.add( sinkVer ); subEdges.remove( j ); len = subEdges.size(); first = false; } else if ( first ) { System.out.println( "This is a vertex with outdegree zero" ); end = true; } else end = true; } } public int findEdges( Vertex ver ) { int i = 0 , l = subEdges.size(); boolean find = false; Vertex v = new Vertex(); Edge e = new Edge(); while (( !find ) && ( i < l )) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( ver )) find = true; else ++i; } if ( !find ) i = -1; return i; } public void modifyVertexInEdges( Vertex sinkVer ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subEdges.size(); for ( int i = 0; i < l; i++ ) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( sinkVer )) e.setSource( sinkVer ); else { v = e.getSink(); if ( v.equals( sinkVer )) e.setSink( sinkVer ); } subEdges.setElementAt( e, i ); } } public Vector traverseVertices() { Vertex v = new Vertex(), maxVer = new Vertex(); int l = subVertices.size(); float length = 0f, maxLp = -2222.0f; for ( int i = 0; i < l; i++ ) { v = (Vertex)subVertices.get( i ); length = v.getLP(); if ( length > maxLp ) { maxLp = length; maxVer = v; } } Vector lp = constructLongestPath( maxVer ); return lp; } public Vector constructLongestPath( Vertex maxVer ) { Vector lp = new Vector(); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); Vertex v = maxVer; while ( !(v.equals( dummySource ))) { lp.add ( 0, v ); v = v.getPredecessor(); } return lp; } public void printLongestPath( Vector lp ) { int i = 0; Vertex v = new Vertex(); String str = new String(); int length = lp.size(); for ( i = 0; i < length; i++ ) { v = (Vertex) lp.get( i ); System.out.print( v.getStartLetter() + "A" + v.getEndLetter() + " "); } System.out.println(); } public Vector identifyLCRs( ) { addDummySource(); Vector lp = findLongestPath(); return lp; } public boolean checkExistence( Vertex v ) { int index = subVertices.indexOf( v ); if ( index == -1 ) return false; else return true; } public void copy( Vector vt1, Vector vt2 ) { int l = vt1.size(); Edge e = new Edge(); for ( int i = 0; i < l; i++ ) { e = (Edge)vt1.get( i ); vt2.add ( e ); } } // make all edges beginning with the same vertex stay together public void clusterSubEdges() { Vector tmpSubEdges = new Vector(); Edge e = new Edge(); Vertex v = new Vertex(), ver = new Vertex(); while ( ( !subEdges.isEmpty())) { e = (Edge)subEdges.remove( 0 ); tmpSubEdges.add( e ); v = e.getSource(); int m = 0; int len = subEdges.size(); while (( m < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( m ); ver = e.getSource(); if ( v.equals( ver )) { tmpSubEdges.add( e ); subEdges.remove( m ); } else ++m; len = subEdges.size(); } } copy( tmpSubEdges, subEdges ); } // assign values to subVertices and subEdges ( BFS ) public void extractConnectedGraph() { boolean first = true; Vertex v = new Vertex(); Edge e = new Edge(); Vector tmpQueue = new Vector(); while(( first ) || (!(tmpQueue.isEmpty()))){ if ( first ) { // start the first edge of a new connected subgraph e = (Edge)edges.remove( 0 ); subEdges.add( e ); v = e.getSource(); subVertices.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong1" ); v = e.getSink(); subVertices.add( v ); tmpQueue.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong2" ); v = e.getSource(); first = false; boolean same = true; int m = 0; int len = edges.size(); while (( same ) &&( m < len )) { // remove all those edges having the same source vertex as the first edge e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong3" ); } else same = false; len = edges.size(); } } else { v = (Vertex)tmpQueue.remove( 0 ); int m = 0; boolean found = false; int len = edges.size(); //find the starting positon of those edges who start at the first vertex from tmpQueue while (( !found ) && ( m < len )) { // skip all edges starting with the vertex from the tmpQueue e = (Edge)edges.elementAt( m ); Vertex sr = e.getSource(); if ( v.equals( sr )) found = true; else { Vertex si = e.getSink(); if ( v.equals( si )) { boolean exist = checkExistence( sr ); if ( !exist ) { subVertices.add( sr ); tmpQueue.add( sr ); // work on edges whose sink vertex is the same as the vertex from tempQueue if ( !(vertices.remove( sr ))) System.out.println( "wrong4" ); } e = (Edge)edges.remove( m ); subEdges.add(e ); len = edges.size(); } else ++m; } } //System.out.println( "m = " + m ); boolean same = true; while ( same ) { // remove all those edges starting with the vertex from tmpQueue to subEdges len = edges.size(); if ( m < len ) { e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); boolean exist = checkExistence( ver ); // check whether the sinkVertex is in the subVertices or not if ( !exist ) { subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong5" ); } } else { same = false; } } else same = false; } } } clusterSubEdges(); } public int workOnSequence( String str, int th1, int th2, int th3, int th4 ) { constructVertices( str, th4 ); //printVertices( 1 );////////////////////////////// int i = 0; constructEdges( th1, th2, th3 ); //printEdges(1);///////////////////////// boolean find = true; while ( !(edges.isEmpty())) { subVertices.clear(); subEdges.clear(); extractConnectedGraph(); Vector lp = identifyLCRs(); // the longest path in a connected subgraph //System.out.println("longest path"); ///////////////// //printLongestPath(lp);////////////// lps.add( lp ); ++i; } return i; } // combine all letters from a sequence on different lines( stings) into a single line ( string ) public String generateSequence( String str ) { String sequence = new String(); String strTmp = str; boolean lastSeq = false; try { if ( strTmp == null ) sequence = null; if ( ( strTmp != null ) && ( strTmp.startsWith( ">"))) { //System.out.println( "*******************************************" ); System.out.println( str ); strTmp = rf.readLine(); } while (( strTmp != null ) && (!(strTmp.startsWith( ">" )))) { strTmp = strTmp.trim(); sequence = sequence + strTmp; strTmp = rf.readLine(); } if ( strTmp != null) if ( strTmp.startsWith( ">" )) { sequence = strTmp + "!" + sequence ; } } catch ( IOException ex ) { } return sequence; } public void printPositions( Vector pos ) { int l = pos.size(); String str = new String(); for ( int i = 0; i < l; i++ ) { str = (String) pos.get( i ); System.out.print( str + " " ); } System.out.println(); } public void printLCRBlocks( Vector LCRBlocks ) { String str = new String(); int l = LCRBlocks.size(); // System.out.println( "LCR Blocks: " ); for ( int i = 0; i < l; i++ ) { str = (String)LCRBlocks.get( i ); int index = str.indexOf( "-" ); String start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) System.out.print( str + " " ); } System.out.println(); } public Vector getPositions ( int k ) { Vector pos = new Vector(); //Vector posSingleVertexOnly = new Vector(); String str = new String(); int l = lps.size(), index = 0; Vector lp = new Vector(); Vertex v = new Vertex(); // longest path for ( int i = 0; i < k; i++ ) { // get positions from those vertices in lps lp = (Vector)lps.get( i ); int len = lp.size(); for ( int j = 0; j < len; j++ ) { v = (Vertex)lp.get( j ); str = v.getStartLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); str = v.getEndLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); } } return pos; } public Vector sortPositions( Vector pos) { Vector positions = new Vector(); // used to keep the sorted positions String str = new String(); int len = pos.size(), current = 0, previous = 0; Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { str = (String)pos.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); Vector tmpLCRBlocks = new Vector(); String start = new String(); previous = posInt[0].intValue() - 1; start = Integer.toString(( previous + 1 )); for ( int i = 0; i < len; i++ ) { str = posInt[i].toString(); current = posInt[i].intValue(); //generate blocks of continuous positions. Say, the sorted integer array is 3,4,5,6 8,9,10,11,12,13,29,30,31. It can be represented as a vector of three strings( blocks ): 3-6, 8-13, 29-31. if ( current != ( previous + 1 )) { tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); start = str; } previous = current; positions.add( str ); } tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); len = tmpLCRBlocks.size(); Vector LCRBlocks = new Vector(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLCRBlocks.get( i ); int index = str.indexOf( "-" ); start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) // the interval is at least 3-letter long LCRBlocks.add( str ); } //printLCRs( LCRBlocks ); return LCRBlocks; } public Vector extend ( int startPos, int endPos, int limit, String direction, String seq ) { Vector decRegs = new Vector(); int pointer = 0, startDecPos = 0, endDecPos = 0; double com1 = 0, com2 = 0; String extReg = seq.substring( startPos - 1, endPos ); if ( direction.equals( "left" )) { // extend to the left( front ) boolean dec = false; pointer = startPos - 2; while ( ( pointer > limit ) && ( pointer > ( startPos - 17 )) ) { com1 = cc.calculateEntropy(extReg ); extReg = seq.substring( pointer, endPos ); com2 = cc.calculateEntropy(extReg ); if ( com1 > com2 ) { if ( !dec ) { dec = true; // System.out.println( "from false to true111111111" ); endDecPos = pointer + 2; } } else if ( dec ) { dec = false; //System.out.println( "from true to false1111111111" ); startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 11111111 " + startDecPos + "-" + endDecPos ); } } -- pointer; } if ( ( dec ) && ( pointer == ( startPos - 17 )) ) { //System.out.println( "keeping decreasing1111111111" ); while ( ( pointer > limit ) && ( dec ) ) { com1 = cc.calculateEntropy( extReg ); extReg = seq.substring( pointer, endPos ); com2 = cc.calculateEntropy( extReg ); if ( com1 < com2 ) { //System.out.println( "from true to false2222222222" ); dec = false; startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 222222222222 " + startDecPos + "-" + endDecPos ); } } -- pointer; } } // the left extension touches the end of the last block of the current lcr blocks if (( pointer == limit ) && ( dec ) ) { startDecPos = pointer + 2; if ( com2 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 333333333333 " + startDecPos + "-" + endDecPos ); } } if ( decRegs.size() == 0 ) { //System.out.println( "left: Empty" ); } else { //System.out.print( "left: "); //printLCRs( decRegs ); } } else { boolean dec = false; pointer = endPos + 1;//////// // extend to the right( back ) while ( ( pointer < limit ) && ( pointer < ( endPos + 15 ))) { com1 = cc.calculateEntropy( extReg ); extReg = seq.substring( startPos - 1, pointer ); com2 = cc.calculateEntropy(extReg ); if ( com1 > com2 ) { if ( !dec ) { dec = true; //System.out.println( "from false to true33333333333333333" ); startDecPos = pointer -1 ; } } else if ( dec ) { dec = false; //System.out.println( "from true to false3333333333333" ); endDecPos = pointer -1; if ( com1 < comCut ) { decRegs.add( ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 4444444 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if ( ( dec ) && ( pointer == ( endPos + 15 )) ) { // keep extending until the complexity starts increasing, which means that several blocks generated from the longest path can be included into lcrs during one call of the 'extend()' based on a block while (( dec ) && ( pointer < limit )) { //System.out.println( "444444444444extReg:" + extReg ); com1= cc.calculateEntropy( extReg ); extReg = seq.substring( startPos - 1, pointer ); com2 = cc.calculateEntropy(extReg ); if ( com1 < com2 ) { dec = false; //System.out.println( "from true to false444444444" ); endDecPos = pointer - 1; if ( com1 < comCut ) { decRegs.add(( startDecPos ) + "-" + endDecPos ); //System.out.println( "decRegs added 5555555555555555555 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if (( pointer == limit ) && ( dec )) { endDecPos = limit - 1; //System.out.println( "decRegs added 66666666666 " + startDecPos + "-" + endDecPos ); decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); } } if ( decRegs.size() == 0 ) { //System.out.println( "right: Empty" ); } else { //System.out.print( "right: "); //printLCRs( decRegs ); } } return decRegs; } public boolean shareLetter( String str1, String str2 ) { boolean shared = false; String str = str1, letter = new String(); while (( str.length() != 0 ) && ( !shared )) { letter = str.substring( 0, 1 ); int index = str2.indexOf( letter ); if ( index != -1 ) shared = true; else if ( str.length() != 0 ) str = str.substring( 1 ); } return shared; } public boolean checkContribution( String currentBlock, Vector decRegs, String seq ) { boolean contributed = false; Vector regs = decRegs; String block = new String(); int i = 0, len = regs.size(); while ( ( i < len ) && ( !contributed )) { block = (String) regs.elementAt( i ); int index = block.indexOf( "-" ); int start= Integer.parseInt( block.substring( 0, index )); int end = Integer.parseInt( block.substring( index + 1 )); block = seq.substring( start - 1, end ); //System.out.println( "block: " + block+ " currentBlock: "+ currentBlock ); contributed = shareLetter( currentBlock, block ); ++i; } return contributed; } public Vector appendLcrs( Vector lcrs, Vector appendedLcrs ) { Vector lowComRegs = lcrs, tmpLcrs = appendedLcrs; while( !( tmpLcrs.isEmpty())) lowComRegs.add( (String)tmpLcrs.remove( 0 ) ); return lowComRegs; } public Vector pickUpDrop ( Vector blocks, String seq ) { Vector frontLcrs = new Vector(), backLcrs = new Vector(), lcrs = new Vector(), tmpBLOCKS = blocks; String currentBlock = new String(), tmpBlock = new String(); boolean isFirstBlock = true; int limit = 0, index = 0, startPos = 0, endPos = 0; while (( !tmpBLOCKS.isEmpty() )) { frontLcrs.clear(); backLcrs.clear(); int lcrBlockStart = 0, lcrBlockEnd= 0; boolean extendToLeft = true; // whether to extend towards the left boolean find = false; // find the current extending block // currentBlock can start in the middle of a block, or has the same starting position as a block and it doesn't have to be the block after the previous currentBlock // get the end position of the last block in lcrs if ( !( lcrs.isEmpty())) { tmpBlock = (String)lcrs.lastElement(); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 )); //System.out.println( "lcrBlockEnd: " + lcrBlockEnd ); while (( !find ) && ( !(tmpBLOCKS.isEmpty()) )) { // get the current block currentBlock = (String)tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); //System.out.println( "find currentBlock: "+ startPos + " " + endPos); if ( startPos < lcrBlockEnd ) { if ( endPos > lcrBlockEnd ) if (( endPos - lcrBlockEnd ) >= 3 ) { startPos = lcrBlockEnd + 1; extendToLeft = false; find = true; } } else find = true; } } else { currentBlock = (String) tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); find = true; } if ( find ) { //System.out.println( "currentBlock:" + currentBlock ); if ( isFirstBlock ) { limit = -1; isFirstBlock = false; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } else if ( extendToLeft ) { limit = lcrBlockEnd - 1; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } limit = seq.length() + 1; // extend to the right( back ) backLcrs = extend( startPos, endPos, limit, "right", seq ); double com = 0; index = currentBlock.indexOf( "-" ); int cbStart = Integer.parseInt( currentBlock.substring( 0, index ))- 1; int cbEnd = Integer.parseInt( currentBlock.substring( index + 1 )) ; com = cc.calculateEntropy(seq.substring( cbStart, cbEnd )); boolean contributed = false; if ( frontLcrs.size() != 0 ) { // get the start position of the first block in frontLcrs as the start position of the block to be added into lcrs tmpBlock = (String)frontLcrs.elementAt( 0 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); if ( com > comCut ) { // check whether the current block contributes to the complexity-decreasing regions or not contributed = checkContribution( seq.substring( cbStart, cbEnd ), frontLcrs, seq ); if ( !contributed ) lcrBlockEnd = startPos - 1; else { lcrBlockEnd = endPos; } } else { lcrBlockEnd = endPos; //System.out.println( "com of currentBlock: " + com ); } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } boolean combine = false; // whether to combine the last block in lcrs from frontLcrs and the block to be added into lcrs from backLcrs if (( !contributed ) && ( com > comCut )){ contributed = checkContribution( seq.substring( cbStart, cbEnd ), backLcrs, seq ); if ( !contributed ) { lcrBlockStart = endPos + 1; } else { if ( frontLcrs.size() != 0 ) combine = true; } } else if ( frontLcrs.size() != 0 ) combine = true; // get the end position of the last block in backLcrs as the end position of the block to be added into lcrs if ( !( backLcrs.isEmpty())) { tmpBlock = (String)backLcrs.lastElement( ); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 ) ); if ( combine ) { //System.out.println( "combine" ); limit = lcrs.size(); tmpBlock = (String) lcrs.remove( limit - 1 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); } else { if ( com < comCut ) { lcrBlockStart = startPos; //System.out.println( "Here, com" ); } else if ( contributed ) { //System.out.println( "contributed to the back, com > comCut " ); lcrBlockStart = startPos; } else { //System.out.println( "OOOOOOOOOOOOOOOOOOOOOOOO" ); lcrBlockStart = endPos + 1; } } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } else { if((frontLcrs.size() == 0 ) && ( !contributed ) && ( com < comCut )) { lcrs.add( currentBlock ); } } // check whether to combine the last two blocks in the current lcrs //len = lcrs.length(); /* System.out.print( "current lcrs: " ); printLCRs( lcrs ); */ } } return lcrs; } public Vector mergePurge( Vector lcrs ) { Vector tmpLcrs = lcrs; String currentBlock = new String(), nextBlock = new String(); int len = tmpLcrs.size(), i = 0; while ( i < len ) { if (( i + 1 ) < len ){ currentBlock = (String) tmpLcrs.elementAt( i ); int endIndex = currentBlock.indexOf( "-" ); int end = Integer.parseInt( currentBlock.substring( endIndex +1 )); nextBlock = (String) tmpLcrs.elementAt( i + 1 ); int startIndex = nextBlock.indexOf( "-" ); int start = Integer.parseInt( nextBlock.substring( 0, startIndex )); if (( end == ( start - 1 )) || ( end == start )) { //System.out.println( currentBlock + " " + nextBlock ); currentBlock = currentBlock.substring( 0, endIndex ) + "-" + nextBlock.substring( startIndex + 1 ); tmpLcrs.remove( i ); tmpLcrs.remove( i ); tmpLcrs.add( i, currentBlock ); } else ++i; len = tmpLcrs.size(); } else ++i; } i = 0; len = tmpLcrs.size(); // System.out.println("After the extension:"); //////////////////////// //printLCRs( tmpLcrs," ", "0" ); ////////////////// /* while ( i < len ) { currentBlock = (String) tmpLcrs.elementAt( i ); int index = currentBlock.indexOf( "-" ); int start = Integer.parseInt( currentBlock.substring( 0, index )); int end = Integer.parseInt( currentBlock.substring( index + 1 )); if (( end - start ) < 7 ) tmpLcrs.remove( i ); else ++i; len = tmpLcrs.size(); } */ return tmpLcrs; } public boolean checkCombinedSubBlock( String seq1, String seq2, double cCut ) { boolean delete = true; double com = cc.calculateEntropy( seq1 + seq2 ); if ( com > cCut ) delete = false; return delete; } public String findAlignment( String seq1, String seq2 ) { String aliPos = new String(); try { Sequence s1 = SequenceParser.parse( seq1 ); Sequence s2 = SequenceParser.parse( seq2 ); //System.out.println( "alignment sequences: " + seq1 + "???" + seq2 ); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); int similarLen = alignment.getSimilarity(); // get the length of the same and similar letters; if ( similarLen > 4 ) { // only if the length of similar and same letters is greater than 4 aliPos = new Pair().format( alignment ); //System.out.println( "the alignment: " + aliPos + " " + similarLen ); } } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } // System.out.println(aliPos); return aliPos; } public Vector checkLeftRegs( int aliStart, int aliEnd, int start, int end, String seq, double cCut ) { Vector left = new Vector(); double com = 0; if ( aliStart > 7 ) { // the length of the left region must be longer than 7 com = cc.calculateEntropy( seq.substring( start - 1, start + aliStart - 2 )); //System.out.println( "left1: " + seq.substring( start - 1, start + aliStart - 2 ) + " " + com + " " + start + "-" + ( start + aliStart - 2 )); if ( com <= cCut ) left.add( 0, start + "-" + ( start + aliStart - 2 )); } if ( ( end - start + 1 - aliEnd ) > 7 ) { com = cc.calculateEntropy(seq.substring( start + aliEnd - 1, end )); //System.out.println( "left2:" + seq.substring( start + aliEnd - 1, end ) + " " + com + " " + ( start + aliEnd ) + "-" + end ); if ( com <= cCut ) left.add( ( start + aliEnd ) + "-" + end ); } return left; } public Vector addToResult( Vector result, Vector left ) { Vector tmpResult = result; String str1 = new String(), str2 = new String(); int j = 0; for ( int i = 0; i < left.size(); i++ ) { str1 = (String) left.elementAt( i ); int index = str1.indexOf( "-" ); int endLeft = Integer.parseInt( str1.substring( index + 1 )); boolean found = false; while ( !found ) { if ( j < result.size() ) { str2 = (String) result.elementAt( j ); index = str2.indexOf( "-" ); int startResult = Integer.parseInt( str2.substring( 0, index )); if ( endLeft < startResult ) { found = true; // System.out.println( "Insert left into result: " + str1 + " " + str2 ); result.add( j , str1 ); j = j + 2; } else j++; } else { result.add( str1 ); //System.out.println( "append to the end of result" ); found = true; } } } return result; } public Vector checkAdjBlock ( int start1, int end1, String adjBlock, String seq, double cCut, String mark ) { Vector result = new Vector(); double com = 0; String seq1 = seq.substring( start1 - 1, end1 ); //System.out.println( "current block: " +start1 + " " + end1 + " " + seq1 ); int index1 = adjBlock.indexOf( "-" ); int start2 = Integer.parseInt( adjBlock.substring( 0, index1 )); int end2 = Integer.parseInt( adjBlock.substring( index1 + 1 )); String seq2 = seq.substring( start2 - 1, end2 ); String aliPos = new String(); if ( mark.equals( "front" )) aliPos = findAlignment( seq2, seq1 ); else aliPos = findAlignment( seq1,seq2 ); //System.out.println("ASJ " +adjBlock); //System.out.println("1 " + aliPos); if ( aliPos.length() != 0 ) { // format of aliPos: 'a1-a2 b1-b2' index1 = aliPos.indexOf( "-" ); int index2 = aliPos.indexOf( " " ); int aliStart2 = Integer.parseInt ( aliPos.substring( 0, index1 )); int aliEnd2 = Integer.parseInt ( aliPos.substring( index1 + 1, index2 )); String aliSeq1 = new String(), aliSeq2 = new String(); if ( mark.equals( "front" )) { aliSeq2 = seq.substring( start2 + aliStart2 - 2, start2 + aliEnd2 - 1 ); //System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq2 ); } else { aliSeq1 = seq.substring( start1 + aliStart2 - 2, start1 + aliEnd2 - 1 ); //System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq1 ); } aliPos = aliPos.substring( index2 + 1 ); //System.out.println("2 " + aliPos); index1 = aliPos.indexOf( "-" ); int aliStart1 = Integer.parseInt( aliPos.substring( 0, index1 )); int aliEnd1 = Integer.parseInt( aliPos.substring( index1 + 1 )); if ( mark.equals( "front" )) { aliSeq1 = seq.substring( start1 + aliStart1 - 2, start1 + aliEnd1 - 1 ); //System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq1 ); } else { aliSeq2 = seq.substring( start2 + aliStart1 - 2, start2 + aliEnd1 - 1 ); //System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq2 ); } boolean decOrNot = true; if ( mark.equals( "front" )) { decOrNot = true; // decOrNot = checkCombinedSubBlock( aliSeq2,aliSeq1, cCut ); if ( decOrNot ) { result.add( ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 ) ); //System.out.println( "added to result1: " + ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 )); Vector left = checkLeftRegs( aliStart1, aliEnd1, start1, end1, seq, cCut ); result = addToResult( result, left ); } } else { //decOrNot = checkCombinedSubBlock( aliSeq1,aliSeq2, cCut ); decOrNot = true; if ( decOrNot ) { result.add( ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1) ); //System.out.println( "added to redult2: " + ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1)); Vector left = checkLeftRegs( aliStart2, aliEnd2, start1, end1, seq, cCut ); result = addToResult( result, left ); result.add( ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 ) ); //System.out.println( "added to redult3: " + ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 )); left = checkLeftRegs( aliStart1, aliEnd1, start2, end2, seq, cCut ); result = addToResult( result, left ); } } } return result; } public Vector checkDeletability ( Vector lcrs, int maxIndex, String seq, double cCut ) { //System.out.println(lcrs); Vector result = new Vector(); String block = new String(); int start1 = 0, end1 = 0; block = (String)lcrs.elementAt( maxIndex ); int index = block.indexOf( "-" ); start1 = Integer.parseInt( block.substring( 0, index )); end1 = Integer.parseInt( block.substring( index + 1 )); if ( maxIndex != 0 ) { block = (String)lcrs.elementAt( maxIndex - 1 ); // System.out.println( "front adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "front" ); } if ( result.size() == 0 ) { if ( maxIndex != ( lcrs.size() - 1 ) ) { block = (String)lcrs.elementAt( maxIndex + 1 ); //System.out.println( "back adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "back" ); if ( result.size() != 0 ) result.add( "back" ); } } return result; } public Vector readSampledLenRepPer() { Vector v = new Vector(); try { File f = new File( "knowledge/sampledLenRepPer" ); RandomAccessFile rf = new RandomAccessFile( f, "r" ); String line = rf.readLine(); while ( line != null ) { v.add( line ); line = rf.readLine(); } rf.close(); } catch ( IOException ex ) { } return v; } public Vector filter ( Vector lcrs, String seq ) { Vector tmpLcrs = lcrs, com = new Vector(); float len = tmpLcrs.size(); double singleCom = 0, max = -222222222; String str = new String(), block = new String(); if ( tmpLcrs.size() != 1 ) { for ( int i = 0; i < len; i++ ) { str = (String)tmpLcrs.elementAt( i ); int index = str.indexOf( "-" ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); singleCom = cc.calculateEntropy( str ); com.add( Double.toString( singleCom )); } int i = 0, j = 0, maxIndex = 0; double limit = 0, cCut = 0; /* if ( seq.length() > 500 ) limit = len * 0.6; // 0.6 else limit = len * 0.3; //0.3 */ /* if ( len < 350 ) len = 350; double p = 1 - 350/len; limit = len * p; */ Vector sample = readSampledLenRepPer(); int seqLen = seq.length(); // find the right cut percentage boolean found = false; String range = new String(); double per = 0, prePer = 0; int shortest, longest, preLongest = 0; while (( !found ) && (i< sample.size())) { range = (String)sample.elementAt( i ); range = range .trim(); int index = range.indexOf(","); shortest = Integer.parseInt( range.substring( 0, index )); int index2 = range.indexOf( " " ); longest = Integer.parseInt( range.substring( index+1, index2 ) ); index = range.indexOf(":"); per = Double.parseDouble( range.substring( index + 1)); if ( ( seqLen >= shortest ) && ( seqLen <= longest)) { limit = len * ( 1 - per ); found = true; } else if ( seqLen < shortest ) { int diff1 = shortest - seqLen; int diff2 = seqLen - preLongest; if ( ( diff2 > diff1 ) || ( preLongest == 0 )) { limit = len * ( 1- per ); found = true; } else { limit = len * ( 1- prePer ); found = true; } } ++i; preLongest = longest; prePer = per; } if (!found) limit = len *(1-per); i = 0; while ( i < limit ) { int l = com.size(); j = 0; max = -222222222; while ( j < l ) { str = ( String ) com.elementAt( j ); singleCom = Double.parseDouble( str ); if ( singleCom > max ) { max = singleCom; maxIndex = j; } ++ j; } //System.out.println( "com: " + com.elementAt( maxIndex )); cCut = Double.parseDouble( (String)com.remove( maxIndex )); ++i; } //System.out.println( "cCut: " + cCut ); j = 0; i = 0; len = tmpLcrs.size(); Vector result = new Vector(); while (( i < limit ) && ( len != 1 ) && ( j < len )) { str = (String)tmpLcrs.elementAt( j ); int index = str.indexOf( "-" ); //System.out.println( "current block?" + str.substring( 0, index ) + "?" + str.substring( index + 1 ) ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); //System.out.println( "cur subseq???" + str ); singleCom = cc.calculateEntropy( str ); //System.out.println( "singlecom:"+ singleCom + " " + cCut ); if ( singleCom >= cCut ) { result = new Vector(); result = checkDeletability( tmpLcrs, j, seq, cCut ); int rSize =result.size(); boolean fromBack = false; if ( rSize != 0 ) { str = (String)result.elementAt( rSize - 1 ); if ( str.equals( "back")) { --rSize; fromBack = true; } // System.out.println( "remove: "+ (String)tmpLcrs.remove( j )); tmpLcrs.remove( j ); for ( int k = 0; k < rSize; k++ ) { // add 'result' Vector into tmpLcrs in order str = ( String ) result.elementAt( k ); //System.out.print( "*"+ str + "*" ); tmpLcrs.add( j, str ); ++j; } //System.out.println(); // if ( j < tmpLcrs.size()) //System.out.println( "the next one:" + tmpLcrs.elementAt( j )); if ( fromBack ) { //System.out.println( "yes, from back " ); //System.out.println( "removed: "+ tmpLcrs.remove( j )); tmpLcrs.remove( j ); } len = tmpLcrs.size(); } else { tmpLcrs.remove( j ); // System.out.println( "removed coz of high complexity" ); ++i; len = tmpLcrs.size(); } } else ++j; } //System.out.println(i + " " + j ); } return tmpLcrs; } public Vector postProcess( int i, String seq, String mark ) { Vector lcrs = new Vector(); Vector blocks = new Vector(); Vector pos = getPositions( i ); if ( pos.size() != 0 ) { blocks = sortPositions( pos ); // blocks = filter( blocks, seq ); //printLCRs( blocks ); } lcrs = pickUpDrop( blocks, seq ); //printLCRs( lcrs ); lcrs = mergePurge( lcrs ); //printLCRs( lcrs ); lcrs = filter( lcrs, seq ); lcrs = filter( lcrs, seq ); ///////////// filter for a second time //System.out.println( "AFTER************" ); printLCRs( lcrs, seq, mark ); return lcrs; } public void computeLCRPercentage ( Vector lcrs, String str) { } public void printLCRs( Vector LCRs, String seq, String mark ) { int len = LCRs.size(); String posBlocks = new String(); for ( int i = 0; i < len; i ++ ) { String str = (String)LCRs.get( i ); posBlocks = posBlocks + " " + str; } posBlocks = posBlocks.trim(); if ( mark.equals("0") ) // generate LCR blocks System.out.println( posBlocks ); else { // generate masked sequences Masker ms = new Masker(); ms.mask( seq, posBlocks ); } } public void startt( int th1, int th2, int th3, int th4, String mark ) { String str = new String(), id = new String(), nextId = new String(); int index = 0; boolean first = true; try { while ( str != null ) { str = rf.readLine(); str = generateSequence( str ); if ( str != null ) { if ( str.indexOf( ">" ) != -1 ) { index = str.indexOf( "!" ); id = nextId; nextId = str.substring( 0, index ); str = str.substring( index + 1 ); } else id = nextId; if (!first ) { System.out.println(); System.out.println( id ); } else { id = nextId; first = false; } vertices.clear(); edges.clear(); lps.clear(); for ( int i = 0; i < 20; i++ ) { fVecNor[i] = 0f; fVecUnNor[i] = 0f; } str = str.trim(); //System.out.println(str);//////////////// int i = workOnSequence( str, th1, th2, th3, th4 ); Vector lcrs = postProcess( i, str, mark );//process all longest paths from every connected subgraph /* computeLCRPercentage( lcrs, str ); printLCRs( lcrs, str ); */ } } rf.close(); } catch ( IOException ex ) { } } public static void main ( String args[] ) { int th1 = Integer.parseInt( args[2] ); int th2 = Integer.parseInt( args[3] ); int th3 = Integer.parseInt( args[4] ); int th4 = th2; ShannonGbm g = new ShannonGbm( args[0]); g.readRNRMatrices( args[1] ); g.readScoringMatrix( "knowledge/blosum62Matrix" ); //g.getComCut( args[5] ); g.startt( th1, th2, th3, th4, args[6] ); /* try { Sequence s1 = SequenceParser.parse("VVVVVV" ); Sequence s2 = SequenceParser.parse( "LAELLAKKSDRDSPKK"); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); System.out.println( "similarity: " + alignment.getSimilarity() ); String result = new Pair().format(alignment); System.out.println( "*****************" ); System.out.println( result ); } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } */ } } /* 1. Format of the output: SACACPQTSOP......( 60 letters) XXXX TPQSKAQ..........( 60 letters) */ PK e}7l")applications/SwissprotMatrixLearner.class1/ X R\ R R   R R  R | &  & & , , , & & &     &  < < R & R R R R R   ,  R < R R RalphabetLjava/util/Vector; repeatMatrix[[FnonRepeatMatrixrfSeqLjava/io/RandomAccessFile;rfRep'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTablefLjava/io/File;exLjava/io/IOException;this%Lapplications/SwissprotMatrixLearner; sequenceFileLjava/lang/String; repeatFileinitializeAlphabet()VgenerateConnectedSeq()Ljava/lang/String;strseqcombineRepeats&(Ljava/lang/String;)Ljava/lang/String;repeatsrepeat1repeat2 subRepeatskIijstartend workOnMatrix(Ljava/lang/String;IF)VmatrixLjava/lang/Float;ffF subSequencemark forgetRatesubSeqtmpSeq outsideLetter insideLetterrowcollenworkOnSequence:(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;F)Vrepsequence subRepeatnonRepeatStart nonRepeatEnd repeatStart repeatEndindex beginWithOneZnormalizeSingle(I)Vsum normalizeBoth learnPattern(F)Vid closeBothprintMatricesRowByRowprintMatricesColByColmain([Ljava/lang/String;)Vargs[Ljava/lang/String;swl SourceFileSwissprotMatrixLearner.java ap op [\ ]\ java/io/File ajava/io/RandomAccessFiler a ^_ `_java/io/IOExceptionjava/util/Vector YZA RNDCQEGHLKMPSTWYVjava/lang/String r>    rjava/lang/StringBuilder   r     -     java/lang/Float  a! "#1- $ dummy qr uv p %p& '(Non-Repeat matrix: ) *  + , *pRepeat matrix: #applications/SwissprotMatrixLearner ab -. p pjava/lang/Object(Ljava/lang/String;)V#(Ljava/io/File;Ljava/lang/String;)Vadd(Ljava/lang/Object;)ZreadLine startsWith(Ljava/lang/String;)Ztrimappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringindexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;(I)Ljava/lang/String;size()I elementAt(I)Ljava/lang/Object;java/lang/IntegerparseIntremove(ILjava/lang/Object;)Vlength(Ljava/lang/Object;)Ijava/lang/Mathpow(DD)D(D)V floatValue()FendsWithclosejava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintln(F)Ljava/lang/StringBuilder;print parseFloat(Ljava/lang/String;)F RXYZ[\]\^_`_ abcS****Y+N*Y- Y,N*Y- N NQ d.  )7 @!N$Q#R%e4)%fgRhiSjkSlmSnmopcR*Y*W*W*W*W*W*W*W*W*W*W*W*W*W*W* W*!W*"W*#W*$W*%WdZ) *+,)-3.=/G0Q1[2e3o4y56789:;<=>e jkqrcN&Y'L&Y'M* (L+/+)*&++L,Y-,.+./M* (LӧN,HK d& BDE%F*G=HHLKKLMe*LhiNjkFsm>tmuvcc Q+M&Y'N&Y':Y:6+M=,+M,016,2W ,W,3M6666 6 4d5&N`5&:-616-`376 616276   `C,Y--2.6.`3./M-8W8W,9jd5&M64,5&N,Y-,.0.-./M,d"RSTU!V'W,X4Y:ZJ\Q]W^a`dapb|cdefghijklm prst&u1vIwOyep QjkQwmOsm Gxm>ym5zZ2{|g}|j~|m| p| c *: *:+:&Y':&Y':&Y':6 6 6 :3:2:*;6 6 :m2:*;6 :  ?8   2  2 0bQ 2  2 0bQ3: 3:_ * *dn 1:BJS^air}e \Bf ;az}| jkm|\mm(m1m4| 7| :| cZ+:&Y':,:&Y':6 6 6 6 6 666+:@* 66 616016 ::2:`3:+:276 " d6  d 2:*A6`376  d 2:*A `6 _-B-76  d 2:*Ad $-07AGJOXagkq{ ejktmmmm mwmm| | !| $| '| *}|-~|0c+w E *N*N6K E6$-20bE6$ -2-20$nQ *-*-dJ)39CIY_eiqveR \"~|<#~|O}|wjkw|ud\pc= *C*Cd   e  jkc4&Y'M&Y'ND:&Y':* (W*EN-UP61* (:+:* (+:*F:*-#G*EN* (::*H dB%*3>JQ]eot"!#$eHhijkmztmvwmmmpc_* I* IL d)*-,.ehijkpcQJKL<C=)J,Y-*20MN./OײJPJPJPJPJQL<C=)J,Y-*20MN./OײJPJPJPdR234584>6D7J3P9V:\;d<l=t>=?@<BCe4,~| F}|n,~|fF}|jkpcQJKL<C=)J,Y-*20MN./OײJPJPJPJPJQL<C=)J,Y-*20MN./OײJPJPJPdRGHIJ8I>KDLJHPNVO\PdQlRtSRTUQWXe4,~| F}|n,~|fF}|jk cu#RY*2*2SL*2TE+$U+V+Wd]^_`a"se #kPK e}7ee)e)-applications/SwissprotMetricsCalculator.class1m h c c c c c  c c c c c c c c c c c c c c c c   c c@           / / / /            /@\  ! "#$%&'()* c+ c, c- c. c/ c0 c123 c4 c5 c6 c78 9:; 9<=>? @A cB cC cD cEFLCRs[Ljava/lang/String;HCRsdomains nonDomains tmpNonDomains strDomainLjava/lang/String;indexLCRIindexHCR remainder indexDomainindexNonDomainindexTmpNonDomain indexMetric totalLCRsD totalRepeats sensitivity[D precisionrecalljcmmheadZrf1Ljava/io/RandomAccessFile;rf2'(Ljava/lang/String;Ljava/lang/String;)VCodeLineNumberTableLocalVariableTableif1Ljava/io/File;f2exLjava/io/IOException;this)Lapplications/SwissprotMetricsCalculator; filename1 filename2 getLCRHCRs(Ljava/lang/String;)V numOfLettersjstrstrTmp startLCRStr endLCRStr startHCRStr endHCRStr startHCRInt endHCRInt beginWithOne printLCRs()V printHCRsgetInterLCRRepeat()Dtotalk lcrStartInt lcrEndInt nonDStartInt nonDEndInt lcrStartStr lcrEndStr nonDStartStr nonDEndStrfoundgetInterHCRNonRepeat hcrStartInt hcrEndInt domStartInt domEndInt hcrStartStr hcrEndStr domStartStr domEndStrcomputeMetrics(DDD)V LCRRepeat HCRNonRepeattotalNum getNonRepeats(I)VstartStrendStrstartIntendInt printRepeatsprintNonRepeatscombineRepeatsstr1getRepeatsNonRepeats()Ljava/lang/String;eidgetLCRHCRRepeatNonRepeatMetrics strMasked printMetricssenAvepreAverecAvejcAvemmAvenfLjava/text/NumberFormat; closeBothmain([Ljava/lang/String;)Vargsmc SourceFileSwissprotMetricsCalculator.java java/lang/String ij kj lj mj nj op qr sr tr ur vr wr xr yz {z |} ~} } } }  java/io/File java/io/RandomAccessFiler G java/io/IOException H I1- JK1 LM NO- PQ PRS TOjava/lang/StringBuilder UV UW X XR YKZ [\LLLLLLLLLLLLLLLLLLLLLLLLLLL] ^ _ ^HHHHHHHHHHHHHHHHHHHHHHHHHHH `a bM7total length of intersections between LCRs and repeats: Uc:Total length of intersectsss between HCRs and nonRepteats:d ef `R tmpRepeatsRepeat Infor.:Total length of all repeats:Non-repeat Infor.:>0************************************************ID:  TotalNumOfLetters:  totalLCR: <<<<<<<<<< 0 prec. recall Jac.Co. Min.Mea.g hi jk Ave.  l'applications/SwissprotMetricsCalculator java/lang/Object#(Ljava/io/File;Ljava/lang/String;)VreadLinetrim startsWith(Ljava/lang/String;)Zlength()IindexOf(Ljava/lang/String;)I substring(II)Ljava/lang/String;(I)Ljava/lang/String;java/lang/IntegerparseIntappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;toStringendsWithjava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintlnprintvalueOf'(Ljava/lang/String;)Ljava/lang/Integer;intValue(D)Ljava/lang/StringBuilder;java/lang/Mathsqrt(D)Djava/text/NumberFormat getInstance()Ljava/text/NumberFormat;format(D)Ljava/lang/String;close chijkjljmjnjopqrsrtrurvrwrxryz{z|}~}}}}****d*d*d*Y * * * * ******ȼ*ȼ*ȼ*ȼ*ȼ*Y+NY,:*Y-*Y67;* R* R* R* R* RħN"^$3>Mak &'()*+,-./ *425H?ri_pp=>Y:Y:Y:Y:Y:Y: 6 6 6 *#:: $:%& ':6 (:)*=+*>,:"** -S`-::(** ,S`,:`-:**.c.gc.d6  2** /Y01+1 23S*Y ` 6 .`4:*Y ` +$:/Y0+1+135.** /Y01+1+13S*Y ` CLO"'9:;(<:=@>C@LDOBQEXFbGiIlJtKwM|NOPQRSTWXY[\]^&_3b6cBdOfVgqhikQ prr pp~p(up1lp:cp =`r @]r CZ E<678* '6/Y0*21)139ײ6:678"op qr0s6u<vDwECrE<6;8* '6/Y0*21)139ײ6:6;8"{| }~06<DECrH>666666 Y: Y: Y: Y: 6* :*2+*6*2,:  <=6*2`-:  <=6**2+*6*2,:  <=6*2`-:  <=6  # ' cgcH ' cgcHm #'cgcH6F 69 'cgcH66IJ6/Y0>1'?38'% (:=ER`jy.:@GM[gmsv|zrr r rrrr xp (op 1fp :]p =ZH>666666 Y: Y: Y: Y: 6* :*2+*6*2,:  <=6*2`-:  <=6* *2+*6*2,:  <=6*2`-:  <=6  # ' cgcH ' cgcHm #'cgcH6F 69 'cgcH66IJ6/Y0@1'?38'% (:=ER`jy.:@GM[gmsv|zrr r rrrr xp (op 1fp :]p =Zi**')coR***AR**'*oR***AR**'*oR**')goR***'g*c'g*oCR6/Y0**1?D1**1?D1**1?D1**1?D1**1?38*Y`2 '6?N]l*zzz YMYNY:66666 *J*2%&5'M6*2+*6 *2N- ,N-<=d6c6]6*d2+*6 *d2 `-:<=`6*2N-+*6 - ,N-<=d6EM1*EN* /Y0,1+1-13S * 4N**d2:-5S+*6  `-:<=`6EM** /Y0,1+1-13S*Y ` )"(16EHKYait w }  !"(#1$:%E&Q'W(w)+ Yr Gr :Gr rzprpipfcr"`r%]r(Zr  =@6F8*'6/Y0*21)139ײ6:Y6G8*'6/Y0*21)139ײ6:6/Y0H1*?38:0123445:7C:K;S<q=w?}@B rr=6I8<* '6/Y0*21)139ײ6:FG HI0J6L<M= 3rYLYM>66**2M,+*6,,M6`*X*2L++*6+`-L+<=`6`*%*`2L++*6+,L+<=6u*d2L++*6+`-L,<=6+<=6**/Y0,1+1+13S**cgc*Y` rRST U'V/W7X;YJZQ[Y\b]m^w_`abdfghijklmnq> pprrr YLYM**>* J&**# *** -L6:6K8**# ]** $ * )*>*** ,S*** S*Y`** - *#M6/Y0L1+1M1,138*N*O,<=6*P*QN, "~vwxz{(|6};~@IOWbgr|  >r r  ppd *#LI99++J&t*R:*S*T6/Y0U1*?386V8*W*X9*Y9<=I*(Z* * **#L*#LzL"^#)-IQU[akty~H#hpp z zz- e6:6[8<I9999 \: *6/Y0`2]1 *1^1_1 *1^1_1 *1^1_1 *1^138(*1cI*1A*1c9*1A*1c9*1c9 *1c9 D(*oI*o9*o9*o9 *o9 6/Y0`1 ^1a1 ^1a1 ^1a1  ^1386:^#+^dReUrSzPzMzJzGz #B _*b*bL" _cY*2*2dL+e+f+gj PK e}7. applications/Vertex.class1b QR Q S T U V W X Y Z [ \ ] ^_` startLetterLjava/lang/String; endLetterindegreeI outdegreeweightFlp lookbehindLP predecessorLapplications/Vertex;ancestor()VCodeLineNumberTableLocalVariableTablethis)(Ljava/lang/String;Ljava/lang/String;IF)VstartenddwgetStartLetter()Ljava/lang/String; getEndLetter getIndegree()I getOutdegree getWeight()FgetLPsetLP(F)VlgetLBLPsetLBLPfhasLP()ZgetPredecessor()Lapplications/Vertex;setPredecessor(Lapplications/Vertex;)Vpre getAncestor setAncestoranc incIndegree decIndegree incOutdegree decOutdegreeequals(Lapplications/Vertex;)ZvbZstartVendV SourceFile Vertex.java !java/lang/String          +, -, Haapplications/Vertexjava/lang/Object(Ljava/lang/Object;)Z!  !":**Y*Y** * * * * #* $*/49$ :% &".**+*,*** * * * #*   !"##($-%$4.%.'.(.).*+,"/*#)$ %-,"/*#.$ %./"/*#3$ %0/"/* #7$ %12"/*#<$ %32"/* #A$ %45">*# # FG$%672"/* #K$ %85">*# # OP$%9:;"E* * # ST$ %<="/* #Y$ %>?">*+ # ^_$%@A="/* #c$ %B?">*+ # gh$%CD!"9 *Y`# k l$  %E!"9 *Yd# p q$  %F!"9 *Y ` # u v$  %G!"9 *Y d # z {$  %HI"(=+N+:*-*=#~ $&$4(%(J&KL!M NOPPK f}7z.yjj)applications/gbmCommentAdjBlocks.java.alt/* ** Author: Xuehui Li ** Date: March, 2005 ** "gbm" or "GBM" is the abbrevaition of "A graph-based method for detecting low-complexity reions". ** This is program is used to find low-complexity regions in sequences ** NOTE: all vertices and edges have topological orders ** There are totally five input paramaters. The first one is the sequence file name. The second one the learned matrix file( /cise/research/tamer/xli/LCR/graphLCR/swissprotLearnedMatrices ). The remaining three are the threshold 1, threshold 2 and threshold 3, respectively. At this tiem, all LCR Blocks generated in both /cise/research/tamer/xli/LCR/graphLCR/swissprotLCRBlocks/ and /cise/research/tamer/xli/LCR/graphLCR/pfamLCRBlocks/ are based on the three thresholds: " 3 15 5" */ package applications; import jaligner.Alignment; import jaligner.Sequence; import jaligner.SmithWatermanGotoh; import jaligner.formats.Pair; import jaligner.matrix.MatrixLoader; import jaligner.util.SequenceParser; import java.io.*; import java.util.*; class gbm { // vertices and edges are vectors used to keep all the vertices, edges in a graph generated from a sequence, respectively. vertexQueue is a vector used to keep all vertices whose indegree is zero. lps is a vector used to keep all the longest paths in all connected subgraphs of a sequence. Every longest path in lps is a vector of vertices, excluding the dummy source. // subVertices and subEdges are vectors used to keep all the vertices, edges in a connected-graph which is a subgraph of the graph generated from a sequence, respectively. private File f; private RandomAccessFile rf; private Vector vertices, subVertices, edges, subEdges, vertexQueue, lps; private float[][] repeatMatrix, nonRepeatMatrix; private float[] fVecNor, fVecUnNor; private Vector alphabet; private double comCut = 0; // the complexity cut-off value private float singleVertexOnly; public gbm ( String fileName ) { // the graph is given in a file where every line represents an edge and has the fromat of "source sink weight" initializeAlphabet(); try{ f = new File ( fileName ); rf = new RandomAccessFile ( f, "r" ); } catch ( IOException ex ) { } vertices = new Vector(); subVertices = new Vector(); edges = new Vector(); subEdges = new Vector(); vertexQueue = new Vector(); lps = new Vector(); repeatMatrix = new float[20][20]; nonRepeatMatrix = new float[20][20]; fVecNor = new float[20]; fVecUnNor = new float[20]; for ( int i = 0; i < 20; i++ ) fVecUnNor[i] = 0f; singleVertexOnly = 0f; } public void initializeAlphabet() { alphabet = new Vector(); alphabet.add( "A" ); alphabet.add( "R" ); alphabet.add( "N" ); alphabet.add( "D" ); alphabet.add( "C" ); alphabet.add( "Q" ); alphabet.add( "E" ); alphabet.add( "G" ); alphabet.add( "H" ); alphabet.add( "I" ); alphabet.add( "L" ); alphabet.add( "K" ); alphabet.add( "M" ); alphabet.add( "F" ); alphabet.add( "P" ); alphabet.add( "S" ); alphabet.add( "T" ); alphabet.add( "W" ); alphabet.add( "Y" ); alphabet.add( "V" ); } public void readMatrices( String matricesFile ) { try { File f = new File( matricesFile ); RandomAccessFile rfm = new RandomAccessFile ( f, "r" ); String row = new String(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { nonRepeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { nonRepeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.readLine(); rfm.readLine(); rfm.readLine(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { repeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { repeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.close(); } catch ( IOException ex ) { } } public void printMatricesRowByRow() { System.out.println( "Non-Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( nonRepeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); System.out.println(); System.out.println( "Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( repeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); } public void getComCut( String fileName ) { try { File f = new File ( fileName ); RandomAccessFile rf = new RandomAccessFile( f, "r"); String line = new String(); double com, sum = 0, num = 0; line = rf.readLine(); while ( line != null ) { line = line.trim(); complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); com = cc.calculateModifiedEntropy( line ); //com = cc.calculateReciprocalPro( line ); //com = cc.calculateRecProWScoringMatrix( line ); sum = sum + com; num = num + 1; line = rf.readLine(); } comCut = sum / num; // System.out.println( "The cut-off value is: " + sum + " / " + num + " = " + comCut ); rf.close(); } catch( IOException ex ){ } } public void createFirstVector( String window ) { int len = window.length(); String tmpWindow = window, letter = new String(); for ( int i = 0; i < len; i++ ) { letter = tmpWindow.substring( 0, 1 ); int index = alphabet.indexOf( letter ); fVecUnNor[ index ]= fVecUnNor[index] + 1f; tmpWindow = tmpWindow.substring( 1 ); } for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecUnNor[ i ]; for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecNor[ i ] / len; } // o for unNormalized, 1 for normalized public void printVector( int mark ) { float[] tmpVector = new float[ 20 ]; if ( mark == 0 ) tmpVector = fVecUnNor; else tmpVector = fVecNor; for ( int i = 0; i < 20; i++ ) System.out.print( tmpVector[i] + " " ); System.out.println(); } public void constructSingleVertex( int start, int end ) { String startLetter = Integer.toString( start + 1 ); String endLetter = Integer.toString( end + 1 ); Vertex v = new Vertex( startLetter, endLetter, 1, 1.0f ); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); v.setPredecessor( dummySource ); vertices.add( v ); } public void workOnFirstWindow( String window ) { String letter1, letter2; createFirstVector( window ); int len = window.length(), j = 0, row = 0, col = 0; for ( int i = 0; i < len - 1; i++ ) { j = 1; letter1 = window.substring( i, i + 1 ); while ( ( i + j ) < len ) { letter2 = window.substring( i + j, i + j + 1 ); boolean similar = checkSimilarity( letter1, letter2 ); if ( similar ) { row = alphabet.indexOf( letter1 ); col = alphabet.indexOf( letter2 ); if ( checkProbablity( row, col )) constructSingleVertex( i, i + j ); } ++j; } } } // at this time, row == col, since we only consider same letters public boolean checkProbablity( int row, int col ) { boolean construct = false; float difference1 = Math.abs( repeatMatrix[ row][col] - fVecNor[row] ); float difference2 = Math.abs( nonRepeatMatrix[row][col] - fVecNor[row] ); if ( difference2 > difference1 ) { construct = true; } return construct; } public void addVertices( String window, int index, int startPos ) { // get all letters in the window who have the same index in the alphabet as "index" int len = window.length(); String letter = new String(); int tmpIndex = 0; int[] positions = new int[ len - 1 ]; for ( int i = 0; i < len - 1; i++ ) { positions[ i ] = 0; } int j = 0; for ( int i = 0; i < len - 1; i++ ) { letter = window.substring( i, i + 1 ); tmpIndex = alphabet.indexOf( letter ); if ( index == tmpIndex ) { positions[ j ] = startPos + i; /// get the position # of the letter ++j; } } if ( checkProbablity( index, index )) { j = 0; boolean end = false; while (( !end ) && ( j < ( len - 1 ))) { if ( positions[ j ] != 0 ) constructSingleVertex( positions[ j ], startPos + len - 1 ); else end = true; ++j; } } } public void constructVertices( String str, int th4 ) { char c1, c2; int len = str.length(); boolean similar = false; Vertex v = new Vertex(); float[] previousVecUnNOr = new float[20]; String window = str.substring( 0, th4 ); String oldLetter = new String(), newLetter = new String(); workOnFirstWindow( window ); int startPos = 1; while( startPos <= ( len - th4 ) ) { oldLetter = window.substring( 0, 1 ); window = str.substring( startPos, startPos + th4 ); newLetter = window.substring( th4 - 1, th4 ); int index = alphabet.indexOf( oldLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] - 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; index = alphabet.indexOf( newLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] + 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; addVertices( window, index, startPos ); ++startPos; } } // return the actual weight of the vertex ??? /// to be modified later to include similar cases public boolean checkSimilarity( String c1, String c2 ) { boolean similar = false; if ( c1.equals( c2 )) similar = true; return similar; } // find the percentage of letters appearing in vertices public void findLetterPercentageVer( float lF ) { int len = vertices.size(); Vector appearedLetters = new Vector(); Vertex v = new Vertex(); for ( int i = 0; i < len ; i++ ) { v = (Vertex)vertices.elementAt( i ); String str = v.getStartLetter(); int index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); str = v.getEndLetter(); index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); } ///// Sort first len = appearedLetters.size(); float per = len / lF; // System.out.println( "The letter percentage after vertex construction is: " + len + " / " + lF + " = " + per ); Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { String str = (String)appearedLetters.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); for ( int i = 0; i < len; i ++ ) { String str = posInt[i].toString(); System.out.print( str + " " ); } System.out.println(); // computePercentage( lF, appearedLetters ); } public void computePercentage( float l, Vector al ) { int len = al.size(); float per = len / l; System.out.println( "The percentage is: " + len + " / " + l + " = " + per ); for ( int i = 0; i < len; i ++ ) { String str = (String)al.elementAt( i ); System.out.print( str + " " ); } System.out.println(); } public void constructEdges( int th1, int th2, int th3 ) { String startLetter = new String(), endLetter = new String(); int i = 0, j = 0, p = 0, q = 0, l = vertices.size(); Vertex v1 = new Vertex(), v2 = new Vertex(); Edge e = new Edge(); for ( int k = 0; k < l-1; k++ ) { v1 = (Vertex)vertices.get( k ); startLetter = v1.getStartLetter(); i = Integer.parseInt ( startLetter ); endLetter = v1.getEndLetter(); j = Integer.parseInt( endLetter ); boolean end = false; int m = k + 1; while (( !end )&& ( m < l )) { v2 = (Vertex)vertices.get( m ); startLetter = v2.getStartLetter(); p = Integer.parseInt ( startLetter ); endLetter = v2.getEndLetter(); q = Integer.parseInt( endLetter ); if ( checkConditions( i, j, p, q, th1, th2, th3 )) { v2.incIndegree(); // modify the vertex in vertices vertices.setElementAt( v2, m ); e = new Edge( v1, v2, 1.0f ); edges.add( e ); } else if ( ( p - i ) > th2 ) // k2 = 25 end = false; ++m; } } } // return the actual weight of the edge ??? public boolean checkConditions ( int i, int j, int p, int q, int th1, int th2, int th3 ) { boolean satisfied = false; int foo = ( j - i ) - ( q - p ); foo = Math.abs( foo ); if ( foo <= th1 ) // condition #1, k1 = 5 if (( p - i ) <= th2 ) // condition #2, k2 = 26 if ((( i <= p ) && ( p <= j ) ) && ( j <= q )) // condition #3 if (( i == p ) || ( p == j ) || ( j == q )) { // condition #4 if ((( j - i ) <= th3) && ( ( q - p ) <= th3 )) satisfied = true; } else satisfied = true; return satisfied; } public void modifyVertexQueue ( Vector tmpQueue, Vector tmpVertices ) { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void printVertexQueue() { System.out.println( "All vertices in the queue:" ); Vertex v = new Vertex(); Vertex previous = v; int l = vertexQueue.size(); for ( int i = 0; i < l; i++ ) { v = (Vertex) vertexQueue.get( i ); previous = v.getPredecessor(); if ( previous != null) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printVertices( int m ) { Vector tmpVertices = new Vector(); if ( m == 0 ) tmpVertices = subVertices; else tmpVertices = vertices; Vertex v = new Vertex(); Vertex previous = v; int l = tmpVertices.size(); System.out.println( "All vertices in the graph: " + l ); for ( int i = 0; i < l; i++ ) { v = (Vertex) tmpVertices.get( i ); previous = v.getPredecessor(); if ( previous != null ) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printEdges( int m ) { Vector tmpEdges = new Vector(); if ( m == 0 ) tmpEdges = subEdges; else tmpEdges = edges; Edge e = new Edge(); Vertex sourceVer = new Vertex(), sinkVer = new Vertex(); String str = new String(); int length = tmpEdges.size(); System.out.println( "All edges in the graph: " + length ); for ( int i = 0; i < length; i++ ) { e = (Edge) tmpEdges.get( i ); sourceVer = e.getSource(); sinkVer = e.getSink(); System.out.println( sourceVer.getStartLetter() + "A" + sourceVer.getEndLetter() + " lp: " + sourceVer.getLP()+ " indegree: "+ sourceVer.getIndegree() + " "+ sinkVer.getStartLetter() + "A" + sinkVer.getEndLetter() + " lp: " + sinkVer.getLP() + " indegree: " + sinkVer.getIndegree() + " weight: "+ e.getWeight() ); } } public void addDummySource() { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); addToSubVerticesVertexQueue( dummySource ); addToSubEdges( dummySource ); } public void addToSubVerticesVertexQueue( Vertex dummySource ) { subVertices.add( 0, dummySource ); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void addToSubEdges( Vertex dummySource ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subVertices.size(); for ( int i = l-1; i > 0; i-- ) { v = (Vertex)subVertices.get( i ); e = new Edge( dummySource, v, 0.0f ); subEdges.add( 0, e ); } } public Vector findLongestPath() { Vertex v = new Vertex(); while ( !( vertexQueue.isEmpty())) { v = (Vertex) vertexQueue.remove( 0 ); traverseSubEdges( v ); } // find the vertex to which the path from the source is the longest Vector lp = traverseVertices(); return lp; } public void traverseSubEdges( Vertex ver ) { boolean end = false, first = true; Edge e = new Edge(); Vertex sourceVer = new Vertex(); Vertex sinkVer = new Vertex(); String str = new String(); float w = 0, sourceLP = 0, sinkLP = 0; int len = subEdges.size(), j = 0; if (!(subEdges.isEmpty())) j = findEdges( ver ); if ( j == -1 ) end = true; else if ( j != 0 ) { } while (( !end ) && ( j < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( j ); sourceVer = e.getSource(); if ( sourceVer.equals( ver )) { sourceLP = sourceVer.getLP(); sinkVer = e.getSink(); int i = subVertices.indexOf( sinkVer ); sinkLP = sinkVer.getLP(); w = e.getWeight(); if (( sourceLP + w ) > sinkLP ) { sinkLP = sourceLP +w; sinkVer.setLP( sinkLP ); sinkVer.setPredecessor( sourceVer ); } sinkVer.decIndegree(); subVertices.setElementAt( sinkVer, i ); modifyVertexInEdges( sinkVer ); i = sinkVer.getIndegree(); if ( i == 0 ) vertexQueue.add( sinkVer ); subEdges.remove( j ); len = subEdges.size(); first = false; } else if ( first ) { System.out.println( "This is a vertex with outdegree zero" ); end = true; } else end = true; } } public int findEdges( Vertex ver ) { int i = 0 , l = subEdges.size(); boolean find = false; Vertex v = new Vertex(); Edge e = new Edge(); while (( !find ) && ( i < l )) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( ver )) find = true; else ++i; } if ( !find ) i = -1; return i; } public void modifyVertexInEdges( Vertex sinkVer ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subEdges.size(); for ( int i = 0; i < l; i++ ) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( sinkVer )) e.setSource( sinkVer ); else { v = e.getSink(); if ( v.equals( sinkVer )) e.setSink( sinkVer ); } subEdges.setElementAt( e, i ); } } public Vector traverseVertices() { Vertex v = new Vertex(), maxVer = new Vertex(); int l = subVertices.size(); float length = 0f, maxLp = -2222.0f; for ( int i = 0; i < l; i++ ) { v = (Vertex)subVertices.get( i ); length = v.getLP(); if ( length > maxLp ) { maxLp = length; maxVer = v; } } Vector lp = constructLongestPath( maxVer ); return lp; } public Vector constructLongestPath( Vertex maxVer ) { Vector lp = new Vector(); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); Vertex v = maxVer; while ( !(v.equals( dummySource ))) { lp.add ( 0, v ); v = v.getPredecessor(); } return lp; } public void printLongestPath( Vector lp ) { int i = 0; Vertex v = new Vertex(); String str = new String(); int length = lp.size(); for ( i = 0; i < length; i++ ) { v = (Vertex) lp.get( i ); System.out.print( v.getStartLetter() + "A" + v.getEndLetter() + " "); } System.out.println(); } public Vector identifyLCRs( ) { addDummySource(); Vector lp = findLongestPath(); return lp; } public boolean checkExistence( Vertex v ) { int index = subVertices.indexOf( v ); if ( index == -1 ) return false; else return true; } public void copy( Vector vt1, Vector vt2 ) { int l = vt1.size(); Edge e = new Edge(); for ( int i = 0; i < l; i++ ) { e = (Edge)vt1.get( i ); vt2.add ( e ); } } // make all edges beginning with the same vertex stay together public void clusterSubEdges() { Vector tmpSubEdges = new Vector(); Edge e = new Edge(); Vertex v = new Vertex(), ver = new Vertex(); while ( ( !subEdges.isEmpty())) { e = (Edge)subEdges.remove( 0 ); tmpSubEdges.add( e ); v = e.getSource(); int m = 0; int len = subEdges.size(); while (( m < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( m ); ver = e.getSource(); if ( v.equals( ver )) { tmpSubEdges.add( e ); subEdges.remove( m ); } else ++m; len = subEdges.size(); } } copy( tmpSubEdges, subEdges ); } // assign values to subVertices and subEdges ( BFS ) public void extractConnectedGraph() { boolean first = true; Vertex v = new Vertex(); Edge e = new Edge(); Vector tmpQueue = new Vector(); while(( first ) || (!(tmpQueue.isEmpty()))){ if ( first ) { // start the first edge of a new connected subgraph e = (Edge)edges.remove( 0 ); subEdges.add( e ); v = e.getSource(); subVertices.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong1" ); v = e.getSink(); subVertices.add( v ); tmpQueue.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong2" ); v = e.getSource(); first = false; boolean same = true; int m = 0; int len = edges.size(); while (( same ) &&( m < len )) { // remove all those edges having the same source vertex as the first edge e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong3" ); } else same = false; len = edges.size(); } } else { v = (Vertex)tmpQueue.remove( 0 ); int m = 0; boolean found = false; int len = edges.size(); //find the starting positon of those edges who start at the first vertex from tmpQueue while (( !found ) && ( m < len )) { // skip all edges starting with the vertex from the tmpQueue e = (Edge)edges.elementAt( m ); Vertex sr = e.getSource(); if ( v.equals( sr )) found = true; else { Vertex si = e.getSink(); if ( v.equals( si )) { boolean exist = checkExistence( sr ); if ( !exist ) { subVertices.add( sr ); tmpQueue.add( sr ); // work on edges whose sink vertex is the same as the vertex from tempQueue if ( !(vertices.remove( sr ))) System.out.println( "wrong4" ); } e = (Edge)edges.remove( m ); subEdges.add(e ); len = edges.size(); } else ++m; } } //System.out.println( "m = " + m ); boolean same = true; while ( same ) { // remove all those edges starting with the vertex from tmpQueue to subEdges len = edges.size(); if ( m < len ) { e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); boolean exist = checkExistence( ver ); // check whether the sinkVertex is in the subVertices or not if ( !exist ) { subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong5" ); } } else { same = false; } } else same = false; } } } clusterSubEdges(); } public int workOnSequence( String str, int th1, int th2, int th3, int th4 ) { constructVertices( str, th4 ); constructEdges( th1, th2, th3 ); boolean find = true; int i = 0; while ( !(edges.isEmpty())) { subVertices.clear(); subEdges.clear(); extractConnectedGraph(); Vector lp = identifyLCRs(); // the longest path in a connected subgraph lps.add( lp ); ++i; } return i; } // combine all letters from a sequence on different lines( stings) into a single line ( string ) public String generateSequence( String str ) { String sequence = new String(); String strTmp = str; boolean lastSeq = false; try { if ( strTmp == null ) sequence = null; if ( ( strTmp != null ) && ( strTmp.startsWith( ">"))) { //System.out.println( "*******************************************" ); System.out.println( str ); strTmp = rf.readLine(); } while (( strTmp != null ) && (!(strTmp.startsWith( ">" )))) { strTmp = strTmp.trim(); sequence = sequence + strTmp; strTmp = rf.readLine(); } if ( strTmp != null) if ( strTmp.startsWith( ">" )) { sequence = strTmp + "!" + sequence ; } } catch ( IOException ex ) { } return sequence; } public void printPositions( Vector pos ) { int l = pos.size(); String str = new String(); for ( int i = 0; i < l; i++ ) { str = (String) pos.get( i ); System.out.print( str + " " ); } System.out.println(); } public void printLCRBlocks( Vector LCRBlocks ) { String str = new String(); int l = LCRBlocks.size(); // System.out.println( "LCR Blocks: " ); for ( int i = 0; i < l; i++ ) { str = (String)LCRBlocks.get( i ); int index = str.indexOf( "-" ); String start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) System.out.print( str + " " ); } System.out.println(); } public Vector getPositions ( int k ) { Vector pos = new Vector(); //Vector posSingleVertexOnly = new Vector(); String str = new String(); int l = lps.size(), index = 0; Vector lp = new Vector(); Vertex v = new Vertex(); // longest path for ( int i = 0; i < k; i++ ) { // get positions from those vertices in lps lp = (Vector)lps.get( i ); int len = lp.size(); for ( int j = 0; j < len; j++ ) { v = (Vertex)lp.get( j ); str = v.getStartLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); str = v.getEndLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); } } return pos; } public Vector sortPositions( Vector pos) { Vector positions = new Vector(); // used to keep the sorted positions String str = new String(); int len = pos.size(), current = 0, previous = 0; Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { str = (String)pos.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); Vector tmpLCRBlocks = new Vector(); String start = new String(); previous = posInt[0].intValue() - 1; start = Integer.toString(( previous + 1 )); for ( int i = 0; i < len; i++ ) { str = posInt[i].toString(); current = posInt[i].intValue(); //generate blocks of continuous positions. Say, the sorted integer array is 3,4,5,6 8,9,10,11,12,13,29,30,31. It can be represented as a vector of three strings( blocks ): 3-6, 8-13, 29-31. if ( current != ( previous + 1 )) { tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); start = str; } previous = current; positions.add( str ); } tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); len = tmpLCRBlocks.size(); Vector LCRBlocks = new Vector(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLCRBlocks.get( i ); int index = str.indexOf( "-" ); start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) LCRBlocks.add( str ); } //printLCRs( LCRBlocks ); return LCRBlocks; } public Vector extend ( int startPos, int endPos, int limit, String direction, String seq ) { Vector decRegs = new Vector(); int pointer = 0, startDecPos = 0, endDecPos = 0; double com1 = 0, com2 = 0; String extReg = seq.substring( startPos - 1, endPos ); complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); if ( direction.equals( "left" )) { // extend to the left( front ) boolean dec = false; pointer = startPos - 2; while ( ( pointer > limit ) && ( pointer > ( startPos - 17 )) ) { //System.out.println( "111111111111extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( pointer, endPos ); com2 = cc.calculateModifiedEntropy( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 11111 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; // System.out.println( "from false to true111111111" ); endDecPos = pointer + 2; } } else if ( dec ) { dec = false; //System.out.println( "from true to false1111111111" ); startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 11111111 " + startDecPos + "-" + endDecPos ); } } -- pointer; } if ( ( dec ) && ( pointer == ( startPos - 17 )) ) { //System.out.println( "keeping decreasing1111111111" ); while ( ( pointer > limit ) && ( dec ) ) { //System.out.println( "22222222222222extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( pointer, endPos ); //System.out.println("keep: " + extReg ); com2 = cc.calculateModifiedEntropy( extReg ); // com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 22222222222 " + com2); if ( com1 < com2 ) { //System.out.println( "from true to false2222222222" ); dec = false; startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 222222222222 " + startDecPos + "-" + endDecPos ); } } -- pointer; } } // the left extension touches the end of the last block of the current lcr blocks if (( pointer == limit ) && ( dec ) ) { startDecPos = pointer + 2; if ( com2 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 333333333333 " + startDecPos + "-" + endDecPos ); } } if ( decRegs.size() == 0 ) { //System.out.println( "left: Empty" ); } else { //System.out.print( "left: "); //printLCRs( decRegs ); } } else { boolean dec = false; pointer = endPos + 1;//////// // extend to the right( back ) while ( ( pointer < limit ) && ( pointer < ( endPos + 15 ))) { //System.out.println( "333333333333333333333extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); com2 = cc.calculateModifiedEntropy( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 33333333 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; //System.out.println( "from false to true33333333333333333" ); startDecPos = pointer -1 ; } } else if ( dec ) { dec = false; //System.out.println( "from true to false3333333333333" ); endDecPos = pointer -1; if ( com1 < comCut ) { decRegs.add( ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 4444444 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if ( ( dec ) && ( pointer == ( endPos + 15 )) ) { // keep extending until the complexity starts increasing, which means that several blocks generated from the longest path can be included into lcrs during one call of the 'extend()' based on a block while (( dec ) && ( pointer < limit )) { //System.out.println( "444444444444extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); com2 = cc.calculateModifiedEntropy( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 4444444 " + com2); if ( com1 < com2 ) { dec = false; //System.out.println( "from true to false444444444" ); endDecPos = pointer - 1; if ( com1 < comCut ) { decRegs.add(( startDecPos ) + "-" + endDecPos ); //System.out.println( "decRegs added 5555555555555555555 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if (( pointer == limit ) && ( dec )) { endDecPos = limit - 1; //System.out.println( "decRegs added 66666666666 " + startDecPos + "-" + endDecPos ); decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); } } if ( decRegs.size() == 0 ) { //System.out.println( "right: Empty" ); } else { //System.out.print( "right: "); //printLCRs( decRegs ); } } return decRegs; } public boolean shareLetter( String str1, String str2 ) { boolean shared = false; String str = str1, letter = new String(); while (( str.length() != 0 ) && ( !shared )) { letter = str.substring( 0, 1 ); int index = str2.indexOf( letter ); if ( index != -1 ) shared = true; else if ( str.length() != 0 ) str = str.substring( 1 ); } return shared; } public boolean checkContribution( String currentBlock, Vector decRegs, String seq ) { boolean contributed = false; Vector regs = decRegs; String block = new String(); int i = 0, len = regs.size(); while ( ( i < len ) && ( !contributed )) { block = (String) regs.elementAt( i ); int index = block.indexOf( "-" ); int start= Integer.parseInt( block.substring( 0, index )); int end = Integer.parseInt( block.substring( index + 1 )); block = seq.substring( start - 1, end ); //System.out.println( "block: " + block+ " currentBlock: "+ currentBlock ); contributed = shareLetter( currentBlock, block ); ++i; } return contributed; } public Vector appendLcrs( Vector lcrs, Vector appendedLcrs ) { Vector lowComRegs = lcrs, tmpLcrs = appendedLcrs; while( !( tmpLcrs.isEmpty())) lowComRegs.add( (String)tmpLcrs.remove( 0 ) ); return lowComRegs; } public Vector pickUpDrop ( Vector blocks, String seq ) { Vector frontLcrs = new Vector(), backLcrs = new Vector(), lcrs = new Vector(), tmpBLOCKS = blocks; String currentBlock = new String(), tmpBlock = new String(); boolean isFirstBlock = true; int limit = 0, index = 0, startPos = 0, endPos = 0; while (( !tmpBLOCKS.isEmpty() )) { frontLcrs.clear(); backLcrs.clear(); int lcrBlockStart = 0, lcrBlockEnd= 0; boolean extendToLeft = true; // whether to extend towards the left boolean find = false; // find the current extending block // currentBlock can start in the middle of a block, or has the same starting position as a block and it doesn't have to be the block after the previous currentBlock // get the end position of the last block in lcrs if ( !( lcrs.isEmpty())) { tmpBlock = (String)lcrs.lastElement(); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 )); //System.out.println( "lcrBlockEnd: " + lcrBlockEnd ); while (( !find ) && ( !(tmpBLOCKS.isEmpty()) )) { // get the current block currentBlock = (String)tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); //System.out.println( "find currentBlock: "+ startPos + " " + endPos); if ( startPos < lcrBlockEnd ) { if ( endPos > lcrBlockEnd ) if (( endPos - lcrBlockEnd ) >= 3 ) { startPos = lcrBlockEnd + 1; extendToLeft = false; find = true; } } else find = true; } } else { currentBlock = (String) tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); find = true; } if ( find ) { //System.out.println( "currentBlock:" + currentBlock ); if ( isFirstBlock ) { limit = -1; isFirstBlock = false; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } else if ( extendToLeft ) { limit = lcrBlockEnd - 1; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } limit = seq.length() + 1; // extend to the right( back ) backLcrs = extend( startPos, endPos, limit, "right", seq ); double com = 0; complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); index = currentBlock.indexOf( "-" ); int cbStart = Integer.parseInt( currentBlock.substring( 0, index ))- 1; int cbEnd = Integer.parseInt( currentBlock.substring( index + 1 )) ; //System.out.println("current block String:" + seq.substring( cbStart,cbEnd)); com = cc.calculateModifiedEntropy( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateReciprocalPro( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateRecProWScoringMatrix( seq.substring( cbStart, cbEnd ) ); boolean contributed = false; if ( frontLcrs.size() != 0 ) { // get the start position of the first block in frontLcrs as the start position of the block to be added into lcrs tmpBlock = (String)frontLcrs.elementAt( 0 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); if ( com > comCut ) { // check whether the current block contributes to the complexity-decreasing regions or not contributed = checkContribution( seq.substring( cbStart, cbEnd ), frontLcrs, seq ); if ( !contributed ) lcrBlockEnd = startPos - 1; else { lcrBlockEnd = endPos; } } else { lcrBlockEnd = endPos; //System.out.println( "com of currentBlock: " + com ); } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } boolean combine = false; // whether to combine the last block in lcrs from frontLcrs and the block to be added into lcrs from backLcrs if (( !contributed ) && ( com > comCut )){ contributed = checkContribution( seq.substring( cbStart, cbEnd ), backLcrs, seq ); if ( !contributed ) { lcrBlockStart = endPos + 1; } else { if ( frontLcrs.size() != 0 ) combine = true; } } else if ( frontLcrs.size() != 0 ) combine = true; // get the end position of the last block in backLcrs as the end position of the block to be added into lcrs if ( !( backLcrs.isEmpty())) { tmpBlock = (String)backLcrs.lastElement( ); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 ) ); if ( combine ) { //System.out.println( "combine" ); limit = lcrs.size(); tmpBlock = (String) lcrs.remove( limit - 1 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); } else { if ( com < comCut ) { lcrBlockStart = startPos; //System.out.println( "Here, com" ); } else if ( contributed ) { //System.out.println( "contributed to the back, com > comCut " ); lcrBlockStart = startPos; } else { //System.out.println( "OOOOOOOOOOOOOOOOOOOOOOOO" ); lcrBlockStart = endPos + 1; } } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } else { if((frontLcrs.size() == 0 ) && ( !contributed ) && ( com < comCut )) { lcrs.add( currentBlock ); } } // check whether to combine the last two blocks in the current lcrs //len = lcrs.length(); /* System.out.print( "current lcrs: " ); printLCRs( lcrs ); */ } } return lcrs; } public Vector mergePurge( Vector lcrs ) { Vector tmpLcrs = lcrs; String currentBlock = new String(), nextBlock = new String(); int len = tmpLcrs.size(), i = 0; while ( i < len ) { if (( i + 1 ) < len ){ currentBlock = (String) tmpLcrs.elementAt( i ); int endIndex = currentBlock.indexOf( "-" ); int end = Integer.parseInt( currentBlock.substring( endIndex +1 )); nextBlock = (String) tmpLcrs.elementAt( i + 1 ); int startIndex = nextBlock.indexOf( "-" ); int start = Integer.parseInt( nextBlock.substring( 0, startIndex )); if (( end == ( start - 1 )) || ( end == start )) { //System.out.println( currentBlock + " " + nextBlock ); currentBlock = currentBlock.substring( 0, endIndex ) + "-" + nextBlock.substring( startIndex + 1 ); tmpLcrs.remove( i ); tmpLcrs.remove( i ); tmpLcrs.add( i, currentBlock ); } else ++i; len = tmpLcrs.size(); } else ++i; } i = 0; len = tmpLcrs.size(); //printLCRs( tmpLcrs ); /* while ( i < len ) { currentBlock = (String) tmpLcrs.elementAt( i ); int index = currentBlock.indexOf( "-" ); int start = Integer.parseInt( currentBlock.substring( 0, index )); int end = Integer.parseInt( currentBlock.substring( index + 1 )); if (( end - start ) < 7 ) tmpLcrs.remove( i ); else ++i; len = tmpLcrs.size(); } */ return tmpLcrs; } public boolean checkCombinedSubBlock( String seq1, String seq2, double cCut ) { boolean delete = true; complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); // double com = cc.calculateReciprocalPro( seq1 + seq2 ); String seq = seq1 + seq2; double com = cc.calculateNorModifiedEntropy( seq ); System.out.println( "combined:" + seq1 + seq2 + " " + com ); if ( com > cCut ) delete = false; return delete; } public String findAlignment( String seq1, String seq2 ) { String aliPos = new String(); try { Sequence s1 = SequenceParser.parse( seq1 ); Sequence s2 = SequenceParser.parse( seq2 ); System.out.println( "alignment sequences: " + seq1 + "???" + seq2 ); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); int similarLen = alignment.getSimilarity(); // get the length of the same and similar letters; if ( similarLen > 4 ) { // only if the length of similar and same letters is greater than 4 aliPos = new Pair().format( alignment ); System.out.println( "the alignment: " + aliPos + " " + similarLen ); } } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } return aliPos; } public Vector checkLeftRegs( int aliStart, int aliEnd, int start, int end, String seq, double cCut ) { Vector left = new Vector(); complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); double com = 0; if ( aliStart > 7 ) { // the length of the left region must be longer than 7 com = cc.calculateNorModifiedEntropy( seq.substring( start - 1, start + aliStart - 2 )); System.out.println( "left1: " + seq.substring( start - 1, start + aliStart - 2 ) + " " + com + " " + start + "-" + ( start + aliStart - 2 )); if ( com <= cCut ) left.add( 0, start + "-" + ( start + aliStart - 2 )); } if ( ( end - start + 1 - aliEnd ) > 7 ) { com = cc.calculateNorModifiedEntropy( seq.substring( start + aliEnd - 1, end )); System.out.println( "left2:" + seq.substring( start + aliEnd - 1, end ) + " " + com + " " + ( start + aliEnd ) + "-" + end ); if ( com <= cCut ) left.add( ( start + aliEnd ) + "-" + end ); } return left; } public Vector addToResult( Vector result, Vector left ) { Vector tmpResult = result; String str1 = new String(), str2 = new String(); int j = 0; for ( int i = 0; i < left.size(); i++ ) { str1 = (String) left.elementAt( i ); int index = str1.indexOf( "-" ); int endLeft = Integer.parseInt( str1.substring( index + 1 )); boolean found = false; while ( !found ) { if ( j < result.size() ) { str2 = (String) result.elementAt( j ); index = str2.indexOf( "-" ); int startResult = Integer.parseInt( str2.substring( 0, index )); if ( endLeft < startResult ) { found = true; System.out.println( "Insert left into result: " + str1 + " " + str2 ); result.add( j , str1 ); j = j + 2; } else j++; } else { result.add( str1 ); System.out.println( "append to the end of result" ); found = true; } } } return result; } public Vector checkAdjBlock ( int start1, int end1, String adjBlock, String seq, double cCut, String mark ) { Vector result = new Vector(); double com = 0; String seq1 = seq.substring( start1 - 1, end1 ); //System.out.println( "current block: " +start1 + " " + end1 + " " + seq1 ); int index1 = adjBlock.indexOf( "-" ); int start2 = Integer.parseInt( adjBlock.substring( 0, index1 )); int end2 = Integer.parseInt( adjBlock.substring( index1 + 1 )); String seq2 = seq.substring( start2 - 1, end2 ); String aliPos = new String(); if ( mark.equals( "front" )) aliPos = findAlignment( seq2, seq1 ); else aliPos = findAlignment( seq1,seq2 ); if ( aliPos.length() != 0 ) { // format of aliPos: 'a1-a2 b1-b2' index1 = aliPos.indexOf( "-" ); int index2 = aliPos.indexOf( " " ); int aliStart2 = Integer.parseInt ( aliPos.substring( 0, index1 )); int aliEnd2 = Integer.parseInt ( aliPos.substring( index1 + 1, index2 )); String aliSeq1 = new String(), aliSeq2 = new String(); if ( mark.equals( "front" )) { aliSeq2 = seq.substring( start2 + aliStart2 - 2, start2 + aliEnd2 - 1 ); System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq2 ); } else { aliSeq1 = seq.substring( start1 + aliStart2 - 2, start1 + aliEnd2 - 1 ); System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq1 ); } aliPos = aliPos.substring( index2 + 1 ); index1 = aliPos.indexOf( "-" ); //System.out.println( "aliPos:" + aliPos ); int aliStart1 = Integer.parseInt( aliPos.substring( 0, index1 )); int aliEnd1 = Integer.parseInt( aliPos.substring( index1 + 1 )); if ( mark.equals( "front" )) { aliSeq1 = seq.substring( start1 + aliStart1 - 2, start1 + aliEnd1 - 1 ); System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq1 ); } else { aliSeq2 = seq.substring( start2 + aliStart1 - 2, start2 + aliEnd1 - 1 ); System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq2 ); } boolean decOrNot = true; if ( mark.equals( "front" )) { decOrNot = checkCombinedSubBlock( aliSeq2,aliSeq1, cCut ); if ( decOrNot ) { result.add( ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 ) ); System.out.println( "added to result1: " + ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 )); Vector left = checkLeftRegs( aliStart1, aliEnd1, start1, end1, seq, cCut ); result = addToResult( result, left ); } } else { decOrNot = checkCombinedSubBlock( aliSeq1,aliSeq2, cCut ); if ( decOrNot ) { result.add( ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1) ); System.out.println( "added to redult2: " + ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1)); Vector left = checkLeftRegs( aliStart2, aliEnd2, start1, end1, seq, cCut ); result = addToResult( result, left ); result.add( ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 ) ); System.out.println( "added to redult3: " + ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 )); left = checkLeftRegs( aliStart1, aliEnd1, start2, end2, seq, cCut ); result = addToResult( result, left ); } } } return result; } public Vector checkDeletability ( Vector lcrs, int maxIndex, String seq, double cCut ) { Vector result = new Vector(); String block = new String(); int start1 = 0, end1 = 0; block = (String)lcrs.elementAt( maxIndex ); //System.out.println( "current block :" + block ); int index = block.indexOf( "-" ); start1 = Integer.parseInt( block.substring( 0, index )); end1 = Integer.parseInt( block.substring( index + 1 )); if ( maxIndex != 0 ) { block = (String)lcrs.elementAt( maxIndex - 1 ); System.out.println( "front adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "front" ); } if ( result.size() == 0 ) { if ( maxIndex != ( lcrs.size() - 1 ) ) { block = (String)lcrs.elementAt( maxIndex + 1 ); System.out.println( "back adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "back" ); if ( result.size() != 0 ) result.add( "back" ); } } return result; } public Vector filter ( Vector lcrs, String seq ) { Vector tmpLcrs = lcrs, com = new Vector(); float len = tmpLcrs.size(); double singleCom = 0, max = -222222222; String str = new String(), block = new String(); if ( tmpLcrs.size() != 1 ) { complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLcrs.elementAt( i ); int index = str.indexOf( "-" ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); //singleCom = cc.calculateReciprocalPro( str ); singleCom= cc.calculateNorModifiedEntropy( str ); com.add( Double.toString( singleCom )); } int i = 0, j = 0, maxIndex = 0; double limit = 0, cCut = 0; if ( seq.length() > 500 ) limit = len * 0.5; else limit = len * 0.2; while ( i < limit ) { int l = com.size(); j = 0; max = -222222222; while ( j < l ) { str = ( String ) com.elementAt( j ); singleCom = Double.parseDouble( str ); if ( singleCom > max ) { max = singleCom; maxIndex = j; } ++ j; } //System.out.println( "com: " + com.elementAt( maxIndex )); cCut = Double.parseDouble( (String)com.remove( maxIndex )); ++i; } System.out.println( "cCut: " + cCut ); j = 0; i = 0; len = tmpLcrs.size(); Vector result = new Vector(); while (( i < limit ) && ( j < len )) { str = (String)tmpLcrs.elementAt( j ); int index = str.indexOf( "-" ); System.out.println( "current block?" + str.substring( 0, index ) + "?" + str.substring( index + 1 ) ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); System.out.println( "cur subseq???" + str ); //singleCom = cc.calculateReciprocalPro( str ); singleCom= cc.calculateNorModifiedEntropy( str ); //System.out.println( "singlecom:"+ singleCom + " " + cCut ); if ( singleCom >= cCut ) { result = new Vector(); result = checkDeletability( tmpLcrs, j, seq, cCut ); int rSize =result.size(); boolean fromBack = false; if ( rSize != 0 ) { str = (String)result.elementAt( rSize - 1 ); if ( str.equals( "back")) { --rSize; fromBack = true; System.out.println( "back" ); } System.out.println( "remove: "+ (String)tmpLcrs.remove( j )); for ( int k = 0; k < rSize; k++ ) { // add 'result' Vector into tmpLcrs in order str = ( String ) result.elementAt( k ); System.out.print( "*"+ str + "*" ); tmpLcrs.add( j, str ); ++j; } System.out.println(); if ( j < tmpLcrs.size()) System.out.println( "the next one:" + tmpLcrs.elementAt( j )); if ( fromBack ) { System.out.println( "yes, from back " ); System.out.println( "removed: "+ tmpLcrs.remove( j )); } len = tmpLcrs.size(); //////////////////////// } else { tmpLcrs.remove( j ); System.out.println( "removed coz of high complexity" ); ++i; len = tmpLcrs.size(); } } else ++j; } } return tmpLcrs; } public Vector postProcess( int i, String seq ) { Vector lcrs = new Vector(); Vector blocks = new Vector(); Vector pos = getPositions( i ); if ( pos.size() != 0 ) { blocks = sortPositions( pos ); // blocks = filter( blocks, seq ); //printLCRs( blocks ); } lcrs = pickUpDrop( blocks, seq ); System.out.println( "previous" ); printLCRs( lcrs ); lcrs = mergePurge( lcrs ); lcrs = filter( lcrs, seq ); //System.out.println( "AFTER************" ); printLCRs( lcrs ); return lcrs; } public void computeLCRPercentage ( Vector lcrs, String str) { } public void printLCRs( Vector LCRs ) { int len = LCRs.size(); for ( int i = 0; i < len; i ++ ) { String str = (String)LCRs.get( i ); System.out.print( str + " " ); } System.out.println( ); } public void printLCRs ( Vector lcrs, String str) { } public void startt( int th1, int th2, int th3, int th4 ) { String str = new String(), id = new String(), nextId = new String(); int index = 0; boolean first = true; try { while ( str != null ) { str = rf.readLine(); str = generateSequence( str ); if ( str != null ) { if ( str.indexOf( ">" ) != -1 ) { index = str.indexOf( "!" ); id = nextId; nextId = str.substring( 0, index ); str = str.substring( index + 1 ); } else id = nextId; if (!first ) { System.out.println(); System.out.println( id ); } else { id = nextId; first = false; } vertices.clear(); edges.clear(); lps.clear(); for ( int i = 0; i < 20; i++ ) { fVecNor[i] = 0f; fVecUnNor[i] = 0f; } str = str.trim(); int i = workOnSequence( str, th1, th2, th3, th4 ); Vector lcrs = postProcess( i, str );//process all longest paths from every connected subgraph /****************************** float l = lcrs.size(); float len = str.length(); float per = l / len; System.out.println( "The percentage of LCR letters after the longest path: " + per ); ***************************************/ /* computeLCRPercentage( lcrs, str ); printLCRs( lcrs, str ); */ } } rf.close(); } catch ( IOException ex ) { } } public static void main ( String args[] ) { int th1 = Integer.parseInt( args[2] ); int th2 = Integer.parseInt( args[3] ); int th3 = Integer.parseInt( args[4] ); int th4 = th2; gbm g = new gbm( args[0]); g.readMatrices( args[1] ); g.getComCut( args[5] ); g.startt( th1, th2, th3, th4 ); /* try { Sequence s1 = SequenceParser.parse("VVVVVV" ); Sequence s2 = SequenceParser.parse( "LAELLAKKSDRDSPKK"); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); System.out.println( "similarity: " + alignment.getSimilarity() ); String result = new Pair().format(alignment); System.out.println( "*****************" ); System.out.println( result ); } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } */ } } /* 1. Format of the output: SACACPQTSOP......( 60 letters) XXXX TPQSKAQ..........( 60 letters) */ PK f}7ٱ˛0applications/gbmCommentAdjBlocksNCheCom.java.alt/* ** Author: Xuehui Li ** Date: March, 2005 ** "gbm" or "GBM" is the abbrevaition of "A graph-based method for detecting low-complexity reions". ** This is program is used to find low-complexity regions in sequences ** NOTE: all vertices and edges have topological orders ** There are totally five input paramaters. The first one is the sequence file name. The second one the learned matrix file( /cise/research/tamer/xli/LCR/graphLCR/swissprotLearnedMatrices ). The remaining three are the threshold 1, threshold 2 and threshold 3, respectively. At this tiem, all LCR Blocks generated in both /cise/research/tamer/xli/LCR/graphLCR/swissprotLCRBlocks/ and /cise/research/tamer/xli/LCR/graphLCR/pfamLCRBlocks/ are based on the three thresholds: " 3 15 5" */ package applications; import jaligner.Alignment; import jaligner.Sequence; import jaligner.SmithWatermanGotoh; import jaligner.formats.Pair; import jaligner.matrix.MatrixLoader; import jaligner.util.SequenceParser; import java.io.*; import java.util.*; class gbm { // vertices and edges are vectors used to keep all the vertices, edges in a graph generated from a sequence, respectively. vertexQueue is a vector used to keep all vertices whose indegree is zero. lps is a vector used to keep all the longest paths in all connected subgraphs of a sequence. Every longest path in lps is a vector of vertices, excluding the dummy source. // subVertices and subEdges are vectors used to keep all the vertices, edges in a connected-graph which is a subgraph of the graph generated from a sequence, respectively. private File f; private RandomAccessFile rf; private Vector vertices, subVertices, edges, subEdges, vertexQueue, lps; private float[][] repeatMatrix, nonRepeatMatrix; private float[] fVecNor, fVecUnNor; private Vector alphabet; private double comCut = 0; // the complexity cut-off value private float singleVertexOnly; public gbm ( String fileName ) { // the graph is given in a file where every line represents an edge and has the fromat of "source sink weight" initializeAlphabet(); try{ f = new File ( fileName ); rf = new RandomAccessFile ( f, "r" ); } catch ( IOException ex ) { } vertices = new Vector(); subVertices = new Vector(); edges = new Vector(); subEdges = new Vector(); vertexQueue = new Vector(); lps = new Vector(); repeatMatrix = new float[20][20]; nonRepeatMatrix = new float[20][20]; fVecNor = new float[20]; fVecUnNor = new float[20]; for ( int i = 0; i < 20; i++ ) fVecUnNor[i] = 0f; singleVertexOnly = 0f; } public void initializeAlphabet() { alphabet = new Vector(); alphabet.add( "A" ); alphabet.add( "R" ); alphabet.add( "N" ); alphabet.add( "D" ); alphabet.add( "C" ); alphabet.add( "Q" ); alphabet.add( "E" ); alphabet.add( "G" ); alphabet.add( "H" ); alphabet.add( "I" ); alphabet.add( "L" ); alphabet.add( "K" ); alphabet.add( "M" ); alphabet.add( "F" ); alphabet.add( "P" ); alphabet.add( "S" ); alphabet.add( "T" ); alphabet.add( "W" ); alphabet.add( "Y" ); alphabet.add( "V" ); } public void readMatrices( String matricesFile ) { try { File f = new File( matricesFile ); RandomAccessFile rfm = new RandomAccessFile ( f, "r" ); String row = new String(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { nonRepeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { nonRepeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.readLine(); rfm.readLine(); rfm.readLine(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { repeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { repeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.close(); } catch ( IOException ex ) { } } public void printMatricesRowByRow() { System.out.println( "Non-Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( nonRepeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); System.out.println(); System.out.println( "Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( repeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); } public void getComCut( String fileName ) { try { File f = new File ( fileName ); RandomAccessFile rf = new RandomAccessFile( f, "r"); String line = new String(); double com, sum = 0, num = 0; line = rf.readLine(); while ( line != null ) { line = line.trim(); complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); com = cc.calculateModifiedEntropy( line ); //com = cc.calculateReciprocalPro( line ); //com = cc.calculateRecProWScoringMatrix( line ); sum = sum + com; num = num + 1; line = rf.readLine(); } comCut = sum / num; // System.out.println( "The cut-off value is: " + sum + " / " + num + " = " + comCut ); rf.close(); } catch( IOException ex ){ } } public void createFirstVector( String window ) { int len = window.length(); String tmpWindow = window, letter = new String(); for ( int i = 0; i < len; i++ ) { letter = tmpWindow.substring( 0, 1 ); int index = alphabet.indexOf( letter ); fVecUnNor[ index ]= fVecUnNor[index] + 1f; tmpWindow = tmpWindow.substring( 1 ); } for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecUnNor[ i ]; for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecNor[ i ] / len; } // o for unNormalized, 1 for normalized public void printVector( int mark ) { float[] tmpVector = new float[ 20 ]; if ( mark == 0 ) tmpVector = fVecUnNor; else tmpVector = fVecNor; for ( int i = 0; i < 20; i++ ) System.out.print( tmpVector[i] + " " ); System.out.println(); } public void constructSingleVertex( int start, int end ) { String startLetter = Integer.toString( start + 1 ); String endLetter = Integer.toString( end + 1 ); Vertex v = new Vertex( startLetter, endLetter, 1, 1.0f ); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); v.setPredecessor( dummySource ); vertices.add( v ); } public void workOnFirstWindow( String window ) { String letter1, letter2; createFirstVector( window ); int len = window.length(), j = 0, row = 0, col = 0; for ( int i = 0; i < len - 1; i++ ) { j = 1; letter1 = window.substring( i, i + 1 ); while ( ( i + j ) < len ) { letter2 = window.substring( i + j, i + j + 1 ); boolean similar = checkSimilarity( letter1, letter2 ); if ( similar ) { row = alphabet.indexOf( letter1 ); col = alphabet.indexOf( letter2 ); if ( checkProbablity( row, col )) constructSingleVertex( i, i + j ); } ++j; } } } // at this time, row == col, since we only consider same letters public boolean checkProbablity( int row, int col ) { boolean construct = false; float difference1 = Math.abs( repeatMatrix[ row][col] - fVecNor[row] ); float difference2 = Math.abs( nonRepeatMatrix[row][col] - fVecNor[row] ); if ( difference2 > difference1 ) { construct = true; } return construct; } public void addVertices( String window, int index, int startPos ) { // get all letters in the window who have the same index in the alphabet as "index" int len = window.length(); String letter = new String(); int tmpIndex = 0; int[] positions = new int[ len - 1 ]; for ( int i = 0; i < len - 1; i++ ) { positions[ i ] = 0; } int j = 0; for ( int i = 0; i < len - 1; i++ ) { letter = window.substring( i, i + 1 ); tmpIndex = alphabet.indexOf( letter ); if ( index == tmpIndex ) { positions[ j ] = startPos + i; /// get the position # of the letter ++j; } } if ( checkProbablity( index, index )) { j = 0; boolean end = false; while (( !end ) && ( j < ( len - 1 ))) { if ( positions[ j ] != 0 ) constructSingleVertex( positions[ j ], startPos + len - 1 ); else end = true; ++j; } } } public void constructVertices( String str, int th4 ) { char c1, c2; int len = str.length(); boolean similar = false; Vertex v = new Vertex(); float[] previousVecUnNOr = new float[20]; String window = str.substring( 0, th4 ); String oldLetter = new String(), newLetter = new String(); workOnFirstWindow( window ); int startPos = 1; while( startPos <= ( len - th4 ) ) { oldLetter = window.substring( 0, 1 ); window = str.substring( startPos, startPos + th4 ); newLetter = window.substring( th4 - 1, th4 ); int index = alphabet.indexOf( oldLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] - 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; index = alphabet.indexOf( newLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] + 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; addVertices( window, index, startPos ); ++startPos; } } // return the actual weight of the vertex ??? /// to be modified later to include similar cases public boolean checkSimilarity( String c1, String c2 ) { boolean similar = false; if ( c1.equals( c2 )) similar = true; return similar; } // find the percentage of letters appearing in vertices public void findLetterPercentageVer( float lF ) { int len = vertices.size(); Vector appearedLetters = new Vector(); Vertex v = new Vertex(); for ( int i = 0; i < len ; i++ ) { v = (Vertex)vertices.elementAt( i ); String str = v.getStartLetter(); int index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); str = v.getEndLetter(); index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); } ///// Sort first len = appearedLetters.size(); float per = len / lF; // System.out.println( "The letter percentage after vertex construction is: " + len + " / " + lF + " = " + per ); Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { String str = (String)appearedLetters.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); for ( int i = 0; i < len; i ++ ) { String str = posInt[i].toString(); System.out.print( str + " " ); } System.out.println(); // computePercentage( lF, appearedLetters ); } public void computePercentage( float l, Vector al ) { int len = al.size(); float per = len / l; System.out.println( "The percentage is: " + len + " / " + l + " = " + per ); for ( int i = 0; i < len; i ++ ) { String str = (String)al.elementAt( i ); System.out.print( str + " " ); } System.out.println(); } public void constructEdges( int th1, int th2, int th3 ) { String startLetter = new String(), endLetter = new String(); int i = 0, j = 0, p = 0, q = 0, l = vertices.size(); Vertex v1 = new Vertex(), v2 = new Vertex(); Edge e = new Edge(); for ( int k = 0; k < l-1; k++ ) { v1 = (Vertex)vertices.get( k ); startLetter = v1.getStartLetter(); i = Integer.parseInt ( startLetter ); endLetter = v1.getEndLetter(); j = Integer.parseInt( endLetter ); boolean end = false; int m = k + 1; while (( !end )&& ( m < l )) { v2 = (Vertex)vertices.get( m ); startLetter = v2.getStartLetter(); p = Integer.parseInt ( startLetter ); endLetter = v2.getEndLetter(); q = Integer.parseInt( endLetter ); if ( checkConditions( i, j, p, q, th1, th2, th3 )) { v2.incIndegree(); // modify the vertex in vertices vertices.setElementAt( v2, m ); e = new Edge( v1, v2, 1.0f ); edges.add( e ); } else if ( ( p - i ) > th2 ) // k2 = 25 end = false; ++m; } } } // return the actual weight of the edge ??? public boolean checkConditions ( int i, int j, int p, int q, int th1, int th2, int th3 ) { boolean satisfied = false; int foo = ( j - i ) - ( q - p ); foo = Math.abs( foo ); if ( foo <= th1 ) // condition #1, k1 = 5 if (( p - i ) <= th2 ) // condition #2, k2 = 26 if ((( i <= p ) && ( p <= j ) ) && ( j <= q )) // condition #3 if (( i == p ) || ( p == j ) || ( j == q )) { // condition #4 if ((( j - i ) <= th3) && ( ( q - p ) <= th3 )) satisfied = true; } else satisfied = true; return satisfied; } public void modifyVertexQueue ( Vector tmpQueue, Vector tmpVertices ) { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void printVertexQueue() { System.out.println( "All vertices in the queue:" ); Vertex v = new Vertex(); Vertex previous = v; int l = vertexQueue.size(); for ( int i = 0; i < l; i++ ) { v = (Vertex) vertexQueue.get( i ); previous = v.getPredecessor(); if ( previous != null) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printVertices( int m ) { Vector tmpVertices = new Vector(); if ( m == 0 ) tmpVertices = subVertices; else tmpVertices = vertices; Vertex v = new Vertex(); Vertex previous = v; int l = tmpVertices.size(); System.out.println( "All vertices in the graph: " + l ); for ( int i = 0; i < l; i++ ) { v = (Vertex) tmpVertices.get( i ); previous = v.getPredecessor(); if ( previous != null ) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printEdges( int m ) { Vector tmpEdges = new Vector(); if ( m == 0 ) tmpEdges = subEdges; else tmpEdges = edges; Edge e = new Edge(); Vertex sourceVer = new Vertex(), sinkVer = new Vertex(); String str = new String(); int length = tmpEdges.size(); System.out.println( "All edges in the graph: " + length ); for ( int i = 0; i < length; i++ ) { e = (Edge) tmpEdges.get( i ); sourceVer = e.getSource(); sinkVer = e.getSink(); System.out.println( sourceVer.getStartLetter() + "A" + sourceVer.getEndLetter() + " lp: " + sourceVer.getLP()+ " indegree: "+ sourceVer.getIndegree() + " "+ sinkVer.getStartLetter() + "A" + sinkVer.getEndLetter() + " lp: " + sinkVer.getLP() + " indegree: " + sinkVer.getIndegree() + " weight: "+ e.getWeight() ); } } public void addDummySource() { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); addToSubVerticesVertexQueue( dummySource ); addToSubEdges( dummySource ); } public void addToSubVerticesVertexQueue( Vertex dummySource ) { subVertices.add( 0, dummySource ); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void addToSubEdges( Vertex dummySource ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subVertices.size(); for ( int i = l-1; i > 0; i-- ) { v = (Vertex)subVertices.get( i ); e = new Edge( dummySource, v, 0.0f ); subEdges.add( 0, e ); } } public Vector findLongestPath() { Vertex v = new Vertex(); while ( !( vertexQueue.isEmpty())) { v = (Vertex) vertexQueue.remove( 0 ); traverseSubEdges( v ); } // find the vertex to which the path from the source is the longest Vector lp = traverseVertices(); return lp; } public void traverseSubEdges( Vertex ver ) { boolean end = false, first = true; Edge e = new Edge(); Vertex sourceVer = new Vertex(); Vertex sinkVer = new Vertex(); String str = new String(); float w = 0, sourceLP = 0, sinkLP = 0; int len = subEdges.size(), j = 0; if (!(subEdges.isEmpty())) j = findEdges( ver ); if ( j == -1 ) end = true; else if ( j != 0 ) { } while (( !end ) && ( j < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( j ); sourceVer = e.getSource(); if ( sourceVer.equals( ver )) { sourceLP = sourceVer.getLP(); sinkVer = e.getSink(); int i = subVertices.indexOf( sinkVer ); sinkLP = sinkVer.getLP(); w = e.getWeight(); if (( sourceLP + w ) > sinkLP ) { sinkLP = sourceLP +w; sinkVer.setLP( sinkLP ); sinkVer.setPredecessor( sourceVer ); } sinkVer.decIndegree(); subVertices.setElementAt( sinkVer, i ); modifyVertexInEdges( sinkVer ); i = sinkVer.getIndegree(); if ( i == 0 ) vertexQueue.add( sinkVer ); subEdges.remove( j ); len = subEdges.size(); first = false; } else if ( first ) { System.out.println( "This is a vertex with outdegree zero" ); end = true; } else end = true; } } public int findEdges( Vertex ver ) { int i = 0 , l = subEdges.size(); boolean find = false; Vertex v = new Vertex(); Edge e = new Edge(); while (( !find ) && ( i < l )) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( ver )) find = true; else ++i; } if ( !find ) i = -1; return i; } public void modifyVertexInEdges( Vertex sinkVer ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subEdges.size(); for ( int i = 0; i < l; i++ ) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( sinkVer )) e.setSource( sinkVer ); else { v = e.getSink(); if ( v.equals( sinkVer )) e.setSink( sinkVer ); } subEdges.setElementAt( e, i ); } } public Vector traverseVertices() { Vertex v = new Vertex(), maxVer = new Vertex(); int l = subVertices.size(); float length = 0f, maxLp = -2222.0f; for ( int i = 0; i < l; i++ ) { v = (Vertex)subVertices.get( i ); length = v.getLP(); if ( length > maxLp ) { maxLp = length; maxVer = v; } } Vector lp = constructLongestPath( maxVer ); return lp; } public Vector constructLongestPath( Vertex maxVer ) { Vector lp = new Vector(); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); Vertex v = maxVer; while ( !(v.equals( dummySource ))) { lp.add ( 0, v ); v = v.getPredecessor(); } return lp; } public void printLongestPath( Vector lp ) { int i = 0; Vertex v = new Vertex(); String str = new String(); int length = lp.size(); for ( i = 0; i < length; i++ ) { v = (Vertex) lp.get( i ); System.out.print( v.getStartLetter() + "A" + v.getEndLetter() + " "); } System.out.println(); } public Vector identifyLCRs( ) { addDummySource(); Vector lp = findLongestPath(); return lp; } public boolean checkExistence( Vertex v ) { int index = subVertices.indexOf( v ); if ( index == -1 ) return false; else return true; } public void copy( Vector vt1, Vector vt2 ) { int l = vt1.size(); Edge e = new Edge(); for ( int i = 0; i < l; i++ ) { e = (Edge)vt1.get( i ); vt2.add ( e ); } } // make all edges beginning with the same vertex stay together public void clusterSubEdges() { Vector tmpSubEdges = new Vector(); Edge e = new Edge(); Vertex v = new Vertex(), ver = new Vertex(); while ( ( !subEdges.isEmpty())) { e = (Edge)subEdges.remove( 0 ); tmpSubEdges.add( e ); v = e.getSource(); int m = 0; int len = subEdges.size(); while (( m < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( m ); ver = e.getSource(); if ( v.equals( ver )) { tmpSubEdges.add( e ); subEdges.remove( m ); } else ++m; len = subEdges.size(); } } copy( tmpSubEdges, subEdges ); } // assign values to subVertices and subEdges ( BFS ) public void extractConnectedGraph() { boolean first = true; Vertex v = new Vertex(); Edge e = new Edge(); Vector tmpQueue = new Vector(); while(( first ) || (!(tmpQueue.isEmpty()))){ if ( first ) { // start the first edge of a new connected subgraph e = (Edge)edges.remove( 0 ); subEdges.add( e ); v = e.getSource(); subVertices.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong1" ); v = e.getSink(); subVertices.add( v ); tmpQueue.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong2" ); v = e.getSource(); first = false; boolean same = true; int m = 0; int len = edges.size(); while (( same ) &&( m < len )) { // remove all those edges having the same source vertex as the first edge e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong3" ); } else same = false; len = edges.size(); } } else { v = (Vertex)tmpQueue.remove( 0 ); int m = 0; boolean found = false; int len = edges.size(); //find the starting positon of those edges who start at the first vertex from tmpQueue while (( !found ) && ( m < len )) { // skip all edges starting with the vertex from the tmpQueue e = (Edge)edges.elementAt( m ); Vertex sr = e.getSource(); if ( v.equals( sr )) found = true; else { Vertex si = e.getSink(); if ( v.equals( si )) { boolean exist = checkExistence( sr ); if ( !exist ) { subVertices.add( sr ); tmpQueue.add( sr ); // work on edges whose sink vertex is the same as the vertex from tempQueue if ( !(vertices.remove( sr ))) System.out.println( "wrong4" ); } e = (Edge)edges.remove( m ); subEdges.add(e ); len = edges.size(); } else ++m; } } //System.out.println( "m = " + m ); boolean same = true; while ( same ) { // remove all those edges starting with the vertex from tmpQueue to subEdges len = edges.size(); if ( m < len ) { e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); boolean exist = checkExistence( ver ); // check whether the sinkVertex is in the subVertices or not if ( !exist ) { subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong5" ); } } else { same = false; } } else same = false; } } } clusterSubEdges(); } public int workOnSequence( String str, int th1, int th2, int th3, int th4 ) { constructVertices( str, th4 ); constructEdges( th1, th2, th3 ); boolean find = true; int i = 0; while ( !(edges.isEmpty())) { subVertices.clear(); subEdges.clear(); extractConnectedGraph(); Vector lp = identifyLCRs(); // the longest path in a connected subgraph lps.add( lp ); ++i; } return i; } // combine all letters from a sequence on different lines( stings) into a single line ( string ) public String generateSequence( String str ) { String sequence = new String(); String strTmp = str; boolean lastSeq = false; try { if ( strTmp == null ) sequence = null; if ( ( strTmp != null ) && ( strTmp.startsWith( ">"))) { //System.out.println( "*******************************************" ); System.out.println( str ); strTmp = rf.readLine(); } while (( strTmp != null ) && (!(strTmp.startsWith( ">" )))) { strTmp = strTmp.trim(); sequence = sequence + strTmp; strTmp = rf.readLine(); } if ( strTmp != null) if ( strTmp.startsWith( ">" )) { sequence = strTmp + "!" + sequence ; } } catch ( IOException ex ) { } return sequence; } public void printPositions( Vector pos ) { int l = pos.size(); String str = new String(); for ( int i = 0; i < l; i++ ) { str = (String) pos.get( i ); System.out.print( str + " " ); } System.out.println(); } public void printLCRBlocks( Vector LCRBlocks ) { String str = new String(); int l = LCRBlocks.size(); // System.out.println( "LCR Blocks: " ); for ( int i = 0; i < l; i++ ) { str = (String)LCRBlocks.get( i ); int index = str.indexOf( "-" ); String start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) System.out.print( str + " " ); } System.out.println(); } public Vector getPositions ( int k ) { Vector pos = new Vector(); //Vector posSingleVertexOnly = new Vector(); String str = new String(); int l = lps.size(), index = 0; Vector lp = new Vector(); Vertex v = new Vertex(); // longest path for ( int i = 0; i < k; i++ ) { // get positions from those vertices in lps lp = (Vector)lps.get( i ); int len = lp.size(); for ( int j = 0; j < len; j++ ) { v = (Vertex)lp.get( j ); str = v.getStartLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); str = v.getEndLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); } } return pos; } public Vector sortPositions( Vector pos) { Vector positions = new Vector(); // used to keep the sorted positions String str = new String(); int len = pos.size(), current = 0, previous = 0; Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { str = (String)pos.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); Vector tmpLCRBlocks = new Vector(); String start = new String(); previous = posInt[0].intValue() - 1; start = Integer.toString(( previous + 1 )); for ( int i = 0; i < len; i++ ) { str = posInt[i].toString(); current = posInt[i].intValue(); //generate blocks of continuous positions. Say, the sorted integer array is 3,4,5,6 8,9,10,11,12,13,29,30,31. It can be represented as a vector of three strings( blocks ): 3-6, 8-13, 29-31. if ( current != ( previous + 1 )) { tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); start = str; } previous = current; positions.add( str ); } tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); len = tmpLCRBlocks.size(); Vector LCRBlocks = new Vector(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLCRBlocks.get( i ); int index = str.indexOf( "-" ); start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) LCRBlocks.add( str ); } //printLCRs( LCRBlocks ); return LCRBlocks; } public Vector extend ( int startPos, int endPos, int limit, String direction, String seq ) { Vector decRegs = new Vector(); int pointer = 0, startDecPos = 0, endDecPos = 0; double com1 = 0, com2 = 0; String extReg = seq.substring( startPos - 1, endPos ); complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); if ( direction.equals( "left" )) { // extend to the left( front ) boolean dec = false; pointer = startPos - 2; while ( ( pointer > limit ) && ( pointer > ( startPos - 17 )) ) { //System.out.println( "111111111111extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( pointer, endPos ); com2 = cc.calculateModifiedEntropy( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 11111 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; // System.out.println( "from false to true111111111" ); endDecPos = pointer + 2; } } else if ( dec ) { dec = false; //System.out.println( "from true to false1111111111" ); startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 11111111 " + startDecPos + "-" + endDecPos ); } } -- pointer; } if ( ( dec ) && ( pointer == ( startPos - 17 )) ) { //System.out.println( "keeping decreasing1111111111" ); while ( ( pointer > limit ) && ( dec ) ) { //System.out.println( "22222222222222extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( pointer, endPos ); //System.out.println("keep: " + extReg ); com2 = cc.calculateModifiedEntropy( extReg ); // com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 22222222222 " + com2); if ( com1 < com2 ) { //System.out.println( "from true to false2222222222" ); dec = false; startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 222222222222 " + startDecPos + "-" + endDecPos ); } } -- pointer; } } // the left extension touches the end of the last block of the current lcr blocks if (( pointer == limit ) && ( dec ) ) { startDecPos = pointer + 2; if ( com2 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 333333333333 " + startDecPos + "-" + endDecPos ); } } if ( decRegs.size() == 0 ) { //System.out.println( "left: Empty" ); } else { //System.out.print( "left: "); //printLCRs( decRegs ); } } else { boolean dec = false; pointer = endPos + 1;//////// // extend to the right( back ) while ( ( pointer < limit ) && ( pointer < ( endPos + 15 ))) { //System.out.println( "333333333333333333333extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); com2 = cc.calculateModifiedEntropy( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 33333333 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; //System.out.println( "from false to true33333333333333333" ); startDecPos = pointer -1 ; } } else if ( dec ) { dec = false; //System.out.println( "from true to false3333333333333" ); endDecPos = pointer -1; if ( com1 < comCut ) { decRegs.add( ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 4444444 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if ( ( dec ) && ( pointer == ( endPos + 15 )) ) { // keep extending until the complexity starts increasing, which means that several blocks generated from the longest path can be included into lcrs during one call of the 'extend()' based on a block while (( dec ) && ( pointer < limit )) { //System.out.println( "444444444444extReg:" + extReg ); com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); com2 = cc.calculateModifiedEntropy( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 4444444 " + com2); if ( com1 < com2 ) { dec = false; //System.out.println( "from true to false444444444" ); endDecPos = pointer - 1; if ( com1 < comCut ) { decRegs.add(( startDecPos ) + "-" + endDecPos ); //System.out.println( "decRegs added 5555555555555555555 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if (( pointer == limit ) && ( dec )) { endDecPos = limit - 1; //System.out.println( "decRegs added 66666666666 " + startDecPos + "-" + endDecPos ); decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); } } if ( decRegs.size() == 0 ) { //System.out.println( "right: Empty" ); } else { //System.out.print( "right: "); //printLCRs( decRegs ); } } return decRegs; } public boolean shareLetter( String str1, String str2 ) { boolean shared = false; String str = str1, letter = new String(); while (( str.length() != 0 ) && ( !shared )) { letter = str.substring( 0, 1 ); int index = str2.indexOf( letter ); if ( index != -1 ) shared = true; else if ( str.length() != 0 ) str = str.substring( 1 ); } return shared; } public boolean checkContribution( String currentBlock, Vector decRegs, String seq ) { boolean contributed = false; Vector regs = decRegs; String block = new String(); int i = 0, len = regs.size(); while ( ( i < len ) && ( !contributed )) { block = (String) regs.elementAt( i ); int index = block.indexOf( "-" ); int start= Integer.parseInt( block.substring( 0, index )); int end = Integer.parseInt( block.substring( index + 1 )); block = seq.substring( start - 1, end ); //System.out.println( "block: " + block+ " currentBlock: "+ currentBlock ); contributed = shareLetter( currentBlock, block ); ++i; } return contributed; } public Vector appendLcrs( Vector lcrs, Vector appendedLcrs ) { Vector lowComRegs = lcrs, tmpLcrs = appendedLcrs; while( !( tmpLcrs.isEmpty())) lowComRegs.add( (String)tmpLcrs.remove( 0 ) ); return lowComRegs; } public Vector pickUpDrop ( Vector blocks, String seq ) { Vector frontLcrs = new Vector(), backLcrs = new Vector(), lcrs = new Vector(), tmpBLOCKS = blocks; String currentBlock = new String(), tmpBlock = new String(); boolean isFirstBlock = true; int limit = 0, index = 0, startPos = 0, endPos = 0; while (( !tmpBLOCKS.isEmpty() )) { frontLcrs.clear(); backLcrs.clear(); int lcrBlockStart = 0, lcrBlockEnd= 0; boolean extendToLeft = true; // whether to extend towards the left boolean find = false; // find the current extending block // currentBlock can start in the middle of a block, or has the same starting position as a block and it doesn't have to be the block after the previous currentBlock // get the end position of the last block in lcrs if ( !( lcrs.isEmpty())) { tmpBlock = (String)lcrs.lastElement(); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 )); //System.out.println( "lcrBlockEnd: " + lcrBlockEnd ); while (( !find ) && ( !(tmpBLOCKS.isEmpty()) )) { // get the current block currentBlock = (String)tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); //System.out.println( "find currentBlock: "+ startPos + " " + endPos); if ( startPos < lcrBlockEnd ) { if ( endPos > lcrBlockEnd ) if (( endPos - lcrBlockEnd ) >= 3 ) { startPos = lcrBlockEnd + 1; extendToLeft = false; find = true; } } else find = true; } } else { currentBlock = (String) tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); find = true; } if ( find ) { //System.out.println( "currentBlock:" + currentBlock ); if ( isFirstBlock ) { limit = -1; isFirstBlock = false; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } else if ( extendToLeft ) { limit = lcrBlockEnd - 1; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } limit = seq.length() + 1; // extend to the right( back ) backLcrs = extend( startPos, endPos, limit, "right", seq ); double com = 0; complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); index = currentBlock.indexOf( "-" ); int cbStart = Integer.parseInt( currentBlock.substring( 0, index ))- 1; int cbEnd = Integer.parseInt( currentBlock.substring( index + 1 )) ; //System.out.println("current block String:" + seq.substring( cbStart,cbEnd)); com = cc.calculateModifiedEntropy( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateReciprocalPro( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateRecProWScoringMatrix( seq.substring( cbStart, cbEnd ) ); boolean contributed = false; if ( frontLcrs.size() != 0 ) { // get the start position of the first block in frontLcrs as the start position of the block to be added into lcrs tmpBlock = (String)frontLcrs.elementAt( 0 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); if ( com > comCut ) { // check whether the current block contributes to the complexity-decreasing regions or not contributed = checkContribution( seq.substring( cbStart, cbEnd ), frontLcrs, seq ); if ( !contributed ) lcrBlockEnd = startPos - 1; else { lcrBlockEnd = endPos; } } else { lcrBlockEnd = endPos; //System.out.println( "com of currentBlock: " + com ); } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } boolean combine = false; // whether to combine the last block in lcrs from frontLcrs and the block to be added into lcrs from backLcrs if (( !contributed ) && ( com > comCut )){ contributed = checkContribution( seq.substring( cbStart, cbEnd ), backLcrs, seq ); if ( !contributed ) { lcrBlockStart = endPos + 1; } else { if ( frontLcrs.size() != 0 ) combine = true; } } else if ( frontLcrs.size() != 0 ) combine = true; // get the end position of the last block in backLcrs as the end position of the block to be added into lcrs if ( !( backLcrs.isEmpty())) { tmpBlock = (String)backLcrs.lastElement( ); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 ) ); if ( combine ) { //System.out.println( "combine" ); limit = lcrs.size(); tmpBlock = (String) lcrs.remove( limit - 1 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); } else { if ( com < comCut ) { lcrBlockStart = startPos; //System.out.println( "Here, com" ); } else if ( contributed ) { //System.out.println( "contributed to the back, com > comCut " ); lcrBlockStart = startPos; } else { //System.out.println( "OOOOOOOOOOOOOOOOOOOOOOOO" ); lcrBlockStart = endPos + 1; } } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } else { if((frontLcrs.size() == 0 ) && ( !contributed ) && ( com < comCut )) { lcrs.add( currentBlock ); } } // check whether to combine the last two blocks in the current lcrs //len = lcrs.length(); /* System.out.print( "current lcrs: " ); printLCRs( lcrs ); */ } } return lcrs; } public Vector mergePurge( Vector lcrs ) { Vector tmpLcrs = lcrs; String currentBlock = new String(), nextBlock = new String(); int len = tmpLcrs.size(), i = 0; while ( i < len ) { if (( i + 1 ) < len ){ currentBlock = (String) tmpLcrs.elementAt( i ); int endIndex = currentBlock.indexOf( "-" ); int end = Integer.parseInt( currentBlock.substring( endIndex +1 )); nextBlock = (String) tmpLcrs.elementAt( i + 1 ); int startIndex = nextBlock.indexOf( "-" ); int start = Integer.parseInt( nextBlock.substring( 0, startIndex )); if (( end == ( start - 1 )) || ( end == start )) { //System.out.println( currentBlock + " " + nextBlock ); currentBlock = currentBlock.substring( 0, endIndex ) + "-" + nextBlock.substring( startIndex + 1 ); tmpLcrs.remove( i ); tmpLcrs.remove( i ); tmpLcrs.add( i, currentBlock ); } else ++i; len = tmpLcrs.size(); } else ++i; } i = 0; len = tmpLcrs.size(); //printLCRs( tmpLcrs ); /* while ( i < len ) { currentBlock = (String) tmpLcrs.elementAt( i ); int index = currentBlock.indexOf( "-" ); int start = Integer.parseInt( currentBlock.substring( 0, index )); int end = Integer.parseInt( currentBlock.substring( index + 1 )); if (( end - start ) < 7 ) tmpLcrs.remove( i ); else ++i; len = tmpLcrs.size(); } */ return tmpLcrs; } public boolean checkCombinedSubBlock( String seq1, String seq2, double cCut ) { boolean delete = true; complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); // double com = cc.calculateReciprocalPro( seq1 + seq2 ); String seq = seq1 + seq2; double com = cc.calculateNorModifiedEntropy( seq ); System.out.println( "combined:" + seq1 + seq2 + " " + com ); if ( com > cCut ) delete = false; return delete; } public String findAlignment( String seq1, String seq2 ) { String aliPos = new String(); try { Sequence s1 = SequenceParser.parse( seq1 ); Sequence s2 = SequenceParser.parse( seq2 ); System.out.println( "alignment sequences: " + seq1 + "???" + seq2 ); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); int similarLen = alignment.getSimilarity(); // get the length of the same and similar letters; if ( similarLen > 4 ) { // only if the length of similar and same letters is greater than 4 aliPos = new Pair().format( alignment ); System.out.println( "the alignment: " + aliPos + " " + similarLen ); } } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } return aliPos; } public Vector checkLeftRegs( int aliStart, int aliEnd, int start, int end, String seq, double cCut ) { Vector left = new Vector(); complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); double com = 0; if ( aliStart > 7 ) { // the length of the left region must be longer than 7 com = cc.calculateNorModifiedEntropy( seq.substring( start - 1, start + aliStart - 2 )); System.out.println( "left1: " + seq.substring( start - 1, start + aliStart - 2 ) + " " + com + " " + start + "-" + ( start + aliStart - 2 )); if ( com <= cCut ) left.add( 0, start + "-" + ( start + aliStart - 2 )); } if ( ( end - start + 1 - aliEnd ) > 7 ) { com = cc.calculateNorModifiedEntropy( seq.substring( start + aliEnd - 1, end )); System.out.println( "left2:" + seq.substring( start + aliEnd - 1, end ) + " " + com + " " + ( start + aliEnd ) + "-" + end ); if ( com <= cCut ) left.add( ( start + aliEnd ) + "-" + end ); } return left; } public Vector addToResult( Vector result, Vector left ) { Vector tmpResult = result; String str1 = new String(), str2 = new String(); int j = 0; for ( int i = 0; i < left.size(); i++ ) { str1 = (String) left.elementAt( i ); int index = str1.indexOf( "-" ); int endLeft = Integer.parseInt( str1.substring( index + 1 )); boolean found = false; while ( !found ) { if ( j < result.size() ) { str2 = (String) result.elementAt( j ); index = str2.indexOf( "-" ); int startResult = Integer.parseInt( str2.substring( 0, index )); if ( endLeft < startResult ) { found = true; System.out.println( "Insert left into result: " + str1 + " " + str2 ); result.add( j , str1 ); j = j + 2; } else j++; } else { result.add( str1 ); System.out.println( "append to the end of result" ); found = true; } } } return result; } public Vector checkAdjBlock ( int start1, int end1, String adjBlock, String seq, double cCut, String mark ) { Vector result = new Vector(); double com = 0; String seq1 = seq.substring( start1 - 1, end1 ); //System.out.println( "current block: " +start1 + " " + end1 + " " + seq1 ); int index1 = adjBlock.indexOf( "-" ); int start2 = Integer.parseInt( adjBlock.substring( 0, index1 )); int end2 = Integer.parseInt( adjBlock.substring( index1 + 1 )); String seq2 = seq.substring( start2 - 1, end2 ); String aliPos = new String(); if ( mark.equals( "front" )) aliPos = findAlignment( seq2, seq1 ); else aliPos = findAlignment( seq1,seq2 ); if ( aliPos.length() != 0 ) { // format of aliPos: 'a1-a2 b1-b2' index1 = aliPos.indexOf( "-" ); int index2 = aliPos.indexOf( " " ); int aliStart2 = Integer.parseInt ( aliPos.substring( 0, index1 )); int aliEnd2 = Integer.parseInt ( aliPos.substring( index1 + 1, index2 )); String aliSeq1 = new String(), aliSeq2 = new String(); if ( mark.equals( "front" )) { aliSeq2 = seq.substring( start2 + aliStart2 - 2, start2 + aliEnd2 - 1 ); System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq2 ); } else { aliSeq1 = seq.substring( start1 + aliStart2 - 2, start1 + aliEnd2 - 1 ); System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq1 ); } aliPos = aliPos.substring( index2 + 1 ); index1 = aliPos.indexOf( "-" ); //System.out.println( "aliPos:" + aliPos ); int aliStart1 = Integer.parseInt( aliPos.substring( 0, index1 )); int aliEnd1 = Integer.parseInt( aliPos.substring( index1 + 1 )); if ( mark.equals( "front" )) { aliSeq1 = seq.substring( start1 + aliStart1 - 2, start1 + aliEnd1 - 1 ); System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq1 ); } else { aliSeq2 = seq.substring( start2 + aliStart1 - 2, start2 + aliEnd1 - 1 ); System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq2 ); } boolean decOrNot = true; if ( mark.equals( "front" )) { decOrNot = true; // decOrNot = checkCombinedSubBlock( aliSeq2,aliSeq1, cCut ); if ( decOrNot ) { result.add( ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 ) ); System.out.println( "added to result1: " + ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 )); Vector left = checkLeftRegs( aliStart1, aliEnd1, start1, end1, seq, cCut ); result = addToResult( result, left ); } } else { decOrNot = true; //decOrNot = checkCombinedSubBlock( aliSeq1,aliSeq2, cCut ); if ( decOrNot ) { result.add( ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1) ); System.out.println( "added to redult2: " + ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1)); Vector left = checkLeftRegs( aliStart2, aliEnd2, start1, end1, seq, cCut ); result = addToResult( result, left ); result.add( ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 ) ); System.out.println( "added to redult3: " + ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 )); left = checkLeftRegs( aliStart1, aliEnd1, start2, end2, seq, cCut ); result = addToResult( result, left ); } } } return result; } public Vector checkDeletability ( Vector lcrs, int maxIndex, String seq, double cCut ) { Vector result = new Vector(); String block = new String(); int start1 = 0, end1 = 0; block = (String)lcrs.elementAt( maxIndex ); //System.out.println( "current block :" + block ); int index = block.indexOf( "-" ); start1 = Integer.parseInt( block.substring( 0, index )); end1 = Integer.parseInt( block.substring( index + 1 )); if ( maxIndex != 0 ) { block = (String)lcrs.elementAt( maxIndex - 1 ); System.out.println( "front adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "front" ); } if ( result.size() == 0 ) { if ( maxIndex != ( lcrs.size() - 1 ) ) { block = (String)lcrs.elementAt( maxIndex + 1 ); System.out.println( "back adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "back" ); if ( result.size() != 0 ) result.add( "back" ); } } return result; } public Vector filter ( Vector lcrs, String seq ) { Vector tmpLcrs = lcrs, com = new Vector(); float len = tmpLcrs.size(); double singleCom = 0, max = -222222222; String str = new String(), block = new String(); if ( tmpLcrs.size() != 1 ) { complexityCalculator cc = new complexityCalculator(); cc. initializeAlphabet(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLcrs.elementAt( i ); int index = str.indexOf( "-" ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); //singleCom = cc.calculateReciprocalPro( str ); singleCom= cc.calculateNorModifiedEntropy( str ); com.add( Double.toString( singleCom )); } int i = 0, j = 0, maxIndex = 0; double limit = 0, cCut = 0; if ( seq.length() > 500 ) limit = len * 0.6; else limit = len * 0.3; while ( i < limit ) { int l = com.size(); j = 0; max = -222222222; while ( j < l ) { str = ( String ) com.elementAt( j ); singleCom = Double.parseDouble( str ); if ( singleCom > max ) { max = singleCom; maxIndex = j; } ++ j; } //System.out.println( "com: " + com.elementAt( maxIndex )); cCut = Double.parseDouble( (String)com.remove( maxIndex )); ++i; } System.out.println( "cCut: " + cCut ); j = 0; i = 0; len = tmpLcrs.size(); Vector result = new Vector(); while (( i < limit ) && ( j < len )) { str = (String)tmpLcrs.elementAt( j ); int index = str.indexOf( "-" ); System.out.println( "current block?" + str.substring( 0, index ) + "?" + str.substring( index + 1 ) ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); System.out.println( "cur subseq???" + str ); //singleCom = cc.calculateReciprocalPro( str ); singleCom= cc.calculateNorModifiedEntropy( str ); //System.out.println( "singlecom:"+ singleCom + " " + cCut ); if ( singleCom >= cCut ) { result = new Vector(); result = checkDeletability( tmpLcrs, j, seq, cCut ); int rSize =result.size(); boolean fromBack = false; if ( rSize != 0 ) { str = (String)result.elementAt( rSize - 1 ); if ( str.equals( "back")) { --rSize; fromBack = true; System.out.println( "back" ); } System.out.println( "remove: "+ (String)tmpLcrs.remove( j )); for ( int k = 0; k < rSize; k++ ) { // add 'result' Vector into tmpLcrs in order str = ( String ) result.elementAt( k ); System.out.print( "*"+ str + "*" ); tmpLcrs.add( j, str ); ++j; } System.out.println(); if ( j < tmpLcrs.size()) System.out.println( "the next one:" + tmpLcrs.elementAt( j )); if ( fromBack ) { System.out.println( "yes, from back " ); System.out.println( "removed: "+ tmpLcrs.remove( j )); } len = tmpLcrs.size(); //////////////////////// } else { tmpLcrs.remove( j ); System.out.println( "removed coz of high complexity" ); ++i; len = tmpLcrs.size(); } } else ++j; } } return tmpLcrs; } public Vector postProcess( int i, String seq ) { Vector lcrs = new Vector(); Vector blocks = new Vector(); Vector pos = getPositions( i ); if ( pos.size() != 0 ) { blocks = sortPositions( pos ); // blocks = filter( blocks, seq ); printLCRs( blocks ); } lcrs = pickUpDrop( blocks, seq ); System.out.println( "previous" ); printLCRs( lcrs ); lcrs = mergePurge( lcrs ); lcrs = filter( lcrs, seq ); System.out.println( "AFTER************" ); printLCRs( lcrs ); return lcrs; } public void computeLCRPercentage ( Vector lcrs, String str) { } public void printLCRs( Vector LCRs ) { int len = LCRs.size(); for ( int i = 0; i < len; i ++ ) { String str = (String)LCRs.get( i ); System.out.print( str + " " ); } System.out.println( ); } public void printLCRs ( Vector lcrs, String str) { } public void startt( int th1, int th2, int th3, int th4 ) { String str = new String(), id = new String(), nextId = new String(); int index = 0; boolean first = true; try { while ( str != null ) { str = rf.readLine(); str = generateSequence( str ); if ( str != null ) { if ( str.indexOf( ">" ) != -1 ) { index = str.indexOf( "!" ); id = nextId; nextId = str.substring( 0, index ); str = str.substring( index + 1 ); } else id = nextId; if (!first ) { System.out.println(); System.out.println( id ); } else { id = nextId; first = false; } vertices.clear(); edges.clear(); lps.clear(); for ( int i = 0; i < 20; i++ ) { fVecNor[i] = 0f; fVecUnNor[i] = 0f; } str = str.trim(); int i = workOnSequence( str, th1, th2, th3, th4 ); Vector lcrs = postProcess( i, str );//process all longest paths from every connected subgraph /****************************** float l = lcrs.size(); float len = str.length(); float per = l / len; System.out.println( "The percentage of LCR letters after the longest path: " + per ); ***************************************/ /* computeLCRPercentage( lcrs, str ); printLCRs( lcrs, str ); */ } } rf.close(); } catch ( IOException ex ) { } } public static void main ( String args[] ) { int th1 = Integer.parseInt( args[2] ); int th2 = Integer.parseInt( args[3] ); int th3 = Integer.parseInt( args[4] ); int th4 = th2; gbm g = new gbm( args[0]); g.readMatrices( args[1] ); g.getComCut( args[5] ); g.startt( th1, th2, th3, th4 ); /* try { Sequence s1 = SequenceParser.parse("VVVVVV" ); Sequence s2 = SequenceParser.parse( "LAELLAKKSDRDSPKK"); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); System.out.println( "similarity: " + alignment.getSimilarity() ); String result = new Pair().format(alignment); System.out.println( "*****************" ); System.out.println( result ); } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } */ } } /* 1. Format of the output: SACACPQTSOP......( 60 letters) XXXX TPQSKAQ..........( 60 letters) */ PK f}7applications/gbmCopy1.java.alt/* ** Author: Xuehui Li ** Date: March, 2005 ** "gbm" or "GBM" is the abbrevaition of "A graph-based method for detecting low-complexity reions". ** This is program is used to find low-complexity regions in sequences ** NOTE: all vertices and edges have topological orders ** There are totally five input paramaters. The first one is the sequence file name. The second one the learned matrix file( /cise/research/tamer/xli/LCR/graphLCR/swissprotLearnedMatrices ). The remaining three are the threshold 1, threshold 2 and threshold 3, respectively. At this tiem, all LCR Blocks generated in both /cise/research/tamer/xli/LCR/graphLCR/swissprotLCRBlocks/ and /cise/research/tamer/xli/LCR/graphLCR/pfamLCRBlocks/ are based on the three thresholds: " 3 15 5" */ package applications; import jaligner.Alignment; import jaligner.Sequence; import jaligner.SmithWatermanGotoh; import jaligner.formats.Pair; import jaligner.matrix.MatrixLoader; import jaligner.util.SequenceParser; import java.io.*; import java.util.*; class gbm { // vertices and edges are vectors used to keep all the vertices, edges in a graph generated from a sequence, respectively. vertexQueue is a vector used to keep all vertices whose indegree is zero. lps is a vector used to keep all the longest paths in all connected subgraphs of a sequence. Every longest path in lps is a vector of vertices, excluding the dummy source. // subVertices and subEdges are vectors used to keep all the vertices, edges in a connected-graph which is a subgraph of the graph generated from a sequence, respectively. private File f; private RandomAccessFile rf; private Vector vertices, subVertices, edges, subEdges, vertexQueue, lps; private float[][] repeatMatrix, nonRepeatMatrix; private float[] fVecNor, fVecUnNor; private Vector alphabet; private double comCut = 0; // the complexity cut-off value private complexityCalculator cc; public gbm ( String fileName ) { // the graph is given in a file where every line represents an edge and has the fromat of "source sink weight" initializeAlphabet(); try{ f = new File ( fileName ); rf = new RandomAccessFile ( f, "r" ); } catch ( IOException ex ) { } vertices = new Vector(); subVertices = new Vector(); edges = new Vector(); subEdges = new Vector(); vertexQueue = new Vector(); lps = new Vector(); repeatMatrix = new float[20][20]; nonRepeatMatrix = new float[20][20]; fVecNor = new float[20]; fVecUnNor = new float[20]; for ( int i = 0; i < 20; i++ ) fVecUnNor[i] = 0f; cc = new complexityCalculator(); cc. initializeAlphabet(); } public void initializeAlphabet() { alphabet = new Vector(); alphabet.add( "A" ); alphabet.add( "R" ); alphabet.add( "N" ); alphabet.add( "D" ); alphabet.add( "C" ); alphabet.add( "Q" ); alphabet.add( "E" ); alphabet.add( "G" ); alphabet.add( "H" ); alphabet.add( "I" ); alphabet.add( "L" ); alphabet.add( "K" ); alphabet.add( "M" ); alphabet.add( "F" ); alphabet.add( "P" ); alphabet.add( "S" ); alphabet.add( "T" ); alphabet.add( "W" ); alphabet.add( "Y" ); alphabet.add( "V" ); } public void readMatrices( String matricesFile ) { try { File f = new File( matricesFile ); RandomAccessFile rfm = new RandomAccessFile ( f, "r" ); String row = new String(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { nonRepeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { nonRepeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.readLine(); rfm.readLine(); rfm.readLine(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { repeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { repeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.close(); } catch ( IOException ex ) { } } public void printMatricesRowByRow() { System.out.println( "Non-Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( nonRepeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); System.out.println(); System.out.println( "Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( repeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); } public void getComCut( String fileName ) { try { File f = new File ( fileName ); RandomAccessFile rf = new RandomAccessFile( f, "r"); String line = new String(); double com, sum = 0, num = 0; line = rf.readLine(); while ( line != null ) { line = line.trim(); com = cc.calculate2LetterEntropyWScoMatrix( line ); //com = cc.calculateModifiedEntropy( line ); sum = sum + com; num = num + 1; line = rf.readLine(); } comCut = sum / num; // System.out.println( "The cut-off value is: " + sum + " / " + num + " = " + comCut ); rf.close(); } catch( IOException ex ){ } } public void createFirstVector( String window ) { int len = window.length(); String tmpWindow = window, letter = new String(); for ( int i = 0; i < len; i++ ) { letter = tmpWindow.substring( 0, 1 ); int index = alphabet.indexOf( letter ); fVecUnNor[ index ]= fVecUnNor[index] + 1f; tmpWindow = tmpWindow.substring( 1 ); } for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecUnNor[ i ]; for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecNor[ i ] / len; } // o for unNormalized, 1 for normalized public void printVector( int mark ) { float[] tmpVector = new float[ 20 ]; if ( mark == 0 ) tmpVector = fVecUnNor; else tmpVector = fVecNor; for ( int i = 0; i < 20; i++ ) System.out.print( tmpVector[i] + " " ); System.out.println(); } public void constructSingleVertex( int start, int end ) { String startLetter = Integer.toString( start + 1 ); String endLetter = Integer.toString( end + 1 ); Vertex v = new Vertex( startLetter, endLetter, 1, 1.0f ); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); v.setPredecessor( dummySource ); vertices.add( v ); } public void workOnFirstWindow( String window ) { String letter1, letter2; createFirstVector( window ); int len = window.length(), j = 0, row = 0, col = 0; for ( int i = 0; i < len - 1; i++ ) { j = 1; letter1 = window.substring( i, i + 1 ); while ( ( i + j ) < len ) { letter2 = window.substring( i + j, i + j + 1 ); boolean similar = checkSimilarity( letter1, letter2 ); if ( similar ) { row = alphabet.indexOf( letter1 ); col = alphabet.indexOf( letter2 ); if ( checkProbablity( row, col )) constructSingleVertex( i, i + j ); } ++j; } } } // at this time, row == col, since we only consider same letters public boolean checkProbablity( int row, int col ) { boolean construct = false; float difference1 = Math.abs( repeatMatrix[ row][col] - fVecNor[row] ); float difference2 = Math.abs( nonRepeatMatrix[row][col] - fVecNor[row] ); if ( difference2 > difference1 ) { construct = true; } return construct; } public void addVertices( String window, int index, int startPos ) { // get all letters in the window who have the same index in the alphabet as "index" int len = window.length(); String letter = new String(); int tmpIndex = 0; int[] positions = new int[ len - 1 ]; for ( int i = 0; i < len - 1; i++ ) { positions[ i ] = 0; } int j = 0; for ( int i = 0; i < len - 1; i++ ) { letter = window.substring( i, i + 1 ); tmpIndex = alphabet.indexOf( letter ); if ( index == tmpIndex ) { positions[ j ] = startPos + i; /// get the position # of the letter ++j; } } if ( checkProbablity( index, index )) { j = 0; boolean end = false; while (( !end ) && ( j < ( len - 1 ))) { if ( positions[ j ] != 0 ) constructSingleVertex( positions[ j ], startPos + len - 1 ); else end = true; ++j; } } } public void constructVertices( String str, int th4 ) { char c1, c2; int len = str.length(); boolean similar = false; Vertex v = new Vertex(); float[] previousVecUnNOr = new float[20]; String window = str.substring( 0, th4 ); String oldLetter = new String(), newLetter = new String(); workOnFirstWindow( window ); int startPos = 1; while( startPos <= ( len - th4 ) ) { oldLetter = window.substring( 0, 1 ); window = str.substring( startPos, startPos + th4 ); newLetter = window.substring( th4 - 1, th4 ); int index = alphabet.indexOf( oldLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] - 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; index = alphabet.indexOf( newLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] + 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; addVertices( window, index, startPos ); ++startPos; } } // return the actual weight of the vertex ??? /// to be modified later to include similar cases public boolean checkSimilarity( String c1, String c2 ) { boolean similar = false; if ( c1.equals( c2 )) similar = true; return similar; } // find the percentage of letters appearing in vertices public void findLetterPercentageVer( float lF ) { int len = vertices.size(); Vector appearedLetters = new Vector(); Vertex v = new Vertex(); for ( int i = 0; i < len ; i++ ) { v = (Vertex)vertices.elementAt( i ); String str = v.getStartLetter(); int index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); str = v.getEndLetter(); index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); } ///// Sort first len = appearedLetters.size(); float per = len / lF; // System.out.println( "The letter percentage after vertex construction is: " + len + " / " + lF + " = " + per ); Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { String str = (String)appearedLetters.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); for ( int i = 0; i < len; i ++ ) { String str = posInt[i].toString(); System.out.print( str + " " ); } System.out.println(); // computePercentage( lF, appearedLetters ); } public void computePercentage( float l, Vector al ) { int len = al.size(); float per = len / l; System.out.println( "The percentage is: " + len + " / " + l + " = " + per ); for ( int i = 0; i < len; i ++ ) { String str = (String)al.elementAt( i ); System.out.print( str + " " ); } System.out.println(); } public void constructEdges( int th1, int th2, int th3 ) { String startLetter = new String(), endLetter = new String(); int i = 0, j = 0, p = 0, q = 0, l = vertices.size(); Vertex v1 = new Vertex(), v2 = new Vertex(); Edge e = new Edge(); for ( int k = 0; k < l-1; k++ ) { v1 = (Vertex)vertices.get( k ); startLetter = v1.getStartLetter(); i = Integer.parseInt ( startLetter ); endLetter = v1.getEndLetter(); j = Integer.parseInt( endLetter ); boolean end = false; int m = k + 1; while (( !end )&& ( m < l )) { v2 = (Vertex)vertices.get( m ); startLetter = v2.getStartLetter(); p = Integer.parseInt ( startLetter ); endLetter = v2.getEndLetter(); q = Integer.parseInt( endLetter ); if ( checkConditions( i, j, p, q, th1, th2, th3 )) { v2.incIndegree(); // modify the vertex in vertices vertices.setElementAt( v2, m ); e = new Edge( v1, v2, 1.0f ); edges.add( e ); } else if ( ( p - i ) > th2 ) // k2 = 25 end = false; ++m; } } } // return the actual weight of the edge ??? public boolean checkConditions ( int i, int j, int p, int q, int th1, int th2, int th3 ) { boolean satisfied = false; int foo = ( j - i ) - ( q - p ); foo = Math.abs( foo ); if ( foo <= th1 ) // condition #1, k1 = 5 if (( p - i ) <= th2 ) // condition #2, k2 = 26 if ((( i <= p ) && ( p <= j ) ) && ( j <= q )) // condition #3 if (( i == p ) || ( p == j ) || ( j == q )) { // condition #4 if ((( j - i ) <= th3) && ( ( q - p ) <= th3 )) satisfied = true; } else satisfied = true; return satisfied; } public void modifyVertexQueue ( Vector tmpQueue, Vector tmpVertices ) { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void printVertexQueue() { System.out.println( "All vertices in the queue:" ); Vertex v = new Vertex(); Vertex previous = v; int l = vertexQueue.size(); for ( int i = 0; i < l; i++ ) { v = (Vertex) vertexQueue.get( i ); previous = v.getPredecessor(); if ( previous != null) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printVertices( int m ) { Vector tmpVertices = new Vector(); if ( m == 0 ) tmpVertices = subVertices; else tmpVertices = vertices; Vertex v = new Vertex(); Vertex previous = v; int l = tmpVertices.size(); System.out.println( "All vertices in the graph: " + l ); for ( int i = 0; i < l; i++ ) { v = (Vertex) tmpVertices.get( i ); previous = v.getPredecessor(); if ( previous != null ) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printEdges( int m ) { Vector tmpEdges = new Vector(); if ( m == 0 ) tmpEdges = subEdges; else tmpEdges = edges; Edge e = new Edge(); Vertex sourceVer = new Vertex(), sinkVer = new Vertex(); String str = new String(); int length = tmpEdges.size(); System.out.println( "All edges in the graph: " + length ); for ( int i = 0; i < length; i++ ) { e = (Edge) tmpEdges.get( i ); sourceVer = e.getSource(); sinkVer = e.getSink(); System.out.println( sourceVer.getStartLetter() + "A" + sourceVer.getEndLetter() + " lp: " + sourceVer.getLP()+ " indegree: "+ sourceVer.getIndegree() + " "+ sinkVer.getStartLetter() + "A" + sinkVer.getEndLetter() + " lp: " + sinkVer.getLP() + " indegree: " + sinkVer.getIndegree() + " weight: "+ e.getWeight() ); } } public void addDummySource() { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); addToSubVerticesVertexQueue( dummySource ); addToSubEdges( dummySource ); } public void addToSubVerticesVertexQueue( Vertex dummySource ) { subVertices.add( 0, dummySource ); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void addToSubEdges( Vertex dummySource ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subVertices.size(); for ( int i = l-1; i > 0; i-- ) { v = (Vertex)subVertices.get( i ); e = new Edge( dummySource, v, 0.0f ); subEdges.add( 0, e ); } } public Vector findLongestPath() { Vertex v = new Vertex(); while ( !( vertexQueue.isEmpty())) { v = (Vertex) vertexQueue.remove( 0 ); traverseSubEdges( v ); } // find the vertex to which the path from the source is the longest Vector lp = traverseVertices(); return lp; } public void traverseSubEdges( Vertex ver ) { boolean end = false, first = true; Edge e = new Edge(); Vertex sourceVer = new Vertex(); Vertex sinkVer = new Vertex(); String str = new String(); float w = 0, sourceLP = 0, sinkLP = 0; int len = subEdges.size(), j = 0; if (!(subEdges.isEmpty())) j = findEdges( ver ); if ( j == -1 ) end = true; else if ( j != 0 ) { } while (( !end ) && ( j < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( j ); sourceVer = e.getSource(); if ( sourceVer.equals( ver )) { sourceLP = sourceVer.getLP(); sinkVer = e.getSink(); int i = subVertices.indexOf( sinkVer ); sinkLP = sinkVer.getLP(); w = e.getWeight(); if (( sourceLP + w ) > sinkLP ) { sinkLP = sourceLP +w; sinkVer.setLP( sinkLP ); sinkVer.setPredecessor( sourceVer ); } sinkVer.decIndegree(); subVertices.setElementAt( sinkVer, i ); modifyVertexInEdges( sinkVer ); i = sinkVer.getIndegree(); if ( i == 0 ) vertexQueue.add( sinkVer ); subEdges.remove( j ); len = subEdges.size(); first = false; } else if ( first ) { System.out.println( "This is a vertex with outdegree zero" ); end = true; } else end = true; } } public int findEdges( Vertex ver ) { int i = 0 , l = subEdges.size(); boolean find = false; Vertex v = new Vertex(); Edge e = new Edge(); while (( !find ) && ( i < l )) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( ver )) find = true; else ++i; } if ( !find ) i = -1; return i; } public void modifyVertexInEdges( Vertex sinkVer ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subEdges.size(); for ( int i = 0; i < l; i++ ) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( sinkVer )) e.setSource( sinkVer ); else { v = e.getSink(); if ( v.equals( sinkVer )) e.setSink( sinkVer ); } subEdges.setElementAt( e, i ); } } public Vector traverseVertices() { Vertex v = new Vertex(), maxVer = new Vertex(); int l = subVertices.size(); float length = 0f, maxLp = -2222.0f; for ( int i = 0; i < l; i++ ) { v = (Vertex)subVertices.get( i ); length = v.getLP(); if ( length > maxLp ) { maxLp = length; maxVer = v; } } Vector lp = constructLongestPath( maxVer ); return lp; } public Vector constructLongestPath( Vertex maxVer ) { Vector lp = new Vector(); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); Vertex v = maxVer; while ( !(v.equals( dummySource ))) { lp.add ( 0, v ); v = v.getPredecessor(); } return lp; } public void printLongestPath( Vector lp ) { int i = 0; Vertex v = new Vertex(); String str = new String(); int length = lp.size(); for ( i = 0; i < length; i++ ) { v = (Vertex) lp.get( i ); System.out.print( v.getStartLetter() + "A" + v.getEndLetter() + " "); } System.out.println(); } public Vector identifyLCRs( ) { addDummySource(); Vector lp = findLongestPath(); return lp; } public boolean checkExistence( Vertex v ) { int index = subVertices.indexOf( v ); if ( index == -1 ) return false; else return true; } public void copy( Vector vt1, Vector vt2 ) { int l = vt1.size(); Edge e = new Edge(); for ( int i = 0; i < l; i++ ) { e = (Edge)vt1.get( i ); vt2.add ( e ); } } // make all edges beginning with the same vertex stay together public void clusterSubEdges() { Vector tmpSubEdges = new Vector(); Edge e = new Edge(); Vertex v = new Vertex(), ver = new Vertex(); while ( ( !subEdges.isEmpty())) { e = (Edge)subEdges.remove( 0 ); tmpSubEdges.add( e ); v = e.getSource(); int m = 0; int len = subEdges.size(); while (( m < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( m ); ver = e.getSource(); if ( v.equals( ver )) { tmpSubEdges.add( e ); subEdges.remove( m ); } else ++m; len = subEdges.size(); } } copy( tmpSubEdges, subEdges ); } // assign values to subVertices and subEdges ( BFS ) public void extractConnectedGraph() { boolean first = true; Vertex v = new Vertex(); Edge e = new Edge(); Vector tmpQueue = new Vector(); while(( first ) || (!(tmpQueue.isEmpty()))){ if ( first ) { // start the first edge of a new connected subgraph e = (Edge)edges.remove( 0 ); subEdges.add( e ); v = e.getSource(); subVertices.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong1" ); v = e.getSink(); subVertices.add( v ); tmpQueue.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong2" ); v = e.getSource(); first = false; boolean same = true; int m = 0; int len = edges.size(); while (( same ) &&( m < len )) { // remove all those edges having the same source vertex as the first edge e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong3" ); } else same = false; len = edges.size(); } } else { v = (Vertex)tmpQueue.remove( 0 ); int m = 0; boolean found = false; int len = edges.size(); //find the starting positon of those edges who start at the first vertex from tmpQueue while (( !found ) && ( m < len )) { // skip all edges starting with the vertex from the tmpQueue e = (Edge)edges.elementAt( m ); Vertex sr = e.getSource(); if ( v.equals( sr )) found = true; else { Vertex si = e.getSink(); if ( v.equals( si )) { boolean exist = checkExistence( sr ); if ( !exist ) { subVertices.add( sr ); tmpQueue.add( sr ); // work on edges whose sink vertex is the same as the vertex from tempQueue if ( !(vertices.remove( sr ))) System.out.println( "wrong4" ); } e = (Edge)edges.remove( m ); subEdges.add(e ); len = edges.size(); } else ++m; } } //System.out.println( "m = " + m ); boolean same = true; while ( same ) { // remove all those edges starting with the vertex from tmpQueue to subEdges len = edges.size(); if ( m < len ) { e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); boolean exist = checkExistence( ver ); // check whether the sinkVertex is in the subVertices or not if ( !exist ) { subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong5" ); } } else { same = false; } } else same = false; } } } clusterSubEdges(); } public int workOnSequence( String str, int th1, int th2, int th3, int th4 ) { constructVertices( str, th4 ); constructEdges( th1, th2, th3 ); boolean find = true; int i = 0; while ( !(edges.isEmpty())) { subVertices.clear(); subEdges.clear(); extractConnectedGraph(); Vector lp = identifyLCRs(); // the longest path in a connected subgraph lps.add( lp ); ++i; } return i; } // combine all letters from a sequence on different lines( stings) into a single line ( string ) public String generateSequence( String str ) { String sequence = new String(); String strTmp = str; boolean lastSeq = false; try { if ( strTmp == null ) sequence = null; if ( ( strTmp != null ) && ( strTmp.startsWith( ">"))) { //System.out.println( "*******************************************" ); System.out.println( str ); strTmp = rf.readLine(); } while (( strTmp != null ) && (!(strTmp.startsWith( ">" )))) { strTmp = strTmp.trim(); sequence = sequence + strTmp; strTmp = rf.readLine(); } if ( strTmp != null) if ( strTmp.startsWith( ">" )) { sequence = strTmp + "!" + sequence ; } } catch ( IOException ex ) { } return sequence; } public void printPositions( Vector pos ) { int l = pos.size(); String str = new String(); for ( int i = 0; i < l; i++ ) { str = (String) pos.get( i ); System.out.print( str + " " ); } System.out.println(); } public void printLCRBlocks( Vector LCRBlocks ) { String str = new String(); int l = LCRBlocks.size(); // System.out.println( "LCR Blocks: " ); for ( int i = 0; i < l; i++ ) { str = (String)LCRBlocks.get( i ); int index = str.indexOf( "-" ); String start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) System.out.print( str + " " ); } System.out.println(); } public Vector getPositions ( int k ) { Vector pos = new Vector(); //Vector posSingleVertexOnly = new Vector(); String str = new String(); int l = lps.size(), index = 0; Vector lp = new Vector(); Vertex v = new Vertex(); // longest path for ( int i = 0; i < k; i++ ) { // get positions from those vertices in lps lp = (Vector)lps.get( i ); int len = lp.size(); for ( int j = 0; j < len; j++ ) { v = (Vertex)lp.get( j ); str = v.getStartLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); str = v.getEndLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); } } return pos; } public Vector sortPositions( Vector pos) { Vector positions = new Vector(); // used to keep the sorted positions String str = new String(); int len = pos.size(), current = 0, previous = 0; Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { str = (String)pos.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); Vector tmpLCRBlocks = new Vector(); String start = new String(); previous = posInt[0].intValue() - 1; start = Integer.toString(( previous + 1 )); for ( int i = 0; i < len; i++ ) { str = posInt[i].toString(); current = posInt[i].intValue(); //generate blocks of continuous positions. Say, the sorted integer array is 3,4,5,6 8,9,10,11,12,13,29,30,31. It can be represented as a vector of three strings( blocks ): 3-6, 8-13, 29-31. if ( current != ( previous + 1 )) { tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); start = str; } previous = current; positions.add( str ); } tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); len = tmpLCRBlocks.size(); Vector LCRBlocks = new Vector(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLCRBlocks.get( i ); int index = str.indexOf( "-" ); start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) LCRBlocks.add( str ); } //printLCRs( LCRBlocks ); return LCRBlocks; } public Vector extend ( int startPos, int endPos, int limit, String direction, String seq ) { Vector decRegs = new Vector(); int pointer = 0, startDecPos = 0, endDecPos = 0; double com1 = 0, com2 = 0; String extReg = seq.substring( startPos - 1, endPos ); if ( direction.equals( "left" )) { // extend to the left( front ) boolean dec = false; pointer = startPos - 2; while ( ( pointer > limit ) && ( pointer > ( startPos - 17 )) ) { //System.out.println( "111111111111extReg:" + extReg ); com1 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); extReg = seq.substring( pointer, endPos ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 11111 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; // System.out.println( "from false to true111111111" ); endDecPos = pointer + 2; } } else if ( dec ) { dec = false; //System.out.println( "from true to false1111111111" ); startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 11111111 " + startDecPos + "-" + endDecPos ); } } -- pointer; } if ( ( dec ) && ( pointer == ( startPos - 17 )) ) { //System.out.println( "keeping decreasing1111111111" ); while ( ( pointer > limit ) && ( dec ) ) { //System.out.println( "22222222222222extReg:" + extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); com1 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( pointer, endPos ); //System.out.println("keep: " + extReg ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); // com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 22222222222 " + com2); if ( com1 < com2 ) { //System.out.println( "from true to false2222222222" ); dec = false; startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 222222222222 " + startDecPos + "-" + endDecPos ); } } -- pointer; } } // the left extension touches the end of the last block of the current lcr blocks if (( pointer == limit ) && ( dec ) ) { startDecPos = pointer + 2; if ( com2 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 333333333333 " + startDecPos + "-" + endDecPos ); } } if ( decRegs.size() == 0 ) { //System.out.println( "left: Empty" ); } else { //System.out.print( "left: "); //printLCRs( decRegs ); } } else { boolean dec = false; pointer = endPos + 1;//////// // extend to the right( back ) while ( ( pointer < limit ) && ( pointer < ( endPos + 15 ))) { //System.out.println( "333333333333333333333extReg:" + extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); com1 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 33333333 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; //System.out.println( "from false to true33333333333333333" ); startDecPos = pointer -1 ; } } else if ( dec ) { dec = false; //System.out.println( "from true to false3333333333333" ); endDecPos = pointer -1; if ( com1 < comCut ) { decRegs.add( ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 4444444 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if ( ( dec ) && ( pointer == ( endPos + 15 )) ) { // keep extending until the complexity starts increasing, which means that several blocks generated from the longest path can be included into lcrs during one call of the 'extend()' based on a block while (( dec ) && ( pointer < limit )) { //System.out.println( "444444444444extReg:" + extReg ); // com1 = cc.calculateModifiedEntropy( extReg ); com1= cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com2 = cc.calculateModifiedEntropy( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 4444444 " + com2); if ( com1 < com2 ) { dec = false; //System.out.println( "from true to false444444444" ); endDecPos = pointer - 1; if ( com1 < comCut ) { decRegs.add(( startDecPos ) + "-" + endDecPos ); //System.out.println( "decRegs added 5555555555555555555 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if (( pointer == limit ) && ( dec )) { endDecPos = limit - 1; //System.out.println( "decRegs added 66666666666 " + startDecPos + "-" + endDecPos ); decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); } } if ( decRegs.size() == 0 ) { //System.out.println( "right: Empty" ); } else { //System.out.print( "right: "); //printLCRs( decRegs ); } } return decRegs; } public boolean shareLetter( String str1, String str2 ) { boolean shared = false; String str = str1, letter = new String(); while (( str.length() != 0 ) && ( !shared )) { letter = str.substring( 0, 1 ); int index = str2.indexOf( letter ); if ( index != -1 ) shared = true; else if ( str.length() != 0 ) str = str.substring( 1 ); } return shared; } public boolean checkContribution( String currentBlock, Vector decRegs, String seq ) { boolean contributed = false; Vector regs = decRegs; String block = new String(); int i = 0, len = regs.size(); while ( ( i < len ) && ( !contributed )) { block = (String) regs.elementAt( i ); int index = block.indexOf( "-" ); int start= Integer.parseInt( block.substring( 0, index )); int end = Integer.parseInt( block.substring( index + 1 )); block = seq.substring( start - 1, end ); //System.out.println( "block: " + block+ " currentBlock: "+ currentBlock ); contributed = shareLetter( currentBlock, block ); ++i; } return contributed; } public Vector appendLcrs( Vector lcrs, Vector appendedLcrs ) { Vector lowComRegs = lcrs, tmpLcrs = appendedLcrs; while( !( tmpLcrs.isEmpty())) lowComRegs.add( (String)tmpLcrs.remove( 0 ) ); return lowComRegs; } public Vector pickUpDrop ( Vector blocks, String seq ) { Vector frontLcrs = new Vector(), backLcrs = new Vector(), lcrs = new Vector(), tmpBLOCKS = blocks; String currentBlock = new String(), tmpBlock = new String(); boolean isFirstBlock = true; int limit = 0, index = 0, startPos = 0, endPos = 0; while (( !tmpBLOCKS.isEmpty() )) { frontLcrs.clear(); backLcrs.clear(); int lcrBlockStart = 0, lcrBlockEnd= 0; boolean extendToLeft = true; // whether to extend towards the left boolean find = false; // find the current extending block // currentBlock can start in the middle of a block, or has the same starting position as a block and it doesn't have to be the block after the previous currentBlock // get the end position of the last block in lcrs if ( !( lcrs.isEmpty())) { tmpBlock = (String)lcrs.lastElement(); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 )); //System.out.println( "lcrBlockEnd: " + lcrBlockEnd ); while (( !find ) && ( !(tmpBLOCKS.isEmpty()) )) { // get the current block currentBlock = (String)tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); //System.out.println( "find currentBlock: "+ startPos + " " + endPos); if ( startPos < lcrBlockEnd ) { if ( endPos > lcrBlockEnd ) if (( endPos - lcrBlockEnd ) >= 3 ) { startPos = lcrBlockEnd + 1; extendToLeft = false; find = true; } } else find = true; } } else { currentBlock = (String) tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); find = true; } if ( find ) { //System.out.println( "currentBlock:" + currentBlock ); if ( isFirstBlock ) { limit = -1; isFirstBlock = false; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } else if ( extendToLeft ) { limit = lcrBlockEnd - 1; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } limit = seq.length() + 1; // extend to the right( back ) backLcrs = extend( startPos, endPos, limit, "right", seq ); double com = 0; index = currentBlock.indexOf( "-" ); int cbStart = Integer.parseInt( currentBlock.substring( 0, index ))- 1; int cbEnd = Integer.parseInt( currentBlock.substring( index + 1 )) ; //System.out.println("current block String:" + seq.substring( cbStart,cbEnd)); //com = cc.calculateModifiedEntropy( seq.substring( cbStart, cbEnd ) ); com = cc.calculate2LetterEntropyWScoMatrix( seq.substring( cbStart, cbEnd )); //com = cc.calculateModifiedEntropy( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateReciprocalPro( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateRecProWScoringMatrix( seq.substring( cbStart, cbEnd ) ); boolean contributed = false; if ( frontLcrs.size() != 0 ) { // get the start position of the first block in frontLcrs as the start position of the block to be added into lcrs tmpBlock = (String)frontLcrs.elementAt( 0 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); if ( com > comCut ) { // check whether the current block contributes to the complexity-decreasing regions or not contributed = checkContribution( seq.substring( cbStart, cbEnd ), frontLcrs, seq ); if ( !contributed ) lcrBlockEnd = startPos - 1; else { lcrBlockEnd = endPos; } } else { lcrBlockEnd = endPos; //System.out.println( "com of currentBlock: " + com ); } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } boolean combine = false; // whether to combine the last block in lcrs from frontLcrs and the block to be added into lcrs from backLcrs if (( !contributed ) && ( com > comCut )){ contributed = checkContribution( seq.substring( cbStart, cbEnd ), backLcrs, seq ); if ( !contributed ) { lcrBlockStart = endPos + 1; } else { if ( frontLcrs.size() != 0 ) combine = true; } } else if ( frontLcrs.size() != 0 ) combine = true; // get the end position of the last block in backLcrs as the end position of the block to be added into lcrs if ( !( backLcrs.isEmpty())) { tmpBlock = (String)backLcrs.lastElement( ); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 ) ); if ( combine ) { //System.out.println( "combine" ); limit = lcrs.size(); tmpBlock = (String) lcrs.remove( limit - 1 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); } else { if ( com < comCut ) { lcrBlockStart = startPos; //System.out.println( "Here, com" ); } else if ( contributed ) { //System.out.println( "contributed to the back, com > comCut " ); lcrBlockStart = startPos; } else { //System.out.println( "OOOOOOOOOOOOOOOOOOOOOOOO" ); lcrBlockStart = endPos + 1; } } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } else { if((frontLcrs.size() == 0 ) && ( !contributed ) && ( com < comCut )) { lcrs.add( currentBlock ); } } // check whether to combine the last two blocks in the current lcrs //len = lcrs.length(); /* System.out.print( "current lcrs: " ); printLCRs( lcrs ); */ } } return lcrs; } public Vector mergePurge( Vector lcrs ) { Vector tmpLcrs = lcrs; String currentBlock = new String(), nextBlock = new String(); int len = tmpLcrs.size(), i = 0; while ( i < len ) { if (( i + 1 ) < len ){ currentBlock = (String) tmpLcrs.elementAt( i ); int endIndex = currentBlock.indexOf( "-" ); int end = Integer.parseInt( currentBlock.substring( endIndex +1 )); nextBlock = (String) tmpLcrs.elementAt( i + 1 ); int startIndex = nextBlock.indexOf( "-" ); int start = Integer.parseInt( nextBlock.substring( 0, startIndex )); if (( end == ( start - 1 )) || ( end == start )) { //System.out.println( currentBlock + " " + nextBlock ); currentBlock = currentBlock.substring( 0, endIndex ) + "-" + nextBlock.substring( startIndex + 1 ); tmpLcrs.remove( i ); tmpLcrs.remove( i ); tmpLcrs.add( i, currentBlock ); } else ++i; len = tmpLcrs.size(); } else ++i; } i = 0; len = tmpLcrs.size(); //printLCRs( tmpLcrs ); /* while ( i < len ) { currentBlock = (String) tmpLcrs.elementAt( i ); int index = currentBlock.indexOf( "-" ); int start = Integer.parseInt( currentBlock.substring( 0, index )); int end = Integer.parseInt( currentBlock.substring( index + 1 )); if (( end - start ) < 7 ) tmpLcrs.remove( i ); else ++i; len = tmpLcrs.size(); } */ return tmpLcrs; } public boolean checkCombinedSubBlock( String seq1, String seq2, double cCut ) { boolean delete = true; // double com = cc.calculateReciprocalPro( seq1 + seq2 ); double com = cc.calculateNor2LetterEntropyWScoMatrix( seq1 + seq2 ); //double com = cc.calculateNorModifiedEntropy( seq1 + seq2 ); // System.out.println( "combined:" + seq1 + seq2 + " " + com ); if ( com > cCut ) delete = false; return delete; } public String findAlignment( String seq1, String seq2 ) { String aliPos = new String(); try { Sequence s1 = SequenceParser.parse( seq1 ); Sequence s2 = SequenceParser.parse( seq2 ); //System.out.println( "alignment sequences: " + seq1 + "???" + seq2 ); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); int similarLen = alignment.getSimilarity(); // get the length of the same and similar letters; if ( similarLen > 4 ) { // only if the length of similar and same letters is greater than 4 aliPos = new Pair().format( alignment ); //System.out.println( "the alignment: " + aliPos + " " + similarLen ); } } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } return aliPos; } public Vector checkLeftRegs( int aliStart, int aliEnd, int start, int end, String seq, double cCut ) { Vector left = new Vector(); double com = 0; if ( aliStart > 7 ) { // the length of the left region must be longer than 7 com = cc.calculateNor2LetterEntropyWScoMatrix( seq.substring( start - 1, start + aliStart - 2 )); //com = cc.calculateNorModifiedEntropy( seq.substring( start - 1, start + aliStart - 2 )); //System.out.println( "left1: " + seq.substring( start - 1, start + aliStart - 2 ) + " " + com + " " + start + "-" + ( start + aliStart - 2 )); if ( com <= cCut ) left.add( 0, start + "-" + ( start + aliStart - 2 )); } if ( ( end - start + 1 - aliEnd ) > 7 ) { com = cc.calculateNor2LetterEntropyWScoMatrix( seq.substring( start + aliEnd - 1, end )); //com = cc.calculateNorModifiedEntropy( seq.substring( start + aliEnd - 1, end )); //System.out.println( "left2:" + seq.substring( start + aliEnd - 1, end ) + " " + com + " " + ( start + aliEnd ) + "-" + end ); if ( com <= cCut ) left.add( ( start + aliEnd ) + "-" + end ); } return left; } public Vector addToResult( Vector result, Vector left ) { Vector tmpResult = result; String str1 = new String(), str2 = new String(); int j = 0; for ( int i = 0; i < left.size(); i++ ) { str1 = (String) left.elementAt( i ); int index = str1.indexOf( "-" ); int endLeft = Integer.parseInt( str1.substring( index + 1 )); boolean found = false; while ( !found ) { if ( j < result.size() ) { str2 = (String) result.elementAt( j ); index = str2.indexOf( "-" ); int startResult = Integer.parseInt( str2.substring( 0, index )); if ( endLeft < startResult ) { found = true; // System.out.println( "Insert left into result: " + str1 + " " + str2 ); result.add( j , str1 ); j = j + 2; } else j++; } else { result.add( str1 ); //System.out.println( "append to the end of result" ); found = true; } } } return result; } public Vector checkAdjBlock ( int start1, int end1, String adjBlock, String seq, double cCut, String mark ) { Vector result = new Vector(); double com = 0; String seq1 = seq.substring( start1 - 1, end1 ); //System.out.println( "current block: " +start1 + " " + end1 + " " + seq1 ); int index1 = adjBlock.indexOf( "-" ); int start2 = Integer.parseInt( adjBlock.substring( 0, index1 )); int end2 = Integer.parseInt( adjBlock.substring( index1 + 1 )); String seq2 = seq.substring( start2 - 1, end2 ); String aliPos = new String(); if ( mark.equals( "front" )) aliPos = findAlignment( seq2, seq1 ); else aliPos = findAlignment( seq1,seq2 ); if ( aliPos.length() != 0 ) { // format of aliPos: 'a1-a2 b1-b2' index1 = aliPos.indexOf( "-" ); int index2 = aliPos.indexOf( " " ); int aliStart2 = Integer.parseInt ( aliPos.substring( 0, index1 )); int aliEnd2 = Integer.parseInt ( aliPos.substring( index1 + 1, index2 )); String aliSeq1 = new String(), aliSeq2 = new String(); if ( mark.equals( "front" )) { aliSeq2 = seq.substring( start2 + aliStart2 - 2, start2 + aliEnd2 - 1 ); //System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq2 ); } else { aliSeq1 = seq.substring( start1 + aliStart2 - 2, start1 + aliEnd2 - 1 ); //System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq1 ); } aliPos = aliPos.substring( index2 + 1 ); index1 = aliPos.indexOf( "-" ); int aliStart1 = Integer.parseInt( aliPos.substring( 0, index1 )); int aliEnd1 = Integer.parseInt( aliPos.substring( index1 + 1 )); if ( mark.equals( "front" )) { aliSeq1 = seq.substring( start1 + aliStart1 - 2, start1 + aliEnd1 - 1 ); //System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq1 ); } else { aliSeq2 = seq.substring( start2 + aliStart1 - 2, start2 + aliEnd1 - 1 ); //System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq2 ); } boolean decOrNot = true; if ( mark.equals( "front" )) { decOrNot = true; // decOrNot = checkCombinedSubBlock( aliSeq2,aliSeq1, cCut ); if ( decOrNot ) { result.add( ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 ) ); //System.out.println( "added to result1: " + ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 )); Vector left = checkLeftRegs( aliStart1, aliEnd1, start1, end1, seq, cCut ); result = addToResult( result, left ); } } else { //decOrNot = checkCombinedSubBlock( aliSeq1,aliSeq2, cCut ); decOrNot = true; if ( decOrNot ) { result.add( ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1) ); //System.out.println( "added to redult2: " + ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1)); Vector left = checkLeftRegs( aliStart2, aliEnd2, start1, end1, seq, cCut ); result = addToResult( result, left ); result.add( ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 ) ); //System.out.println( "added to redult3: " + ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 )); left = checkLeftRegs( aliStart1, aliEnd1, start2, end2, seq, cCut ); result = addToResult( result, left ); } } } return result; } public Vector checkDeletability ( Vector lcrs, int maxIndex, String seq, double cCut ) { Vector result = new Vector(); String block = new String(); int start1 = 0, end1 = 0; block = (String)lcrs.elementAt( maxIndex ); int index = block.indexOf( "-" ); start1 = Integer.parseInt( block.substring( 0, index )); end1 = Integer.parseInt( block.substring( index + 1 )); if ( maxIndex != 0 ) { block = (String)lcrs.elementAt( maxIndex - 1 ); // System.out.println( "front adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "front" ); } if ( result.size() == 0 ) { if ( maxIndex != ( lcrs.size() - 1 ) ) { block = (String)lcrs.elementAt( maxIndex + 1 ); //System.out.println( "back adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "back" ); if ( result.size() != 0 ) result.add( "back" ); } } return result; } public Vector filter ( Vector lcrs, String seq ) { Vector tmpLcrs = lcrs, com = new Vector(); float len = tmpLcrs.size(); double singleCom = 0, max = -222222222; String str = new String(), block = new String(); if ( tmpLcrs.size() != 1 ) { for ( int i = 0; i < len; i++ ) { str = (String)tmpLcrs.elementAt( i ); int index = str.indexOf( "-" ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); singleCom = cc.calculateNor2LetterEntropyWScoMatrix( str ); //singleCom = cc.calculateReciprocalPro( str ); //singleCom= cc.calculateNorModifiedEntropy( str ); com.add( Double.toString( singleCom )); } int i = 0, j = 0, maxIndex = 0; double limit = 0, cCut = 0; if ( seq.length() > 500 ) limit = len * 0.6; // 0.6 else limit = len * 0.3; //0.3 while ( i < limit ) { int l = com.size(); j = 0; max = -222222222; while ( j < l ) { str = ( String ) com.elementAt( j ); singleCom = Double.parseDouble( str ); if ( singleCom > max ) { max = singleCom; maxIndex = j; } ++ j; } //System.out.println( "com: " + com.elementAt( maxIndex )); cCut = Double.parseDouble( (String)com.remove( maxIndex )); ++i; } //System.out.println( "cCut: " + cCut ); j = 0; i = 0; len = tmpLcrs.size(); Vector result = new Vector(); while (( i < limit ) && ( j < len )) { str = (String)tmpLcrs.elementAt( j ); int index = str.indexOf( "-" ); //System.out.println( "current block?" + str.substring( 0, index ) + "?" + str.substring( index + 1 ) ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); //System.out.println( "cur subseq???" + str ); //singleCom = cc.calculateReciprocalPro( str ); //singleCom= cc.calculateNorModifiedEntropy( str ); singleCom = cc.calculateNor2LetterEntropyWScoMatrix( str ); //System.out.println( "singlecom:"+ singleCom + " " + cCut ); if ( singleCom >= cCut ) { result = new Vector(); result = checkDeletability( tmpLcrs, j, seq, cCut ); int rSize =result.size(); boolean fromBack = false; if ( rSize != 0 ) { str = (String)result.elementAt( rSize - 1 ); if ( str.equals( "back")) { --rSize; fromBack = true; } // System.out.println( "remove: "+ (String)tmpLcrs.remove( j )); tmpLcrs.remove( j ); for ( int k = 0; k < rSize; k++ ) { // add 'result' Vector into tmpLcrs in order str = ( String ) result.elementAt( k ); //System.out.print( "*"+ str + "*" ); tmpLcrs.add( j, str ); ++j; } //System.out.println(); // if ( j < tmpLcrs.size()) //System.out.println( "the next one:" + tmpLcrs.elementAt( j )); if ( fromBack ) { //System.out.println( "yes, from back " ); //System.out.println( "removed: "+ tmpLcrs.remove( j )); tmpLcrs.remove( j ); } len = tmpLcrs.size(); } else { tmpLcrs.remove( j ); // System.out.println( "removed coz of high complexity" ); ++i; len = tmpLcrs.size(); } } else ++j; } //System.out.println(i + " " + j ); } return tmpLcrs; } public Vector postProcess( int i, String seq ) { Vector lcrs = new Vector(); Vector blocks = new Vector(); Vector pos = getPositions( i ); if ( pos.size() != 0 ) { blocks = sortPositions( pos ); // blocks = filter( blocks, seq ); //printLCRs( blocks ); } lcrs = pickUpDrop( blocks, seq ); //printLCRs( lcrs ); lcrs = mergePurge( lcrs ); //printLCRs( lcrs ); lcrs = filter( lcrs, seq ); //System.out.println( "AFTER************" ); printLCRs( lcrs ); return lcrs; } public void computeLCRPercentage ( Vector lcrs, String str) { } public void printLCRs( Vector LCRs ) { int len = LCRs.size(); for ( int i = 0; i < len; i ++ ) { String str = (String)LCRs.get( i ); System.out.print( str + " " ); } System.out.println( ); } public void printLCRs ( Vector lcrs, String str) { } public void startt( int th1, int th2, int th3, int th4 ) { String str = new String(), id = new String(), nextId = new String(); int index = 0; boolean first = true; try { while ( str != null ) { str = rf.readLine(); str = generateSequence( str ); if ( str != null ) { if ( str.indexOf( ">" ) != -1 ) { index = str.indexOf( "!" ); id = nextId; nextId = str.substring( 0, index ); str = str.substring( index + 1 ); } else id = nextId; if (!first ) { System.out.println(); System.out.println( id ); } else { id = nextId; first = false; } vertices.clear(); edges.clear(); lps.clear(); for ( int i = 0; i < 20; i++ ) { fVecNor[i] = 0f; fVecUnNor[i] = 0f; } str = str.trim(); int i = workOnSequence( str, th1, th2, th3, th4 ); Vector lcrs = postProcess( i, str );//process all longest paths from every connected subgraph /****************************** float l = lcrs.size(); float len = str.length(); float per = l / len; System.out.println( "The percentage of LCR letters after the longest path: " + per ); ***************************************/ /* computeLCRPercentage( lcrs, str ); printLCRs( lcrs, str ); */ } } rf.close(); } catch ( IOException ex ) { } } public static void main ( String args[] ) { int th1 = Integer.parseInt( args[2] ); int th2 = Integer.parseInt( args[3] ); int th3 = Integer.parseInt( args[4] ); int th4 = th2; gbm g = new gbm( args[0]); g.readMatrices( args[1] ); g.getComCut( args[5] ); g.startt( th1, th2, th3, th4 ); /* try { Sequence s1 = SequenceParser.parse("VVVVVV" ); Sequence s2 = SequenceParser.parse( "LAELLAKKSDRDSPKK"); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); System.out.println( "similarity: " + alignment.getSimilarity() ); String result = new Pair().format(alignment); System.out.println( "*****************" ); System.out.println( result ); } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } */ } } /* 1. Format of the output: SACACPQTSOP......( 60 letters) XXXX TPQSKAQ..........( 60 letters) */ PK f}7O2eeapplications/gbmCopy2.java.alt/* ** Author: Xuehui Li ** Date: March, 2005 ** "gbm" or "GBM" is the abbrevaition of "A graph-based method for detecting low-complexity reions". ** This is program is used to find low-complexity regions in sequences ** NOTE: all vertices and edges have topological orders ** There are totally five input paramaters. The first one is the sequence file name. The second one the learned matrix file( /cise/research/tamer/xli/LCR/graphLCR/swissprotLearnedMatrices ). The remaining three are the threshold 1, threshold 2 and threshold 3, respectively. At this tiem, all LCR Blocks generated in both /cise/research/tamer/xli/LCR/graphLCR/swissprotLCRBlocks/ and /cise/research/tamer/xli/LCR/graphLCR/pfamLCRBlocks/ are based on the three thresholds: " 3 15 5" */ package applications; import jaligner.Alignment; import jaligner.Sequence; import jaligner.SmithWatermanGotoh; import jaligner.formats.Pair; import jaligner.matrix.MatrixLoader; import jaligner.util.SequenceParser; import java.io.*; import java.util.*; class gbm { // vertices and edges are vectors used to keep all the vertices, edges in a graph generated from a sequence, respectively. vertexQueue is a vector used to keep all vertices whose indegree is zero. lps is a vector used to keep all the longest paths in all connected subgraphs of a sequence. Every longest path in lps is a vector of vertices, excluding the dummy source. // subVertices and subEdges are vectors used to keep all the vertices, edges in a connected-graph which is a subgraph of the graph generated from a sequence, respectively. private File f; private RandomAccessFile rf; private Vector vertices, subVertices, edges, subEdges, vertexQueue, lps; private float[][] repeatMatrix, nonRepeatMatrix; private float[] fVecNor, fVecUnNor; private double[][] scoringMatrix; private Vector alphabet; private double comCut = 0; // the complexity cut-off value //private complexityCalculator cc; public gbm ( String fileName ) { // the graph is given in a file where every line represents an edge and has the fromat of "source sink weight" initializeAlphabet(); try{ f = new File ( fileName ); rf = new RandomAccessFile ( f, "r" ); } catch ( IOException ex ) { } vertices = new Vector(); subVertices = new Vector(); edges = new Vector(); subEdges = new Vector(); vertexQueue = new Vector(); lps = new Vector(); repeatMatrix = new float[20][20]; nonRepeatMatrix = new float[20][20]; fVecNor = new float[20]; fVecUnNor = new float[20]; scoringMatrix = new double[20][20]; for ( int i = 0; i < 20; i++ ) fVecUnNor[i] = 0f; //cc = new complexityCalculator(); //cc. initializeAlphabet(); } public void initializeAlphabet() { alphabet = new Vector(); alphabet.add( "A" ); alphabet.add( "R" ); alphabet.add( "N" ); alphabet.add( "D" ); alphabet.add( "C" ); alphabet.add( "Q" ); alphabet.add( "E" ); alphabet.add( "G" ); alphabet.add( "H" ); alphabet.add( "I" ); alphabet.add( "L" ); alphabet.add( "K" ); alphabet.add( "M" ); alphabet.add( "F" ); alphabet.add( "P" ); alphabet.add( "S" ); alphabet.add( "T" ); alphabet.add( "W" ); alphabet.add( "Y" ); alphabet.add( "V" ); } // get repeat/non-repeat matrices public void readRNRMatrices( String matricesFile ) { try { File f = new File( matricesFile ); RandomAccessFile rfm = new RandomAccessFile ( f, "r" ); String row = new String(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { nonRepeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { nonRepeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.readLine(); rfm.readLine(); rfm.readLine(); for ( int i = 0; i < 20; i++ ) { row = rfm.readLine(); row = rfm.readLine(); row = row.trim(); for ( int j = 0; j < 20; j++ ) { int index = row.indexOf( " " ); if ( index != -1 ) { repeatMatrix[i][j] = Float.parseFloat( row.substring( 0, index )); } else { repeatMatrix[i][j] = Float.parseFloat( row ); } row = row.substring( index + 3 ); } } rfm.close(); } catch ( IOException ex ) { } } public void readScoringMatrix( String fileName ){ String line = new String(), score = new String(); try { File f = new File( fileName ); RandomAccessFile rf = new RandomAccessFile( f, "r"); for ( int i = 0; i < 20; i++ ) { line = rf.readLine(); int k = 0; for ( int j = 0; j < 20; j++) { score = line.substring( k, k + 2 ).trim(); scoringMatrix[i][j] = Integer.parseInt( score ); k = k + 3; } } rf.close(); } catch ( IOException ex ) { } } public void printMatricesRowByRow() { System.out.println( "Non-Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( nonRepeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); System.out.println(); System.out.println( "Repeat matrix: " ); for ( int i = 0; i < 20; i++ ) { for ( int j = 0; j < 20; j++ ) System.out.print( repeatMatrix[i][j] + " " ); System.out.println(); System.out.println(); } System.out.println(); } public void getComCut( String fileName ) { complexityCalculator cc = new complexityCalculator(); cc.initializeAlphabet(); try { File f = new File ( fileName ); RandomAccessFile rf = new RandomAccessFile( f, "r"); String line = new String(); double com, sum = 0, num = 0; line = rf.readLine(); while ( line != null ) { line = line.trim(); com = cc.calculate2LetterEntropyWScoMatrix( line ); //com = cc.calculateModifiedEntropy( line ); sum = sum + com; num = num + 1; line = rf.readLine(); } comCut = sum / num; // System.out.println( "The cut-off value is: " + sum + " / " + num + " = " + comCut ); rf.close(); } catch( IOException ex ){ } } public void createFirstVector( String window ) { int len = window.length(); String tmpWindow = window, letter = new String(); for ( int i = 0; i < len; i++ ) { letter = tmpWindow.substring( 0, 1 ); int index = alphabet.indexOf( letter ); fVecUnNor[ index ]= fVecUnNor[index] + 1f; tmpWindow = tmpWindow.substring( 1 ); } for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecUnNor[ i ]; for ( int i = 0; i < 20; i++ ) fVecNor[ i ] = fVecNor[ i ] / len; } // o for unNormalized, 1 for normalized public void printVector( int mark ) { float[] tmpVector = new float[ 20 ]; if ( mark == 0 ) tmpVector = fVecUnNor; else tmpVector = fVecNor; for ( int i = 0; i < 20; i++ ) System.out.print( tmpVector[i] + " " ); System.out.println(); } public void constructSingleVertex( int start, int end ) { String startLetter = Integer.toString( start + 1 ); String endLetter = Integer.toString( end + 1 ); Vertex v = new Vertex( startLetter, endLetter, 1, 1.0f ); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); v.setPredecessor( dummySource ); vertices.add( v ); } public void workOnFirstWindow( String window ) { String letter1, letter2; createFirstVector( window ); int len = window.length(), j = 0, row = 0, col = 0; for ( int i = 0; i < len - 1; i++ ) { j = 1; letter1 = window.substring( i, i + 1 ); while ( ( i + j ) < len ) { letter2 = window.substring( i + j, i + j + 1 ); row = alphabet.indexOf( letter1 ); col = alphabet.indexOf( letter2 ); if ( scoringMatrix[row][col] > 1 ) if ( checkProbablity( row, col )) constructSingleVertex( i, i + j ); ++j; } } } // at this time, row == col, since we only consider same letters public boolean checkProbablity( int row, int col ) { boolean construct = false; float difference1 = Math.abs( repeatMatrix[ row][col] - fVecNor[row] ); float difference2 = Math.abs( nonRepeatMatrix[row][col] - fVecNor[row] ); if ( difference2 > difference1 ) { construct = true; } return construct; } public void addVertices( String window, int index, int startPos ) { int len = window.length(); String letter = new String(); int tmpIndex = 0; for ( int i = 0; i < len - 1; i++ ) { letter = window.substring( i, i + 1 ); tmpIndex = alphabet.indexOf( letter ); if ( scoringMatrix[index][tmpIndex] > 1 ) if ( checkProbablity( index, tmpIndex )) constructSingleVertex( startPos + i, startPos + len - 1 ); } } public void constructVertices( String str, int th4 ) { char c1, c2; int len = str.length(); boolean similar = false; Vertex v = new Vertex(); float[] previousVecUnNOr = new float[20]; String window = str.substring( 0, th4 ); String oldLetter = new String(), newLetter = new String(); workOnFirstWindow( window ); int startPos = 1; while( startPos <= ( len - th4 ) ) { oldLetter = window.substring( 0, 1 ); window = str.substring( startPos, startPos + th4 ); newLetter = window.substring( th4 - 1, th4 ); int index = alphabet.indexOf( oldLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] - 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; index = alphabet.indexOf( newLetter ); fVecUnNor[ index ] = fVecUnNor[ index ] + 1; fVecNor[ index ] = fVecUnNor[ index ] / th4; addVertices( window, index, startPos ); ++startPos; } } // return the actual weight of the vertex ??? /// to be modified later to include similar cases public boolean checkSimilarity( String c1, String c2 ) { boolean similar = false; if ( c1.equals( c2 )) similar = true; return similar; } // find the percentage of letters appearing in vertices public void findLetterPercentageVer( float lF ) { int len = vertices.size(); Vector appearedLetters = new Vector(); Vertex v = new Vertex(); for ( int i = 0; i < len ; i++ ) { v = (Vertex)vertices.elementAt( i ); String str = v.getStartLetter(); int index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); str = v.getEndLetter(); index = appearedLetters.indexOf( str ); if ( index == -1 ) appearedLetters.add ( str ); } ///// Sort first len = appearedLetters.size(); float per = len / lF; // System.out.println( "The letter percentage after vertex construction is: " + len + " / " + lF + " = " + per ); Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { String str = (String)appearedLetters.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); for ( int i = 0; i < len; i ++ ) { String str = posInt[i].toString(); System.out.print( str + " " ); } System.out.println(); // computePercentage( lF, appearedLetters ); } public void computePercentage( float l, Vector al ) { int len = al.size(); float per = len / l; System.out.println( "The percentage is: " + len + " / " + l + " = " + per ); for ( int i = 0; i < len; i ++ ) { String str = (String)al.elementAt( i ); System.out.print( str + " " ); } System.out.println(); } public void constructEdges( int th1, int th2, int th3 ) { String startLetter = new String(), endLetter = new String(); int i = 0, j = 0, p = 0, q = 0, l = vertices.size(); Vertex v1 = new Vertex(), v2 = new Vertex(); Edge e = new Edge(); for ( int k = 0; k < l-1; k++ ) { v1 = (Vertex)vertices.get( k ); startLetter = v1.getStartLetter(); i = Integer.parseInt ( startLetter ); endLetter = v1.getEndLetter(); j = Integer.parseInt( endLetter ); boolean end = false; int m = k + 1; while (( !end )&& ( m < l )) { v2 = (Vertex)vertices.get( m ); startLetter = v2.getStartLetter(); p = Integer.parseInt ( startLetter ); endLetter = v2.getEndLetter(); q = Integer.parseInt( endLetter ); if ( checkConditions( i, j, p, q, th1, th2, th3 )) { v2.incIndegree(); // modify the vertex in vertices vertices.setElementAt( v2, m ); e = new Edge( v1, v2, 1.0f ); edges.add( e ); } else if ( ( p - i ) > th2 ) // k2 = 25 end = false; ++m; } } } // return the actual weight of the edge ??? public boolean checkConditions ( int i, int j, int p, int q, int th1, int th2, int th3 ) { boolean satisfied = false; int foo = ( j - i ) - ( q - p ); foo = Math.abs( foo ); if ( foo <= th1 ) // condition #1, k1 = 5 if (( p - i ) <= th2 ) // condition #2, k2 = 26 if ((( i <= p ) && ( p <= j ) ) && ( j <= q )) // condition #3 if (( i == p ) || ( p == j ) || ( j == q )) { // condition #4 if ((( j - i ) <= th3) && ( ( q - p ) <= th3 )) satisfied = true; } else satisfied = true; return satisfied; } public void modifyVertexQueue ( Vector tmpQueue, Vector tmpVertices ) { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void printVertexQueue() { System.out.println( "All vertices in the queue:" ); Vertex v = new Vertex(); Vertex previous = v; int l = vertexQueue.size(); for ( int i = 0; i < l; i++ ) { v = (Vertex) vertexQueue.get( i ); previous = v.getPredecessor(); if ( previous != null) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printVertices( int m ) { Vector tmpVertices = new Vector(); if ( m == 0 ) tmpVertices = subVertices; else tmpVertices = vertices; Vertex v = new Vertex(); Vertex previous = v; int l = tmpVertices.size(); System.out.println( "All vertices in the graph: " + l ); for ( int i = 0; i < l; i++ ) { v = (Vertex) tmpVertices.get( i ); previous = v.getPredecessor(); if ( previous != null ) System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP() + " " + previous.getStartLetter() + "A" + previous.getEndLetter()); else System.out.println( v.getStartLetter() + "A" + v.getEndLetter() + " " + v.getIndegree() + " " + v.getWeight() + " " + v.getLP()); } } // m = 0 for subgraph, m = 1 for the whole sequence public void printEdges( int m ) { Vector tmpEdges = new Vector(); if ( m == 0 ) tmpEdges = subEdges; else tmpEdges = edges; Edge e = new Edge(); Vertex sourceVer = new Vertex(), sinkVer = new Vertex(); String str = new String(); int length = tmpEdges.size(); System.out.println( "All edges in the graph: " + length ); for ( int i = 0; i < length; i++ ) { e = (Edge) tmpEdges.get( i ); sourceVer = e.getSource(); sinkVer = e.getSink(); System.out.println( sourceVer.getStartLetter() + "A" + sourceVer.getEndLetter() + " lp: " + sourceVer.getLP()+ " indegree: "+ sourceVer.getIndegree() + " "+ sinkVer.getStartLetter() + "A" + sinkVer.getEndLetter() + " lp: " + sinkVer.getLP() + " indegree: " + sinkVer.getIndegree() + " weight: "+ e.getWeight() ); } } public void addDummySource() { Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); addToSubVerticesVertexQueue( dummySource ); addToSubEdges( dummySource ); } public void addToSubVerticesVertexQueue( Vertex dummySource ) { subVertices.add( 0, dummySource ); vertexQueue.clear(); vertexQueue.add( dummySource ); } public void addToSubEdges( Vertex dummySource ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subVertices.size(); for ( int i = l-1; i > 0; i-- ) { v = (Vertex)subVertices.get( i ); e = new Edge( dummySource, v, 0.0f ); subEdges.add( 0, e ); } } public Vector findLongestPath() { Vertex v = new Vertex(); while ( !( vertexQueue.isEmpty())) { v = (Vertex) vertexQueue.remove( 0 ); traverseSubEdges( v ); } // find the vertex to which the path from the source is the longest Vector lp = traverseVertices(); return lp; } public void traverseSubEdges( Vertex ver ) { boolean end = false, first = true; Edge e = new Edge(); Vertex sourceVer = new Vertex(); Vertex sinkVer = new Vertex(); String str = new String(); float w = 0, sourceLP = 0, sinkLP = 0; int len = subEdges.size(), j = 0; if (!(subEdges.isEmpty())) j = findEdges( ver ); if ( j == -1 ) end = true; else if ( j != 0 ) { } while (( !end ) && ( j < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( j ); sourceVer = e.getSource(); if ( sourceVer.equals( ver )) { sourceLP = sourceVer.getLP(); sinkVer = e.getSink(); int i = subVertices.indexOf( sinkVer ); sinkLP = sinkVer.getLP(); w = e.getWeight(); if (( sourceLP + w ) > sinkLP ) { sinkLP = sourceLP +w; sinkVer.setLP( sinkLP ); sinkVer.setPredecessor( sourceVer ); } sinkVer.decIndegree(); subVertices.setElementAt( sinkVer, i ); modifyVertexInEdges( sinkVer ); i = sinkVer.getIndegree(); if ( i == 0 ) vertexQueue.add( sinkVer ); subEdges.remove( j ); len = subEdges.size(); first = false; } else if ( first ) { System.out.println( "This is a vertex with outdegree zero" ); end = true; } else end = true; } } public int findEdges( Vertex ver ) { int i = 0 , l = subEdges.size(); boolean find = false; Vertex v = new Vertex(); Edge e = new Edge(); while (( !find ) && ( i < l )) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( ver )) find = true; else ++i; } if ( !find ) i = -1; return i; } public void modifyVertexInEdges( Vertex sinkVer ) { Vertex v = new Vertex(); Edge e = new Edge(); int l = subEdges.size(); for ( int i = 0; i < l; i++ ) { e = (Edge)subEdges.get( i ); v = e.getSource(); if ( v.equals( sinkVer )) e.setSource( sinkVer ); else { v = e.getSink(); if ( v.equals( sinkVer )) e.setSink( sinkVer ); } subEdges.setElementAt( e, i ); } } public Vector traverseVertices() { Vertex v = new Vertex(), maxVer = new Vertex(); int l = subVertices.size(); float length = 0f, maxLp = -2222.0f; for ( int i = 0; i < l; i++ ) { v = (Vertex)subVertices.get( i ); length = v.getLP(); if ( length > maxLp ) { maxLp = length; maxVer = v; } } Vector lp = constructLongestPath( maxVer ); return lp; } public Vector constructLongestPath( Vertex maxVer ) { Vector lp = new Vector(); Vertex dummySource = new Vertex( "0", "0", 0, 1.0f); Vertex v = maxVer; while ( !(v.equals( dummySource ))) { lp.add ( 0, v ); v = v.getPredecessor(); } return lp; } public void printLongestPath( Vector lp ) { int i = 0; Vertex v = new Vertex(); String str = new String(); int length = lp.size(); for ( i = 0; i < length; i++ ) { v = (Vertex) lp.get( i ); System.out.print( v.getStartLetter() + "A" + v.getEndLetter() + " "); } System.out.println(); } public Vector identifyLCRs( ) { addDummySource(); Vector lp = findLongestPath(); return lp; } public boolean checkExistence( Vertex v ) { int index = subVertices.indexOf( v ); if ( index == -1 ) return false; else return true; } public void copy( Vector vt1, Vector vt2 ) { int l = vt1.size(); Edge e = new Edge(); for ( int i = 0; i < l; i++ ) { e = (Edge)vt1.get( i ); vt2.add ( e ); } } // make all edges beginning with the same vertex stay together public void clusterSubEdges() { Vector tmpSubEdges = new Vector(); Edge e = new Edge(); Vertex v = new Vertex(), ver = new Vertex(); while ( ( !subEdges.isEmpty())) { e = (Edge)subEdges.remove( 0 ); tmpSubEdges.add( e ); v = e.getSource(); int m = 0; int len = subEdges.size(); while (( m < len ) && (!(subEdges.isEmpty()))) { e = (Edge)subEdges.elementAt( m ); ver = e.getSource(); if ( v.equals( ver )) { tmpSubEdges.add( e ); subEdges.remove( m ); } else ++m; len = subEdges.size(); } } copy( tmpSubEdges, subEdges ); } // assign values to subVertices and subEdges ( BFS ) public void extractConnectedGraph() { boolean first = true; Vertex v = new Vertex(); Edge e = new Edge(); Vector tmpQueue = new Vector(); while(( first ) || (!(tmpQueue.isEmpty()))){ if ( first ) { // start the first edge of a new connected subgraph e = (Edge)edges.remove( 0 ); subEdges.add( e ); v = e.getSource(); subVertices.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong1" ); v = e.getSink(); subVertices.add( v ); tmpQueue.add( v ); if ( !(vertices.remove( v ))) System.out.println( "wrong2" ); v = e.getSource(); first = false; boolean same = true; int m = 0; int len = edges.size(); while (( same ) &&( m < len )) { // remove all those edges having the same source vertex as the first edge e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong3" ); } else same = false; len = edges.size(); } } else { v = (Vertex)tmpQueue.remove( 0 ); int m = 0; boolean found = false; int len = edges.size(); //find the starting positon of those edges who start at the first vertex from tmpQueue while (( !found ) && ( m < len )) { // skip all edges starting with the vertex from the tmpQueue e = (Edge)edges.elementAt( m ); Vertex sr = e.getSource(); if ( v.equals( sr )) found = true; else { Vertex si = e.getSink(); if ( v.equals( si )) { boolean exist = checkExistence( sr ); if ( !exist ) { subVertices.add( sr ); tmpQueue.add( sr ); // work on edges whose sink vertex is the same as the vertex from tempQueue if ( !(vertices.remove( sr ))) System.out.println( "wrong4" ); } e = (Edge)edges.remove( m ); subEdges.add(e ); len = edges.size(); } else ++m; } } //System.out.println( "m = " + m ); boolean same = true; while ( same ) { // remove all those edges starting with the vertex from tmpQueue to subEdges len = edges.size(); if ( m < len ) { e = (Edge)edges.elementAt( m ); Vertex ver = e.getSource(); if ( v.equals( ver )) { edges.remove( m ); // remove the edge who starts with v subEdges.add( e ); ver = e.getSink(); boolean exist = checkExistence( ver ); // check whether the sinkVertex is in the subVertices or not if ( !exist ) { subVertices.add( ver ); tmpQueue.add( ver ); // put ver ( the sink of the edge ) into tmpQueue; if ( !(vertices.remove( ver ))) System.out.println( "wrong5" ); } } else { same = false; } } else same = false; } } } clusterSubEdges(); } public int workOnSequence( String str, int th1, int th2, int th3, int th4 ) { constructVertices( str, th4 ); //printVertices( 1 );////////////////////////////// int i = 0; constructEdges( th1, th2, th3 ); boolean find = true; while ( !(edges.isEmpty())) { subVertices.clear(); subEdges.clear(); extractConnectedGraph(); Vector lp = identifyLCRs(); // the longest path in a connected subgraph lps.add( lp ); ++i; } return i; } // combine all letters from a sequence on different lines( stings) into a single line ( string ) public String generateSequence( String str ) { String sequence = new String(); String strTmp = str; boolean lastSeq = false; try { if ( strTmp == null ) sequence = null; if ( ( strTmp != null ) && ( strTmp.startsWith( ">"))) { //System.out.println( "*******************************************" ); System.out.println( str ); strTmp = rf.readLine(); } while (( strTmp != null ) && (!(strTmp.startsWith( ">" )))) { strTmp = strTmp.trim(); sequence = sequence + strTmp; strTmp = rf.readLine(); } if ( strTmp != null) if ( strTmp.startsWith( ">" )) { sequence = strTmp + "!" + sequence ; } } catch ( IOException ex ) { } return sequence; } public void printPositions( Vector pos ) { int l = pos.size(); String str = new String(); for ( int i = 0; i < l; i++ ) { str = (String) pos.get( i ); System.out.print( str + " " ); } System.out.println(); } public void printLCRBlocks( Vector LCRBlocks ) { String str = new String(); int l = LCRBlocks.size(); // System.out.println( "LCR Blocks: " ); for ( int i = 0; i < l; i++ ) { str = (String)LCRBlocks.get( i ); int index = str.indexOf( "-" ); String start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) System.out.print( str + " " ); } System.out.println(); } public Vector getPositions ( int k ) { Vector pos = new Vector(); //Vector posSingleVertexOnly = new Vector(); String str = new String(); int l = lps.size(), index = 0; Vector lp = new Vector(); Vertex v = new Vertex(); // longest path for ( int i = 0; i < k; i++ ) { // get positions from those vertices in lps lp = (Vector)lps.get( i ); int len = lp.size(); for ( int j = 0; j < len; j++ ) { v = (Vertex)lp.get( j ); str = v.getStartLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); str = v.getEndLetter(); index = pos.indexOf( str ); if ( index == -1 ) pos.add( str ); } } return pos; } public Vector sortPositions( Vector pos) { Vector positions = new Vector(); // used to keep the sorted positions String str = new String(); int len = pos.size(), current = 0, previous = 0; Integer[] posInt = new Integer[ len ]; for ( int i = 0; i < len; i++ ) { str = (String)pos.get( i ); posInt[ i ] = new Integer( str ); } MergeSort.mergeSort( posInt ); Vector tmpLCRBlocks = new Vector(); String start = new String(); previous = posInt[0].intValue() - 1; start = Integer.toString(( previous + 1 )); for ( int i = 0; i < len; i++ ) { str = posInt[i].toString(); current = posInt[i].intValue(); //generate blocks of continuous positions. Say, the sorted integer array is 3,4,5,6 8,9,10,11,12,13,29,30,31. It can be represented as a vector of three strings( blocks ): 3-6, 8-13, 29-31. if ( current != ( previous + 1 )) { tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); start = str; } previous = current; positions.add( str ); } tmpLCRBlocks.add( start + "-" + Integer.toString( previous )); len = tmpLCRBlocks.size(); Vector LCRBlocks = new Vector(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLCRBlocks.get( i ); int index = str.indexOf( "-" ); start = str.substring( 0, index ); String end = str.substring( index + 1 );; int difference = Integer.parseInt( end ) - Integer.parseInt( start ) ; if ( difference > 1 ) // the interval is at least 3-letter long LCRBlocks.add( str ); } //printLCRs( LCRBlocks ); return LCRBlocks; } public Vector extend ( int startPos, int endPos, int limit, String direction, String seq ) { Vector decRegs = new Vector(); int pointer = 0, startDecPos = 0, endDecPos = 0; double com1 = 0, com2 = 0; String extReg = seq.substring( startPos - 1, endPos ); complexityCalculator cc = new complexityCalculator(); cc.initializeAlphabet(); if ( direction.equals( "left" )) { // extend to the left( front ) boolean dec = false; pointer = startPos - 2; while ( ( pointer > limit ) && ( pointer > ( startPos - 17 )) ) { //System.out.println( "111111111111extReg:" + extReg ); com1 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); extReg = seq.substring( pointer, endPos ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 11111 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; // System.out.println( "from false to true111111111" ); endDecPos = pointer + 2; } } else if ( dec ) { dec = false; //System.out.println( "from true to false1111111111" ); startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 11111111 " + startDecPos + "-" + endDecPos ); } } -- pointer; } if ( ( dec ) && ( pointer == ( startPos - 17 )) ) { //System.out.println( "keeping decreasing1111111111" ); while ( ( pointer > limit ) && ( dec ) ) { //System.out.println( "22222222222222extReg:" + extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); com1 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( pointer, endPos ); //System.out.println("keep: " + extReg ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); // com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 22222222222 " + com2); if ( com1 < com2 ) { //System.out.println( "from true to false2222222222" ); dec = false; startDecPos = pointer + 2; if ( com1 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 222222222222 " + startDecPos + "-" + endDecPos ); } } -- pointer; } } // the left extension touches the end of the last block of the current lcr blocks if (( pointer == limit ) && ( dec ) ) { startDecPos = pointer + 2; if ( com2 < comCut ) { decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 333333333333 " + startDecPos + "-" + endDecPos ); } } if ( decRegs.size() == 0 ) { //System.out.println( "left: Empty" ); } else { //System.out.print( "left: "); //printLCRs( decRegs ); } } else { boolean dec = false; pointer = endPos + 1;//////// // extend to the right( back ) while ( ( pointer < limit ) && ( pointer < ( endPos + 15 ))) { //System.out.println( "333333333333333333333extReg:" + extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); com1 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 33333333 " + com2); if ( com1 > com2 ) { if ( !dec ) { dec = true; //System.out.println( "from false to true33333333333333333" ); startDecPos = pointer -1 ; } } else if ( dec ) { dec = false; //System.out.println( "from true to false3333333333333" ); endDecPos = pointer -1; if ( com1 < comCut ) { decRegs.add( ( startDecPos ) + "-" + ( endDecPos )); //System.out.println( "decRegs added 4444444 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if ( ( dec ) && ( pointer == ( endPos + 15 )) ) { // keep extending until the complexity starts increasing, which means that several blocks generated from the longest path can be included into lcrs during one call of the 'extend()' based on a block while (( dec ) && ( pointer < limit )) { //System.out.println( "444444444444extReg:" + extReg ); com1= cc.calculate2LetterEntropyWScoMatrix( extReg ); //com1 = cc.calculateModifiedEntropy( extReg ); //com1 = cc.calculateReciprocalPro( extReg ); //com1 = cc.calculateRecProWScoringMatrix( extReg ); extReg = seq.substring( startPos - 1, pointer ); //com2 = cc.calculateModifiedEntropy( extReg ); com2 = cc.calculate2LetterEntropyWScoMatrix( extReg ); //com2 = cc.calculateReciprocalPro( extReg ); //com2 = cc.calculateRecProWScoringMatrix( extReg ); //System.out.println( com1 + " 4444444 " + com2); if ( com1 < com2 ) { dec = false; //System.out.println( "from true to false444444444" ); endDecPos = pointer - 1; if ( com1 < comCut ) { decRegs.add(( startDecPos ) + "-" + endDecPos ); //System.out.println( "decRegs added 5555555555555555555 " + startDecPos + "-" + endDecPos ); } } ++ pointer; } if (( pointer == limit ) && ( dec )) { endDecPos = limit - 1; //System.out.println( "decRegs added 66666666666 " + startDecPos + "-" + endDecPos ); decRegs.add( 0, ( startDecPos ) + "-" + ( endDecPos )); } } if ( decRegs.size() == 0 ) { //System.out.println( "right: Empty" ); } else { //System.out.print( "right: "); //printLCRs( decRegs ); } } return decRegs; } public boolean shareLetter( String str1, String str2 ) { boolean shared = false; String str = str1, letter = new String(); while (( str.length() != 0 ) && ( !shared )) { letter = str.substring( 0, 1 ); int index = str2.indexOf( letter ); if ( index != -1 ) shared = true; else if ( str.length() != 0 ) str = str.substring( 1 ); } return shared; } public boolean checkContribution( String currentBlock, Vector decRegs, String seq ) { boolean contributed = false; Vector regs = decRegs; String block = new String(); int i = 0, len = regs.size(); while ( ( i < len ) && ( !contributed )) { block = (String) regs.elementAt( i ); int index = block.indexOf( "-" ); int start= Integer.parseInt( block.substring( 0, index )); int end = Integer.parseInt( block.substring( index + 1 )); block = seq.substring( start - 1, end ); //System.out.println( "block: " + block+ " currentBlock: "+ currentBlock ); contributed = shareLetter( currentBlock, block ); ++i; } return contributed; } public Vector appendLcrs( Vector lcrs, Vector appendedLcrs ) { Vector lowComRegs = lcrs, tmpLcrs = appendedLcrs; while( !( tmpLcrs.isEmpty())) lowComRegs.add( (String)tmpLcrs.remove( 0 ) ); return lowComRegs; } public Vector pickUpDrop ( Vector blocks, String seq ) { Vector frontLcrs = new Vector(), backLcrs = new Vector(), lcrs = new Vector(), tmpBLOCKS = blocks; String currentBlock = new String(), tmpBlock = new String(); boolean isFirstBlock = true; int limit = 0, index = 0, startPos = 0, endPos = 0; complexityCalculator cc = new complexityCalculator(); cc.initializeAlphabet(); while (( !tmpBLOCKS.isEmpty() )) { frontLcrs.clear(); backLcrs.clear(); int lcrBlockStart = 0, lcrBlockEnd= 0; boolean extendToLeft = true; // whether to extend towards the left boolean find = false; // find the current extending block // currentBlock can start in the middle of a block, or has the same starting position as a block and it doesn't have to be the block after the previous currentBlock // get the end position of the last block in lcrs if ( !( lcrs.isEmpty())) { tmpBlock = (String)lcrs.lastElement(); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 )); //System.out.println( "lcrBlockEnd: " + lcrBlockEnd ); while (( !find ) && ( !(tmpBLOCKS.isEmpty()) )) { // get the current block currentBlock = (String)tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); //System.out.println( "find currentBlock: "+ startPos + " " + endPos); if ( startPos < lcrBlockEnd ) { if ( endPos > lcrBlockEnd ) if (( endPos - lcrBlockEnd ) >= 3 ) { startPos = lcrBlockEnd + 1; extendToLeft = false; find = true; } } else find = true; } } else { currentBlock = (String) tmpBLOCKS.remove( 0 ); index = currentBlock.indexOf( "-" ); startPos = Integer.parseInt( currentBlock.substring( 0, index )); endPos = Integer.parseInt( currentBlock.substring( index + 1 )); find = true; } if ( find ) { //System.out.println( "currentBlock:" + currentBlock ); if ( isFirstBlock ) { limit = -1; isFirstBlock = false; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } else if ( extendToLeft ) { limit = lcrBlockEnd - 1; // extend to the left( front ) frontLcrs = extend( startPos, endPos, limit, "left", seq ); } limit = seq.length() + 1; // extend to the right( back ) backLcrs = extend( startPos, endPos, limit, "right", seq ); double com = 0; index = currentBlock.indexOf( "-" ); int cbStart = Integer.parseInt( currentBlock.substring( 0, index ))- 1; int cbEnd = Integer.parseInt( currentBlock.substring( index + 1 )) ; //System.out.println("current block String:" + seq.substring( cbStart,cbEnd)); //com = cc.calculateModifiedEntropy( seq.substring( cbStart, cbEnd ) ); com = cc.calculate2LetterEntropyWScoMatrix( seq.substring( cbStart, cbEnd )); //com = cc.calculateModifiedEntropy( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateReciprocalPro( seq.substring( cbStart, cbEnd ) ); //com = cc.calculateRecProWScoringMatrix( seq.substring( cbStart, cbEnd ) ); boolean contributed = false; if ( frontLcrs.size() != 0 ) { // get the start position of the first block in frontLcrs as the start position of the block to be added into lcrs tmpBlock = (String)frontLcrs.elementAt( 0 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); if ( com > comCut ) { // check whether the current block contributes to the complexity-decreasing regions or not contributed = checkContribution( seq.substring( cbStart, cbEnd ), frontLcrs, seq ); if ( !contributed ) lcrBlockEnd = startPos - 1; else { lcrBlockEnd = endPos; } } else { lcrBlockEnd = endPos; //System.out.println( "com of currentBlock: " + com ); } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } boolean combine = false; // whether to combine the last block in lcrs from frontLcrs and the block to be added into lcrs from backLcrs if (( !contributed ) && ( com > comCut )){ contributed = checkContribution( seq.substring( cbStart, cbEnd ), backLcrs, seq ); if ( !contributed ) { lcrBlockStart = endPos + 1; } else { if ( frontLcrs.size() != 0 ) combine = true; } } else if ( frontLcrs.size() != 0 ) combine = true; // get the end position of the last block in backLcrs as the end position of the block to be added into lcrs if ( !( backLcrs.isEmpty())) { tmpBlock = (String)backLcrs.lastElement( ); index = tmpBlock.indexOf( "-" ); lcrBlockEnd = Integer.parseInt( tmpBlock.substring( index + 1 ) ); if ( combine ) { //System.out.println( "combine" ); limit = lcrs.size(); tmpBlock = (String) lcrs.remove( limit - 1 ); index = tmpBlock.indexOf( "-" ); lcrBlockStart = Integer.parseInt( tmpBlock.substring( 0, index ) ); } else { if ( com < comCut ) { lcrBlockStart = startPos; //System.out.println( "Here, com" ); } else if ( contributed ) { //System.out.println( "contributed to the back, com > comCut " ); lcrBlockStart = startPos; } else { //System.out.println( "OOOOOOOOOOOOOOOOOOOOOOOO" ); lcrBlockStart = endPos + 1; } } lcrs.add( lcrBlockStart + "-" + lcrBlockEnd ); } else { if((frontLcrs.size() == 0 ) && ( !contributed ) && ( com < comCut )) { lcrs.add( currentBlock ); } } // check whether to combine the last two blocks in the current lcrs //len = lcrs.length(); /* System.out.print( "current lcrs: " ); printLCRs( lcrs ); */ } } return lcrs; } public Vector mergePurge( Vector lcrs ) { Vector tmpLcrs = lcrs; String currentBlock = new String(), nextBlock = new String(); int len = tmpLcrs.size(), i = 0; while ( i < len ) { if (( i + 1 ) < len ){ currentBlock = (String) tmpLcrs.elementAt( i ); int endIndex = currentBlock.indexOf( "-" ); int end = Integer.parseInt( currentBlock.substring( endIndex +1 )); nextBlock = (String) tmpLcrs.elementAt( i + 1 ); int startIndex = nextBlock.indexOf( "-" ); int start = Integer.parseInt( nextBlock.substring( 0, startIndex )); if (( end == ( start - 1 )) || ( end == start )) { //System.out.println( currentBlock + " " + nextBlock ); currentBlock = currentBlock.substring( 0, endIndex ) + "-" + nextBlock.substring( startIndex + 1 ); tmpLcrs.remove( i ); tmpLcrs.remove( i ); tmpLcrs.add( i, currentBlock ); } else ++i; len = tmpLcrs.size(); } else ++i; } i = 0; len = tmpLcrs.size(); //printLCRs( tmpLcrs ); /* while ( i < len ) { currentBlock = (String) tmpLcrs.elementAt( i ); int index = currentBlock.indexOf( "-" ); int start = Integer.parseInt( currentBlock.substring( 0, index )); int end = Integer.parseInt( currentBlock.substring( index + 1 )); if (( end - start ) < 7 ) tmpLcrs.remove( i ); else ++i; len = tmpLcrs.size(); } */ return tmpLcrs; } public boolean checkCombinedSubBlock( String seq1, String seq2, double cCut ) { boolean delete = true; complexityCalculator cc = new complexityCalculator(); cc.initializeAlphabet(); // double com = cc.calculateReciprocalPro( seq1 + seq2 ); double com = cc.calculateNor2LetterEntropyWScoMatrix( seq1 + seq2 ); //double com = cc.calculateNorModifiedEntropy( seq1 + seq2 ); // System.out.println( "combined:" + seq1 + seq2 + " " + com ); if ( com > cCut ) delete = false; return delete; } public String findAlignment( String seq1, String seq2 ) { String aliPos = new String(); try { Sequence s1 = SequenceParser.parse( seq1 ); Sequence s2 = SequenceParser.parse( seq2 ); //System.out.println( "alignment sequences: " + seq1 + "???" + seq2 ); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); int similarLen = alignment.getSimilarity(); // get the length of the same and similar letters; if ( similarLen > 4 ) { // only if the length of similar and same letters is greater than 4 aliPos = new Pair().format( alignment ); //System.out.println( "the alignment: " + aliPos + " " + similarLen ); } } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } return aliPos; } public Vector checkLeftRegs( int aliStart, int aliEnd, int start, int end, String seq, double cCut ) { Vector left = new Vector(); double com = 0; complexityCalculator cc = new complexityCalculator(); cc.initializeAlphabet(); if ( aliStart > 7 ) { // the length of the left region must be longer than 7 com = cc.calculateNor2LetterEntropyWScoMatrix( seq.substring( start - 1, start + aliStart - 2 )); //com = cc.calculateNorModifiedEntropy( seq.substring( start - 1, start + aliStart - 2 )); //System.out.println( "left1: " + seq.substring( start - 1, start + aliStart - 2 ) + " " + com + " " + start + "-" + ( start + aliStart - 2 )); if ( com <= cCut ) left.add( 0, start + "-" + ( start + aliStart - 2 )); } if ( ( end - start + 1 - aliEnd ) > 7 ) { com = cc.calculateNor2LetterEntropyWScoMatrix( seq.substring( start + aliEnd - 1, end )); //com = cc.calculateNorModifiedEntropy( seq.substring( start + aliEnd - 1, end )); //System.out.println( "left2:" + seq.substring( start + aliEnd - 1, end ) + " " + com + " " + ( start + aliEnd ) + "-" + end ); if ( com <= cCut ) left.add( ( start + aliEnd ) + "-" + end ); } return left; } public Vector addToResult( Vector result, Vector left ) { Vector tmpResult = result; String str1 = new String(), str2 = new String(); int j = 0; for ( int i = 0; i < left.size(); i++ ) { str1 = (String) left.elementAt( i ); int index = str1.indexOf( "-" ); int endLeft = Integer.parseInt( str1.substring( index + 1 )); boolean found = false; while ( !found ) { if ( j < result.size() ) { str2 = (String) result.elementAt( j ); index = str2.indexOf( "-" ); int startResult = Integer.parseInt( str2.substring( 0, index )); if ( endLeft < startResult ) { found = true; // System.out.println( "Insert left into result: " + str1 + " " + str2 ); result.add( j , str1 ); j = j + 2; } else j++; } else { result.add( str1 ); //System.out.println( "append to the end of result" ); found = true; } } } return result; } public Vector checkAdjBlock ( int start1, int end1, String adjBlock, String seq, double cCut, String mark ) { Vector result = new Vector(); double com = 0; String seq1 = seq.substring( start1 - 1, end1 ); //System.out.println( "current block: " +start1 + " " + end1 + " " + seq1 ); int index1 = adjBlock.indexOf( "-" ); int start2 = Integer.parseInt( adjBlock.substring( 0, index1 )); int end2 = Integer.parseInt( adjBlock.substring( index1 + 1 )); String seq2 = seq.substring( start2 - 1, end2 ); String aliPos = new String(); if ( mark.equals( "front" )) aliPos = findAlignment( seq2, seq1 ); else aliPos = findAlignment( seq1,seq2 ); if ( aliPos.length() != 0 ) { // format of aliPos: 'a1-a2 b1-b2' index1 = aliPos.indexOf( "-" ); int index2 = aliPos.indexOf( " " ); int aliStart2 = Integer.parseInt ( aliPos.substring( 0, index1 )); int aliEnd2 = Integer.parseInt ( aliPos.substring( index1 + 1, index2 )); String aliSeq1 = new String(), aliSeq2 = new String(); if ( mark.equals( "front" )) { aliSeq2 = seq.substring( start2 + aliStart2 - 2, start2 + aliEnd2 - 1 ); //System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq2 ); } else { aliSeq1 = seq.substring( start1 + aliStart2 - 2, start1 + aliEnd2 - 1 ); //System.out.println( "the first aligned subSeq: " + aliStart2 + " " + aliEnd2 + " " + aliSeq1 ); } aliPos = aliPos.substring( index2 + 1 ); index1 = aliPos.indexOf( "-" ); int aliStart1 = Integer.parseInt( aliPos.substring( 0, index1 )); int aliEnd1 = Integer.parseInt( aliPos.substring( index1 + 1 )); if ( mark.equals( "front" )) { aliSeq1 = seq.substring( start1 + aliStart1 - 2, start1 + aliEnd1 - 1 ); //System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq1 ); } else { aliSeq2 = seq.substring( start2 + aliStart1 - 2, start2 + aliEnd1 - 1 ); //System.out.println( "the second subSeq: " + aliStart1 + " " + aliEnd1 + " " + aliSeq2 ); } boolean decOrNot = true; if ( mark.equals( "front" )) { decOrNot = true; // decOrNot = checkCombinedSubBlock( aliSeq2,aliSeq1, cCut ); if ( decOrNot ) { result.add( ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 ) ); //System.out.println( "added to result1: " + ( start1 + aliStart1 - 1 )+ "-" + ( start1 + aliEnd1 - 1 )); Vector left = checkLeftRegs( aliStart1, aliEnd1, start1, end1, seq, cCut ); result = addToResult( result, left ); } } else { //decOrNot = checkCombinedSubBlock( aliSeq1,aliSeq2, cCut ); decOrNot = true; if ( decOrNot ) { result.add( ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1) ); //System.out.println( "added to redult2: " + ( start1 + aliStart2 - 1) + "-" + ( start1 + aliEnd2 - 1)); Vector left = checkLeftRegs( aliStart2, aliEnd2, start1, end1, seq, cCut ); result = addToResult( result, left ); result.add( ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 ) ); //System.out.println( "added to redult3: " + ( start2 + aliStart1 - 1 ) + "-" + ( start2 + aliEnd1 - 1 )); left = checkLeftRegs( aliStart1, aliEnd1, start2, end2, seq, cCut ); result = addToResult( result, left ); } } } return result; } public Vector checkDeletability ( Vector lcrs, int maxIndex, String seq, double cCut ) { Vector result = new Vector(); String block = new String(); int start1 = 0, end1 = 0; block = (String)lcrs.elementAt( maxIndex ); int index = block.indexOf( "-" ); start1 = Integer.parseInt( block.substring( 0, index )); end1 = Integer.parseInt( block.substring( index + 1 )); if ( maxIndex != 0 ) { block = (String)lcrs.elementAt( maxIndex - 1 ); // System.out.println( "front adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "front" ); } if ( result.size() == 0 ) { if ( maxIndex != ( lcrs.size() - 1 ) ) { block = (String)lcrs.elementAt( maxIndex + 1 ); //System.out.println( "back adjacent block: " + block ); result = checkAdjBlock( start1, end1, block, seq, cCut, "back" ); if ( result.size() != 0 ) result.add( "back" ); } } return result; } public Vector filter ( Vector lcrs, String seq ) { Vector tmpLcrs = lcrs, com = new Vector(); float len = tmpLcrs.size(); double singleCom = 0, max = -222222222; String str = new String(), block = new String(); if ( tmpLcrs.size() != 1 ) { complexityCalculator cc = new complexityCalculator(); cc.initializeAlphabet(); for ( int i = 0; i < len; i++ ) { str = (String)tmpLcrs.elementAt( i ); int index = str.indexOf( "-" ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); singleCom = cc.calculateNor2LetterEntropyWScoMatrix( str ); //singleCom = cc.calculateReciprocalPro( str ); //singleCom= cc.calculateNorModifiedEntropy( str ); com.add( Double.toString( singleCom )); } int i = 0, j = 0, maxIndex = 0; double limit = 0, cCut = 0; if ( seq.length() > 500 ) limit = len * 0.6; // 0.6 else limit = len * 0.3; //0.3 while ( i < limit ) { int l = com.size(); j = 0; max = -222222222; while ( j < l ) { str = ( String ) com.elementAt( j ); singleCom = Double.parseDouble( str ); if ( singleCom > max ) { max = singleCom; maxIndex = j; } ++ j; } //System.out.println( "com: " + com.elementAt( maxIndex )); cCut = Double.parseDouble( (String)com.remove( maxIndex )); ++i; } //System.out.println( "cCut: " + cCut ); j = 0; i = 0; len = tmpLcrs.size(); Vector result = new Vector(); while (( i < limit ) && ( j < len )) { str = (String)tmpLcrs.elementAt( j ); int index = str.indexOf( "-" ); //System.out.println( "current block?" + str.substring( 0, index ) + "?" + str.substring( index + 1 ) ); str = seq.substring( Integer.parseInt( str.substring( 0, index )) - 1, Integer.parseInt( str.substring( index + 1 ) )); //System.out.println( "cur subseq???" + str ); //singleCom = cc.calculateReciprocalPro( str ); //singleCom= cc.calculateNorModifiedEntropy( str ); singleCom = cc.calculateNor2LetterEntropyWScoMatrix( str ); //System.out.println( "singlecom:"+ singleCom + " " + cCut ); if ( singleCom >= cCut ) { result = new Vector(); result = checkDeletability( tmpLcrs, j, seq, cCut ); int rSize =result.size(); boolean fromBack = false; if ( rSize != 0 ) { str = (String)result.elementAt( rSize - 1 ); if ( str.equals( "back")) { --rSize; fromBack = true; } // System.out.println( "remove: "+ (String)tmpLcrs.remove( j )); tmpLcrs.remove( j ); for ( int k = 0; k < rSize; k++ ) { // add 'result' Vector into tmpLcrs in order str = ( String ) result.elementAt( k ); //System.out.print( "*"+ str + "*" ); tmpLcrs.add( j, str ); ++j; } //System.out.println(); // if ( j < tmpLcrs.size()) //System.out.println( "the next one:" + tmpLcrs.elementAt( j )); if ( fromBack ) { //System.out.println( "yes, from back " ); //System.out.println( "removed: "+ tmpLcrs.remove( j )); tmpLcrs.remove( j ); } len = tmpLcrs.size(); } else { tmpLcrs.remove( j ); // System.out.println( "removed coz of high complexity" ); ++i; len = tmpLcrs.size(); } } else ++j; } //System.out.println(i + " " + j ); } return tmpLcrs; } public Vector postProcess( int i, String seq ) { Vector lcrs = new Vector(); Vector blocks = new Vector(); Vector pos = getPositions( i ); if ( pos.size() != 0 ) { blocks = sortPositions( pos ); // blocks = filter( blocks, seq ); //printLCRs( blocks ); } lcrs = pickUpDrop( blocks, seq ); //printLCRs( lcrs ); lcrs = mergePurge( lcrs ); //printLCRs( lcrs ); lcrs = filter( lcrs, seq ); //System.out.println( "AFTER************" ); printLCRs( lcrs ); return lcrs; } public void computeLCRPercentage ( Vector lcrs, String str) { } public void printLCRs( Vector LCRs ) { int len = LCRs.size(); for ( int i = 0; i < len; i ++ ) { String str = (String)LCRs.get( i ); System.out.print( str + " " ); } System.out.println( ); } public void printLCRs ( Vector lcrs, String str) { } public void startt( int th1, int th2, int th3, int th4 ) { String str = new String(), id = new String(), nextId = new String(); int index = 0; boolean first = true; try { while ( str != null ) { str = rf.readLine(); str = generateSequence( str ); if ( str != null ) { if ( str.indexOf( ">" ) != -1 ) { index = str.indexOf( "!" ); id = nextId; nextId = str.substring( 0, index ); str = str.substring( index + 1 ); } else id = nextId; if (!first ) { System.out.println(); System.out.println( id ); } else { id = nextId; first = false; } vertices.clear(); edges.clear(); lps.clear(); for ( int i = 0; i < 20; i++ ) { fVecNor[i] = 0f; fVecUnNor[i] = 0f; } str = str.trim(); int i = workOnSequence( str, th1, th2, th3, th4 ); Vector lcrs = postProcess( i, str );//process all longest paths from every connected subgraph /* computeLCRPercentage( lcrs, str ); printLCRs( lcrs, str ); */ } } rf.close(); } catch ( IOException ex ) { } } public static void main ( String args[] ) { int th1 = Integer.parseInt( args[2] ); int th2 = Integer.parseInt( args[3] ); int th3 = Integer.parseInt( args[4] ); int th4 = th2; gbm g = new gbm( args[0]); g.readRNRMatrices( args[1] ); g.readScoringMatrix( "knowledge/blosum62Matrix" ); g.getComCut( args[5] ); g.startt( th1, th2, th3, th4 ); /* try { Sequence s1 = SequenceParser.parse("VVVVVV" ); Sequence s2 = SequenceParser.parse( "LAELLAKKSDRDSPKK"); Alignment alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("BLOSUM62"), 10f, 0.5f); System.out.println( "similarity: " + alignment.getSimilarity() ); String result = new Pair().format(alignment); System.out.println( "*****************" ); System.out.println( result ); } catch (Exception e) { //logger.log(Level.SEVERE, "Failed running example: " + e.getMessage(), e); } */ } } /* 1. Format of the output: SACACPQTSOP......( 60 letters) XXXX TPQSKAQ..........( 60 letters) */ PK f}7Uy0 ;applications/gbmWMatricesLCRBlocksAfterLongestPath.java.alt/* ** Author: Xuehui Li ** Date: March, 2005 ** "gbm" or "GBM" is the abbrevaition of "A graph-based method for detecting low-complexity reions". ** This is program is used to find low-complexity regions in sequences ** NOTE: all vertices and edges have topological orders ** There are totally five input paramaters. The first one is the sequence file name. The second one the learned matrix file( /cise/research/tamer/xli/LCR/graphLCR/swissprotLearnedMatrices ). The remaining three are the threshold 1, threshold 2 and threshold 3, respectively. At this tiem, all LCR Blocks generated in both /cise/research/tamer/xli/LCR/graphLCR/swissprotLCRBlocks/ and /cise/research/tamer/xli/LCR/graphLCR/pfamLCRBlocks/ are based on the three thresholds: " 3 15 5" */ package applications; import java.io.*; import java.util.*; class gbm { // vertices and edges are vectors used to keep all the vertices, edges in a graph generated from a sequence, respectively. vertexQueue is a vector used to keep all vertices whose indegree is zero. lps is a vector used to keep all the longest paths in all connected subgraphs of a sequence. Every longest path in lps is a vector of vertices, excluding the dummy source. // subVertices and subEdges are vectors used to keep all the vertices, edges in a connected-graph which is a subgraph of the graph generated from a sequence, respectively. private File f; private RandomAccessFile rf; private Vector vertices, subVertices, edges, subEdges, vertexQueue, lps; private float[][] repeatMatrix, nonRepeatMatrix; private float[] fVecNor, fVecUnNor; private Vector alphabet; private float singleVertexOnly; public gbm ( String fileName ) { // the graph is given in a file where every line represents an edge and has the fromat of "source sink weight" initializeAlphabet(); try{ f = new File ( fileName ); rf = new RandomAccessFile ( f, "r" ); } catch ( IOException ex ) { } vertices = new Vector(); subVertices = new Vector(); edges = new Vector(); subEdges = new Vector(); vertexQueue = new Vector(); lps = new Vector(); repeatMatrix = new float[20][20]; nonRepeatMatrix = new float[20][20]; fVecNor = new float[20]; fVecUnNor = new float[20]; for ( int i = 0; i < 20; i++ ) fVecUnNor[i] = 0f; singleVertexOnly = 0f; } public void test() { Vertex v = new Vertex("1", "6", 0, 1); vertices.add( v ); if (vertices.contains(v)) System.out.println( "Yes" ); else System.out.println( "No" ); } public void initializeAlphabet() { alphabet = new Vector(); alphabet.add( "A" ); alphabet.add( "R" ); alphabet.add( "N" ); alphabet.add( "D" ); alphabet.add( "C" ); alphabet.add( "Q" ); alphabet.add( "E" ); alphabet.add( "G" ); alphabet.add( "H" ); alphabet.add( "I" ); alphabet.add( "L" ); alphabet.add( "K" ); alphabet.add( "M" ); alphabet.add( "F" ); alphabet.add( "P" ); alphabet.add( "S" ); alphab