@article {Lin:November 2002:0022-0000:570, author = "Lin Y-L.", author = "Jiang T.", author = "Chao K-M.", title = "Efficient algorithms for locating the length-constrained heaviest segments with applications to biomolecular sequence analysis", journal = "Journal of Computer and System Sciences", volume = "65", year = "November 2002", abstract = "

We study two fundamental problems concerning the search for interesting regions in sequences: (i) given a sequence of real numbers of length n and an upper boundU, find a consecutive subsequence of length at most U with the maximum sum and (ii) given a sequence of real numbers of length n and a lower bound L, find a consecutive subsequence of length at least L with the maximum average. We present an O(n)-time algorithm for the first problem and an O(n log L)-time algorithm for the second. The algorithms have potential applications in several areas of biomolecular sequence analysis including locating GC-rich regions in a genomic DNA sequence, post-processing sequence alignments, annotating multiple sequence alignments, and computing length-constrained ungapped local alignment. Our preliminary tests on both simulated and real data demonstrate that the algorithms are very efficient and able to locate useful (such as GC-rich) regions.

", pages = "570-586(17)", url = "http://www.ingentaconnect.com/content/ap/ss/2002/00000065/00000003/art00010" doi = "doi:10.1016/S0022-0000(02)00010-7" }