<?xml version="1.0" encoding="ISO-8859-1"?>

<rdf:RDF
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns="http://purl.org/rss/1.0/"
 xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
 xmlns:dc="http://purl.org/dc/elements/1.1/"
 xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
 xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
 xmlns:admin="http://webns.net/mvcb/"
>

<channel rdf:about="http://bioinformatics.oxfordjournals.org">
<title>Bioinformatics - recent issues</title>
<link>http://bioinformatics.oxfordjournals.org</link>
<description>Bioinformatics - RSS feed of recent issues (covers the latest 3 issues, including the current issue) </description>
<prism:eIssn>1460-2059</prism:eIssn>
<prism:publicationName>Bioinformatics</prism:publicationName>
<prism:issn>1367-4803</prism:issn>
<items>
 <rdf:Seq>
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1715?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1722?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1731?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1739?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1746?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1754?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1761?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1768?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1775?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1782?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1789?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1796?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1802?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1805?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1807?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1814?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1822?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1831?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1833?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1836?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1838?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1841?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1843?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1846?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1575?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1587?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1594?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1602?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1609?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1617?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1625?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1632?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1640?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1647?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1655?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1662?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1669?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1680?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1686?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1694?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1702?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1709?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1711?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1713?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i2?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i6?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i15?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i21?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i30?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i39?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i45?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i54?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i63?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i69?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i77?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i85?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i94?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i101?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i110?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i119?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i128?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i137?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i145?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i154?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i161?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i169?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i179?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i187?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i196?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i204?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i213?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i222?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i231?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i240?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i247?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i253?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i259?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i268?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i276?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i281?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i289?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i296?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i305?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i313?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i321?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i330?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i339?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i348?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i356?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i365?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i374?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i383?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1475?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1476?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1484?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1492?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1498?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1506?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1513?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1521?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1528?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1536?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1543?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1550?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1552?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1554?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1556?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1559?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1561?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1564?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1566?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1568?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1570?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1574?rss=1" />
 </rdf:Seq>
</items>
</channel>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1715?rss=1">
<title><![CDATA[Hierarchical hidden Markov model with application to joint analysis of ChIP-chip and ChIP-seq data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1715?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Chromatin immunoprecipitation (ChIP) experiments followed by array hybridization, or ChIP-chip, is a powerful approach for identifying transcription factor binding sites (TFBS) and has been widely used. Recently, massively parallel sequencing coupled with ChIP experiments (ChIP-seq) has been increasingly used as an alternative to ChIP-chip, offering cost-effective genome-wide coverage and resolution up to a single base pair. For many well-studied TFs, both ChIP-seq and ChIP-chip experiments have been applied and their data are publicly available. Previous analyses have revealed substantial technology-specific binding signals despite strong correlation between the two sets of results. Therefore, it is of interest to see whether the two data sources can be combined to enhance the detection of TFBS.</p>
<p><b>Results:</b> In this work, hierarchical hidden Markov model (HHMM) is proposed for combining data from ChIP-seq and ChIP-chip. In HHMM, inference results from individual HMMs in ChIP-seq and ChIP-chip experiments are summarized by a higher level HMM. Simulation studies show the advantage of HHMM when data from both technologies co-exist. Analysis of two well-studied TFs, NRSF and CCCTC-binding factor (CTCF), also suggests that HHMM yields improved TFBS identification in comparison to analyses using individual data sources or a simple merger of the two.</p>
<p><b>Availability:</b> Source code for the software ChIPmeta is freely available for download at <inter-ref locator="http://www.umich.edu/~hwchoi/HHMMsoftware.zip" locator-type="url">http://www.umich.edu/~hwchoi/HHMMsoftware.zip</inter-ref>, implemented in C and supported on linux.</p>
<p><b>Contact:</b> <inter-ref locator="ghoshd@psu.edu" locator-type="email">ghoshd@psu.edu</inter-ref>; <inter-ref locator="qin@umich.edu" locator-type="email">qin@umich.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp312/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Choi, H., Nesvizhskii, A. I., Ghosh, D., Qin, Z. S.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp312</dc:identifier>
<dc:title><![CDATA[Hierarchical hidden Markov model with application to joint analysis of ChIP-chip and ChIP-seq data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1721</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1715</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1722?rss=1">
<title><![CDATA[SOrt-ITEMS: Sequence orthology based approach for improved taxonomic estimation of metagenomic sequences]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1722?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b>One of the first steps in metagenomic analysis is the assignment of reads/contigs obtained from various sequencing technologies to their correct taxonomic bins. Similarity-based binning methods assign a read to a taxon/clade, based on the pattern of significant BLAST hits generated against sequence databases. Existing methods, which use bit-score as the sole parameter to ascertain the significance of BLAST hits, have limited specificity and accuracy of binning. A new binning algorithm, called SOrt-ITEMS is introduced, which addresses these limitations. The method uses alignment parameters besides the bit score to first identify an appropriate taxonomic level where the read can be assigned. An orthology-based approach is subsequently used by the method for the final assignment.</p>
<p><b>Results:</b>The performance of SOrt-ITEMS has been validated with reads simulating sequences from 454 and Sanger sequencing technologies. In addition, the taxonomic composition of the Sargasso Sea data set has been analyzed using SOrt-ITEMS. SOrt-ITEMS shows improved specificity and accuracy of assignments especially in simulated scenarios, wherein sequences corresponding to the source organism of the reads are absent in the reference database.</p>
<p><b>Availability:</b>SOrt-ITEMS software is available for download from: <inter-ref locator="http://metagenomics.atc.tcs.com/binning/SOrt-ITEMS" locator-type="url">http://metagenomics.atc.tcs.com/binning/SOrt-ITEMS</inter-ref>. No license is needed for academic and nonprofit use.</p>
<p><b>Contact:</b> <inter-ref locator="sharmila@atc.tcs.com" locator-type="email">sharmila@atc.tcs.com</inter-ref></p>
<p><b>Supplementary information:</b><inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp317/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Monzoorul Haque, M., Ghosh, T. S., Komanduri, D., Mande, S. S.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp317</dc:identifier>
<dc:title><![CDATA[SOrt-ITEMS: Sequence orthology based approach for improved taxonomic estimation of metagenomic sequences]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1730</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1722</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1731?rss=1">
<title><![CDATA[Data structures and compression algorithms for genomic sequence data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1731?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The continuing exponential accumulation of full genome data, including full diploid human genomes, creates new challenges not only for understanding genomic structure, function and evolution, but also for the storage, navigation and privacy of genomic data. Here, we develop data structures and algorithms for the efficient storage of genomic and other sequence data that may also facilitate querying and protecting the data.</p>
<p><b>Results:</b> The general idea is to encode only the differences between a genome sequence and a reference sequence, using absolute or relative coordinates for the location of the differences. These locations and the corresponding differential variants can be encoded into binary strings using various entropy coding methods, from fixed codes such as Golomb and Elias codes, to variables codes, such as Huffman codes. We demonstrate the approach and various tradeoffs using highly variables human mitochondrial genome sequences as a testbed. With only a partial level of optimization, 3615 genome sequences occupying 56 MB in GenBank are compressed down to only 167 KB, achieving a 345-fold compression rate, using the revised Cambridge Reference Sequence as the reference sequence. Using the consensus sequence as the reference sequence, the data can be stored using only 133 KB, corresponding to a 433-fold level of compression, roughly a 23% improvement. Extensions to nuclear genomes and high-throughput sequencing data are discussed.</p>
<p><b>Availability:</b> Data are publicly available from GenBank, the HapMap web site, and the MITOMAP database. Supplementary materials with additional results, statistics, and software implementations are available from <inter-ref locator="http://mammag.web.uci.edu/bin/view/Mitowiki/ProjectDNACompression" locator-type="url">http://mammag.web.uci.edu/bin/view/Mitowiki/ProjectDNACompression</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="pfbaldi@ics.uci.edu" locator-type="email">pfbaldi@ics.uci.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Brandon, M. C., Wallace, D. C., Baldi, P.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp319</dc:identifier>
<dc:title><![CDATA[Data structures and compression algorithms for genomic sequence data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1738</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1731</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1739?rss=1">
<title><![CDATA[ESG: extended similarity group method for automated protein function prediction]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1739?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Importance of accurate automatic protein function prediction is ever increasing in the face of a large number of newly sequenced genomes and proteomics data that are awaiting biological interpretation. Conventional methods have focused on high sequence similarity-based annotation transfer which relies on the concept of homology. However, many cases have been reported that simple transfer of function from top hits of a homology search causes erroneous annotation. New methods are required to handle the sequence similarity in a more robust way to combine together signals from strongly and weakly similar proteins for effectively predicting function for unknown proteins with high reliability.</p>
<p><b>Results:</b> We present the extended similarity group (ESG) method, which performs iterative sequence database searches and annotates a query sequence with Gene Ontology terms. Each annotation is assigned with probability based on its relative similarity score with the multiple-level neighbors in the protein similarity graph. We will depict how the statistical framework of ESG improves the prediction accuracy by iteratively taking into account the neighborhood of query protein in the sequence similarity space. ESG outperforms conventional PSI-BLAST and the protein function prediction (PFP) algorithm. It is found that the iterative search is effective in capturing multiple-domains in a query protein, enabling accurately predicting several functions which originate from different domains.</p>
<p><b>Availability:</b> ESG web server is available for automated protein function prediction at <inter-ref locator="http://dragon.bio.purdue.edu/ESG/" locator-type="url">http://dragon.bio.purdue.edu/ESG/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="cspark@cau.ac.kr" locator-type="email">cspark@cau.ac.kr</inter-ref>; <inter-ref locator="dkihara@purdue.edu" locator-type="email">dkihara@purdue.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp309/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Chitale, M., Hawkins, T., Park, C., Kihara, D.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp309</dc:identifier>
<dc:title><![CDATA[ESG: extended similarity group method for automated protein function prediction]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1745</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1739</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1746?rss=1">
<title><![CDATA[Efficient computation of all perfect repeats in genomic sequences of up to half a gigabyte, with a case study on the human genome]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1746?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> There is a significant ongoing research to identify the number and types of repetitive DNA sequences. As more genomes are sequenced, efficiency and scalability in computational tools become mandatory. Existing tools fail to find distant repeats because they cannot accommodate whole chromosomes, but segments. Also, a quantitative framework for repetitive elements inside a genome or across genomes is still missing.</p>
<p><b>Results:</b> We present a new efficient algorithm and its implementation as a software tool to compute all perfect repeats in inputs of up to 500 million nucleotide bases, possibly containing many genomes. Our algorithm is based on a suffix array construction and a novel procedure to extract all perfect repeats in the entire input, that can be arbitrarily distant, and with no bound on the repeat length. We tested the software on the <I>Homo sapiens</I> DNA genome NCBI 36.49. We computed all perfect repeats of at least 40 bases occurring in any two chromosomes with exact matching. We found that each <I>H.sapiens</I> chromosome shares ~10% of its full sequence with every other human chromosome, distributed more or less evenly among the chromosome surfaces. We give statistics including a quantification of repeats by diversity, length and number of occurrences. We compared the computed repeats against all biological repeats currently obtainable from Ensembl enlarged with the output of the dust program and all elements identified by TRF and RepeatMasker (<inter-ref locator="ftp://ftp.ebi.ac.uk/pub/databases/ensembl/jherrero/.repeats/all_repeats.txt.bz2" locator-type="url">ftp://ftp.ebi.ac.uk/pub/databases/ensembl/jherrero/.repeats/all_repeats.txt.bz2</inter-ref>). We report novel repeats as well as new occurrences of repeats matching with known biological elements.</p>
<p><b>Availability:</b> The source code, results and visualization of some statistics are accessible from <inter-ref locator="http://kapow.dc.uba.ar/patterns/" locator-type="url">http://kapow.dc.uba.ar/patterns/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="vbecher@dc.uba.ar" locator-type="email">vbecher@dc.uba.ar</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Becher, V., Deymonnaz, A., Heiber, P.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp321</dc:identifier>
<dc:title><![CDATA[Efficient computation of all perfect repeats in genomic sequences of up to half a gigabyte, with a case study on the human genome]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1753</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1746</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1754?rss=1">
<title><![CDATA[Fast and accurate short read alignment with Burrows-Wheeler transform]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1754?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The enormous amount of short reads generated by the new DNA sequencing technologies call for the development of fast and accurate read alignment programs. A first generation of hash table-based methods has been developed, including MAQ, which is accurate, feature rich and fast enough to align short reads from a single individual. However, MAQ does not support gapped alignment for single-end reads, which makes it unsuitable for alignment of longer reads where indels may occur frequently. The speed of MAQ is also a concern when the alignment is scaled up to the resequencing of hundreds of individuals.</p>
<p><b>Results:</b> We implemented Burrows-Wheeler Alignment tool (BWA), a new read alignment package that is based on backward search with Burrows&ndash;Wheeler Transform (BWT), to efficiently align short sequencing reads against a large reference sequence such as the human genome, allowing mismatches and gaps. BWA supports both base space reads, e.g. from Illumina sequencing machines, and color space reads from AB SOLiD machines. Evaluations on both simulated and real data suggest that BWA is ~10&ndash;20<FONT FACE="arial,helvetica">x</FONT> faster than MAQ, while achieving similar accuracy. In addition, BWA outputs alignment in the new standard SAM (Sequence Alignment/Map) format. Variant calling and other downstream analyses after the alignment can be achieved with the open source SAMtools software package.</p>
<p><b>Availability:</b> <inter-ref locator="http://maq.sourceforge.net" locator-type="url">http://maq.sourceforge.net</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="rd@sanger.ac.uk" locator-type="email">rd@sanger.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Li, H., Durbin, R.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp324</dc:identifier>
<dc:title><![CDATA[Fast and accurate short read alignment with Burrows-Wheeler transform]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1760</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1754</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1761?rss=1">
<title><![CDATA[pGenTHREADER and pDomTHREADER: new methods for improved protein fold recognition and superfamily discrimination]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1761?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Generation of structural models and recognition of homologous relationships for unannotated protein sequences are fundamental problems in bioinformatics. Improving the sensitivity and selectivity of methods designed for these two tasks therefore has downstream benefits for many other bioinformatics applications.</p>
<p><b>Results:</b> We describe the latest implementation of the GenTHREADER method for structure prediction on a genomic scale. The method combines profile&ndash;profile alignments with secondary-structure specific gap-penalties, classic pair- and solvation potentials using a linear combination optimized with a regression SVM model. We find this combination significantly improves both detection of useful templates and accuracy of sequence-structure alignments relative to other competitive approaches. We further present a second implementation of the protocol designed for the task of discriminating superfamilies from one another. This method, pDomTHREADER, is the first to incorporate both sequence and structural data directly in this task and improves sensitivity and selectivity over the standard version of pGenTHREADER and three other standard methods for remote homology detection.</p>
<p><b>Contact:</b> <inter-ref locator="d.jones@cs.ucl.ac.uk" locator-type="email">d.jones@cs.ucl.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp302/DC1" locator-type="url">Supplementary data</inter-ref> are available at Bioinformatics online.</p>
]]></description>
<dc:creator><![CDATA[Lobley, A., Sadowski, M. I., Jones, D. T.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp302</dc:identifier>
<dc:title><![CDATA[pGenTHREADER and pDomTHREADER: new methods for improved protein fold recognition and superfamily discrimination]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1767</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1761</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1768?rss=1">
<title><![CDATA[Literature-based priors for gene regulatory networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1768?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The use of prior knowledge to improve gene regulatory network modelling has often been proposed. In this article we present the first research on the massive incorporation of prior knowledge from literature for Bayesian network learning of gene networks. As the publication rate of scientific papers grows, updating online databases, which have been proposed as potential prior knowledge in past research, becomes increasingly challenging. The novelty of our approach lies in the use of gene-pair association scores that describe the overlap in the contexts in which the genes are mentioned, generated from a large database of scientific literature, harnessing the information contained in a huge number of documents into a simple, clear format.</p>
<p><b>Results:</b> We present a method to transform such literature-based gene association scores to network prior probabilities, and apply it to learn gene sub-networks for yeast, <I>Escherichia coli</I> and Human organisms. We also investigate the effect of weighting the influence of the prior knowledge. Our findings show that literature-based priors can improve both the number of true regulatory interactions present in the network and the accuracy of expression value prediction on genes, in comparison to a network learnt solely from expression data. Networks learnt with priors also show an improved biological interpretation, with identified subnetworks that coincide with known biological pathways.</p>
<p><b>Contact:</b> <inter-ref locator="emma.steele@brunel.ac.uk" locator-type="email">emma.steele@brunel.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp277/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Steele, E., Tucker, A., Hoen, P.A.C. t, Schuemie, M.J.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp277</dc:identifier>
<dc:title><![CDATA[Literature-based priors for gene regulatory networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1774</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1768</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1775?rss=1">
<title><![CDATA[Gradient lasso for Cox proportional hazards model]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1775?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> There has been an increasing interest in expressing a survival phenotype (e.g. time to cancer recurrence or death) or its distribution in terms of a subset of the expression data of a subset of genes. Due to high dimensionality of gene expression data, however, there is a serious problem of collinearity in fitting a prediction model, e.g. Cox's proportional hazards model. To avoid the collinearity problem, several methods based on penalized Cox proportional hazards models have been proposed. However, those methods suffer from severe computational problems, such as slow or even failed convergence, because of high-dimensional matrix inversions required for model fitting. We propose to implement the penalized Cox regression with a lasso penalty via the gradient lasso algorithm that yields faster convergence to the global optimum than do other algorithms. Moreover the gradient lasso algorithm is guaranteed to converge to the optimum under mild regularity conditions. Hence, our gradient lasso algorithm can be a useful tool in developing a prediction model based on high-dimensional covariates including gene expression data.</p>
<p><b>Results:</b> Results from simulation studies showed that the prediction model by gradient lasso recovers the prognostic genes. Also results from diffuse large B-cell lymphoma datasets and Norway/Stanford breast cancer dataset indicate that our method is very competitive compared with popular existing methods by Park and Hastie and Goeman in its computational time, prediction and selectivity.</p>
<p><b>Availability:</b> R package <ty>glcoxph</ty> is available at <inter-ref locator="http://datamining.dongguk.ac.kr/R/glcoxph" locator-type="url">http://datamining.dongguk.ac.kr/R/glcoxph</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="park463@uos.ac.kr" locator-type="email">park463@uos.ac.kr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Sohn, I., Kim, J., Jung, S.-H., Park, C.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp322</dc:identifier>
<dc:title><![CDATA[Gradient lasso for Cox proportional hazards model]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1781</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1775</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1782?rss=1">
<title><![CDATA[Relating periodicity of nucleosome organization and gene regulation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1782?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The relationship between nucleosome positioning and gene regulation is fundamental yet complex. Previous studies on genomic nucleosome positions have revealed a correlation between nucleosome occupancy on promoters and gene expression levels. Many of these studies focused on individual nucleosomes, especially those proximal to transcription start sites. To study the collective effect of multiple nucleosomes on the gene expression, we developed a mathematical approach based on autocorrelation to relate genomic nucleosome organization to gene regulation.</p>
<p><b>Results:</b> We found that nucleosome organization in gene promoters can be well described by autocorrelation transformation. Some promoters show obvious periods in their nucleosome organization, while others have no clear periodicity. The genes with periodic nucleosome organization in promoters tend to be lower expressed than the genes without periodic nucleosome organization. These suggest that regular organization of nucleosomes plays a critical role in gene regulation. To quantitatively associate nucleosome organization and gene expression, we predicted gene expression solely based on nucleosome status and found that nucleosome status accounts for ~25% of the observed gene expression variability. Furthermore, we explored the underlying forces that maintain the periodicity in nucleosome organization, namely intrinsic (i.e. DNA sequence) and extrinsic forces (i.e. chromatin remodeling factors). We found that the extrinsic factors play a critical role in maintaining the periodic nucleosome organization.</p>
<p><b>Contact:</b> <inter-ref locator="jiang.qian@jhmi.edu" locator-type="email">jiang.qian@jhmi.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp323/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Wan, J., Lin, J., Zack, D. J., Qian, J.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp323</dc:identifier>
<dc:title><![CDATA[Relating periodicity of nucleosome organization and gene regulation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1788</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1782</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1789?rss=1">
<title><![CDATA[Seeing the forest for the trees: using the Gene Ontology to restructure hierarchical clustering]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1789?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> There is a growing interest in improving the cluster analysis of expression data by incorporating into it prior knowledge, such as the Gene Ontology (GO) annotations of genes, in order to improve the biological relevance of the clusters that are subjected to subsequent scrutiny. The structure of the GO is another source of background knowledge that can be exploited through the use of semantic similarity.</p>
<p><b>Results:</b> We propose here a novel algorithm that integrates semantic similarities (derived from the ontology structure) into the procedure of deriving clusters from the dendrogram constructed during expression-based hierarchical clustering. Our approach can handle the multiple annotations, from different levels of the GO hierarchy, which most genes have. Moreover, it treats annotated and unannotated genes in a uniform manner. Consequently, the clusters obtained by our algorithm are characterized by significantly enriched annotations. In both cross-validation tests and when using an external index such as protein&ndash;protein interactions, our algorithm performs better than previous approaches. When applied to human cancer expression data, our algorithm identifies, among others, clusters of genes related to immune response and glucose metabolism. These clusters are also supported by protein&ndash;protein interaction data.</p>
<p><b>Contact:</b> <inter-ref locator="dotna@cs.bgu.ac.il" locator-type="email">dotna@cs.bgu.ac.il</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp327/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dotan-Cohen, D., Kasif, S., Melkman, A. A.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp327</dc:identifier>
<dc:title><![CDATA[Seeing the forest for the trees: using the Gene Ontology to restructure hierarchical clustering]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1795</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1789</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1796?rss=1">
<title><![CDATA[On the inference of spatial structure from population genetics data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1796?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> In a series of recent papers, Tess, a computer program based on the concept of hidden Markov random field, has been proposed to infer the number and locations of panmictic population units from the genotypes and spatial locations of these individuals. The method seems to be of broad appeal as it is conceptually much simpler than other competing methods and it has been reported by its authors to be fast and accurate. However, this methodology is not grounded in a formal statistical inference method and seems to rely to a large extent on arbitrary choices regarding the parameters used. The present article is an investigation of the accuracy of this method and an attempt to assess whether recent results reported on the basis of this method are genuine features of the genetic process or artefacts of the method.</p>
<p><b>Method:</b> I analyse simulated data consisting of populations at Hardy&ndash;Weinberg and linkage equilibrium and also data simulated under a scenario of isolation-by-distance at mutation&ndash;migration&ndash;drift equilibrium. <I>Arabidopsis thaliana</I> data previously analysed with this method are also reconsidered.</p>
<p><b>Results:</b> Using the Tess program under the no-admixture model to analyse data consisting of several genuine HWLE populations with individuals of pure ancestries leads to highly inaccurate results; Using the Tess program under the admixture model to analyse data consisting of a continuous isolation-by-distance population leads to the inference of spurious HWLE populations whose number and features depend on the parameters used. Results previously reported about the <I>A.thaliana</I> using Tess seem to a large extent to be artefacts of the statistical methodology used. The findings go beyond population clustering models and can be an help to design more efficient algorithms based on graphs.</p>
<p><b>Availability:</b> The data analysed in the present article are available from <inter-ref locator="http://folk.uio.no/gillesg/Bioinformatics-HMRF" locator-type="url">http://folk.uio.no/gillesg/Bioinformatics-HMRF</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="gilles.guillot@bio.uio.no" locator-type="email">gilles.guillot@bio.uio.no</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp267/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Guillot, G.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp267</dc:identifier>
<dc:title><![CDATA[On the inference of spatial structure from population genetics data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1801</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1796</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1802?rss=1">
<title><![CDATA[Comment on 'On the inference of spatial structure from population genetics data']]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1802?rss=1</link>
<description><![CDATA[
<p><b>Contact:</b> <inter-ref locator="Olivier.francois@imag.fr" locator-type="email">Olivier.francois@imag.fr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Durand, E., Chen, C., Francois, O.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp337</dc:identifier>
<dc:title><![CDATA[Comment on 'On the inference of spatial structure from population genetics data']]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1804</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1802</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1805?rss=1">
<title><![CDATA[Response to comment on 'On the inference of spatial structure from population genetics data']]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1805?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Guillot, G.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp351</dc:identifier>
<dc:title><![CDATA[Response to comment on 'On the inference of spatial structure from population genetics data']]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1806</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1805</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1807?rss=1">
<title><![CDATA[Estimating the posterior probability that genome-wide association findings are true or false]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1807?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> A limitation of current methods used to declare significance in genome-wide association studies (GWAS) is that they do not provide clear information about the probability that GWAS findings are true of false. This lack of information increases the chance of false discoveries and may result in real effects being missed.</p>
<p><b>Results:</b> We propose a method to estimate the posterior probability that a marker has (no) effect given its test statistic value, also called the local false discovery rate (FDR), in the GWAS. A critical step involves the estimation the parameters of the distribution of the true alternative tests. For this, we derived and implemented the real maximum likelihood function, which turned out to provide us with significantly more accurate estimates than the widely used mixture model likelihood. Actual GWAS data are used to illustrate properties of the posterior probability estimates empirically. In addition to evaluating individual markers, a variety of applications are conceivable. For instance, posterior probability estimates can be used to control the FDR more precisely than Benjamini&ndash;Hochberg procedure.</p>
<p><b>Availability:</b> The codes are freely downloadable from the web site <inter-ref locator="http://www.people.vcu.edu/~jbukszar" locator-type="url">http://www.people.vcu.edu/~jbukszar</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="jbukszar@vcu.edu" locator-type="email">jbukszar@vcu.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp305/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Bukszar, J., McClay, J. L., van den Oord, E. J. C. G.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp305</dc:identifier>
<dc:title><![CDATA[Estimating the posterior probability that genome-wide association findings are true or false]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1813</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1807</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1814?rss=1">
<title><![CDATA[Structure discovery in PPI networks using pattern-based network decomposition]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1814?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The large, complex networks of interactions between proteins provide a lens through which one can examine the structure and function of biological systems. Previous analyses of these continually growing networks have primarily followed either of two approaches: large-scale statistical analysis of holistic network properties, or small-scale analysis of local topological features. Meanwhile, investigation of meso-scale network structure (above that of individual functional modules, while maintaining the significance of individual proteins) has been hindered by the computational complexity of structural search in networks. Examining protein&ndash;protein interaction (PPI) networks at the meso-scale may provide insights into the presence and form of relationships between individual protein complexes and functional modules.</p>
<p><b>Results:</b> In this article, we present an efficient algorithm for performing sub-graph isomorphism queries on a network and show its computational advantage over previous methods. We also present a novel application of this form of topological search which permits analysis of a network's structure at a scale between that of individual functional modules and that of network-wide properties. This analysis provides support for the presence of hierarchical modularity in the PPI network of <I>Saccharomyces cerevisiae</I>.</p>
<p><b>Contact:</b> <inter-ref locator="ying.liu@utdallas.edu" locator-type="email">ying.liu@utdallas.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bachman, P., Liu, Y.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp297</dc:identifier>
<dc:title><![CDATA[Structure discovery in PPI networks using pattern-based network decomposition]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1821</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1814</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1822?rss=1">
<title><![CDATA[Robust synthetic biology design: stochastic game theory approach]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1822?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Synthetic biology is to engineer artificial biological systems to investigate natural biological phenomena and for a variety of applications. However, the development of synthetic gene networks is still difficult and most newly created gene networks are non-functioning due to uncertain initial conditions and disturbances of extra-cellular environments on the host cell. At present, how to design a robust synthetic gene network to work properly under these uncertain factors is the most important topic of synthetic biology.</p>
<p><b>Results:</b> A robust regulation design is proposed for a stochastic synthetic gene network to achieve the prescribed steady states under these uncertain factors from the minimax regulation perspective. This minimax regulation design problem can be transformed to an equivalent stochastic game problem. Since it is not easy to solve the robust regulation design problem of synthetic gene networks by non-linear stochastic game method directly, the Takagi&ndash;Sugeno (T&ndash;S) fuzzy model is proposed to approximate the non-linear synthetic gene network via the linear matrix inequality (LMI) technique through the Robust Control Toolbox in Matlab. Finally, an <I>in silico</I> example is given to illustrate the design procedure and to confirm the efficiency and efficacy of the proposed robust gene design method.</p>
<p><b>Availability:</b> <inter-ref locator="http://www.ee.nthu.edu.tw/bschen/SyntheticBioDesign_supplement.pdf" locator-type="url">http://www.ee.nthu.edu.tw/bschen/SyntheticBioDesign_supplement.pdf</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="bschen@ee.nthu.edu.tw" locator-type="email">bschen@ee.nthu.edu.tw</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp310/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Chen, B.-S., Chang, C.-H., Lee, H.-C.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp310</dc:identifier>
<dc:title><![CDATA[Robust synthetic biology design: stochastic game theory approach]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1830</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1822</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1831?rss=1">
<title><![CDATA[Rahnuma: hypergraph-based tool for metabolic pathway prediction and network comparison]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1831?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b>We present a tool called Rahnuma for prediction and analysis of metabolic pathways and comparison of metabolic networks. Rahnuma represents metabolic networks as hypergraphs and computes all possible pathways between two or more metabolites. It provides an intuitive way to answer biological ques- tions focusing on differences between organisms or the evolution of different species by allowing pathway-based metabolic network comparisons at an organism as well as at a phylogenetic level.</p>
<p><b>Availability:</b> Rahnuma is available online at <inter-ref locator="http://portal.stats.ox.ac.uk:8080/rahnuma/" locator-type="url">http://portal.stats.ox.ac.uk:8080/rahnuma/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="gail.preston@plants.ox.ac.uk" locator-type="email">gail.preston@plants.ox.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp269/DC1" locator-type="url">Supplementary data</inter-ref> are available at the <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Mithani, A., Preston, G. M., Hein, J.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp269</dc:identifier>
<dc:title><![CDATA[Rahnuma: hypergraph-based tool for metabolic pathway prediction and network comparison]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1832</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1831</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1833?rss=1">
<title><![CDATA[baobabLUNA: the solution space of sorting by reversals]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1833?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Computing the reversal distance and searching for an optimal sequence of reversals to transform a unichromosomal genome into another are useful algorithmic tools to analyse real evolutionary scenarios. Currently, these problems can be solved by at least two available softwares, the prominent of which are <ty>GRAPPA</ty> and <ty>GRIMM</ty>. However, the number of different optimal sequences is usually huge and taking only the distance and/or one example is often insufficient to do a proper analysis. Here, we offer an alternative and present <ty>baobabLUNA</ty>, a framework that contains an algorithm to give a compact representation of the whole space of solutions for the sorting by reversals problem.</p>
<p><b>Availability and Implementation:</b> Compiled code implemented in Java is freely available for download at <inter-ref locator="http://pbil.univ-lyon1.fr/software/luna/" locator-type="url">http://pbil.univ-lyon1.fr/software/luna/</inter-ref>. Documentation with methodological background, technical aspects, download and setup instructions, interface description and tutorial are available at <inter-ref locator="http://pbil.univ-lyon1.fr/software/luna/doc/luna-doc.pdf" locator-type="url">http://pbil.univ-lyon1.fr/software/luna/doc/luna-doc.pdf</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="mdvbraga@gmail.com" locator-type="email">mdvbraga@gmail.com</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp285/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Braga, M. D. V.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp285</dc:identifier>
<dc:title><![CDATA[baobabLUNA: the solution space of sorting by reversals]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1835</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1833</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1836?rss=1">
<title><![CDATA[Apollo: a community resource for genome annotation editing]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1836?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Apollo is a genome annotation-editing tool with an easy to use graphical interface. It is a component of the GMOD project, with ongoing development driven by the community. Recent additions to the software include support for the generic feature format version 3 (GFF3), continuous transcriptome data, a full Chado database interface, integration with remote services for on-the-fly BLAST and Primer BLAST analyses, graphical interfaces for configuring user preferences and full undo of all edit operations. Apollo's user community continues to grow, including its use as an educational tool for college and high-school students.</p>
<p><b>Availability:</b> Apollo is a Java application distributed under a free and open source license. Installers for Windows, Linux, Unix, Solaris and Mac OS X are available at <inter-ref locator="http://apollo.berkeleybop.org" locator-type="url">http://apollo.berkeleybop.org</inter-ref>, and the source code is available from the SourceForge CVS repository at <inter-ref locator="http://gmod.cvs.sourceforge.net/gmod/apollo" locator-type="url">http://gmod.cvs.sourceforge.net/gmod/apollo</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="elee@berkeleybop.org" locator-type="email">elee@berkeleybop.org</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ed, L., Nomi, H., Mark, G., Raymond, C., Suzanna, L.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp314</dc:identifier>
<dc:title><![CDATA[Apollo: a community resource for genome annotation editing]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1837</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1836</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1838?rss=1">
<title><![CDATA[NTAP: for NimbleGen tiling array ChIP-chip data analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1838?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b>NTAP is designed to analyze ChIP-chip data generated by the NimbleGen tiling array platform and to accomplish various pattern recognition tasks that are useful especially for epigenetic studies. The modular design of NTAP makes the data processing highly customizable. Users can either use NTAP to perform the full process of NimbleGen tiling array data analysis, or choose post-processing modules in NTAP to analyze pre-processed epigenetic data generated by other platforms. The output of NTAP can be saved in standard GFF format files and visualized in GBrowse.</p>
<p><b>Availability and Implementation:</b>The source code of NTAP is freely available at <inter-ref locator="http://ntap.cbi.pku.edu.cn/" locator-type="url">http://ntap.cbi.pku.edu.cn/</inter-ref>. It is implemented in Perl and R and can be used on Linux, Mac and Windows platforms.</p>
<p><b>Contact:</b> <inter-ref locator="ntap@mail.cbi.pku.edu.cn" locator-type="email">ntap@mail.cbi.pku.edu.cn</inter-ref>; <inter-ref locator="luojc@pku.edu.cn" locator-type="email">luojc@pku.edu.cn</inter-ref>; <inter-ref locator="hekun78@gmail.com" locator-type="email">hekun78@gmail.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[He, K., Li, X., Zhou, J., Deng, X.-W., Zhao, H., Luo, J.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp320</dc:identifier>
<dc:title><![CDATA[NTAP: for NimbleGen tiling array ChIP-chip data analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1840</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1838</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1841?rss=1">
<title><![CDATA[rtracklayer: an R package for interfacing with genome browsers]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1841?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The <I>rtracklayer</I> package supports the integration of existing genome browsers with experimental data analyses performed in R. The user may (i) transfer annotation tracks to and from a genome browser and (ii) create and manipulate browser views to focus on a particular set of annotations in a specific genomic region. Currently, the UCSC genome browser is supported.</p>
<p><b>Availability:</b> The package is freely available from <inter-ref locator="http://www.bioconductor.org/" locator-type="url">http://www.bioconductor.org/</inter-ref>. A quick-start vignette is included with the package.</p>
<p><b>Contact:</b> <inter-ref locator="mflawren@fhcrc.org" locator-type="email">mflawren@fhcrc.org</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Lawrence, M., Gentleman, R., Carey, V.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp328</dc:identifier>
<dc:title><![CDATA[rtracklayer: an R package for interfacing with genome browsers]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1842</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1841</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1843?rss=1">
<title><![CDATA[MetaTISA: Metagenomic Translation Initiation Site Annotator for improving gene start prediction]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1843?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We proposed a tool named MetaTISA with an aim to improve TIS prediction of current gene-finders for metagenomes. The method employs a two-step strategy to predict translation initiation sites (TISs) by first clustering metagenomic fragments into phylogenetic groups and then predicting TISs independently for each group in an unsupervised manner. As evaluated on experimentally verified TISs, MetaTISA greatly improves the accuracies of TIS prediction of current gene-finders.</p>
<p><b>Availability:</b> The C++ source code is freely available under the GNU GPL license <I>via</I> <inter-ref locator="http://mech.ctb.pku.edu.cn/MetaTISA/" locator-type="url">http://mech.ctb.pku.edu.cn/MetaTISA/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="hqzhu@pku.edu.cn" locator-type="email">hqzhu@pku.edu.cn</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp272/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Hu, G.-Q., Guo, J.-T., Liu, Y.-C., Zhu, H.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp272</dc:identifier>
<dc:title><![CDATA[MetaTISA: Metagenomic Translation Initiation Site Annotator for improving gene start prediction]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1845</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1843</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1846?rss=1">
<title><![CDATA[PESTAS: a web server for EST analysis and sequence mining]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/14/1846?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We have developed a web server for the high-throughput annotation of expressed sequence tags (ESTs) called pipeline for EST analysis service (PESTAS). PESTAS processes entire datasets with an automated pipeline of 13 analytic services, then deposits the data into the MySQL database and transforms it into three kinds of reports: preprocessing, assembling and annotation. All annotated information is provided to the scientist and can be downloaded through a web browser. To get more relevant functional annotation results, a curation function was introduced with which biologists can easily change the best-hit annotation information. We included a gene chip module that detects gene expression differences between libraries by comparing accession number counts from BLAST search results. PESTAS also provides access to the pathway information of KEGG, which is useful for mapping the relationships among networks of annotated enzymes, and is especially valuable for those researchers interested in biological pathways.</p>
<p><b>Availability:</b> PESTAS is available at <inter-ref locator="http://pestas.kribb.re.kr/" locator-type="url">http://pestas.kribb.re.kr/</inter-ref></p>
<p><b>Supplementary information:</b> Supplementary data are available at <inter-ref locator="http://pestas.kribb.re.kr/pestas.jsp" locator-type="url">http://pestas.kribb.re.kr/pestas.jsp</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="odysseus@kribb.re.kr" locator-type="email">odysseus@kribb.re.kr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Nam, S.-H., Kim, D.-W., Jung, T.-S., Choi, Y.-S., Kim, D.-W., Choi, H.-S., Choi, S.-H., Park, H.-S.]]></dc:creator>
<dc:date>2009-07-02</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp293</dc:identifier>
<dc:title><![CDATA[PESTAS: a web server for EST analysis and sequence mining]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>14</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1848</prism:endingPage>
<prism:publicationDate>2009-07-15</prism:publicationDate>
<prism:startingPage>1846</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1575?rss=1">
<title><![CDATA[Textual data compression in computational biology: a synopsis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1575?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Textual data compression, and the associated techniques coming from information theory, are often perceived as being of interest for data communication and storage. However, they are also deeply related to classification and data mining and analysis. In recent years, a substantial effort has been made for the application of textual data compression techniques to various computational biology tasks, ranging from storage and indexing of large datasets to comparison and reverse engineering of biological networks.</p>
<p><b>Results:</b> The main focus of this review is on a systematic presentation of the key areas of bioinformatics and computational biology where compression has been used. When possible, a unifying organization of the main ideas and techniques is also provided.</p>
<p><b>Availability:</b> It goes without saying that most of the research results reviewed here offer software prototypes to the bioinformatics community. The <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp117/DC1" locator-type="url">Supplementary Material</inter-ref> provides pointers to software and benchmark datasets for a range of applications of broad interest. In addition to provide reference to software, the <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp117/DC1" locator-type="url">Supplementary Material</inter-ref> also gives a brief presentation of some fundamental results and techniques related to this paper. It is at: <inter-ref locator="http://www.math.unipa.it/~raffaele/suppMaterial/compReview/" locator-type="url">http://www.math.unipa.it/~raffaele/suppMaterial/compReview/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="raffaele@math.unipa.it" locator-type="email">raffaele@math.unipa.it</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Giancarlo, R., Scaturro, D., Utro, F.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp117</dc:identifier>
<dc:title><![CDATA[Textual data compression in computational biology: a synopsis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1586</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1575</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1587?rss=1">
<title><![CDATA[Pairagon: a highly accurate, HMM-based cDNA-to-genome aligner]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1587?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The most accurate way to determine the intron&ndash;exon structures in a genome is to align spliced cDNA sequences to the genome. Thus, cDNA-to-genome alignment programs are a key component of most annotation pipelines. The scoring system used to choose the best alignment is a primary determinant of alignment accuracy, while heuristics that prevent consideration of certain alignments are a primary determinant of runtime and memory usage. Both accuracy and speed are important considerations in choosing an alignment algorithm, but scoring systems have received much less attention than heuristics.</p>
<p><b>Results:</b> We present Pairagon, a pair hidden Markov model based cDNA-to-genome alignment program, as the most accurate aligner for sequences with high- and low-identity levels. We conducted a series of experiments testing alignment accuracy with varying sequence identity. We first created &lsquo;perfect&rsquo; simulated cDNA sequences by splicing the sequences of exons in the reference genome sequences of fly and human. The complete reference genome sequences were then mutated to various degrees using a realistic mutation simulator and the perfect cDNAs were aligned to them using Pairagon and 12 other aligners. To validate these results with natural sequences, we performed cross-species alignment using orthologous transcripts from human, mouse and rat.</p>
<p>We found that aligner accuracy is heavily dependent on sequence identity. For sequences with 100% identity, Pairagon achieved accuracy levels of &gt;99.6%, with one quarter of the errors of any other aligner. Furthermore, for human/mouse alignments, which are only 85% identical, Pairagon achieved 87% accuracy, higher than any other aligner.</p>
<p><b>Availability:</b> Pairagon source and executables are freely available at <inter-ref locator="http://mblab.wustl.edu/software/pairagon/" locator-type="url">http://mblab.wustl.edu/software/pairagon/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="davidlu@wustl.edu" locator-type="email">davidlu@wustl.edu</inter-ref>; <inter-ref locator="brent@cse.wustl.edu" locator-type="email">brent@cse.wustl.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp273/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Lu, D. V., Brown, R. H., Arumugam, M., Brent, M. R.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp273</dc:identifier>
<dc:title><![CDATA[Pairagon: a highly accurate, HMM-based cDNA-to-genome aligner]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1593</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1587</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1594?rss=1">
<title><![CDATA[Approximate Bayesian feature selection on a large meta-dataset offers novel insights on factors that effect siRNA potency]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1594?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Short interfering RNA (siRNA)-induced RNA interference is an endogenous pathway in sequence-specific gene silencing. The potency of different siRNAs to inhibit a common target varies greatly and features affecting inhibition are of high current interest. The limited success in predicting siRNA potency being reported so far could originate in the small number and the heterogeneity of available datasets in addition to the knowledge-driven, empirical basis on which features thought to be affecting siRNA potency are often chosen. We attempt to overcome these problems by first constructing a meta-dataset of 6483 publicly available siRNAs (targeting mammalian mRNA), the largest to date, and then applying a Bayesian analysis which accommodates feature set uncertainty. A stochastic logistic regression-based algorithm is designed to explore a vast model space of 497 compositional, structural and thermodynamic features, identifying associations with siRNA potency.</p>
<p><b>Results:</b> Our algorithm reveals a number of features associated with siRNA potency that are, to the best of our knowledge, either under reported in literature, such as anti-sense 5' &ndash;3' motif &lsquo;UCU&rsquo;, or not reported at all, such as the anti-sense 5' -3' motif &lsquo;ACGA&rsquo;. These findings should aid in improving future siRNA potency predictions and might offer further insights into the working of the RNA-induced silencing complex (RISC).</p>
<p><b>Contact:</b> <inter-ref locator="cholmes@stats.ox.ac.uk" locator-type="email">cholmes@stats.ox.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp284/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Klingelhoefer, J. W., Moutsianas, L., Holmes, C.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp284</dc:identifier>
<dc:title><![CDATA[Approximate Bayesian feature selection on a large meta-dataset offers novel insights on factors that effect siRNA potency]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1601</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1594</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1602?rss=1">
<title><![CDATA[Augmented training of hidden Markov models to recognize remote homologs via simulated evolution]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1602?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> While profile hidden Markov models (HMMs) are successful and powerful methods to recognize homologous proteins, they can break down when homology becomes too distant due to lack of sufficient training data. We show that we can improve the performance of HMMs in this domain by using a simple simulated model of evolution to create an augmented training set.</p>
<p><b>Results:</b> We show, in two different remote protein homolog tasks, that HMMs whose training is augmented with simulated evolution outperform HMMs trained only on real data. We find that a mutation rate between 15 and 20% performs best for recognizing G-protein coupled receptor proteins in different classes, and for recognizing SCOP super-family proteins from different families.</p>
<p><b>Contacts:</b> <inter-ref locator="anoop.kumar@tufts.edu" locator-type="email">anoop.kumar@tufts.edu</inter-ref>;<inter-ref locator="lenore.cowen@tufts.edu" locator-type="email">lenore.cowen@tufts.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Kumar, A., Cowen, L.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp265</dc:identifier>
<dc:title><![CDATA[Augmented training of hidden Markov models to recognize remote homologs via simulated evolution]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1608</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1602</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1609?rss=1">
<title><![CDATA[A practical algorithm for finding maximal exact matches in large sequence datasets using sparse suffix arrays]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1609?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> High-throughput sequencing technologies place ever increasing demands on existing algorithms for sequence analysis. Algorithms for computing maximal exact matches (MEMs) between sequences appear in two contexts where high-throughput sequencing will vastly increase the volume of sequence data: (i) seeding alignments of high-throughput reads for genome assembly and (ii) designating anchor points for genome&ndash;genome comparisons.</p>
<p><b>Results:</b> We introduce a new algorithm for finding MEMs. The algorithm leverages a sparse suffix array (SA), a text index that stores every <I>K</I>-th position of the text. In contrast to a full text index that stores every position of the text, a sparse SA occupies much less memory. Even though we use a sparse index, the output of our algorithm is the same as a full text index algorithm as long as the space between the indexed suffixes is not greater than a minimum length of a MEM. By relying on partial matches and additional text scanning between indexed positions, the algorithm trades memory for extra computation. The reduced memory usage makes it possible to determine MEMs between significantly longer sequences.</p>
<p><b>Availability:</b> Source code for the algorithm is available under a BSD open source license at <inter-ref locator="http://compbio.cs.princeton.edu/mems" locator-type="url">http://compbio.cs.princeton.edu/mems</inter-ref>. The implementation can serve as a drop-in replacement for the MEMs algorithm in MUMmer 3.</p>
<p><b>Contact:</b> <inter-ref locator="zkhan@cs.princeton.edu" locator-type="email">zkhan@cs.princeton.edu</inter-ref>;<inter-ref locator="mona@cs.princeton.edu" locator-type="email">mona@cs.princeton.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp275/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Khan, Z., Bloom, J. S., Kruglyak, L., Singh, M.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp275</dc:identifier>
<dc:title><![CDATA[A practical algorithm for finding maximal exact matches in large sequence datasets using sparse suffix arrays]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1616</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1609</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1617?rss=1">
<title><![CDATA[Affinity Density: a novel genomic approach to the identification of transcription factor regulatory targets]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1617?rss=1</link>
<description><![CDATA[
<p><b>Methods:</b> A new method was developed for identifying novel transcription factor regulatory targets based on calculating Local Affinity Density. Techniques from the signal-processing field were used, in particular the Hann digital filter, to calculate the relative binding affinity of different regions based on previously published <I>in vitro</I> binding data. To illustrate this approach, the complete genomes of <I>Drosophila melanogaster</I> and <I>D.pseudoobscura</I> were analyzed for binding sites of the homeodomain proteinc Tinman, an essential heart development gene in both <I>Drosophila</I> and Mouse. The significant binding regions were identified relative to genomic background and assigned to putative target genes. Valid candidates common to both species of <I>Drosophila</I> were selected as a test of conservation.</p>
<p><b>Results:</b> The new method was more sensitive than cluster searches for conserved binding motifs with respect to positive identification of known Tinman targets. Our Local Affinity Density method also identified a significantly greater proportion of Tinman-coexpressed genes than equivalent, optimized cluster searching. In addition, this new method predicted a significantly greater than expected number of genes with previously published RNAi phenotypes in the heart.</p>
<p><b>Availability:</b> Algorithms were implemented in Python, LISP, R and maxima, using MySQL to access locally mirrored sequence data from Ensembl (<I>D.melanogaster</I> release 4.3) and flybase (<I>D.pseudoobscura</I>). All code is licensed under GPL and freely available at <inter-ref locator="http://www.ohsu.edu/cellbio/dev_biol_prog/affinitydensity/" locator-type="url">http://www.ohsu.edu/cellbio/dev_biol_prog/affinitydensity/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="hazelett@ohsu.edu" locator-type="email">hazelett@ohsu.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Hazelett, D. J., Lakeland, D. L., Weiss, J. B.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp282</dc:identifier>
<dc:title><![CDATA[Affinity Density: a novel genomic approach to the identification of transcription factor regulatory targets]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1624</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1617</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1625?rss=1">
<title><![CDATA[Flexible structural protein alignment by a sequence of local transformations]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1625?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Throughout evolution, homologous proteins have common regions that stay semi-rigid relative to each other and other parts that vary in a more noticeable way. In order to compare the increasing number of structures in the PDB, flexible geometrical alignments are needed, that are reliable and easy to use.</p>
<p><b>Results:</b> We present a protein structure alignment method whose main feature is the ability to consider different rigid transformations at different sites, allowing for deformations beyond a global rigid transformation. The performance of the method is comparable with that of the best ones from 10 aligners tested, regarding both the quality of the alignments with respect to hand curated ones, and the classification ability. An analysis of some structure pairs from the literature that need to be matched in a flexible fashion are shown. The use of a series of local transformations can be exported to other classifiers, and a future golden protein similarity measure could benefit from it.</p>
<p><b>Availability:</b> A public server for the program is available at <inter-ref locator="http://dmi.uib.es/ProtDeform/" locator-type="url">http://dmi.uib.es/ProtDeform/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="jairo@uib.es" locator-type="email">jairo@uib.es</inter-ref></p>
<p><b>Supplementary information:</b> All data used, results and examples are available at <inter-ref locator="http://dmi.uib.es/people/jairo/bio/ProtDeform.Supplementary" locator-type="url">http://dmi.uib.es/people/jairo/bio/ProtDeform.Supplementary</inter-ref> data are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Rocha, J., Segura, J., Wilson, R. C., Dasgupta, S.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp296</dc:identifier>
<dc:title><![CDATA[Flexible structural protein alignment by a sequence of local transformations]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1631</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1625</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1632?rss=1">
<title><![CDATA[Domain Interaction Footprint: a multi-classification approach to predict domain-peptide interactions]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1632?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The flow of information within cellular pathways largely relies on specific protein&ndash;protein interactions. Discovering such interactions that are mostly mediated by peptide recognition modules (PRM) is therefore a fundamental step towards unravelling the complexity of varying pathways. Since peptides can be recognized by more than one PRM and high-throughput experiments are both time consuming and expensive, it would be preferable to narrow down all potential peptide ligands for one specific PRM by a computational method. We at first present Domain Interaction Footprint (DIF) a new approach to predict binding peptides to PRMs merely based on the sequence of the peptides. Second, we show that our method is able to create a multi-classification model that assesses the binding specificity of a given peptide to all examined PRMs at once.</p>
<p><b>Results:</b> We first applied our approach to a previously investigated dataset of different SH3 domains and predicted their appropriate peptide ligands with an exceptionally high accuracy. This result outperforms all recent methods trained on the same dataset. Furthermore, we used our technique to build two multi-classification models (SH3 and PDZ domains) to predict the interaction preference between a peptide and every single domain in the corresponding domain family at once. Predicting the domain specificity most reliably, our proposed approach can be seen as a first step towards a complete multi-domain classification model comprised of all domains of one family. Such a comprehensive domain specificity model would benefit the quest for highly specific peptide ligands interacting solely with the domain of choice.</p>
<p><b>Contact:</b> <inter-ref locator="gkrause@fmp-berlin.de" locator-type="email">gkrause@fmp-berlin.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp264/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Schillinger, C., Boisguerin, P., Krause, G.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp264</dc:identifier>
<dc:title><![CDATA[Domain Interaction Footprint: a multi-classification approach to predict domain-peptide interactions]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1639</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1632</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1640?rss=1">
<title><![CDATA[Optimizing static thermodynamic models of transcriptional regulation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1640?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Modeling transcriptional regulation using thermo-dynamic modeling approaches has become increasingly relevant as a way to gain a detailed understanding of transcriptional regulation. Thermodynamic models are able to model the interactions between transcription factors (TFs) and DNA that lead to a specific transcriptional output of the target gene. Such models can be &lsquo;trained&rsquo; by fitting their free parameters to data on the transcription rate of a gene and the concentrations of its regulating factors. However, the parameter fitting process is computationally very expensive and this limits the number of alternative types of model that can be explored.</p>
<p><b>Results:</b> In this study, we evaluate the &lsquo;optimization landscape&rsquo; of a class of static, quantitative models of regulation and explore the efficiency of a range of optimization methods. We evaluate eight optimization methods: two variants of simulated annealing (SA), four variants of gradient descent (GD), a hybrid SA/GD algorithm and a genetic algorithm. We show that the optimization landscape has numerous local optima, resulting in poor performance for the GD methods. SA with a simple geometric cooling schedule performs best among all tested methods. In particular, we see no advantage to using the more sophisticated &lsquo;LAM&rsquo; cooling schedule. Overall, a good approximate solution is achievable in minutes using SA with a simple cooling schedule.</p>
<p><b>Contact:</b> <inter-ref locator="d.bauer@uq.edu.au" locator-type="email">d.bauer@uq.edu.au</inter-ref>; <inter-ref locator="t.bailey@imb.uq.edu.au" locator-type="email">t.bailey@imb.uq.edu.au</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp283/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Bauer, D. C., Bailey, T. L.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp283</dc:identifier>
<dc:title><![CDATA[Optimizing static thermodynamic models of transcriptional regulation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1646</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1640</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1647?rss=1">
<title><![CDATA[A tool for identification of genes expressed in patterns of interest using the Allen Brain Atlas]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1647?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Gene expression patterns can be useful in understanding the structural organization of the brain and the regulatory logic that governs its myriad cell types. A particularly rich source of spatial expression data is the Allen Brain Atlas (ABA), a comprehensive genome-wide <I>in situ</I> hybridization study of the adult mouse brain. Here, we present an open-source program, ALLENMINER, that searches the ABA for genes that are expressed, enriched, patterned or graded in a user-specified region of interest.</p>
<p><b>Results:</b> Regionally enriched genes identified by ALLENMINER accurately reflect the <I>in situ</I> data (95&ndash;99% concordance with manual curation) and compare with regional microarray studies as expected from previous comparisons (61&ndash;80% concordance). We demonstrate the utility of ALLENMINER by identifying genes that exhibit patterned expression in the caudoputamen and neocortex. We discuss general characteristics of gene expression in the mouse brain and the potential application of ALLENMINER to design strategies for specific genetic access to brain regions and cell types.</p>
<p><b>Availability:</b> ALLENMINER is freely available on the Internet at <inter-ref locator="http://research.janelia.org/davis/allenminer" locator-type="url">http://research.janelia.org/davis/allenminer</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="davisf@janelia.hhmi.org" locator-type="email">davisf@janelia.hhmi.org</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp288/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Davis, F. P., Eddy, S. R.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp288</dc:identifier>
<dc:title><![CDATA[A tool for identification of genes expressed in patterns of interest using the Allen Brain Atlas]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1654</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1647</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1655?rss=1">
<title><![CDATA[Ratio adjustment and calibration scheme for gene-wise normalization to enhance microarray inter-study prediction]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1655?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Reproducibility analyses of biologically relevant microarray studies have mostly focused on overlap of detected biomarkers or correlation of differential expression evidences across studies. For clinical utility, direct inter-study prediction (i.e. to establish a prediction model in one study and apply to another) for disease diagnosis or prognosis prediction is more important. Normalization plays a key role for such a task. Traditionally, sample-wise normalization has been a standard for inter-array and inter-study normalization. For gene-wise normalization, it has been implemented for intra-study or inter-study predictions in a few papers while its rationale, strategy and effect remain unexplored.</p>
<p><b>Results:</b> In this article, we investigate the effect of gene-wise normalization in microarray inter-study prediction. Gene-specific intensity discrepancies across studies are commonly found even after proper sample-wise normalization. We explore the rationale and necessity of gene-wise normalization. We also show that the ratio of sample sizes in normal versus diseased groups can greatly affect the performance of gene-wise normalization and an analytical method is developed to adjust for the imbalanced ratio effect. Both simulation results and applications to three lung cancer and two prostate cancer data sets, considering both binary classification and survival risk predictions, showed significant and robust improvement of the new adjustment. A calibration scheme is developed to apply the ratio-adjusted gene-wise normalization for prospective clinical trials. The number of calibration samples needed is estimated from existing studies and suggested for future applications. The result has important implication to the translational research of microarray as a practical disease diagnosis and prognosis prediction tool.</p>
<p><b>Contact:</b> <inter-ref locator="ctseng@pitt.edu" locator-type="email">ctseng@pitt.edu</inter-ref></p>
<p><b>Availability:</b> <inter-ref locator="http://www.biostat.pitt.edu/bioinfo/" locator-type="url">http://www.biostat.pitt.edu/bioinfo/</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp292/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Cheng, C., Shen, K., Song, C., Luo, J., Tseng, G. C.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp292</dc:identifier>
<dc:title><![CDATA[Ratio adjustment and calibration scheme for gene-wise normalization to enhance microarray inter-study prediction]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1661</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1655</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1662?rss=1">
<title><![CDATA[Evaluating reproducibility of differential expression discoveries in microarray studies by considering correlated molecular changes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1662?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> According to current consistency metrics such as percentage of overlapping genes (POG), lists of differentially expressed genes (DEGs) detected from different microarray studies for a complex disease are often highly inconsistent. This irreproducibility problem also exists in other high-throughput post-genomic areas such as proteomics and metabolism. A complex disease is often characterized with many coordinated molecular changes, which should be considered when evaluating the reproducibility of discovery lists from different studies.</p>
<p><b>Results:</b> We proposed metrics percentage of overlapping genes-related (POGR) and normalized POGR (<I>n</I>POGR) to evaluate the consistency between two DEG lists for a complex disease, considering correlated molecular changes rather than only counting gene overlaps between the lists. Based on microarray datasets of three diseases, we showed that though the POG scores for DEG lists from different studies for each disease are extremely low, the POGR and <I>n</I>POGR scores can be rather high, suggesting that the apparently inconsistent DEG lists may be highly reproducible in the sense that they are actually significantly correlated. Observing different discovery results for a disease by the POGR and <I>n</I>POGR scores will obviously reduce the uncertainty of the microarray studies. The proposed metrics could also be applicable in many other high-throughput post-genomic areas.</p>
<p><b>Contact:</b> <inter-ref locator="guoz@ems.hrbmu.edu.cn" locator-type="email">guoz@ems.hrbmu.edu.cn</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp295/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Zhang, M., Zhang, L., Zou, J., Yao, C., Xiao, H., Liu, Q., Wang, J., Wang, D., Wang, C., Guo, Z.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp295</dc:identifier>
<dc:title><![CDATA[Evaluating reproducibility of differential expression discoveries in microarray studies by considering correlated molecular changes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1668</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1662</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1669?rss=1">
<title><![CDATA[A Bayesian segmentation approach to ascertain copy number variations at the population level]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1669?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Efficient and accurate ascertainment of copy number variations (CNVs) at the population level is essential to understand the evolutionary process and population genetics, and to apply CNVs in population-based genome-wide association studies for complex human diseases. We propose a novel Bayesian segmentation approach to identify CNVs in a defined population of any size. It is computationally efficient and provides statistical evidence for the detected CNVs through the Bayes factor. This approach has the unique feature of carrying out segmentation and assigning copy number status simultaneously&mdash;a desirable property that current segmentation methods do not share.</p>
<p><b>Results:</b> In comparisons with popular two-step segmentation methods for a single individual using benchmark simulation studies, we find the new approach to perform competitively with respect to false discovery rate and sensitivity in breakpoint detection. In a simulation study of multiple samples with recurrent copy numbers, the new approach outperforms two leading single sample methods. We further demonstrate the effectiveness of our approach in population-level analysis of previously published HapMap data. We also apply our approach in studying population genetics of CNVs.</p>
<p><b>Availability:</b> R programs are available at <inter-ref locator="http://www.mshri.on.ca/mitacs/software/SOFTWARE.HTML" locator-type="url">http://www.mshri.on.ca/mitacs/software/SOFTWARE.HTML</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="lwu@math.uwaterloo.ca" locator-type="email">lwu@math.uwaterloo.ca</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp270/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Wu, L. Y., Chipman, H. A., Bull, S. B., Briollais, L., Wang, K.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp270</dc:identifier>
<dc:title><![CDATA[A Bayesian segmentation approach to ascertain copy number variations at the population level]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1679</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1669</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1680?rss=1">
<title><![CDATA[Systematic analysis of synchronized oscillatory neuronal networks reveals an enrichment for coupled direct and indirect feedback motifs]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1680?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Synchronized bursting behavior is a remarkable phenomenon in neural dynamics. So, identification of the underlying functional structure is crucial to understand its regulatory mechanism at a system level. On the other hand, we noted that feedback loops (FBLs) are commonly used basic building blocks in engineering circuit design, especially for synchronization, and they have also been considered as important regulatory network motifs in systems biology. From these motivations, we have investigated the relationship between synchronized bursting behavior and feedback motifs in neural networks.</p>
<p><b>Results:</b> Through extensive simulations of synthetic spike oscillation models, we found that a particular structure of FBLs, coupled direct and indirect positive feedback loops (PFLs), can induce robust synchronized bursting behaviors. To further investigate this, we have developed a novel FBL identification method based on sampled time-series data and applied it to synchronized spiking records measured from cultured neural networks of rat by using multi-electrode array. As a result, we have identified coupled direct and indirect PFLs.</p>
<p><b>Conclusion:</b> We therefore conclude that coupled direct and indirect PFLs might be an important design principle that causes the synchronized bursting behavior in neuronal networks although an extrapolation of this result to <I>in vivo</I> brain dynamics still remains an unanswered question.</p>
<p><b>Contact:</b> <inter-ref locator="ckh@kaist.ac.kr" locator-type="email">ckh@kaist.ac.kr</inter-ref>; <inter-ref locator="ynam@kaist.ac.kr" locator-type="email">ynam@kaist.ac.kr</inter-ref></p>
<p><b>Supplementary Material:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp271/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dong, C.-Y., Lim, J., Nam, Y., Cho, K.-H.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp271</dc:identifier>
<dc:title><![CDATA[Systematic analysis of synchronized oscillatory neuronal networks reveals an enrichment for coupled direct and indirect feedback motifs]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1685</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1680</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1686?rss=1">
<title><![CDATA[List-decoding methods for inferring polynomials in finite dynamical gene network models]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1686?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The problem of reverse engineering the dynamics of gene expression profiles is of focal importance in systems biology. Due to noise and the inherent lack of sufficiently large datasets generated via high-throughput measurements, known reconstruction frameworks based on dynamical systems models fail to provide adequate settings for network analysis. This motivates the study of new approaches that produce stochastic lists of explanations for the observed network dynamics that can be efficiently inferred from small sample sets and in the presence of errors.</p>
<p><b>Results:</b> We introduce a novel algebraic modeling framework, termed stochastic polynomial dynamical systems (SPDSs) that can capture the dynamics of regulatory networks based on microarray expression data. Here, we refer to <I>dynamics of the network</I> as the trajectories of gene expression profiles over time. The model assumes that the expression data is quantized in a manner that allows for imposing a finite field structure on the observations, and the existence of polynomial update functions for each gene in the network. The underlying reverse engineering algorithm is based on ideas borrowed from coding theory, and in particular, list-decoding methods for so called Reed-Muller codes. The list-decoding method was tested on synthetic data and on microarray expression measurements from the M<sup>3D</sup> database, corresponding to a subnetwork of the <I>Escherichia coli</I> SOS repair system, as well as on the complete transcription factor network, available at RegulonDB. The results show that SPDSs constructed via list-decoders significantly outperform other algebraic reverse engineering methods, and that they also provide good guidelines for estimating the influence of genes on the dynamics of the network.</p>
<p><b>Availability:</b> Software codes for list-decoding algorithms suitable for direct application to quantized expression data will be publicly available at the authors' web-pages.</p>
<p><b>Contact:</b> <inter-ref locator="janis.dingel@tum.de" locator-type="email">janis.dingel@tum.de</inter-ref>; <inter-ref locator="milenkov@uiuc.edu" locator-type="email">milenkov@uiuc.edu</inter-ref></p>
<p><b>Supplementary information:</b><inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp281/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dingel, J., Milenkovic, O.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp281</dc:identifier>
<dc:title><![CDATA[List-decoding methods for inferring polynomials in finite dynamical gene network models]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1693</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1686</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1694?rss=1">
<title><![CDATA[A global meta-analysis of microarray expression data to predict unknown gene functions and estimate the literature-data divide]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1694?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Approximately 9334 (37%) of Human genes have no publications documenting their function and, for those that are published, the number of publications per gene is highly skewed. Furthermore, for reasons not clear, the entry of new gene names into the literature has slowed in recent years. If we are to better understand human/mammalian biology and complete the catalog of human gene function, it is important to finish predicting putative functions for these genes based upon existing experimental evidence.</p>
<p><b>Results:</b> A global meta-analysis (GMA) of all publicly available GEO two-channel human microarray datasets (3551 experiments total) was conducted to identify genes with recurrent, reproducible patterns of co-regulation across different conditions. Patterns of co-expression were divided into parallel (i.e. genes are up and down-regulated together) and anti-parallel. Several ranking methods to predict a gene's function based on its top 20 co-expressed gene pairs were compared. In the best method, 34% of predicted Gene Ontology (GO) categories matched exactly with the known GO categories for ~5000 genes analyzed versus only 3% for random gene sets. Only 2.4% of co-expressed gene pairs were found as co-occurring gene pairs in MEDLINE.</p>
<p><b>Conclusions:</b> Via a GO enrichment analysis, genes co-expressed in parallel with the query gene were frequently associated with the same GO categories, whereas anti-parallel genes were not. Combining parallel and anti-parallel genes for analysis resulted in fewer significant GO categories, suggesting they are best analyzed separately. Expression databases contain much unexpected genetic knowledge that has not yet been reported in the literature. A total of 1642 Human genes with unknown function were differentially expressed in at least 30 experiments.</p>
<p><b>Availability:</b> Data matrix available upon request.</p>
<p><b>Contact:</b> <inter-ref locator="jdwren@gmail.com" locator-type="email">jdwren@gmail.com</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp290/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Wren, J. D.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp290</dc:identifier>
<dc:title><![CDATA[A global meta-analysis of microarray expression data to predict unknown gene functions and estimate the literature-data divide]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1701</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1694</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1702?rss=1">
<title><![CDATA[Genetic modification of flux for flux prediction of mutants]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1702?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Gene deletion and overexpression are critical technologies for designing or improving the metabolic flux distribution of microbes. Some algorithms including flux balance analysis (FBA) and minimization of metabolic adjustment (MOMA) predict a flux distribution from a stoichiometric matrix in the mutants in which some metabolic genes are deleted or non-functional, but there are few algorithms that predict how a broad range of genetic modifications, such as over- and underexpression of metabolic genes, alters the phenotypes of the mutants at the metabolic flux level.</p>
<p><b>Results:</b> To overcome such existing limitations, we develop a novel algorithm that predicts the flux distribution of the mutants with a broad range of genetic modification, based on elementary mode analysis. It is denoted as genetic modification of flux (GMF), which couples two algorithms that we have developed: modified control effective flux (mCEF) and enzyme control flux (ECF). mCEF is proposed based on CEF to estimate the gene expression patterns in genetically modified mutants in terms of specific biological functions. GMF is demonstrated to predict the flux distribution of not only gene deletion mutants, but also the mutants with underexpressed and overexpressed genes in <I>Escherichia coli</I> and <I>Corynebacterium glutamicum</I>. This achieves breakthrough in the a priori flux prediction of a broad range of genetically modified mutants.</p>
<p><b>Contact:</b> <inter-ref locator="kurata@bio.kyutech.ac.jp" locator-type="email">kurata@bio.kyutech.ac.jp</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp298/DC1" locator-type="url">Supplementary file</inter-ref> and programs are available at <I>Bioinformatics</I> online or <inter-ref locator="http://www.cadlive.jp" locator-type="url">http://www.cadlive.jp</inter-ref>.</p>
]]></description>
<dc:creator><![CDATA[Zhao, Q., Kurata, H.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp298</dc:identifier>
<dc:title><![CDATA[Genetic modification of flux for flux prediction of mutants]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1708</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1702</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1709?rss=1">
<title><![CDATA[FlexServ: an integrated tool for the analysis of protein flexibility]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1709?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> FlexServ is a web-based tool for the analysis of protein flexibility. The server incorporates powerful protocols for the coarse-grained determination of protein dynamics using different versions of <I>Normal Mode Analysis</I> (NMA), <I>Brownian dynamics</I> (BD) and <I>Discrete Dynamics</I> (DMD). It can also analyze user provided trajectories. The server allows a complete analysis of flexibility using a large variety of metrics, including basic geometrical analysis, B-factors, essential dynamics, stiffness analysis, collectivity measures, Lindemann's indexes, residue correlation, chain-correlations, dynamic domain determination, hinge point detections, etc. Data is presented through a web interface as plain text, 2D and 3D graphics.</p>
<p><b>Availability:</b> <inter-ref locator="http://mmb.pcb.ub.es/FlexServ" locator-type="url">http://mmb.pcb.ub.es/FlexServ</inter-ref>; <inter-ref locator="http://www.inab.org" locator-type="url">http://www.inab.org</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="modesto@mmb.pcb.ub.es" locator-type="email">modesto@mmb.pcb.ub.es</inter-ref> or <inter-ref locator="gelpi@mmb.pcb.ub.es" locator-type="email">gelpi@mmb.pcb.ub.es</inter-ref></p>
<p><b>Supplementary information:</b> Additional information and methodology details can be found at <inter-ref locator="http://mmb.pcb.ub.es/FlexServ/help" locator-type="url">http://mmb.pcb.ub.es/FlexServ/help</inter-ref>.</p>
]]></description>
<dc:creator><![CDATA[Camps, J., Carrillo, O., Emperador, A., Orellana, L., Hospital, A., Rueda, M., Cicin-Sain, D., D'Abramo, M., Gelpi, J. L., Orozco, M.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp304</dc:identifier>
<dc:title><![CDATA[FlexServ: an integrated tool for the analysis of protein flexibility]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1710</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1709</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1711?rss=1">
<title><![CDATA[penalizedSVM: a R-package for feature selection SVM classification]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1711?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Support vector machine (SVMs) classification is a widely used and one of the most powerful classification techniques. However, a major limitation is that SVM cannot perform automatic gene selection. To overcome this restriction, a number of penalized feature selection methods have been proposed. In the R package &lsquo;penalizedSVM&rsquo; implemented penalization functions L<SUB>1</SUB> norm and Smoothly Clipped Absolute Deviation (SCAD) provide automatic feature selection for SVM classification tasks.</p>
<p><b>Availability:</b> The R package &lsquo;penalizedSVM&rsquo; is available from the Comprehensive R Archive Network (<inter-ref locator="http://cran.r-project.org/" locator-type="url">http://cran.r-project.org/</inter-ref>) under GPL-2 or later.</p>
<p><b>Contact:</b> <inter-ref locator="natalia.becker@dkfz.de" locator-type="email">natalia.becker@dkfz.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp286/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Becker, N., Werft, W., Toedt, G., Lichter, P., Benner, A.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp286</dc:identifier>
<dc:title><![CDATA[penalizedSVM: a R-package for feature selection SVM classification]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1712</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1711</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1713?rss=1">
<title><![CDATA[Infernal 1.0: inference of RNA alignments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/13/1713?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Nawrocki, E. P., Kolbe, D. L., Eddy, S. R.]]></dc:creator>
<dc:date>2009-06-17</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp326</dc:identifier>
<dc:title><![CDATA[Infernal 1.0: inference of RNA alignments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>13</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1713</prism:endingPage>
<prism:publicationDate>2009-07-01</prism:publicationDate>
<prism:startingPage>1713</prism:startingPage>
<prism:section>CORRIGENDUM</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i1?rss=1">
<title><![CDATA[Editorial]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i1?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Gusfield, D., Tramontano, A.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp234</dc:identifier>
<dc:title><![CDATA[Editorial]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i1</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i1</prism:startingPage>
<prism:section>EDITORIAL</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i2?rss=1">
<title><![CDATA[ISMB/ECCB 2009 Proceedings papers committee]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i2?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp260</dc:identifier>
<dc:title><![CDATA[ISMB/ECCB 2009 Proceedings papers committee]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i5</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i2</prism:startingPage>
<prism:section>EDITORIAL</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i6?rss=1">
<title><![CDATA[Constrained mixture estimation for analysis and robust classification of clinical time series]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i6?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Personalized medicine based on molecular aspects of diseases, such as gene expression profiling, has become increasingly popular. However, one faces multiple challenges when analyzing clinical gene expression data; most of the well-known theoretical issues such as high dimension of feature spaces versus few examples, noise and missing data apply. Special care is needed when designing classification procedures that support personalized diagnosis and choice of treatment. Here, we particularly focus on classification of interferon-&beta; (IFN&beta;) treatment response in Multiple Sclerosis (MS) patients which has attracted substantial attention in the recent past. Half of the patients remain unaffected by IFN&beta; treatment, which is still the standard. For them the treatment should be timely ceased to mitigate the side effects.</p>
<p><b>Results:</b> We propose constrained estimation of mixtures of hidden Markov models as a methodology to classify patient response to IFN&beta; treatment. The advantages of our approach are that it takes the temporal nature of the data into account and its robustness with respect to noise, missing data and mislabeled samples. Moreover, mixture estimation enables to explore the presence of response sub-groups of patients on the transcriptional level. We clearly outperformed all prior approaches in terms of prediction accuracy, raising it, for the first time, &gt;90%. Additionally, we were able to identify potentially mislabeled samples and to sub-divide the good responders into two sub-groups that exhibited different transcriptional response programs. This is supported by recent findings on MS pathology and therefore may raise interesting clinical follow-up questions.</p>
<p><b>Availability:</b> The method is implemented in the GQL framework and is available at <inter-ref locator="http://www.ghmm.org/gql" locator-type="url">http://www.ghmm.org/gql</inter-ref>. Datasets are available at <inter-ref locator="http://www.cin.ufpe.br/~igcf/MSConst" locator-type="url">http://www.cin.ufpe.br/~igcf/MSConst</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="igcf@cin.ufpe.br" locator-type="email">igcf@cin.ufpe.br</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp222/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Costa, I. G., Schonhuth, A., Hafemeister, C., Schliep, A.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp222</dc:identifier>
<dc:title><![CDATA[Constrained mixture estimation for analysis and robust classification of clinical time series]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i14</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i6</prism:startingPage>
<prism:section>BIOINFORMATICS OF DISEASE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i15?rss=1">
<title><![CDATA[Graph theoretical approach to study eQTL: a case study of Plasmodium falciparum]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i15?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Analysis of expression quantitative trait loci (eQTL) significantly contributes to the determination of gene regulation programs. However, the discovery and analysis of associations of gene expression levels and their underlying sequence polymorphisms continue to pose many challenges. Methods are limited in their ability to illuminate the full structure of the eQTL data. Most rely on an exhaustive, genome scale search that considers all possible locus&ndash;gene pairs and tests the linkage between each locus and gene.</p>
<p><b>Result:</b> To analyze eQTLs in a more comprehensive and efficient way, we developed the Graph based eQTL Decomposition method (GeD) that allows us to model genotype and expression data using an eQTL association graph. Through graph-based heuristics, GeD identifies dense subgraphs in the eQTL association graph. By identifying eQTL association cliques that expose the hidden structure of genotype and expression data, GeD effectively filters out most locus&ndash;gene pairs that are unlikely to have significant linkage. We apply GeD on eQTL data from <I>Plasmodium falciparum</I>, the human malaria parasite, and show that GeD reveals the structure of the relationship between all loci and all genes on a whole genome level. Furthermore, GeD allows us to uncover additional eQTLs with lower FDR, providing an important complement to traditional eQTL analysis methods.</p>
<p><b>Contact:</b> <inter-ref locator="przytyck@ncbi.nlm.nih.gov" locator-type="email">przytyck@ncbi.nlm.nih.gov</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Huang, Y., Wuchty, S., Ferdig, M. T., Przytycka, T. M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp189</dc:identifier>
<dc:title><![CDATA[Graph theoretical approach to study eQTL: a case study of Plasmodium falciparum]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i20</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i15</prism:startingPage>
<prism:section>BIOINFORMATICS OF DISEASE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i21?rss=1">
<title><![CDATA[A Classifier-based approach to identify genetic similarities between diseases]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i21?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Genome-wide association studies are commonly used to identify possible associations between genetic variations and diseases. These studies mainly focus on identifying individual single nucleotide polymorphisms (SNPs) potentially linked with one disease of interest. In this work, we introduce a novel methodology that identifies similarities between diseases using information from a large number of SNPs. We separate the diseases for which we have individual genotype data into one reference disease and several query diseases. We train a classifier that distinguishes between individuals that have the reference disease and a set of control individuals. This classifier is then used to classify the individuals that have the query diseases. We can then rank query diseases according to the average classification of the individuals in each disease set, and identify which of the query diseases are more similar to the reference disease. We repeat these classification and comparison steps so that each disease is used once as reference disease.</p>
<p><b>Results:</b> We apply this approach using a decision tree classifier to the genotype data of seven common diseases and two shared control sets provided by the Wellcome Trust Case Control Consortium. We show that this approach identifies the known genetic similarity between type 1 diabetes and rheumatoid arthritis, and identifies a new putative similarity between bipolar disease and hypertension.</p>
<p><b>Contact:</b> <inter-ref locator="serafim@cs.stanford.edu" locator-type="email">serafim@cs.stanford.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Schaub, M. A., Kaplow, I. M., Sirota, M., Do, C. B., Butte, A. J., Batzoglou, S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp226</dc:identifier>
<dc:title><![CDATA[A Classifier-based approach to identify genetic similarities between diseases]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i29</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i21</prism:startingPage>
<prism:section>BIOINFORMATICS OF DISEASE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i30?rss=1">
<title><![CDATA[Model-based clustering of array CGH data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i30?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Analysis of array comparative genomic hybridization (aCGH) data for recurrent DNA copy number alterations from a cohort of patients can yield distinct sets of molecular signatures or profiles. This can be due to the presence of heterogeneous cancer subtypes within a supposedly homogeneous population.</p>
<p><b>Results:</b> We propose a novel statistical method for automatically detecting such subtypes or clusters. Our approach is model based: each cluster is defined in terms of a sparse profile, which contains the locations of unusually frequent alterations. The profile is represented as a hidden Markov model. Samples are assigned to clusters based on their similarity to the cluster's profile. We simultaneously infer the cluster assignments and the cluster profiles using an expectation maximization-like algorithm. We show, using a realistic simulation study, that our method is significantly more accurate than standard clustering techniques. We then apply our method to two clinical datasets. In particular, we examine previously reported aCGH data from a cohort of 106 follicular lymphoma patients, and discover clusters that are known to correspond to clinically relevant subgroups. In addition, we examine a cohort of 92 diffuse large B-cell lymphoma patients, and discover previously unreported clusters of biological interest which have inspired followup clinical research on an independent cohort.</p>
<p><b>Availability:</b> Software and synthetic datasets are available at <inter-ref locator="http://www.cs.ubc.ca/~sshah/acgh" locator-type="url">http://www.cs.ubc.ca/~sshah/acgh</inter-ref> as part of the CNA-HMMer package.</p>
<p><b>Contact:</b> <inter-ref locator="sshah@bccrc.ca" locator-type="email">sshah@bccrc.ca</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp205/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Shah, S. P., Cheung, K-J., Johnson, N. A., Alain, G., Gascoyne, R. D., Horsman, D. E., Ng, R. T., Murphy, K. P.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp205</dc:identifier>
<dc:title><![CDATA[Model-based clustering of array CGH data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i38</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i30</prism:startingPage>
<prism:section>BIOINFORMATICS OF DISEASE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i39?rss=1">
<title><![CDATA[Viruses selectively mutate their CD8+ T-cell epitopes--a large-scale immunomic analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i39?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Viruses employ various means to evade immune detection. One common evasion strategy is the removal of CD8+cytotoxic T-lymphocyte epitopes. We here use a combination of multiple bioinformatic tools and large amount of genomic data to compute the epitope repertoire presented by over 1300 viruses in many HLA alleles. We define the &lsquo;Size of Immune Repertoire score&rsquo;, which represents the ratio between the epitope density within a protein and the expected density. This score is used to study viral immune evasion.</p>
<p><b>Results:</b> We show that viral proteins in general have a higher epitope density than human proteins. This difference is due to a good fit of the human MHC molecules to the typical amino-acid usage of viruses. Among different viruses, viruses infecting humans present less epitopes than non-human viruses. This selection is not at the amino-acid usage level, but through the removal of specific epitopes. Within a single virus, not all proteins express the same epitopes density. Proteins expressed early in the viral life cycle have a lower epitope density than late proteins. Such a difference is not observed in non-human viruses. The removal of early epitopes and the targeting of the cellular immune response to late viral proteins, allow the virus a time interval to propagate before its host cells are destroyed by T cells.</p>
<p><b>Contact:</b> <inter-ref locator="louzouy@math.biu.ac.il" locator-type="email">louzouy@math.biu.ac.il</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Vider-Shalit, T., Sarid, R., Maman, K., Tsaban, L., Levi, R., Louzoun, Y.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp221</dc:identifier>
<dc:title><![CDATA[Viruses selectively mutate their CD8+ T-cell epitopes--a large-scale immunomic analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i44</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i39</prism:startingPage>
<prism:section>BIOINFORMATICS OF DISEASE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i45?rss=1">
<title><![CDATA[Family classification without domain chaining]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i45?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Classification of gene and protein sequences into homologous families, i.e. sets of sequences that share common ancestry, is an essential step in comparative genomic analyses. This is typically achieved by construction of a sequence homology network, followed by clustering to identify dense subgraphs corresponding to families. Accurate classification of single domain families is now within reach due to major algorithmic advances in remote homology detection and graph clustering. However, classification of multidomain families remains a significant challenge. The presence of the same domain in sequences that do not share common ancestry introduces false edges in the homology network that link unrelated families and stymy clustering algorithms.</p>
<p><b>Results:</b> Here, we investigate a network-rewiring strategy designed to eliminate edges due to promiscuous domains. We show that this strategy can reduce noise in and restore structure to artificial networks with simulated noise, as well as to the yeast genome homology network. We further evaluate this approach on a hand-curated set of multidomain sequences in mouse and human, and demonstrate that classification using the rewired network delivers dramatic improvement in Precision and Recall, compared with current methods. Families in our test set exhibit a broad range of domain architectures and sequence conservation, demonstrating that our method is flexible, robust and suitable for high-throughput, automated processing of heterogeneous, genome-scale data.</p>
<p><b>contact:</b> <inter-ref locator="jacobmj@cmu.edu" locator-type="email">jacobmj@cmu.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Joseph, J. M., Durand, D.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp207</dc:identifier>
<dc:title><![CDATA[Family classification without domain chaining]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i53</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i45</prism:startingPage>
<prism:section>COMPARATIVE GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i54?rss=1">
<title><![CDATA[Identifying novel constrained elements by exploiting biased substitution patterns]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i54?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Comparing the genomes from closely related species provides a powerful tool to identify functional elements in a reference genome. Many methods have been developed to identify conserved sequences across species; however, existing methods only model conservation as a decrease in the <I>rate</I> of mutation and have ignored selection acting on the <I>pattern</I> of mutations.</p>
<p><b>Results:</b> We present a new approach that takes advantage of deeply sequenced clades to identify evolutionary selection by uncovering not only signatures of rate-based conservation but also substitution patterns characteristic of sequence undergoing natural selection. We describe a new statistical method for modeling biased nucleotide substitutions, a learning algorithm for inferring site-specific substitution biases directly from sequence alignments and a hidden Markov model for detecting constrained elements characterized by biased substitutions. We show that the new approach can identify significantly more degenerate constrained sequences than rate-based methods. Applying it to the ENCODE regions, we identify as much as 10.2% of these regions are under selection.</p>
<p><b>Availability:</b> The algorithms are implemented in a Java software package, called SiPhy, freely available at <inter-ref locator="http://www.broadinstitute.org/science/software/" locator-type="url">http://www.broadinstitute.org/science/software/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="xhx@ics.uci.edu" locator-type="email">xhx@ics.uci.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp190/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Garber, M., Guttman, M., Clamp, M., Zody, M. C., Friedman, N., Xie, X.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp190</dc:identifier>
<dc:title><![CDATA[Identifying novel constrained elements by exploiting biased substitution patterns]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i62</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i54</prism:startingPage>
<prism:section>COMPARATIVE GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i63?rss=1">
<title><![CDATA[From disease ontology to disease-ontology lite: statistical methods to adapt a general-purpose ontology for the test of gene-ontology associations]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i63?rss=1</link>
<description><![CDATA[
<p>Subjective methods have been reported to adapt a general-purpose ontology for a specific application. For example, Gene Ontology (GO) Slim was created from GO to generate a highly aggregated report of the human-genome annotation. We propose statistical methods to adapt the general purpose, OBO Foundry Disease Ontology (DO) for the identification of gene-disease associations. Thus, we need a simplified definition of disease categories derived from implicated genes. On the basis of the assumption that the DO terms having similar associated genes are closely related, we group the DO terms based on the similarity of gene-to-DO mapping profiles. Two types of binary distance metrics are defined to measure the overall and subset similarity between DO terms. A compactness-scalable fuzzy clustering method is then applied to group similar DO terms. To reduce false clustering, the semantic similarities between DO terms are also used to constrain clustering results. As such, the DO terms are aggregated and the redundant DO terms are largely removed. Using these methods, we constructed a simplified vocabulary list from the DO called Disease Ontology Lite (DOLite). We demonstrated that DOLite results in more interpretable results than DO for gene-disease association tests. The resultant DOLite has been used in the Functional Disease Ontology (FunDO) Web application at <inter-ref locator="http://www.projects.bioinformatics.northwestern.edu/fundo" locator-type="url">http://www.projects.bioinformatics.northwestern.edu/fundo</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="s-lin2@northwestern.edu" locator-type="email">s-lin2@northwestern.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Du, P., Feng, G., Flatow, J., Song, J., Holko, M., Kibbe, W. A., Lin, S. M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp193</dc:identifier>
<dc:title><![CDATA[From disease ontology to disease-ontology lite: statistical methods to adapt a general-purpose ontology for the test of gene-ontology associations]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i68</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i63</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i69?rss=1">
<title><![CDATA[Alignment of the UMLS semantic network with BioTop: methodology and assessment]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i69?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> For many years, the Unified Medical Language System (UMLS) semantic network (SN) has been used as an upper-level semantic framework for the categorization of terms from terminological resources in biomedicine. BioTop has recently been developed as an upper-level ontology for the biomedical domain. In contrast to the SN, it is founded upon strict ontological principles, using OWL DL as a formal representation language, which has become standard in the semantic Web. In order to make logic-based reasoning available for the resources annotated or categorized with the SN, a mapping ontology was developed aligning the SN with BioTop.</p>
<p><b>Methods:</b> The theoretical foundations and the practical realization of the alignment are being described, with a focus on the design decisions taken, the problems encountered and the adaptations of BioTop that became necessary. For evaluation purposes, UMLS concept pairs obtained from MEDLINE abstracts by a named entity recognition system were tested for possible semantic relationships. Furthermore, all semantic-type combinations that occur in the UMLS Metathesaurus were checked for satisfiability.</p>
<p><b>Results:</b> The effort-intensive alignment process required major design changes and enhancements of BioTop and brought up several design errors that could be fixed. A comparison between a human curator and the ontology yielded only a low agreement. Ontology reasoning was also used to successfully identify 133 inconsistent semantic-type combinations.</p>
<p><b>Availability:</b> BioTop, the OWL DL representation of the UMLS SN, and the mapping ontology are available at <inter-ref locator="http://www.purl.org/biotop/" locator-type="url">http://www.purl.org/biotop/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="stschulz@uni-freiburg.de" locator-type="email">stschulz@uni-freiburg.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Schulz, S., Beisswanger, E., van den Hoek, L., Bodenreider, O., van Mulligen, E. M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp194</dc:identifier>
<dc:title><![CDATA[Alignment of the UMLS semantic network with BioTop: methodology and assessment]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i76</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i69</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i77?rss=1">
<title><![CDATA[Ontology quality assurance through analysis of term transformations]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i77?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> It is important for the quality of biological ontologies that similar concepts be expressed consistently, or <I>univocally</I>. Univocality is relevant for the usability of the ontology for humans, as well as for computational tools that rely on regularity in the structure of terms. However, in practice terms are not always expressed consistently, and we must develop methods for identifying terms that are not univocal so that they can be corrected.</p>
<p><b>Results:</b> We developed an automated transformation-based clustering methodology for detecting terms that use different linguistic conventions for expressing similar semantics. These term sets represent occurrences of univocality violations. Our method was able to identify 67 examples of univocality violations in the Gene Ontology.</p>
<p><b>Availability:</b> The identified univocality violations are available upon request. We are preparing a release of an open source version of the software to be available at <inter-ref locator="http://bionlp.sourceforge.net" locator-type="url">http://bionlp.sourceforge.net</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="karin.verspoor@ucdenver.edu" locator-type="email">karin.verspoor@ucdenver.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Verspoor, K., Dvorkin, D., Cohen, K. B., Hunter, L.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp195</dc:identifier>
<dc:title><![CDATA[Ontology quality assurance through analysis of term transformations]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i84</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i77</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i85?rss=1">
<title><![CDATA[Computing galled networks from real data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i85?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Developing methods for computing phylogenetic networks from biological data is an important problem posed by molecular evolution and much work is currently being undertaken in this area. Although promising approaches exist, there are no tools available that biologists could easily and routinely use to compute rooted phylogenetic networks on real datasets containing tens or hundreds of taxa. Biologists are interested in clades, i.e. groups of monophyletic taxa, and these are usually represented by clusters in a rooted phylogenetic tree. The problem of computing an optimal rooted phylogenetic network from a set of clusters, is hard, in general. Indeed, even the problem of just determining whether a given network contains a given cluster is hard. Hence, some researchers have focused on topologically restricted classes of networks, such as galled trees and level-<I>k</I> networks, that are more tractable, but have the practical draw-back that a given set of clusters will usually not possess such a representation.</p>
<p><b>Results:</b> In this article, we argue that galled networks (a generalization of galled trees) provide a good trade-off between level of generality and tractability. Any set of clusters can be represented by some galled network and the question whether a cluster is contained in such a network is easy to solve. Although the computation of an optimal galled network involves successively solving instances of two different NP-complete problems, in practice our algorithm solves this problem exactly on large datasets containing hundreds of taxa and many reticulations in seconds, as illustrated by a dataset containing 279 prokaryotes.</p>
<p><b>Availability:</b> We provide a fast, robust and easy-to-use implementation of this work in version 2.0 of our tree-handling software Dendroscope, freely available from <inter-ref locator="http://www.dendroscope.org" locator-type="url">http://www.dendroscope.org</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="huson@informatik.uni-tuebingen.de" locator-type="email">huson@informatik.uni-tuebingen.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Huson, D. H., Rupp, R., Berry, V., Gambette, P., Paul, C.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp217</dc:identifier>
<dc:title><![CDATA[Computing galled networks from real data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i93</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i85</prism:startingPage>
<prism:section>EVOLUTION AND PHYLOGENY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i94?rss=1">
<title><![CDATA[Genotype-phenotype associations: substitution models to detect evolutionary associations between phenotypic variables and genotypic evolutionary rate]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i94?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Mapping between genotype and phenotype is one of the primary goals of evolutionary genetics but one that has received little attention at the interspecies level. Recent developments in phylogenetics and statistical modelling have typically been used to examine molecular and phenotypic evolution separately. We have used this background to develop phylogenetic substitution models to test for associations between evolutionary rate of genotype and phenotype. We do this by creating hybrid rate matrices between genotype and phenotype.</p>
<p><b>Results:</b> Simulation results show our models to be accurate in detecting genotype&ndash;phenotype associations and robust for various factors that typically affect maximum likelihood methods, such as number of taxa, level of relevant signal, proportion of sites affected and length of evolutionary divergence. Further, simulations show that our method is robust to homogeneity assumptions. We apply the models to datasets of male reproductive system genes in relation to mating systems of primates. We show that evolution of semenogelin II is significantly associated with mating systems whereas two negative control genes (cytochrome b and peptidase inhibitor 3) show no significant association. This provides the first hybrid substitution model of which we are aware to directly test the association between genotype and phenotype using a phylogenetic framework.</p>
<p><b>Availability:</b> Perl and HYPHY scripts are available upon request from the authors.</p>
<p><b>Contact:</b> <inter-ref locator="to252@cam.ac.uk" locator-type="email">to252@cam.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp231/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[O'Connor, T. D., Mundy, N. I.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp231</dc:identifier>
<dc:title><![CDATA[Genotype-phenotype associations: substitution models to detect evolutionary associations between phenotypic variables and genotypic evolutionary rate]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i100</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i94</prism:startingPage>
<prism:section>EVOLUTION AND PHYLOGENY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i101?rss=1">
<title><![CDATA[Modeling stochasticity and robustness in gene regulatory networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i101?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Understanding gene regulation in biological processes and modeling the robustness of underlying regulatory networks is an important problem that is currently being addressed by computational systems biologists. Lately, there has been a renewed interest in Boolean modeling techniques for gene regulatory networks (GRNs). However, due to their deterministic nature, it is often difficult to identify whether these modeling approaches are robust to the addition of stochastic noise that is widespread in gene regulatory processes. Stochasticity in Boolean models of GRNs has been addressed relatively sparingly in the past, mainly by flipping the expression of genes between different expression levels with a predefined probability. This stochasticity in nodes (SIN) model leads to over representation of noise in GRNs and hence non-correspondence with biological observations.</p>
<p><b>Results:</b> In this article, we introduce the stochasticity in functions (SIF) model for simulating stochasticity in Boolean models of GRNs. By providing biological motivation behind the use of the SIF model and applying it to the T-helper and T-cell activation networks, we show that the SIF model provides more biologically robust results than the existing SIN model of stochasticity in GRNs.</p>
<p><b>Availability:</b> Algorithms are made available under our Boolean modeling toolbox, <I>GenYsis</I>. The software binaries can be downloaded from <inter-ref locator="http://si2.epfl.ch/~garg/genysis.html" locator-type="url">http://si2.epfl.ch/~garg/genysis.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="abhishek.garg@epfl.ch" locator-type="email">abhishek.garg@epfl.ch</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Garg, A., Mohanram, K., Di Cara, A., De Micheli, G., Xenarios, I.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp214</dc:identifier>
<dc:title><![CDATA[Modeling stochasticity and robustness in gene regulatory networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i109</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i101</prism:startingPage>
<prism:section>GENE REGULATION AND TRANSCRIPTOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i110?rss=1">
<title><![CDATA[Grouped graphical Granger modeling for gene expression regulatory networks discovery]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i110?rss=1</link>
<description><![CDATA[
<p>We consider the problem of discovering gene regulatory networks from time-series microarray data. Recently, graphical Granger modeling has gained considerable attention as a promising direction for addressing this problem. These methods apply graphical modeling methods on time-series data and invoke the notion of &lsquo;Granger causality&rsquo; to make assertions on causality through inference on time-lagged effects. Existing algorithms, however, have neglected an important aspect of the problem&mdash;the group structure among the lagged temporal variables naturally imposed by the time series they belong to. Specifically, existing methods in computational biology share this shortcoming, as well as additional computational limitations, prohibiting their effective applications to the large datasets including a large number of genes and many data points. In the present article, we propose a novel methodology which we term &lsquo;grouped graphical Granger modeling method&rsquo;, which overcomes the limitations mentioned above by applying a regression method suited for high-dimensional and large data, and by leveraging the group structure among the lagged temporal variables according to the time series they belong to. We demonstrate the effectiveness of the proposed methodology on both simulated and actual gene expression data, specifically the human cancer cell (HeLa S3) cycle data. The simulation results show that the proposed methodology generally exhibits higher accuracy in recovering the underlying causal structure. Those on the gene expression data demonstrate that it leads to improved accuracy with respect to prediction of known links, and also uncovers additional causal relationships uncaptured by earlier works.</p>
<p><b>Contact:</b> <inter-ref locator="aclozano@us.ibm.com" locator-type="email">aclozano@us.ibm.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Lozano, A. C., Abe, N., Liu, Y., Rosset, S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp199</dc:identifier>
<dc:title><![CDATA[Grouped graphical Granger modeling for gene expression regulatory networks discovery]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i118</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i110</prism:startingPage>
<prism:section>GENE REGULATION AND TRANSCRIPTOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i119?rss=1">
<title><![CDATA[Clustered alignments of gene-expression time series data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i119?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Characterizing and comparing temporal gene-expression responses is an important computational task for answering a variety of questions in biological studies. Algorithms for aligning time series represent a valuable approach for such analyses. However, previous approaches to aligning gene-expression time series have assumed that all genes should share the same alignment. Our work is motivated by the need for methods that identify sets of genes that differ in similar ways between two time series, even when their expression profiles are quite different.</p>
<p><b>Results:</b> We present a novel algorithm that calculates <I>clustered alignments</I>; the method finds clusters of genes such that the genes within a cluster share a common alignment, but each cluster is aligned independently of the others. We also present an efficient new segment-based alignment algorithm for time series called SCOW (shorting correlation-optimized warping). We evaluate our methods by assessing the accuracy of alignments computed with sparse time series from a toxicogenomics dataset. The results of our evaluation indicate that our clustered alignment approach and SCOW provide more accurate alignments than previous approaches. Additionally, we apply our clustered alignment approach to characterize the effects of a conditional Mop3 knockout in mouse liver.</p>
<p><b>Availability:</b> Source code is available at <inter-ref locator="http://www.biostat.wisc.edu/~aasmith/catcode" locator-type="url">http://www.biostat.wisc.edu/~aasmith/catcode</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="aasmith@cs.wisc.edu" locator-type="email">aasmith@cs.wisc.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Smith, A. A., Vollrath, A., Bradfield, C. A., Craven, M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp206</dc:identifier>
<dc:title><![CDATA[Clustered alignments of gene-expression time series data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i1127</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i119</prism:startingPage>
<prism:section>GENE REGULATION AND TRANSCRIPTOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i128?rss=1">
<title><![CDATA[KELLER: estimating time-varying interactions between genes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i128?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Gene regulatory networks underlying temporal processes, such as the cell cycle or the life cycle of an organism, can exhibit significant topological changes to facilitate the underlying dynamic regulatory functions. Thus, it is essential to develop methods that capture the temporal evolution of the regulatory networks. These methods will be an enabling first step for studying the driving forces underlying the dynamic gene regulation circuitry and predicting the future network structures in response to internal and external stimuli.</p>
<p><b>Results:</b> We introduce a kernel-reweighted logistic regression method (KELLER) for reverse engineering the dynamic interactions between genes based on their time series of expression values. We apply the proposed method to estimate the latent sequence of temporal rewiring networks of 588 genes involved in the developmental process during the life cycle of <I>Drosophila melanogaster</I>. Our results offer the first glimpse into the temporal evolution of gene networks in a living organism during its full developmental course. Our results also show that many genes exhibit distinctive functions at different stages along the developmental cycle.</p>
<p><b>Availability:</b> Source codes and relevant data will be made available at <inter-ref locator="http://www.sailing.cs.cmu.edu/keller" locator-type="url">http://www.sailing.cs.cmu.edu/keller</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="epxing@cs.cmu.edu" locator-type="email">epxing@cs.cmu.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Song, L., Kolar, M., Xing, E. P.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp192</dc:identifier>
<dc:title><![CDATA[KELLER: estimating time-varying interactions between genes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i136</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i128</prism:startingPage>
<prism:section>GENE REGULATION AND TRANSCRIPTOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i137?rss=1">
<title><![CDATA[Predicting functionality of protein-DNA interactions by integrating diverse evidence]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i137?rss=1</link>
<description><![CDATA[
<p>Chromatin immunoprecipitation (ChIP-chip) experiments enable capturing physical interactions between regulatory proteins and DNA <I>in vivo</I>. However, measurement of chromatin binding alone is not sufficient to detect regulatory interactions. A detected binding event may not be biologically relevant, or a known regulatory interaction might not be observed under the growth conditions tested so far. To correctly identify physical interactions between transcription factors (TFs) and genes and to determine their regulatory implications under various experimental conditions, we integrated ChIP-chip data with motif binding sites, nucleosome occupancy and mRNA expression datasets within a probabilistic framework. This framework was specifically tailored for the identification of functional and non-functional DNA binding events. Using this, we estimate that only 50% of condition-specific protein&ndash;DNA binding in budding yeast is functional. We further investigated the molecular factors determining the functionality of protein&ndash;DNA interactions under diverse growth conditions. Our analysis suggests that the functionality of binding is highly condition-specific and highly dependent on the presence of specific cofactors. Hence, the joint analysis of both, functional and non-functional DNA binding, may lend important new insights into transcriptional regulation.</p>
<p><b>Contact:</b> <inter-ref locator="workman@cbs.dtu.dk" locator-type="email">workman@cbs.dtu.dk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ucar, D., Beyer, A., Parthasarathy, S., Workman, C. T.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp213</dc:identifier>
<dc:title><![CDATA[Predicting functionality of protein-DNA interactions by integrating diverse evidence]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i144</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i137</prism:startingPage>
<prism:section>GENE REGULATION AND TRANSCRIPTOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i145?rss=1">
<title><![CDATA[Probabilistic retrieval and visualization of biologically relevant microarray experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i145?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> As ArrayExpress and other repositories of genome-wide experiments are reaching a mature size, it is becoming more meaningful to search for related experiments, given a particular study. We introduce methods that allow for the search to be based upon measurement data, instead of the more customary annotation data. The goal is to retrieve experiments in which the same biological processes are activated. This can be due either to experiments targeting the same biological question, or to as yet unknown relationships.</p>
<p><b>Results:</b> We use a combination of existing and new probabilistic machine learning techniques to extract information about the biological processes differentially activated in each experiment, to retrieve earlier experiments where the same processes are activated and to visualize and interpret the retrieval results. Case studies on a subset of ArrayExpress show that, with a sufficient amount of data, our method indeed finds experiments relevant to particular biological questions. Results can be interpreted in terms of biological processes using the visualization techniques.</p>
<p><b>Availability:</b> The code is available from <inter-ref locator="http://www.cis.hut.fi/projects/mi/software/ismb09" locator-type="url">http://www.cis.hut.fi/projects/mi/software/ismb09</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="jose.caldas@tkk.fi" locator-type="email">jose.caldas@tkk.fi</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Caldas, J., Gehlenborg, N., Faisal, A., Brazma, A., Kaski, S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp215</dc:identifier>
<dc:title><![CDATA[Probabilistic retrieval and visualization of biologically relevant microarray experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i153</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i145</prism:startingPage>
<prism:section>OTHER BIOINFORMATICS APPLICATIONS AND METHODS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i154?rss=1">
<title><![CDATA[Proteome coverage prediction with infinite Markov models]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i154?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Liquid chromatography tandem mass spectrometry (LC-MS/MS) is the predominant method to comprehensively characterize complex protein mixtures such as samples from prefractionated or complete proteomes. In order to maximize proteome coverage for the studied sample, i.e. identify as many traceable proteins as possible, LC-MS/MS experiments are typically repeated extensively and the results combined. Proteome coverage prediction is the task of estimating the number of peptide discoveries of future LC-MS/MS experiments. Proteome coverage prediction is important to enhance the design of efficient proteomics studies. To date, there does not exist any method to reliably estimate the increase of proteome coverage at an early stage.</p>
<p><b>Results:</b> We propose an extended infinite Markov model DiriSim to extrapolate the progression of proteome coverage based on a small number of already performed LC-MS/MS experiments. The method explicitly accounts for the uncertainty of peptide identifications. We tested DiriSim on a set of 37 LC-MS/MS experiments of a complete proteome sample and demonstrated that DiriSim correctly predicts the coverage progression already from a small subset of experiments. The predicted progression enabled us to specify maximal coverage for the test sample. We demonstrated that quality requirements on the final proteome map impose an upper bound on the number of useful experiment repetitions and limit the achievable proteome coverage.</p>
<p><b>Contact:</b> <inter-ref locator="manfredc@inf.ethz.ch" locator-type="email">manfredc@inf.ethz.ch</inter-ref>; <inter-ref locator="jbuhmann@inf.ethz.ch" locator-type="email">jbuhmann@inf.ethz.ch</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Claassen, M., Aebersold, R., Buhmann, J. M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp233</dc:identifier>
<dc:title><![CDATA[Proteome coverage prediction with infinite Markov models]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i160</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i154</prism:startingPage>
<prism:section>OTHER BIOINFORMATICS APPLICATIONS AND METHODS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i161?rss=1">
<title><![CDATA[Fewer permutations, more accurate P-values]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i161?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Permutation tests have become a standard tool to assess the statistical significance of an event under investigation. The statistical significance, as expressed in a <I>P</I>-value, is calculated as the fraction of permutation values that are at least as extreme as the original statistic, which was derived from non-permuted data. This empirical method directly couples both the minimal obtainable <I>P</I>-value and the resolution of the <I>P</I>-value to the number of permutations. Thereby, it imposes upon itself the need for a very large number of permutations when small <I>P</I>-values are to be accurately estimated. This is computationally expensive and often infeasible.</p>
<p><b>Results:</b> A method of computing <I>P</I>-values based on tail approximation is presented. The tail of the distribution of permutation values is approximated by a generalized Pareto distribution. A good fit and thus accurate <I>P</I>-value estimates can be obtained with a drastically reduced number of permutations when compared with the standard empirical way of computing <I>P</I>-values.</p>
<p><b>Availability:</b> The Matlab code can be obtained from the corresponding author on request.</p>
<p><b>Contact:</b> <inter-ref locator="tknijnenburg@systemsbiology.org" locator-type="email">tknijnenburg@systemsbiology.org</inter-ref></p>
<p><b>Supplementary information:</b><inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp211/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Knijnenburg, T. A., Wessels, L. F. A., Reinders, M. J. T., Shmulevich, I.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp211</dc:identifier>
<dc:title><![CDATA[Fewer permutations, more accurate P-values]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i168</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i161</prism:startingPage>
<prism:section>OTHER BIOINFORMATICS APPLICATIONS AND METHODS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i169?rss=1">
<title><![CDATA[A general computational method for robustness analysis with applications to synthetic gene networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i169?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Robustness is the capacity of a system to maintain a function in the face of perturbations. It is essential for the correct functioning of natural and engineered biological systems. Robustness is generally defined in an <I>ad hoc</I>, problem-dependent manner, thus hampering the fruitful development of a theory of biological robustness, recently advocated by Kitano.</p>
<p><b>Results:</b> In this article, we propose a general definition of robustness that applies to any biological function expressible in temporal logic LTL (linear temporal logic), and to broad model classes and perturbation types. Moreover, we propose a computational approach and an implementation in BIOCHAM 2.8 for the automated estimation of the robustness of a given behavior with respect to a given set of perturbations. The applicability and biological relevance of our approach is demonstrated by testing and improving the robustness of the timed behavior of a synthetic transcriptional cascade that could be used as a biological timer for synthetic biology applications.</p>
<p><b>Availability:</b> Version 2.8 of BIOCHAM and the transcriptional cascade model are available at <inter-ref locator="http://contraintes.inria.fr/BIOCHAM/" locator-type="url">http://contraintes.inria.fr/BIOCHAM/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="gregory.batt@inria.fr" locator-type="email">gregory.batt@inria.fr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Rizk, A., Batt, G., Fages, F., Soliman, S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp200</dc:identifier>
<dc:title><![CDATA[A general computational method for robustness analysis with applications to synthetic gene networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i178</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i169</prism:startingPage>
<prism:section>OTHER BIOINFORMATICS APPLICATIONS AND METHODS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i179?rss=1">
<title><![CDATA[E-zyme: predicting potential EC numbers from the chemical transformation pattern of substrate-product pairs]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i179?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The IUBMB's Enzyme Nomenclature system, commonly known as the Enzyme Commission (EC) numbers, plays key roles in classifying enzymatic reactions and in linking the enzyme genes or proteins to reactions in metabolic pathways. There are numerous reactions known to be present in various pathways but without any official EC numbers, most of which have no hope to be given ones because of the lack of the published articles on enzyme assays.</p>
<p><b>Results:</b> In this article we propose a new method to predict the potential EC numbers to given reactant pairs (substrates and products) or uncharacterized reactions, and a web-server named E-zyme as an application. This technology is based on our original biochemical transformation pattern which we call an &lsquo;RDM pattern&rsquo;, and consists of three steps: (i) graph alignment of a query reactant pair (substrates and products) for computing the query RDM pattern, (ii) multi-layered partial template matching by comparing the query RDM pattern with template patterns related with known EC numbers and (iii) weighted major voting scheme for selecting appropriate EC numbers. As the result, cross-validation experiments show that the proposed method achieves both high coverage and high prediction accuracy at a practical level, and consistently outperforms the previous method.</p>
<p><b>Availability:</b> The E-zyme system is available at <inter-ref locator="http://www.genome.jp/tools/e-zyme/" locator-type="url">http://www.genome.jp/tools/e-zyme/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="kanehisa@kuicr.kyoto-u.ac.jp" locator-type="email">kanehisa@kuicr.kyoto-u.ac.jp</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yamanishi, Y., Hattori, M., Kotera, M., Goto, S., Kanehisa, M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp223</dc:identifier>
<dc:title><![CDATA[E-zyme: predicting potential EC numbers from the chemical transformation pattern of substrate-product pairs]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i186</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i179</prism:startingPage>
<prism:section>OTHER BIOINFORMATICS APPLICATIONS AND METHODS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i187?rss=1">
<title><![CDATA[Multi-locus match probability in a finite population: a fundamental difference between the Moran and Wright-Fisher models]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i187?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> A fundamental problem in population genetics, which being also of importance to forensic science, is to compute the match probability (MP) that two individuals randomly chosen from a population have identical alleles at a collection of loci. At present, 11&ndash;13 unlinked autosomal microsatellite loci are typed for forensic use. In a finite population, the genealogical relationships of individuals can create statistical non-independence of alleles at unlinked loci. However, the so-called product rule, which is used in courts in the USA, computes the MP for multiple unlinked loci by assuming statistical independence, multiplying the one-locus MPs at those loci. Analytically testing the accuracy of the product rule for more than five loci has hitherto remained an open problem.</p>
<p><b>Results:</b> In this article, we adopt a flexible graphical framework to compute multi-locus MPs analytically. We consider two standard models of random mating, namely the Wright&ndash;Fisher (WF) and Moran models. We succeed in computing haplotypic MPs for up to 10 loci in the WF model, and up to 13 loci in the Moran model. For a finite population and a large number of loci, we show that the MPs predicted by the product rule are highly sensitive to mutation rates in the range of interest, while the true MPs computed using our graphical framework are not. Furthermore, we show that the WF and Moran models may produce drastically different MPs for a finite population, and that this difference grows with the number of loci and mutation rates. Although the two models converge to the same coalescent or diffusion limit, in which the population size approaches infinity, we demonstrate that, when multiple loci are considered, the rate of convergence in the Moran model is significantly slower than that in the WF model.</p>
<p><b>Availability:</b> A C++ implementation of the algorithms discussed in this article is available at <inter-ref locator="http://www.cs.berkeley.edu/~yss/software.html" locator-type="url">http://www.cs.berkeley.edu/~yss/software.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="yss@eecs.berkeley.edu" locator-type="email">yss@eecs.berkeley.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bhaskar, A., Song, Y. S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp227</dc:identifier>
<dc:title><![CDATA[Multi-locus match probability in a finite population: a fundamental difference between the Moran and Wright-Fisher models]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i195</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i187</prism:startingPage>
<prism:section>POPULATION GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i196?rss=1">
<title><![CDATA[Speeding up HMM algorithms for genetic linkage analysis via chain reductions of the state space]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i196?rss=1</link>
<description><![CDATA[
<p>We develop an hidden Markov model (HMM)-based algorithm for computing exact parametric and non-parametric linkage scores in larger pedigrees than was possible before. The algorithm is applicable whenever there are chains of persons in the pedigree with no genetic measurements and with unknown affection status. The algorithm is based on shrinking the state space of the HMM considerably using such chains. In a two g-degree cousins pedigree the reduction drops the state space from being exponential in g to being linear in g. For a Finnish family in which two affected children suffer from a rare cold-inducing sweating syndrome, we were able to reduce the state space by more than five orders of magnitude from 2<sup>50</sup> to 2<sup>32</sup>. In another pedigree of state-space size of 2<sup>27</sup>, used for a study of pituitary adenoma, the state space reduced by a factor of 8.5 and consequently exact linkage scores can now be computed, rather than approximated.</p>
<p><b>Contact:</b> <inter-ref locator="dang@cs.technion.ac.il" locator-type="email">dang@cs.technion.ac.il</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp224/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Geiger, D., Meek, C., Wexler, Y.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp224</dc:identifier>
<dc:title><![CDATA[Speeding up HMM algorithms for genetic linkage analysis via chain reductions of the state space]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i203</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i196</prism:startingPage>
<prism:section>POPULATION GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i204?rss=1">
<title><![CDATA[A multivariate regression approach to association analysis of a quantitative trait network]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i204?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Many complex disease syndromes such as asthma consist of a large number of highly related, rather than independent, clinical phenotypes, raising a new technical challenge in identifying genetic variations associated simultaneously with correlated traits. Although a causal genetic variation may influence a group of highly correlated traits jointly, most of the previous association analyses considered each phenotype separately, or combined results from a set of single-phenotype analyses.</p>
<p><b>Results:</b> We propose a new statistical framework called graph-guided fused lasso to address this issue in a principled way. Our approach represents the dependency structure among the quantitative traits explicitly as a network, and leverages this trait network to encode structured regularizations in a multivariate regression model over the genotypes and traits, so that the genetic markers that jointly influence subgroups of highly correlated traits can be detected with high sensitivity and specificity. While most of the traditional methods examined each phenotype independently, our approach analyzes all of the traits jointly in a single statistical method to discover the genetic markers that perturb a subset of correlated triats jointly rather than a single trait. Using simulated datasets based on the HapMap consortium data and an asthma dataset, we compare the performance of our method with the single-marker analysis, and other sparse regression methods that do not use any structural information in the traits. Our results show that there is a significant advantage in detecting the true causal single nucleotide polymorphisms when we incorporate the correlation pattern in traits using our proposed methods.</p>
<p><b>Availability:</b> Software for GFlasso is available at <inter-ref locator="http://www.sailing.cs.cmu.edu/gflasso.html" locator-type="url">http://www.sailing.cs.cmu.edu/gflasso.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="sssykim@cs.cmu.edu" locator-type="email">sssykim@cs.cmu.edu</inter-ref>; <inter-ref locator="ksohn@cs.cmu.edu" locator-type="email">ksohn@cs.cmu.edu</inter-ref>;</p>
]]></description>
<dc:creator><![CDATA[Kim, S., Sohn, K.-A., Xing, E. P.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp218</dc:identifier>
<dc:title><![CDATA[A multivariate regression approach to association analysis of a quantitative trait network]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i212</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i204</prism:startingPage>
<prism:section>POPULATION GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i213?rss=1">
<title><![CDATA[Inference of locus-specific ancestry in closely related populations]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i213?rss=1</link>
<description><![CDATA[
<p>A characterization of the genetic variation of recently admixed populations may reveal historical population events, and is useful for the detection of single nucleotide polymorphisms (SNPs) associated with diseases through association studies and admixture mapping. Inference of locus-specific ancestry is key to our understanding of the genetic variation of such populations. While a number of methods for the inference of locus-specific ancestry are accurate when the ancestral populations are quite distant (e.g. African&ndash;Americans), current methods incur a large error rate when inferring the locus-specific ancestry in admixed populations where the ancestral populations are closely related (e.g. Americans of European descent).</p>
<p><b>Results:</b> In this work, we extend previous methods for the inference of locus-specific ancestry by the incorporation of a refined model of recombination events. We present an efficient dynamic programming algorithm to infer the locus-specific ancestries in this model, resulting in a method that attains improved accuracies; the improvement is most significant when the ancestral populations are closely related. An evaluation on a wide range of scenarios, including admixtures of the 52 population groups from the Human Genome Diversity Project demonstrates that locus-specific ancestry can indeed be accurately inferred in these admixtures using our method. Finally, we demonstrate that imputation methods can be improved by the incorporation of locus-specific ancestry, when applied to admixed populations.</p>
<p><b>Availability:</b> The implementation of the WINPOP model is available as part of the LAMP package at <inter-ref locator="http://lamp.icsi.berkeley.edu/lamp" locator-type="url">http://lamp.icsi.berkeley.edu/lamp</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="heran@icsi.berkeley.edu" locator-type="email">heran@icsi.berkeley.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Pasaniuc, B., Sankararaman, S., Kimmel, G., Halperin, E.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp197</dc:identifier>
<dc:title><![CDATA[Inference of locus-specific ancestry in closely related populations]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i221</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i213</prism:startingPage>
<prism:section>POPULATION GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i222?rss=1">
<title><![CDATA[A geometric approach for classification and comparison of structural variants]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i222?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Structural variants, including duplications, insertions, deletions and inversions of large blocks of DNA sequence, are an important contributor to human genome variation. Measuring structural variants in a genome sequence is typically more challenging than measuring single nucleotide changes. Current approaches for structural variant identification, including paired-end DNA sequencing/mapping and array comparative genomic hybridization (aCGH), do not identify the boundaries of variants precisely. Consequently, most reported human structural variants are poorly defined and not readily compared across different studies and measurement techniques.</p>
<p><b>Results:</b> We introduce Geometric Analysis of Structural Variants (GASV), a geometric approach for identification, classification and comparison of structural variants. This approach represents the uncertainty in measurement of a structural variant as a polygon in the plane, and identifies measurements supporting the same variant by computing intersections of polygons. We derive a computational geometry algorithm to efficiently identify all such intersections. We apply GASV to sequencing data from nine individual human genomes and several cancer genomes. We obtain better localization of the boundaries of structural variants, distinguish genetic from putative somatic structural variants in cancer genomes, and integrate aCGH and paired-end sequencing measurements of structural variants. This work presents the first general framework for comparing structural variants across multiple samples and measurement techniques, and will be useful for studies of both genetic structural variants and somatic rearrangements in cancer.</p>
<p><b>Availability:</b> <inter-ref locator="http://cs.brown.edu/people/braphael/software.html" locator-type="url">http://cs.brown.edu/people/braphael/software.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="braphael@brown.edu" locator-type="email">braphael@brown.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Sindi, S., Helman, E., Bashir, A., Raphael, B. J.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp208</dc:identifier>
<dc:title><![CDATA[A geometric approach for classification and comparison of structural variants]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i230</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i222</prism:startingPage>
<prism:section>POPULATION GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i231?rss=1">
<title><![CDATA[Joint estimation of gene conversion rates and mean conversion tract lengths from population SNP data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i231?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Two known types of meiotic recombination are crossovers and gene conversions. Although they leave behind different footprints in the genome, it is a challenging task to tease apart their relative contributions to the observed genetic variation. In particular, for a given population SNP dataset, the joint estimation of the crossover rate, the gene conversion rate and the mean conversion tract length is widely viewed as a very difficult problem.</p>
<p><b>Results:</b> In this article, we devise a likelihood-based method using an interleaved hidden Markov model (HMM) that can jointly estimate the aforementioned three parameters fundamental to recombination. Our method significantly improves upon a recently proposed method based on a factorial HMM. We show that modeling overlapping gene conversions is crucial for improving the joint estimation of the gene conversion rate and the mean conversion tract length. We test the performance of our method on simulated data. We then apply our method to analyze real biological data from the telomere of the <I>X</I> chromosome of <I>Drosophila melanogaster</I>, and show that the ratio of the gene conversion rate to the crossover rate for the region may not be nearly as high as previously claimed.</p>
<p><b>Availability:</b> A software implementation of the algorithms discussed in this article is available at <inter-ref locator="http://www.cs.berkeley.edu/~yss/software.html" locator-type="url">http://www.cs.berkeley.edu/~yss/software.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="yss@eecs.berkeley.edu" locator-type="email">yss@eecs.berkeley.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yin, J., Jordan, M. I., Song, Y. S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp229</dc:identifier>
<dc:title><![CDATA[Joint estimation of gene conversion rates and mean conversion tract lengths from population SNP data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i239</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i231</prism:startingPage>
<prism:section>POPULATION GENOMICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i240?rss=1">
<title><![CDATA[Domain-oriented edge-based alignment of protein interaction networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i240?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Recent advances in high-throughput experimental techniques have yielded a large amount of data on protein&ndash;protein interactions (PPIs). Since these interactions can be organized into networks, and since separate PPI networks can be constructed for different species, a natural research direction is the comparative analysis of such networks across species in order to detect conserved functional modules. This is the task of network alignment.</p>
<p><b>Results:</b> Most conventional network alignment algorithms adopt a <I>node-then-edge-alignment</I> paradigm: they first identify homologous proteins across networks and then consider interactions among them to construct network alignments. In this study, we propose an alternative <I>direct-edge-alignment</I> paradigm. Specifically, instead of explicit identification of homologous proteins, we directly infer plausibly alignable PPIs across species by comparing conservation of their constituent domain interactions. We apply our approach to detect conserved protein complexes in yeast&ndash;fly and yeast&ndash;worm PPI networks, and show that our approach outperforms two recent approaches in most alignment performance metrics.</p>
<p><b>Availability:</b> Supplementary material and source code can be found at <inter-ref locator="http://www.cs.duke.edu/~amink/" locator-type="url">http://www.cs.duke.edu/~amink/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="xinguo@cs.duke.edu" locator-type="email">xinguo@cs.duke.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Guo, X., Hartemink, A. J.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp202</dc:identifier>
<dc:title><![CDATA[Domain-oriented edge-based alignment of protein interaction networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1246</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i240</prism:startingPage>
<prism:section>PROTEIN INTERACTIONS AND MOLECULAR NETWORKS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i247?rss=1">
<title><![CDATA[Network-based prediction of metabolic enzymes' subcellular localization]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i247?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Revealing the subcellular localization of proteins within membrane-bound compartments is of a major importance for inferring protein function. Though current high-throughput localization experiments provide valuable data, they are costly and time-consuming, and due to technical difficulties not readily applicable for many Eukaryotes. Physical characteristics of proteins, such as sequence targeting signals and amino acid composition are commonly used to predict subcellular localizations using computational approaches. Recently it was shown that protein&ndash;protein interaction (PPI) networks can be used to significantly improve the prediction accuracy of protein subcellular localization. However, as high-throughput PPI data depend on costly high-throughput experiments and are currently available for only a few organisms, the scope of such methods is yet limited.</p>
<p><b>Results:</b> This study presents a novel constraint-based method for predicting subcellular localization of enzymes based on their embedding metabolic network, relying on a parsimony principle of a minimal number of cross-membrane metabolite transporters. In a cross-validation test of predicting known subcellular localization of yeast enzymes, the method is shown to be markedly robust, providing accurate localization predictions even when only 20% of the known enzyme localizations are given as input. It is shown to outperform pathway enrichment-based methods both in terms of prediction accuracy and in its ability to predict the subcellular localization of entire metabolic pathways when no a-priori pathway-specific localization data is available (and hence enrichment methods are bound to fail). With the number of available metabolic networks already reaching more than 600 and growing fast, the new method may significantly contribute to the identification of enzyme localizations in many different organisms.</p>
<p><b>Contact:</b> <inter-ref locator="shira.mintz@weizmann.ac.il" locator-type="email">shira.mintz@weizmann.ac.il</inter-ref>; <inter-ref locator="tomersh@cs.technion.ac.il" locator-type="email">tomersh@cs.technion.ac.il</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Mintz-Oron, S., Aharoni, A., Ruppin, E., Shlomi, T.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp209</dc:identifier>
<dc:title><![CDATA[Network-based prediction of metabolic enzymes' subcellular localization]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i1252</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i247</prism:startingPage>
<prism:section>PROTEIN INTERACTIONS AND MOLECULAR NETWORKS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i253?rss=1">
<title><![CDATA[IsoRankN: spectral methods for global alignment of multiple protein networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i253?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> With the increasing availability of large protein&ndash;protein interaction networks, the question of protein network alignment is becoming central to systems biology. Network alignment is further delineated into two sub-problems: local alignment, to find small conserved motifs across networks, and global alignment, which attempts to find a best mapping between all nodes of the two networks. In this article, our aim is to improve upon existing global alignment results. Better network alignment will enable, among other things, more accurate identification of functional orthologs across species.</p>
<p><b>Results:</b> We introduce IsoRankN (IsoRank-Nibble) a global multiple-network alignment tool based on spectral clustering on the induced graph of pairwise alignment scores. IsoRankN outperforms existing algorithms for global network alignment in coverage and consistency on multiple alignments of the five available eukaryotic networks. Being based on spectral methods, IsoRankN is both error tolerant and computationally efficient.</p>
<p><b>Availability:</b> Our software is available freely for non-commercial purposes on request from: <inter-ref locator="http://isorank.csail.mit.edu/" locator-type="url">http://isorank.csail.mit.edu/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="bab@mit.edu" locator-type="email">bab@mit.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Liao, C.-S., Lu, K., Baym, M., Singh, R., Berger, B.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp203</dc:identifier>
<dc:title><![CDATA[IsoRankN: spectral methods for global alignment of multiple protein networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i258</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i253</prism:startingPage>
<prism:section>PROTEIN INTERACTIONS AND MOLECULAR NETWORKS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i259?rss=1">
<title><![CDATA[Global alignment of protein-protein interaction networks by graph matching methods]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i259?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Aligning protein&ndash;protein interaction (PPI) networks of different species has drawn a considerable interest recently. This problem is important to investigate evolutionary conserved pathways or protein complexes across species, and to help in the identification of functional orthologs through the detection of conserved interactions. It is, however, a difficult combinatorial problem, for which only heuristic methods have been proposed so far.</p>
<p><b>Results:</b> We reformulate the PPI alignment as a graph matching problem, and investigate how state-of-the-art graph matching algorithms can be used for that purpose. We differentiate between two alignment problems, depending on whether strict constraints on protein matches are given, based on sequence similarity, or whether the goal is instead to find an optimal compromise between sequence similarity and interaction conservation in the alignment. We propose new methods for both cases, and assess their performance on the alignment of the yeast and fly PPI networks. The new methods consistently outperform state-of-the-art algorithms, retrieving in particular 78% more conserved interactions than IsoRank for a given level of sequence similarity.</p>
<p><b>Availability:</b> All data and codes are freely and publicly available upon request.</p>
<p><b>Contact:</b> <inter-ref locator="jean-philippe.vert@mines-paristech.fr" locator-type="email">jean-philippe.vert@mines-paristech.fr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Zaslavskiy, M., Bach, F., Vert, J.-P.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp196</dc:identifier>
<dc:title><![CDATA[Global alignment of protein-protein interaction networks by graph matching methods]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1267</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i259</prism:startingPage>
<prism:section>PROTEIN INTERACTIONS AND MOLECULAR NETWORKS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i268?rss=1">
<title><![CDATA[PICKY: a novel SVD-based NMR spectra peak picking method]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i268?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Picking peaks from experimental NMR spectra is a key unsolved problem for automated NMR protein structure determination. Such a process is a prerequisite for resonance assignment, nuclear overhauser enhancement (NOE) distance restraint assignment, and structure calculation tasks. Manual or semi-automatic peak picking, which is currently the prominent way used in NMR labs, is tedious, time consuming and costly.</p>
<p><b>Results:</b> We introduce new ideas, including noise-level estimation, component forming and sub-division, singular value decomposition (SVD)-based peak picking and peak pruning and refinement. PICKY is developed as an automated peak picking method. Different from the previous research on peak picking, we provide a systematic study of the proposed method. PICKY is tested on 32 real 2D and 3D spectra of eight target proteins, and achieves an average of 88% <I>recall</I> and 74% <I>precision</I>. PICKY is efficient. It takes PICKY on average 15.7 s to process an NMR spectrum. More important than these numbers, PICKY actually works in practice. We feed peak lists generated by PICKY to IPASS for resonance assignment, feed IPASS assignment to SPARTA for fragments generation, and feed SPARTA fragments to FALCON for structure calculation. This results in high-resolution structures of several proteins, for example, TM1112, at 1.25 &Aring;.</p>
<p><b>Availability:</b> PICKY is available upon request. The peak lists of PICKY can be easily loaded by SPARKY to enable a better interactive strategy for rapid peak picking.</p>
<p><b>Contact:</b> <inter-ref locator="mli@uwaterloo.ca" locator-type="email">mli@uwaterloo.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Alipanahi, B., Gao, X., Karakoc, E., Donaldson, L., Li, M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp225</dc:identifier>
<dc:title><![CDATA[PICKY: a novel SVD-based NMR spectra peak picking method]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i275</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i268</prism:startingPage>
<prism:section>PROTEIN STRUCTURE AND FUNCTION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i276?rss=1">
<title><![CDATA[A framework to refine particle clusters produced by EMAN]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i276?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> EMAN is one of the most popular software packages for single particle reconstruction. But the particle clusters produced during its model refining stage are of low qualities. We attempt to refine the particle clusters by more accurately determining orientations of particles, and thereby achieving higher resolutions of consequent 3D structures.</p>
<p><b>Results:</b> A particle reclustering framework (PRF) is introduced, which consists of three components. Each of them is responsible for one of the basic tasks of PRF: normalization, threshold determination and reclustering. Our implementation is also described and proved to meet the constraints proposed by PRF. Experiments revealed that our implementation improved resolutions of consequent structures for most cases, but only a little extra execution time was incurred. Therefore, it is practical to incorporate PRF in EMAN to improve qualities of generated 3D structures.</p>
<p><b>Availability and Implementation:</b> Implementation of our algorithm is available upon request from the authors.</p>
<p><b>Contact:</b> <inter-ref locator="fanliya@ict.ac.cn" locator-type="email">fanliya@ict.ac.cn</inter-ref>; <inter-ref locator="zf@ncic.ac.cn" locator-type="email">zf@ncic.ac.cn</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Fan, L., Zhang, F., Wang, G., Liu, Z.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp219</dc:identifier>
<dc:title><![CDATA[A framework to refine particle clusters produced by EMAN]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i280</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i276</prism:startingPage>
<prism:section>PROTEIN STRUCTURE AND FUNCTION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i281?rss=1">
<title><![CDATA[Pokefind: a novel topological filter for use with protein structure prediction]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i281?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Our focus has been on detecting topological properties that are rare in real proteins, but occur more frequently in models generated by protein structure prediction methods such as Rosetta. We previously created the Knotfind algorithm, successfully decreasing the frequency of knotted Rosetta models during CASP6. We observed an additional class of knot-like loops that appeared to be equally un-protein-like and yet do not contain a mathematical knot. These topological features are commonly referred to as slip-knots and are caused by the same mechanisms that result in knotted models. Slip-knots are undetectable by the original Knotfind algorithm. We have generalized our algorithm to detect them, and analyzed CASP6 models built using the Rosetta loop modeling method.</p>
<p><b>Results:</b> After analyzing known protein structures in the PDB, we found that slip-knots do occur in certain proteins, but are rare and fall into a small number of specific classes. Our group used this new Pokefind algorithm to distinguish between these rare real slip-knots and the numerous classes of slip-knots that we discovered in Rosetta models and models submitted by the various CASP7 servers. The goal of this work is to improve future models created by protein structure prediction methods. Both algorithms are able to detect un-protein-like features that current metrics such as GDT are unable to identify, so these topological filters can also be used as additional assessment tools.</p>
<p><b>Contact:</b> <inter-ref locator="firas@u.washington.edu" locator-type="email">firas@u.washington.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Khatib, F., Rohl, C. A., Karplus, K.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp198</dc:identifier>
<dc:title><![CDATA[Pokefind: a novel topological filter for use with protein structure prediction]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i288</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i281</prism:startingPage>
<prism:section>PROTEIN STRUCTURE AND FUNCTION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i289?rss=1">
<title><![CDATA[REPETITA: detection and discrimination of the periodicity of protein solenoid repeats by discrete Fourier transform]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i289?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Proteins with solenoid repeats evolve more quickly than non-repetitive ones and their periodicity may be rapidly hidden at sequence level, while still evident in structure. In order to identify these repeats, we propose here a novel method based on a metric characterizing amino-acid properties (polarity, secondary structure, molecular volume, codon diversity, electric charge) using five previously derived numerical functions.</p>
<p><b>Results:</b> The five spectra of the candidate sequences coding for structural repeats, obtained by Discrete Fourier Transform (DFT), show common features allowing determination of repeat periodicity with excellent results. Moreover it is possible to introduce a phase space parameterized by two quantities related to the Fourier spectra which allow for a clear distinction between a non-homologous set of globular proteins and proteins with solenoid repeats. The DFT method is shown to be competitive with other state of the art methods in the detection of solenoid structures, while improving its performance especially in the identification of periodicities, since it is able to recognize the actual repeat length in most cases. Moreover it highlights the relevance of local structural propensities in determining solenoid repeats.</p>
<p><b>Availability:</b> A web tool implementing the algorithm presented in the article (REPETITA) is available with additional details on the data sets at the URL: <inter-ref locator="http://protein.bio.unipd.it/repetita/" locator-type="url">http://protein.bio.unipd.it/repetita/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="silvio.tosatto@unipd.it" locator-type="email">silvio.tosatto@unipd.it</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Marsella, L., Sirocco, F., Trovato, A., Seno, F., Tosatto, S. C.E.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp232</dc:identifier>
<dc:title><![CDATA[REPETITA: detection and discrimination of the periodicity of protein solenoid repeats by discrete Fourier transform]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i295</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i289</prism:startingPage>
<prism:section>PROTEIN STRUCTURE AND FUNCTION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i296?rss=1">
<title><![CDATA[Prediction of sub-cavity binding preferences using an adaptive physicochemical structure representation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i296?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The ability to predict binding profiles for an arbitrary protein can significantly improve the areas of drug discovery, lead optimization and protein function prediction. At present, there are no successful algorithms capable of predicting binding profiles for novel proteins. Existing methods typically rely on manually curated templates or entire active site comparison. Consequently, they perform best when analyzing proteins sharing significant structural similarity with known proteins (i.e. proteins resulting from divergent evolution). These methods fall short when used to characterize the binding profile of a novel active site or one for which a template is not available. In contrast to previous approaches, our method characterizes the binding preferences of <I>sub-cavities</I> within the active site by exploiting a large set of known protein&ndash;ligand complexes. The uniqueness of our approach lies not only in the consideration of sub-cavities, but also in the more complete structural representation of these sub-cavities, their parametrization and the method by which they are compared. By only requiring local structural similarity, we are able to leverage previously unused structural information and perform binding inference for proteins that do not share significant structural similarity with known systems.</p>
<p><b>Results:</b> Our algorithm demonstrates the ability to accurately cluster similar sub-cavities and to predict binding patterns across a diverse set of protein&ndash;ligand complexes. When applied to two high-profile drug targets, our algorithm successfully generates a binding profile that is consistent with known inhibitors. The results suggest that our algorithm should be useful in structure-based drug discovery and lead optimization.</p>
<p><b>Contact:</b> <inter-ref locator="izharw@cs.toronto.edu" locator-type="email">izharw@cs.toronto.edu</inter-ref>; <inter-ref locator="lilien@cs.toronto.edu" locator-type="email">lilien@cs.toronto.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Wallach, I., Lilien, R. H.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp204</dc:identifier>
<dc:title><![CDATA[Prediction of sub-cavity binding preferences using an adaptive physicochemical structure representation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i304</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i296</prism:startingPage>
<prism:section>PROTEIN STRUCTURE AND FUNCTION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i305?rss=1">
<title><![CDATA[A unified statistical model to support local sequence order independent similarity searching for ligand-binding sites and its application to genome-based drug discovery]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i305?rss=1</link>
<description><![CDATA[
<p>Functional relationships between proteins that do not share global structure similarity can be established by detecting their ligand-binding-site similarity. For a large-scale comparison, it is critical to accurately and efficiently assess the statistical significance of this similarity. Here, we report an efficient statistical model that supports local sequence order independent ligand&ndash;binding-site similarity searching. Most existing statistical models only take into account the matching vertices between two sites that are defined by a fixed number of points. In reality, the boundary of the binding site is not known or is dependent on the bound ligand making these approaches limited. To address these shortcomings and to perform binding-site mapping on a genome-wide scale, we developed a sequence-order independent profile&ndash;profile alignment (SOIPPA) algorithm that is able to detect local similarity between unknown binding sites a priori. The SOIPPA scoring integrates geometric, evolutionary and physical information into a unified framework. However, this imposes a significant challenge in assessing the statistical significance of the similarity because the conventional probability model that is based on fixed-point matching cannot be applied. Here we find that scores for binding-site matching by SOIPPA follow an extreme value distribution (EVD). Benchmark studies show that the EVD model performs at least two-orders faster and is more accurate than the non-parametric statistical method in the previous SOIPPA version. Efficient statistical analysis makes it possible to apply SOIPPA to genome-based drug discovery. Consequently, we have applied the approach to the structural genome of <I>Mycobacterium tuberculosis</I> to construct a protein&ndash;ligand interaction network. The network reveals highly connected proteins, which represent suitable targets for promiscuous drugs.</p>
<p><b>Contact:</b> <inter-ref locator="lxie@sdsc.edu" locator-type="email">lxie@sdsc.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Xie, L., Xie, L., Bourne, P. E.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp220</dc:identifier>
<dc:title><![CDATA[A unified statistical model to support local sequence order independent similarity searching for ligand-binding sites and its application to genome-based drug discovery]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i312</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i305</prism:startingPage>
<prism:section>PROTEIN STRUCTURE AND FUNCTION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i313?rss=1">
<title><![CDATA[Toward a gold standard for promoter prediction evaluation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i313?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Promoter prediction is an important task in genome annotation projects, and during the past years many new promoter prediction programs (PPPs) have emerged. However, many of these programs are compared inadequately to other programs. In most cases, only a small portion of the genome is used to evaluate the program, which is not a realistic setting for whole genome annotation projects. In addition, a common evaluation design to properly compare PPPs is still lacking.</p>
<p><b>Results:</b> We present a large-scale benchmarking study of 17 state-of-the-art PPPs. A multi-faceted evaluation strategy is proposed that can be used as a gold standard for promoter prediction evaluation, allowing authors of promoter prediction software to compare their method to existing methods in a proper way. This evaluation strategy is subsequently used to compare the chosen promoter predictors, and an in-depth analysis on predictive performance, promoter class specificity, overlap between predictors and positional bias of the predictions is conducted.</p>
<p><b>Availability:</b> We provide the implementations of the four protocols, as well as the datasets required to perform the benchmarks to the academic community free of charge on request.</p>
<p><b>Contact:</b> <inter-ref locator="yves.vandepeer@psb.ugent.be" locator-type="email">yves.vandepeer@psb.ugent.be</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp191/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Abeel, T., Van de Peer, Y., Saeys, Y.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp191</dc:identifier>
<dc:title><![CDATA[Toward a gold standard for promoter prediction evaluation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i320</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i313</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i321?rss=1">
<title><![CDATA[DISCOVER: a feature-based discriminative method for motif search in complex genomes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i321?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Identifying transcription factor binding sites (TFBSs) encoding complex regulatory signals in metazoan genomes remains a challenging problem in computational genomics. Due to degeneracy of nucleotide content among binding site instances or motifs, and intricate &lsquo;grammatical organization&rsquo; of motifs within <I>cis</I>-regulatory modules (CRMs), extant pattern matching-based <I>in silico</I> motif search methods often suffer from impractically high false positive rates, especially in the context of analyzing large genomic datasets, and noisy position weight matrices which characterize binding sites. Here, we try to address this problem by using a framework to maximally utilize the information content of the genomic DNA in the region of query, taking cues from values of various biologically meaningful genetic and epigenetic factors in the query region such as clade-specific evolutionary parameters, presence/absence of nearby coding regions, etc. We present a new method for TFBS prediction in metazoan genomes that utilizes both the CRM architecture of sequences and a variety of features of individual motifs. Our proposed approach is based on a discriminative probabilistic model known as conditional random fields that explicitly optimizes the predictive probability of motif presence in large sequences, based on the joint effect of all such features.</p>
<p><b>Results:</b> This model overcomes weaknesses in earlier methods based on less effective statistical formalisms that are sensitive to spurious signals in the data. We evaluate our method on both simulated CRMs and real <I>Drosophila</I> sequences in comparison with a wide spectrum of existing models, and outperform the state of the art by 22% in F1 score.</p>
<p><b>Availability and Implementation:</b> The code is publicly available at <inter-ref locator="http://www.sailing.cs.cmu.edu/discover.html" locator-type="url">http://www.sailing.cs.cmu.edu/discover.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="epxing@cs.cmu.edu" locator-type="email">epxing@cs.cmu.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp230/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Fu, W., Ray, P., Xing, E. P.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp230</dc:identifier>
<dc:title><![CDATA[DISCOVER: a feature-based discriminative method for motif search in complex genomes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i329</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i321</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i330?rss=1">
<title><![CDATA[Predictions of RNA secondary structure by combining homologous sequence information]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i330?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Secondary structure prediction of RNA sequences is an important problem. There have been progresses in this area, but the accuracy of prediction from an RNA sequence is still limited. In many cases, however, homologous RNA sequences are available with the target RNA sequence whose secondary structure is to be predicted.</p>
<p><b>Results:</b> In this article, we propose a new method for secondary structure predictions of individual RNA sequences by taking the information of their homologous sequences into account without assuming the common secondary structure of the entire sequences. The proposed method is based on posterior decoding techniques, which consider all the suboptimal secondary structures of the target and homologous sequences and all the suboptimal alignments between the target sequence and each of the homologous sequences. In our computational experiments, the proposed method provides better predictions than those performed only on the basis of the formation of individual RNA sequences and those performed by using methods for predicting the common secondary structure of the homologous sequences. Remarkably, we found that the common secondary predictions sometimes give worse predictions for the secondary structure of a target sequence than the predictions from the individual target sequence, while the proposed method always gives good predictions for the secondary structure of target sequences in all tested cases.</p>
<p><b>Availability:</b> Supporting information and software are available online at: <inter-ref locator="http://www.ncrna.org/software/centroidfold/ismb2009/" locator-type="url">http://www.ncrna.org/software/centroidfold/ismb2009/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="hamada-michiaki@aist.go.jp" locator-type="email">hamada-michiaki@aist.go.jp</inter-ref></p>
<p><b>Supplementary information:</b><inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp228/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Hamada, M., Sato, K., Kiryu, H., Mituyama, T., Asai, K.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp228</dc:identifier>
<dc:title><![CDATA[Predictions of RNA secondary structure by combining homologous sequence information]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i338</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i330</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i339?rss=1">
<title><![CDATA[Assessing phylogenetic motif models for predicting transcription factor binding sites]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i339?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> A variety of algorithms have been developed to predict transcription factor binding sites (TFBSs) within the genome by exploiting the evolutionary information implicit in multiple alignments of the genomes of related species. One such approach uses an extension of the standard position-specific motif model that incorporates phylogenetic information via a phylogenetic tree and a model of evolution. However, these phylogenetic motif models (PMMs) have never been rigorously benchmarked in order to determine whether they lead to better prediction of TFBSs than obtained using simple position weight matrix scanning.</p>
<p><b>Results:</b> We evaluate three PMM-based prediction algorithms, each of which uses a different treatment of gapped alignments, and we compare their prediction accuracy with that of a non-phylogenetic motif scanning approach. Surprisingly, all of these algorithms appear to be inferior to simple motif scanning, when accuracy is measured using a gold standard of validated yeast TFBSs. However, the PMM scanners perform much better than simple motif scanning when we abandon the gold standard and consider the number of statistically significant sites predicted, using column-shuffled &lsquo;random&rsquo; motifs to measure significance. These results suggest that the common practice of measuring the accuracy of binding site predictors using collections of known sites may be dangerously misleading since such collections may be missing &lsquo;weak&rsquo; sites, which are exactly the type of sites needed to discriminate among predictors. We then extend our previous theoretical model of the statistical power of PMM-based prediction algorithms to allow for loss of binding sites during evolution, and show that it gives a more accurate upper bound on scanner accuracy. Finally, utilizing our theoretical model, we introduce a new method for predicting the number of real binding sites in a genome. The results suggest that the number of true sites for a yeast TF is in general several times greater than the number of known sites listed in the <I>Saccharomyces cerevisiae</I> Database (SCPD). Among the three scanning algorithms that we test, the MONKEY algorithm has the highest accuracy for predicting yeast TFBSs.</p>
<p><b>Contact:</b> <inter-ref locator="j.hawkins@imb.uq.edu.au" locator-type="email">j.hawkins@imb.uq.edu.au</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Hawkins, J., Grant, C., Noble, W. S., Bailey, T. L.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp201</dc:identifier>
<dc:title><![CDATA[Assessing phylogenetic motif models for predicting transcription factor binding sites]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i347</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i339</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i348?rss=1">
<title><![CDATA[Modeling interactions between adjacent nucleosomes improves genome-wide predictions of nucleosome occupancy]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i348?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Understanding the mechanisms that govern nucleosome positioning over genomes <I>in vivo</I> is essential for unraveling the role of chromatin organization in transcriptional regulation. Until now, models for predicting genome-wide nucleosome occupancy have assumed that the DNA associations of neighboring nucleosomes on the genome are independent. We present a new model that relaxes this independence assumption by modeling interactions between adjacent nucleosomes.</p>
<p><b>Results:</b> We show that modeling interactions between adjacent nucleosomes improves genome-wide nucleosome occupancy predictions in an <I>in vitro</I> system that includes only nucleosomes and purified DNA, where the resulting model has a preference for short spacings (linkers) of less than 20 bp in length between neighboring nucleosomes. Since nucleosome occupancy <I>in vitro</I> depends only on properties intrinsic to nucleosomes, these results suggest that the interactions we find are intrinsic to nucleosomes and do not depend on other factors, such as transcription factors and chromatin remodelers. We also show that modeling these intrinsic interactions significantly improves genome-wide predictions of nucleosome occupancy <I>in vivo</I>.</p>
<p><b>Contact:</b> <inter-ref locator="eran.segal@weizmann.ac.il" locator-type="email">eran.segal@weizmann.ac.il</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp216/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Lubliner, S., Segal, E.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp216</dc:identifier>
<dc:title><![CDATA[Modeling interactions between adjacent nucleosomes improves genome-wide predictions of nucleosome occupancy]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i355</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i348</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i356?rss=1">
<title><![CDATA[Efficient exact motif discovery]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i356?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The <I>motif discovery</I> problem consists of finding over-represented patterns in a collection of biosequences. It is one of the classical sequence analysis problems, but still has not been satisfactorily solved in an exact and efficient manner. This is partly due to the large number of possibilities of defining the motif search space and the notion of over-representation. Even for well-defined formalizations, the problem is frequently solved in an <I>ad hoc</I> manner with heuristics that do not guarantee to find the best motif.</p>
<p><b>Results:</b> We show how to solve the motif discovery problem (almost) exactly on a practically relevant space of IUPAC generalized string patterns, using the <I>p</I>-value with respect to an i.i.d. model or a Markov model as the measure of over-representation. In particular, (i) we use a highly accurate compound Poisson approximation for the null distribution of the number of motif occurrences. We show how to compute the exact clump size distribution using a recently introduced device called probabilistic arithmetic automaton (PAA). (ii) We define two <I>p</I>-value scores for over-representation, the first one based on the total number of motif occurrences, the second one based on the number of sequences in a collection with at least one occurrence. (iii) We describe an algorithm to discover the optimal pattern with respect to either of the scores. The method exploits monotonicity properties of the compound Poisson approximation and is by orders of magnitude faster than exhaustive enumeration of IUPAC strings (11.8 h compared with an extrapolated runtime of 4.8 years). (iv) We justify the use of the proposed scores for motif discovery by showing our method to outperform other motif discovery algorithms (e.g. MEME, Weeder) on benchmark datasets. We also propose new motifs on <I>Mycobacterium tuberculosis</I>.</p>
<p><b>Availability and Implementation:</b> The method has been implemented in Java. It can be obtained from <ty><inter-ref locator="http://ls11-www.cs.tu-dortmund.de/people/marschal/paa_md/" locator-type="url">http://ls11-www.cs.tu-dortmund.de/people/marschal/paa_md/</inter-ref></ty></p>
<p><b>Contact:</b> <inter-ref locator="tobias.marschall@tu-dortmund.de" locator-type="email">tobias.marschall@tu-dortmund.de</inter-ref>; <inter-ref locator="sven.rahmann@tu-dortmund.de" locator-type="email">sven.rahmann@tu-dortmund.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Marschall, T., Rahmann, S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp188</dc:identifier>
<dc:title><![CDATA[Efficient exact motif discovery]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i364</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i356</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i365?rss=1">
<title><![CDATA[A partition function algorithm for interacting nucleic acid strands]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i365?rss=1</link>
<description><![CDATA[
<p>Recent interests, such as RNA interference and antisense RNA regulation, strongly motivate the problem of predicting whether two nucleic acid strands interact.</p>
<p><b>Motivation:</b> Regulatory non-coding RNAs (ncRNAs) such as microRNAs play an important role in gene regulation. Studies on both prokaryotic and eukaryotic cells show that such ncRNAs usually bind to their target mRNA to regulate the translation of corresponding genes. The specificity of these interactions depends on the stability of intermolecular and intramolecular base pairing. While methods like deep sequencing allow to discover an ever increasing set of ncRNAs, there are no high-throughput methods available to detect their associated targets. Hence, there is an increasing need for precise computational target prediction. In order to predict base-pairing probability of any two bases in interacting nucleic acids, it is necessary to compute the interaction partition function over the whole ensemble. The partition function is a scalar value from which various thermodynamic quantities can be derived. For example, the equilibrium concentration of each complex nucleic acid species and also the melting temperature of interacting nucleic acids can be calculated based on the partition function of the complex.</p>
<p><b>Results:</b> We present a model for analyzing the thermodynamics of two interacting nucleic acid strands considering the most general type of interactions studied in the literature. We also present a corresponding dynamic programming algorithm that computes the partition function over (almost) all physically possible joint secondary structures formed by two interacting nucleic acids in <I>O</I>(<I>n</I><sup>6</sup>) time. We verify the predictive power of our algorithm by computing (i) the melting temperature for interacting RNA pairs studied in the literature and (ii) the equilibrium concentration for several variants of the OxyS&ndash;fhlA complex. In both experiments, our algorithm shows high accuracy and outperforms competitors.</p>
<p><b>Availability:</b> Software and web server is available at <inter-ref locator="http://compbio.cs.sfu.ca/taverna/pirna/" locator-type="url">http://compbio.cs.sfu.ca/taverna/pirna/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="cenk@cs.sfu.ca" locator-type="email">cenk@cs.sfu.ca</inter-ref>; <inter-ref locator="backofen@informatik.uni-freiburg.de" locator-type="email">backofen@informatik.uni-freiburg.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp212/DC1" locator-type="url">Supplementary data</inter-ref> are avaliable at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Chitsaz, H., Salari, R., Sahinalp, S. C., Backofen, R.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp212</dc:identifier>
<dc:title><![CDATA[A partition function algorithm for interacting nucleic acid strands]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i373</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i365</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i374?rss=1">
<title><![CDATA[Predicting and understanding the stability of G-quadruplexes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i374?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> G-quadruplexes are stable four-stranded guanine-rich structures that can form in DNA and RNA. They are an important component of human telomeres and play a role in the regulation of transcription and translation. The biological significance of a G-quadruplex is crucially linked with its thermodynamic stability. Hence the prediction of G-quadruplex stability is of vital interest.</p>
<p><b>Results:</b> In this article, we present a novel Bayesian prediction framework based on Gaussian process regression to determine the thermodynamic stability of previously unmeasured G-quadruplexes from the sequence information alone. We benchmark our approach on a large G-quadruplex dataset and compare our method to alternative approaches. Furthermore, we propose an active learning procedure which can be used to iteratively acquire data in an optimal fashion. Lastly, we demonstrate the usefulness of our procedure on a genome-wide study of quadruplexes in the human genome.</p>
<p><b>Availability:</b> A data table with the training sequences is available as <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp210/DC1" locator-type="url">supplementary material</inter-ref>. Source code is available online at <inter-ref locator="http://www.inference.phy.cam.ac.uk/os252/projects/quadruplexes" locator-type="url">http://www.inference.phy.cam.ac.uk/os252/projects/quadruplexes</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="os252@cam.ac.uk" locator-type="email">os252@cam.ac.uk</inter-ref>; <inter-ref locator="jlh29@cam.ac.uk" locator-type="email">jlh29@cam.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp210/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Stegle, O., Payet, L., Mergny, J.-L., MacKay, D. J. C., Huppert, J. L.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp210</dc:identifier>
<dc:title><![CDATA[Predicting and understanding the stability of G-quadruplexes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i1382</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i374</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS AND ALIGNMENT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i383?rss=1">
<title><![CDATA[Author Index]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/i383?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp315</dc:identifier>
<dc:title><![CDATA[Author Index]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>i384</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>i383</prism:startingPage>
<prism:section>AUTHOR INDEX</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1475?rss=1">
<title><![CDATA[Cloud computing]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1475?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Bateman, A., Wood, M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp274</dc:identifier>
<dc:title><![CDATA[Cloud computing]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1475</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1475</prism:startingPage>
<prism:section>EDITORIAL</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1476?rss=1">
<title><![CDATA[Cross species analysis of microarray expression data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1476?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Many biological systems operate in a similar manner across a large number of species or conditions. Cross-species analysis of sequence and interaction data is often applied to determine the function of new genes. In contrast to these static measurements, microarrays measure the dynamic, condition-specific response of complex biological systems. The recent exponential growth in microarray expression datasets allows researchers to combine expression experiments from multiple species to identify genes that are not only conserved in sequence but also operated in a similar way in the different species studied.</p>
<p><b>Results:</b> In this review we discuss the computational and technical challenges associated with these studies, the approaches that have been developed to address these challenges and the advantages of cross-species analysis of microarray data. We show how successful application of these methods lead to insights that cannot be obtained when analyzing data from a single species. We also highlight current open problems and discuss possible ways to address them.</p>
<p><b>Contact:</b> <inter-ref locator="zivbj@cs.cmu.edu" locator-type="email">zivbj@cs.cmu.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Lu, Y., Huggins, P., Bar-Joseph, Z.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp247</dc:identifier>
<dc:title><![CDATA[Cross species analysis of microarray expression data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1483</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1476</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1484?rss=1">
<title><![CDATA[Gene regulation in the intraerythrocytic cycle of Plasmodium falciparum]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1484?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> To date, there is little knowledge about one of the processes fundamental to the biology of <I>Plasmodium falciparum</I>, gene regulation including transcriptional control. We use noisy threshold models to identify regulatory sequence elements explaining membership to a gene expression cluster where each cluster consists of genes active during the part of the developmental cycle inside a red blood cell. Our approach is both able to capture the combinatorial nature of gene regulation and to incorporate uncertainty about the functionality of putative regulatory sequence elements.</p>
<p><b>Results:</b> We find a characteristic pattern where the most common motifs tend to be absent upstream of genes active in the first half of the cycle and present upstream of genes active in the second half. We find no evidence that motif's score, orientation, location and multiplicity improves prediction of gene expression. Through comparative genome analysis, we find a list of potential transcription factors and their associated motifs.</p>
<p><b>Contact:</b> <inter-ref locator="r.jurgelenaite@cmbi.ru.nl" locator-type="email">r.jurgelenaite@cmbi.ru.nl</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp179/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Jurgelenaite, R., Dijkstra, T. M. H., Kocken, C. H. M., Heskes, T.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp179</dc:identifier>
<dc:title><![CDATA[Gene regulation in the intraerythrocytic cycle of Plasmodium falciparum]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1491</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1484</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1492?rss=1">
<title><![CDATA[Protein function annotation from sequence: prediction of residues interacting with RNA]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1492?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> All eukaryotic proteomes are characterized by a significant percentage of proteins of unknown function. Comp-utational function prediction methods are therefore essential as initial steps in the function annotation process. This article describes an annotation method (PiRaNhA) for the prediction of RNA-binding residues (RBRs) from protein sequence information. A series of sequence properties (position specific scoring matrices, interface propensities, predicted accessibility and hydrophobicity) are used to train a support vector machine. This method is then evaluated for its potential to be applied to RNA-binding function prediction at the level of the complete protein.</p>
<p><b>Results:</b> The 5-fold cross-validation of PiRaNhA on a dataset of 81 RNA-binding proteins achieves a Matthews Correlation Coefficient (MCC) of 0.50 and accuracy of 87.2%. When used to predict RBRs in 42 proteins not used in training, PiRaNhA achieves an MCC of 0.41 and accuracy of 84.5%. Decision values from the PiRaNhA predictions were used in a second SVM to make predictions of RNA-binding function at the protein level, achieving an MCC of 0.53 and accuracy of 76.1%. The PiRaNhA RBR predictions allow experimentalists to perform more targeted experiments for function annotation; and the prediction of RNA-binding function at the protein level shows promise for proteome-wide annotations.</p>
<p><b>Availability and Implementation:</b> Freely available on the web at <inter-ref locator="www.bioinformatics.sussex.ac.uk/PIRANHA" locator-type="url">www.bioinformatics.sussex.ac.uk/PIRANHA</inter-ref> or <inter-ref locator="http://piranha.protein.osaka-u.ac.jp" locator-type="url">http://piranha.protein.osaka-u.ac.jp</inter-ref>.</p>
<p><b>Contact:</b><inter-ref locator="s.jones@sussex.ac.uk" locator-type="email">s.jones@sussex.ac.uk</inter-ref>.</p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp257/DC1" locator-type="url">Supplementary data</inter-ref> are available at the <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Spriggs, R. V., Murakami, Y., Nakamura, H., Jones, S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp257</dc:identifier>
<dc:title><![CDATA[Protein function annotation from sequence: prediction of residues interacting with RNA]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1497</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1492</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1498?rss=1">
<title><![CDATA[A local multiple alignment method for detection of non-coding RNA sequences]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1498?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Non-coding RNAs (ncRNAs) show a unique evolutionary process in which the substitutions of distant bases are correlated in order to conserve the secondary structure of the ncRNA molecule. Therefore, the multiple alignment method for the detection of ncRNAs should take into account both the primary sequence and the secondary structure. Recently, there has been intense focus on multiple alignment investigations for the detection of ncRNAs; however, most of the proposed methods are designed for global multiple alignments. For this reason, these methods are not appropriate to identify locally conserved ncRNAs among genomic sequences. A more efficient local multiple alignment method for the detection of ncRNAs is required.</p>
<p><b>Results:</b> We propose a new local multiple alignment method for the detection of ncRNAs. This method uses a local multiple alignment construction procedure inspired by ProDA, which is a local multiple aligner program for protein sequences with repeated and shuffled elements. To align sequences based on secondary structure information, we propose a new alignment model which incorporates secondary structure features. We define the conditional probability of an alignment via a conditional random field and use a -centroid estimator to align sequences. The locally aligned subsequences are clustered into blocks of approximately globally alignable subsequences between pairwise alignments. Finally, these blocks are multiply aligned via MXSCARNA. In benchmark experiments, we demonstrate the high ability of the implemented software, SCARNA_LM, for local multiple alignment for the detection of ncRNAs.</p>
<p><b>Availability:</b> The C++ source code for SCARNA_LM and its experimental datasets are available at <inter-ref locator="http://www.ncrna.org/software/scarna_lm/download" locator-type="url">http://www.ncrna.org/software/scarna_lm/download</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="scarna@m.aist.go.jp" locator-type="email">scarna@m.aist.go.jp</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp261/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Tabei, Y., Asai, K.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp261</dc:identifier>
<dc:title><![CDATA[A local multiple alignment method for detection of non-coding RNA sequences]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1505</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1498</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1506?rss=1">
<title><![CDATA[ModLink+: improving fold recognition by using protein-protein interactions]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1506?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b>Several strategies have been developed to predict the fold of a target protein sequence, most of which are based on aligning the target sequence to other sequences of known structure. Previously, we demonstrated that the consideration of protein&ndash;protein interactions significantly increases the accuracy of fold assignment compared with PSI-BLAST sequence comparisons. A drawback of our method was the low number of proteins to which a fold could be assigned. Here, we present an improved version of the method that addresses this limitation. We also compare our method to other state-of-the-art fold assignment methodologies.</p>
<p><b>Results:</b> Our approach (ModLink+) has been tested on 3716 proteins with domain folds classified in the Structural Classification Of Proteins (SCOP) as well as known interacting partners in the Database of Interacting Proteins (DIP). For this test set, the ratio of success [positive predictive value (PPV)] on fold assignment increases from 75% for PSI-BLAST, 83% for HHSearch and 81% for PRC to &gt;90% for ModLink+at the <I>e</I>-value cutoff of 10<sup>&ndash;3</sup>. Under this <I>e</I>-value, ModLink+can assign a fold to 30&ndash;45% of the proteins in the test set, while our previous method could cover &lt;25%. When applied to 6384 proteins with unknown fold in the yeast proteome, ModLink+combined with PSI-BLAST assigns a fold for domains in 3738 proteins, while PSI-BLAST alone covers only 2122 proteins, HHSearch 2969 and PRC 2826 proteins, using a threshold <I>e</I>-value that would represent a PPV &gt;82% for each method in the test set.</p>
<p><b>Availability:</b> The ModLink+server is freely accessible in the World Wide Web at <inter-ref locator="http://sbi.imim.es/modlink/" locator-type="url">http://sbi.imim.es/modlink/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="boliva@imim.es" locator-type="email">boliva@imim.es</inter-ref>.</p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp238/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Fornes, O., Aragues, R., Espadaler, J., Marti-Renom, M. A., Sali, A., Oliva, B.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp238</dc:identifier>
<dc:title><![CDATA[ModLink+: improving fold recognition by using protein-protein interactions]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1512</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1506</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1513?rss=1">
<title><![CDATA[Identification of computational hot spots in protein interfaces: combining solvent accessibility and inter-residue potentials improves the accuracy]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1513?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b>Hot spots are residues comprising only a small fraction of interfaces yet accounting for the majority of the binding energy. These residues are critical in understanding the principles of protein interactions. Experimental studies like alanine scanning mutagenesis require significant effort; therefore, there is a need for computational methods to predict hot spots in protein interfaces.</p>
<p><b>Results:</b>We present a new intuitive efficient method to determine computational hot spots based on conservation (C), solvent accessibility [accessible surface area (ASA)] and statistical pairwise residue potentials (PP) of the interface residues. Combination of these features is examined in a comprehensive way to study their effect in hot spot detection. The predicted hot spots are observed to match with the experimental hot spots with an accuracy of 70% and a precision of 64% in Alanine Scanning Energetics Database (ASEdb), and accuracy of 70% and a precision of 73% in Binding Interface Database (BID). Several machine learning methods are also applied to predict hot spots. Performance of our empirical approach exceeds learning-based methods and other existing hot spot prediction methods. Residue occlusion from solvent in the complexes and pairwise potentials are found to be the main discriminative features in hot spot prediction.</p>
<p><b>Conclusion:</b>Our empirical method is a simple approach in hot spot prediction yet with its high accuracy and computational effectiveness. We believe that this method provides insights for the researchers working on characterization of protein binding sites and design of specific therapeutic agents for protein interactions.</p>
<p><b>Availability:</b>The list of training and test sets are available as <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp240/DC1" locator-type="url">Supplementary Data</inter-ref> at <inter-ref locator="http://prism.ccbb.ku.edu.tr/hotpoint/supplement.doc" locator-type="url">http://prism.ccbb.ku.edu.tr/hotpoint/supplement.doc</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="agursoy@ku.edu.tr" locator-type="email">agursoy@ku.edu.tr</inter-ref>; <inter-ref locator="okeskin@ku.edu.tr" locator-type="email">okeskin@ku.edu.tr</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp240/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Tuncbag, N., Gursoy, A., Keskin, O.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp240</dc:identifier>
<dc:title><![CDATA[Identification of computational hot spots in protein interfaces: combining solvent accessibility and inter-residue potentials improves the accuracy]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1520</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1513</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1521?rss=1">
<title><![CDATA[Enrichment constrained time-dependent clustering analysis for finding meaningful temporal transcription modules]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1521?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Clustering is a popular data exploration technique widely used in microarray data analysis. When dealing with time-series data, most conventional clustering algorithms, however, either use one-way clustering methods, which fail to consider the heterogeneity of temporary domain, or use two-way clustering methods that do not take into account the time dependency between samples, thus producing less informative results. Furthermore, enrichment analysis is often performed independent of and after clustering and such practice, though capable of revealing biological significant clusters, cannot guide the clustering to produce biologically significant result.</p>
<p><b>Result:</b>We present a new enrichment constrained framework (ECF) coupled with a time-dependent iterative signature algorithm (TDISA), which, by applying a sliding time window to incorporate the time dependency of samples and imposing an enrichment constraint to parameters of clustering, allows supervised identification of temporal transcription modules (TTMs) that are biologically meaningful. Rigorous mathematical definitions of TTM as well as the enrichment constraint framework are also provided that serve as objective functions for retrieving biologically significant modules. We applied the enrichment constrained time-dependent iterative signature algorithm (ECTDISA) to human gene expression time-series data of Kaposi's sarcoma-associated herpesvirus (KSHV) infection of human primary endothelial cells; the result not only confirms known biological facts, but also reveals new insight into the molecular mechanism of KSHV infection.</p>
<p><b>Availability:</b> Data and Matlab code are available at <inter-ref locator="http://engineering.utsa.edu/~yfhuang/ECTDISA.html" locator-type="url">http://engineering.utsa.edu/~yfhuang/ECTDISA.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="yufei.huang@utsa.edu" locator-type="email">yufei.huang@utsa.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp235/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Meng, J., Gao, S.-J., Huang, Y.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp235</dc:identifier>
<dc:title><![CDATA[Enrichment constrained time-dependent clustering analysis for finding meaningful temporal transcription modules]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1527</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1521</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1528?rss=1">
<title><![CDATA[A stochastic model for the evolution of metabolic networks with neighbor dependence]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1528?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Most current research in network evolution focuses on networks that follow a Duplication Attachment model where the network is only allowed to grow. The evolution of metabolic networks, however, is characterized by gain as well as loss of reactions. It would be desirable to have a biologically relevant model of network evolution that could be used to calculate the likelihood of homologous metabolic networks.</p>
<p><b>Results:</b> We describe metabolic network evolution as a discrete space continuous time Markov process and introduce a neighbor-dependent model for the evolution of metabolic networks where the rates with which reactions are added or removed depend on the fraction of neighboring reactions present in the network. We also present a Gibbs sampler for estimating the parameters of evolution without exploring the whole search space by iteratively sampling from the conditional distributions of the paths and parameters. A Metropolis&ndash;Hastings algorithm for sampling paths between two networks and calculating the likelihood of evolution is also presented. The sampler is used to estimate the parameters of evolution of metabolic networks in the genus <I>Pseudomonas</I>.</p>
<p><b>Availability:</b> An implementation of the Gibbs sampler in Java is available at <inter-ref locator="http://www.stats.ox.ac.uk/~mithani/networkGibbs/" locator-type="url">http://www.stats.ox.ac.uk/~mithani/networkGibbs/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="mithani@stats.ox.ac.uk" locator-type="email">mithani@stats.ox.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp262/DC1" locator-type="url">Supplementary data</inter-ref> are available at the <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Mithani, A., Preston, G. M., Hein, J.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp262</dc:identifier>
<dc:title><![CDATA[A stochastic model for the evolution of metabolic networks with neighbor dependence]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1535</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1528</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1536?rss=1">
<title><![CDATA[Bayesian inference of protein-protein interactions from biological literature]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1536?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Protein&ndash;protein interaction (PPI) extraction from published biological articles has attracted much attention because of the importance of protein interactions in biological processes. Despite significant progress, mining PPIs from literatures still rely heavily on time- and resource-consuming manual annotations.</p>
<p><b>Results:</b> In this study, we developed a novel methodology based on Bayesian networks (BNs) for extracting PPI triplets (a PPI triplet consists of two protein names and the corresponding interaction word) from unstructured text. The method achieved an overall accuracy of 87% on a cross-validation test using manually annotated dataset. We also showed, through extracting PPI triplets from a large number of PubMed abstracts, that our method was able to complement human annotations to extract large number of new PPIs from literature.</p>
<p><b>Availability:</b> Programs/scripts we developed/used in the study are available at <inter-ref locator="http://stat.fsu.edu/~jinfeng/datasets/Bio-SI-programs-Bayesian-chowdhary-zhang-liu.zip" locator-type="url">http://stat.fsu.edu/~jinfeng/datasets/Bio-SI-programs-Bayesian-chowdhary-zhang-liu.zip</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="jliu@stat.harvard.edu" locator-type="email">jliu@stat.harvard.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp245/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Chowdhary, R., Zhang, J., Liu, J. S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp245</dc:identifier>
<dc:title><![CDATA[Bayesian inference of protein-protein interactions from biological literature]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1542</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1536</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1543?rss=1">
<title><![CDATA[Application and evaluation of automated semantic annotation of gene expression experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1543?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Many microarray datasets are available online with formalized standards describing the probe sequences and expression values. Unfortunately, the description, conditions and parameters of the experiments are less commonly formalized and often occur as natural language text. This hinders searching, high-throughput analysis, organization and integration of the datasets.</p>
<p><b>Results:</b> We use the lexical resources and software tools from the Unified Medical Language System (UMLS) to extract concepts from text. We then link the UMLS concepts to classes in open biomedical ontologies. The result is accessible and clear semantic annotations of gene expression experiments. We applied the method to 595 expression experiments from Gemma, a resource for re-use and meta-analysis of gene expression profiling data. We evaluated and corrected all stages of the annotation process. The majority of missed annotations were due to a lack of cross-references. The most error-prone stage was the extraction of concepts from phrases. Final review of the annotations in context of the experiments revealed 89% precision. A naive system, lacking the phrase to concept corrections is 68% precise. We have integrated this annotation pipeline into Gemma.</p>
<p><b>Availability:</b> The source code, documentation and <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp259/DC1" locator-type="url">Supplementary Materials</inter-ref> are available at <inter-ref locator="http://www.chibi.ubc.ca/GEOMMTX" locator-type="url">http://www.chibi.ubc.ca/GEOMMTX</inter-ref>. The results of the manual evaluations are provided as <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp259/DC1" locator-type="url">Supplementary Material</inter-ref>. Both manual and predicted annotations can be viewed and searched via the Gemma website at <inter-ref locator="http://www.chibi.ubc.ca/Gemma" locator-type="url">http://www.chibi.ubc.ca/Gemma</inter-ref>. The complete set of predicted annotations is available as a machine readable resource description framework graph.</p>
<p><b>Contact:</b> <inter-ref locator="paul@chibi.ubc.ca" locator-type="email">paul@chibi.ubc.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[French, L., Lane, S., Law, T., Xu, L., Pavlidis, P.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp259</dc:identifier>
<dc:title><![CDATA[Application and evaluation of automated semantic annotation of gene expression experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1549</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1543</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1550?rss=1">
<title><![CDATA[WebGBrowse--a web server for GBrowse]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1550?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The Generic Genome Browser (GBrowse) is one of the most widely used tools for visualizing genomic features along a reference sequence. However, the installation and configuration of GBrowse is not trivial for biologists. We have developed a web server, WebGBrowse that allows users to upload genome annotation in the GFF3 format, configure the display of each genomic feature by simply using a web browser and visualize the configured genomic features with the integrated GBrowse software.</p>
<p><b>Availability:</b> WebGBrowse is accessible via <inter-ref locator="http://webgbrowse.cgb.indiana.edu/" locator-type="url">http://webgbrowse.cgb.indiana.edu/</inter-ref> and the system is also freely available for local installations.</p>
<p><b>Contact:</b> <inter-ref locator="dongq@indiana.edu" locator-type="email">dongq@indiana.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Podicheti, R., Gollapudi, R., Dong, Q.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp239</dc:identifier>
<dc:title><![CDATA[WebGBrowse--a web server for GBrowse]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1551</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1550</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1552?rss=1">
<title><![CDATA[CROC: finding chromosomal clusters in eukaryotic genomes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1552?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> There is increasing evidence showing that co-expression of genes that cluster along the genome is a common characteristic of eukaryotic transcriptomes. Several algorithms have been used to date in the identification of these kinds of gene organization. Here, we present a web tool called CROC that aims to help in the identification and analysis of genomic gene clusters. This method has been successfully used before in the identification of chromosomal clusters in different eukaryotic species.</p>
<p><b>Availability:</b> The web server is freely available to non-commercial users at the following address: <inter-ref locator="http://metagenomics.uv.es/CROC/" locator-type="url">http://metagenomics.uv.es/CROC/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="miguel.pignatelli@uv.es" locator-type="email">miguel.pignatelli@uv.es</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Pignatelli, M., Serras, F., Moya, A., Guigo, R., Corominas, M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp248</dc:identifier>
<dc:title><![CDATA[CROC: finding chromosomal clusters in eukaryotic genomes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1553</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1552</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1554?rss=1">
<title><![CDATA[MapView: visualization of short reads alignment on a desktop computer]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1554?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We introduce a new visual analytics tool named MapView to facilitate the representation of large-scale short reads alignment data and genetic variation analysis. MapView can handle hundreds of millions of short reads on a desktop computer with limited memory. It supports a compact alignment view for both single-end and paired end short reads, multiple navigation and zoom modes and multi-thread processing. Moreover, MapView offers automated genetic variation detection. MapView has been used in our lab and by over 10 research labs worldwide.</p>
<p><b>Availability:</b> <inter-ref locator="http://evolution.sysu.edu.cn/mapview/" locator-type="url">http://evolution.sysu.edu.cn/mapview/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="baohua100@hotmail.com" locator-type="email">baohua100@hotmail.com</inter-ref>; <inter-ref locator="lssssh@mail.sysu.edu.cn" locator-type="email">lssssh@mail.sysu.edu.cn</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://evolution.sysu.edu.cn/mapview/MVF.pdf" locator-type="url">Supplementary data</inter-ref> are available at <inter-ref locator="http://evolution.sysu.edu.cn/mapview/MVF.pdf" locator-type="url">http://evolution.sysu.edu.cn/mapview/MVF.pdf</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bao, H., Guo, H., Wang, J., Zhou, R., Lu, X., Shi, S.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp255</dc:identifier>
<dc:title><![CDATA[MapView: visualization of short reads alignment on a desktop computer]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1555</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1554</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1556?rss=1">
<title><![CDATA[BEsTRF: a tool for optimal resolution of terminal-restriction fragment length polymorphism analysis based on user-defined primer-enzyme-sequence databases]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1556?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> BEsTRF (Best Estimated T-RF) provides a standalone environment for analyzing primers-enzymes-gene section combinations used in terminal-restriction fragment length polymorphism (T-RFLP) for its optimal resolution. User-defined sequence databases of several hundred thousand DNA sequences can be explored and the resolution of user-specified sets of primers and restriction endonucleases can be analyzed on either forward or reverse terminal fragments. Sequence quality, primer mismatches, insertions and deletions can be controlled and each primer pair-specific sequence collections can be exported for downstream analyses. The configuration for a novel T-RFLP population profiling using <I>rpoB</I> gene (DNA-directed RNA polymerase, beta subunit) on forward fluorescently labeled primer are presented.</p>
<p><b>Availability:</b> BEsTRF is freely available at <inter-ref locator="http://lie.fe.uni-lj.si/bestrf" locator-type="url">http://lie.fe.uni-lj.si/bestrf</inter-ref> and can be downloaded from the same site. The online protocol, numerous primer and enzyme dictionaries, sequence collections and results generated during this work for various genes are available at our website <inter-ref locator="http://lie.fe.uni-lj.si/bestrf" locator-type="url">http://lie.fe.uni-lj.si/bestrf</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="blaz.stres@bfro.uni-lj.si" locator-type="email">blaz.stres@bfro.uni-lj.si</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Stres, B., Tiedje, J. M., Murovec, B.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp254</dc:identifier>
<dc:title><![CDATA[BEsTRF: a tool for optimal resolution of terminal-restriction fragment length polymorphism analysis based on user-defined primer-enzyme-sequence databases]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1558</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1556</prism:startingPage>
<prism:section>PHYLOGENETICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1559?rss=1">
<title><![CDATA[BIPA: a database for protein-nucleic acid interaction in 3D structures]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1559?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b>BIPA is a database for protein&ndash;nucleic acid interactions in 3D structures. The database provides various physicochemical features of protein&ndash;nucleic acid interface such as size, shape, residue propensity, secondary structure composition and intermolecular interactions. The database also contains multiple structural alignments of nucleic acid-binding protein families with annotations of local environments in order to allow definition of features that influence acceptability of mutations at a particular position in a protein family. A web interface has been designed to present the results of these analyses and facilitate navigation of protein&ndash;nucleic acid interfaces.</p>
<p><b>Availability:</b> <inter-ref locator="http://www-cryst.bioc.cam.ac.uk/bipa" locator-type="url">http://www-cryst.bioc.cam.ac.uk/bipa</inter-ref></p>
<p><b>Contact:</b><inter-ref locator="semin@cryst.bioc.cam.ac.uk" locator-type="email">semin@cryst.bioc.cam.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b><inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp243/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Lee, S., Blundell, T. L]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp243</dc:identifier>
<dc:title><![CDATA[BIPA: a database for protein-nucleic acid interaction in 3D structures]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1560</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1559</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1561?rss=1">
<title><![CDATA[A graphical algorithm for fast computation of identity coefficients and generalized kinship coefficients]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1561?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Computing the probability of identity by descent sharing among <I>n</I> genes given only the pedigree of those genes is a computationally challenging problem, if <I>n</I> or the pedigree size is large. Here, I present a novel graphical algorithm for efficiently computing all generalized kinship coefficients for n genes. The graphical description transforms the problem from doing many recursion on the pedigree to doing a single traversal of a structure referred to as the kinship graph.</p>
<p><b>Availability:</b> The algorithm is implemented for <I>n</I> = 4 in the software package IdCoefs at <inter-ref locator="http://home.uchicago.edu/abney/Software.html" locator-type="url">http://home.uchicago.edu/abney/Software.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="abney@bsd.uchicago.edu" locator-type="email">abney@bsd.uchicago.edu</inter-ref></p>
<p><b>Supplementary Information:</b><inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp185/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Abney, M.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp185</dc:identifier>
<dc:title><![CDATA[A graphical algorithm for fast computation of identity coefficients and generalized kinship coefficients]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1563</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1561</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1564?rss=1">
<title><![CDATA[Fluctuation AnaLysis CalculatOR: a web tool for the determination of mutation rate using Luria-Delbruck fluctuation analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1564?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b>The program Fluctuation AnaLysis CalculatOR (FALCOR) is a web tool designed for use with Luria&ndash;Delbr&uuml;ck fluctuation analysis to calculate the frequency and rate from various mutation assays in bacteria and yeast. Three calculation methods are available through this program: (i) Ma-Sandri-Sarkar Maximum Likelihood Estimator (MSS-MLE) method, (ii) Lea-Coulson method of the median (LC) and (iii) frequency.</p>
<p><b>Availability:</b> The FALCOR rate calculator is currently accessible at <inter-ref locator="http://www.mitochondria.org/protocols/FALCOR.html" locator-type="url">http://www.mitochondria.org/protocols/FALCOR.html</inter-ref>. This program is written as a Java<SUP><SMALL><SMALL>TM</SMALL></SMALL></SUP> Applet, requiring a web browser enabled with Sun MicroSystems' Java Virtual Machine.</p>
<p><b>Contact:</b> <inter-ref locator="brandon.hall@roswellpark.org" locator-type="url">brandon.hall@roswellpark.org</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Hall, B. M., Ma, C.-X., Liang, P., Singh, K. K.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp253</dc:identifier>
<dc:title><![CDATA[Fluctuation AnaLysis CalculatOR: a web tool for the determination of mutation rate using Luria-Delbruck fluctuation analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1565</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1564</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1566?rss=1">
<title><![CDATA[ELISA-BASE: an integrated bioinformatics tool for analyzing and tracking ELISA microarray data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1566?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b>ELISA-BASE is an open source database for capturing, organizing and analyzing enzyme-linked immunosorbent assay (ELISA) microarray data. ELISA-BASE is an extension of the BioArray Software Environment (BASE) database system.</p>
<p><b>Availability:</b> <inter-ref locator="http://www.pnl.gov/statistics/ProMAT/ELISA-BASE.stm" locator-type="url">http://www.pnl.gov/statistics/ProMAT/ELISA-BASE.stm</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="amanda.white@pnl.gov" locator-type="email">amanda.white@pnl.gov</inter-ref></p>
]]></description>
<dc:creator><![CDATA[White, A. M., Collett, J. R., Seurynck-Servoss, S. L., Daly, D. S., Zangar, R. C.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp182</dc:identifier>
<dc:title><![CDATA[ELISA-BASE: an integrated bioinformatics tool for analyzing and tracking ELISA microarray data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1567</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1566</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1568?rss=1">
<title><![CDATA[ProtVirDB: a database of protozoan virulent proteins]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1568?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> ProtVirDB is a comprehensive and user-friendly web-based knowledgebase of virulent proteins belonging to protozoan species. The database will facilitate research and provide an integrated platform for comparative studies of virulent proteins in different parasitic protozoans and organize them under a unifying classification schema with functional categories. Remarkably, one-third of the protein sequences in the database showed presence of either mono- or hetero-repeats, or both concomitantly&mdash;hence reiterating the importance of repeats in parasite virulence mechanisms. A number of useful bioinformatics tools including BLAST and tools for phylogenetic analysis are integrated with the database. With the rapidly burgeoning interest in the pathogenesis mechanisms of protozoans and ongoing genome sequencing projects, we anticipate that the database will be a useful tool for the research community.</p>
<p><b>Availability:</b> <inter-ref locator="http://bioinfo.icgeb.res.in/protvirdb" locator-type="url">http://bioinfo.icgeb.res.in/protvirdb</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="dinesh@icgeb.res.in" locator-type="email">dinesh@icgeb.res.in</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp258/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online</p>
]]></description>
<dc:creator><![CDATA[Ramana, J., Gupta, D.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp258</dc:identifier>
<dc:title><![CDATA[ProtVirDB: a database of protozoan virulent proteins]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1569</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1568</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1570?rss=1">
<title><![CDATA[ISMB/ECCB 2009 Stockholm]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1570?rss=1</link>
<description><![CDATA[
<p>The International Society for Computational Biology (ISCB; <inter-ref locator="http://www.iscb.org" locator-type="url">http://www.iscb.org</inter-ref>) presents the Seventeenth Annual International Conference on Intelligent Systems for Molecular Biology (ISMB), organized jointly with the Eighth Annual European Conference on Computational Biology (ECCB; <inter-ref locator="http://bioinf.mpi-inf.mpg.de/conferences/eccb/eccb.htm" locator-type="url">http://bioinf.mpi-inf.mpg.de/conferences/eccb/eccb.htm</inter-ref>), in Stockholm, Sweden, 27 June to 2 July 2009. The organizers are putting the finishing touches on the year's premier computational biology conference, with an expected attendance of 1400 computer scientists, mathematicians, statisticians, biologists and scientists from other disciplines related to and reliant on this multi-disciplinary science. ISMB/ECCB 2009 (<inter-ref locator="http://www.iscb.org/ismbeccb2009/" locator-type="url">http://www.iscb.org/ismbeccb2009/</inter-ref>) follows the framework introduced at the ISMB/ECCB 2007 (<inter-ref locator="http://www.iscb.org/ismbeccb2007/" locator-type="url">http://www.iscb.org/ismbeccb2007/</inter-ref>) in Vienna, and further refined at the ISMB 2008 (<inter-ref locator="http://www.iscb.org/ismb2008/" locator-type="url">http://www.iscb.org/ismb2008/</inter-ref>) in Toronto; a framework developed to specifically encourage increased participation from often under-represented disciplines at conferences on computational biology. During the main ISMB conference dates of 29 June to 2 July, keynote talks from highly regarded scientists, including ISCB Award winners, are the featured presentations that bring all attendees together twice a day. The remainder of each day offers a carefully balanced selection of parallel sessions to choose from: proceedings papers, special sessions on emerging topics, highlights of the past year's published research, special interest group meetings, technology demonstrations, workshops and several unique sessions of value to the broad audience of students, faculty and industry researchers. Several hundred posters displayed for the duration of the conference has become a standard of the ISMB and ECCB conference series, and an extensive commercial exhibition showcases the latest bioinformatics publications, software, hardware and services available on the market today. The main conference is preceded by 2 days of Special Interest Group (SIG) and Satellite meetings running in parallel to the fifth Student Council Symposium on 27 June, and in parallel to Tutorials on 28 June. All scientific sessions take place at the Stockholmsm&auml;ssan/Stockholm International Fairs conference and exposition facility.</p>
<p><b>Contact:</b> <inter-ref locator="bj@iscb.org" locator-type="email">bj@iscb.org</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Sagot, M.-F., McKay, B.J. M., Myers, G.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp280</dc:identifier>
<dc:title><![CDATA[ISMB/ECCB 2009 Stockholm]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1573</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1570</prism:startingPage>
<prism:section>ISCB NEWS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1574?rss=1">
<title><![CDATA[Edge-based scoring and searching method for identifying condition-responsive protein-protein interaction sub-network]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/12/1574?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Guo, Z., Li, Y., Gong, X., Yao, C., Ma, W., Wang, D., Li, Y., Zhu, J., Zhang, M., Yang, D., Wang, J.]]></dc:creator>
<dc:date>2009-05-28</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp308</dc:identifier>
<dc:title><![CDATA[Edge-based scoring and searching method for identifying condition-responsive protein-protein interaction sub-network]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>12</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>1574</prism:endingPage>
<prism:publicationDate>2009-06-15</prism:publicationDate>
<prism:startingPage>1574</prism:startingPage>
<prism:section>ERRATUM</prism:section>
</item>

</rdf:RDF>