<?xml version="1.0" encoding="ISO-8859-1"?>

<rdf:RDF
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns="http://purl.org/rss/1.0/"
 xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
 xmlns:dc="http://purl.org/dc/elements/1.1/"
 xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
 xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
 xmlns:admin="http://webns.net/mvcb/"
>

<channel rdf:about="http://bioinformatics.oxfordjournals.org">
<title>Bioinformatics - Advance Access</title>
<link>http://bioinformatics.oxfordjournals.org</link>
<description>Bioinformatics - RSS feed of articles</description>
<prism:eIssn>1460-2059</prism:eIssn>
<prism:publicationName>Bioinformatics</prism:publicationName>
<prism:issn>1367-4803</prism:issn>
<items>
 <rdf:Seq>
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp629v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp598v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp628v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp610v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp627v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp626v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp617v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp625v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp622v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp620v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp618v2?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp615v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp621v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp619v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp601v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp581v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp614v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp607v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp584v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp613v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp612v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp611v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp609v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp608v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp604v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp602v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp600v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp606v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp605v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp603v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp599v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp594v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp586v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp597v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp596v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp595v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp593v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp592v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp587v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp591v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp590v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp589v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp588v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp577v2?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp585v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp583v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp582v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp576v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp580v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp579v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp578v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp575v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp574v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp571v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp568v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp567v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp572v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp570v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp569v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp564v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp566v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp565v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp563v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp562v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp561v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp560v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp559v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp558v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp549v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp548v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp557v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp553v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp555v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp554v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp547v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp552v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp551v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp550v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp545v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp546v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp544v1?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/btm094v2?rss=1" />
 </rdf:Seq>
</items>
</channel>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp629v1?rss=1">
<title><![CDATA[Rapid model quality assessment for protein structure predictions using the comparison of multiple models without structural alignments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp629v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The accurate prediction of the quality of 3D models is a key component of successful protein tertiary structure prediction methods. Currently, clustering or consensus based Model Quality Assessment Programs (MQAPs) are the most accurate methods for predicting 3D model quality; however they are often CPU intensive as they carry out multiple structural alignments in order to compare numerous models. In this study, we describe ModFOLDclustQ - a novel MQAP that compares 3D models of proteins without the need for CPU intensive structural alignments by utilising the Q measure for model comparisons. The ModFOLDclustQ method is benchmarked against the top established methods in terms of both accuracy and speed. In addition, the ModFOLDclustQ scores are combined with those from our older ModFOLDclust method to form a new method, ModFOLDclust2, that aims to provide increased prediction accuracy with negligible computational overhead. </p>
<p><b>Results:</b> The ModFOLDclustQ method is competitive with leading clustering based MQAPs for the prediction of global model quality, yet it is up to 150 times faster than the previous version of the ModFOLDclust method at comparing models of small proteins (&lt;60 residues) and over 5 times faster at comparing models of large proteins (&gt;800 residues). Furthermore, a significant improvement in accuracy can be gained over the previous clustering based MQAPs by combining the scores from ModFOLDclustQ and ModFOLDclust to form the new ModFOLDclust2 method, with little impact on the overall time taken for each prediction.</p>
<p><b>Availability:</b> The ModFOLDclustQ and ModFOLDclust2 methods are available to download from: <inter-ref locator="http://www.reading.ac.uk/bioinf/downloads/" locator-type="url">http://www.reading.ac.uk/bioinf/downloads/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="l.j.mcguffin@reading.ac.uk" locator-type="email">l.j.mcguffin@reading.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[McGuffin, L. J., Roche, D. B.]]></dc:creator>
<dc:date>Fri, 06 Nov 2009 05:52:50 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp629</dc:identifier>
<dc:title><![CDATA[Rapid model quality assessment for protein structure predictions using the comparison of multiple models without structural alignments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-11-06</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp598v1?rss=1">
<title><![CDATA[Identification of microRNA activity by Targets' Reverse EXpression]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp598v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Non-coding miRNAs act as regulators of global protein output. While their major effect is on protein levels of target genes, it has been proven that they also specifically impact on the messenger RNA level of targets. Prominent interest in microRNAs strongly motivates the need for increasing the options available to detect their cellular activity.</p>
<p><b>Results:</b> We used the effect of miRNAs over their targets for the detection of miRNA activity using mRNAs expression profiles. Here we describe the method, called T-REX (from Targets' Reverse EXpression), compare it to other similar applications, show its effectiveness and apply it to build activity maps. We used six different target predictions from each of four algorithms: TargetScan, PicTar, DIANA-microT and DIANA Union.</p>
<p>First, we proved the sensitivity and specificity of our technique in miRNA over-expression and knock-out animal models. Then, we used whole transcriptome data from acute myeloid leukemia to show that we could identify critical miRNAs in a real life, complex, clinically relevant dataset. Finally, we studied sixty-six different cellular conditions to confirm and extend the current knowledge on the role of miRNAs in cellular physiology and in cancer. </p>
]]></description>
<dc:creator><![CDATA[Volinia, S., Visone, R., Galasso, M., Rossi, E., Croce, C. M]]></dc:creator>
<dc:date>Fri, 06 Nov 2009 05:52:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp598</dc:identifier>
<dc:title><![CDATA[Identification of microRNA activity by Targets' Reverse EXpression]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-11-06</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp628v1?rss=1">
<title><![CDATA[GATE: Software for the Analysis and Visualization of High-Dimensional Time-series Expression Data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp628v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We present Grid Analysis of Time-series Expression (GATE), an integrated computational software platform for the analysis and visualization of high-dimensional bio-molecular time-series. GATE uses a correlation-based clustering algorithm to arrange molecular time-series on a two-dimensional hexagonal array and dynamically colors individual hexagons according to the expression level of the molecular component to which they are assigned, to create animated movies of systems-level molecular regulatory dynamics. In order to infer potential regulatory control mechanisms from patterns of correlation, GATE also allows interactive interrogation of movies against a wide variety of prior knowledge datasets. GATE movies can be paused and are interactive, allowing users to reconstruct networks and perform functional enrichment analyses. Movies created with GATE can be saved in Flash format and can be inserted directly into PDF manuscript files as interactive figures.</p>
<p><b>Availability:</b> GATE is available for download and is free for academic use from <inter-ref locator="http://amp.pharm.mssm.edu/maayan-lab/gate.htm" locator-type="url">http://amp.pharm.mssm.edu/maayan-lab/gate.htm</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="avi.maayan@mssm.edu" locator-type="email">avi.maayan@mssm.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[MacArthur, B. D., Lachmann, A., Lemischka, I. R., Ma'ayan, A.]]></dc:creator>
<dc:date>Thu, 05 Nov 2009 05:57:16 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp628</dc:identifier>
<dc:title><![CDATA[GATE: Software for the Analysis and Visualization of High-Dimensional Time-series Expression Data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-11-05</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp610v1?rss=1">
<title><![CDATA[Methods for combining peptide intensities to estimate relative protein abundance]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp610v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Labeling techniques are being used increasingly to estimate relative protein abundances in quantitative proteomic studies. These techniques require the accurate measurement of correspondingly labeled peptide peak intensities to produce high-quality estimates of differential expression ratios. In mass spectrometers with counting detectors, the measurement noise varies with intensity and consequently accuracy increases with the number of ions detected. Consequently, the relative variability of peptide intensity measurements varies with intensity. This effect must be accounted for when combining information from multiple peptides to estimate relative protein abundance.</p>
<p><b>Results:</b> We examined a variety of algorithms that estimate protein differential expression ratios from multiple peptide intensity measurements. Algorithms that account for the variation of measurement error with intensity were found to provide the most accurate estimates of differential abundance. A simple Sum-of-Intensities algorithm provided the best estimates of true protein ratios of all algorithms tested.</p>
]]></description>
<dc:creator><![CDATA[Carrillo, B., Yanofsky, C., Laboissiere, S., Nadon, R., Kearney, R. E.]]></dc:creator>
<dc:date>Thu, 05 Nov 2009 05:57:16 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp610</dc:identifier>
<dc:title><![CDATA[Methods for combining peptide intensities to estimate relative protein abundance]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-11-05</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp627v1?rss=1">
<title><![CDATA[Modeling the interplay of single-stranded binding proteins and nucleic acid secondary structure]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp627v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> There are many important proteins which bind singlestranded nucleic acids, such as the nucleocapsid protein in HIV and the RecA DNA repair protein in bacteria. The presence of such proteins can strongly alter the secondary structure of the nucleic acid molecules. Therefore, accurate modeling of the interaction between single-stranded nucleic acids and such proteins is essential to fully understanding many biological processes.</p>
<p><b>Results:</b> We develop a model for predicting nucleic acid secondary structure in the presence of single stranded binding proteins, and implement it as an extension of the Vienna RNA Package. All parameters needed to model nucleic acid secondary structures in the absence of proteins have been previously determined. This leaves the footprint and sequence dependent binding affinity of the protein as adjustable parameters of our model. Using this model we are able to predict the probability of the protein binding at any position in the nucleic acid sequence, the impact of the protein on nucleic acid base pairing, the end-to-end distance distribution for the nucleic acid, and FRET distributions for fluorophores attached to the nucleic acid.</p>
<p><b>Availability:</b> Source code for our modified version of the Vienna RNA package is freely available at <inter-ref locator="http://bioserv.mps.ohio-state.edu/Vienna+P" locator-type="url">http://bioserv.mps.ohio-state.edu/Vienna+P</inter-ref>, implemented in C and running on Linux.</p>
<p><b>Contact:</b> <inter-ref locator="bundschuh@mps.ohio-state.edu" locator-type="email">bundschuh@mps.ohio-state.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Forties, R. A., Bundschuh, R.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 06:06:12 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp627</dc:identifier>
<dc:title><![CDATA[Modeling the interplay of single-stranded binding proteins and nucleic acid secondary structure]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-11-04</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp626v1?rss=1">
<title><![CDATA[ARH: Predicting Splice Variants from Genome-wide Data with Modified Entropy]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp626v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Exon arrays allow the quantitative study of alternative splicing on a genome-wide scale. A variety of splicing prediction methods has been proposed for Affymetrix exon arrays mainly focusing on geometric correlation measures or analysis of variance. In this paper we introduce an information theoretic concept that is based on modification of the well-known entropy function.</p>
<p><b>Results:</b> We have developed an alternative splicing robust prediction method based on entropy (ARH). We can show that this measure copes with bias inherent in the analysis of alternative splicing such as the dependency of prediction performance on the number of exons or variable exon expression. In order to judge the performance of ARH. we have compared it with eight existing splicing prediction methods using experimental benchmark data and demonstrate that ARH is a well-performing new method for the prediction of splice variants.</p>
<p><b>Availability and Implementation:</b> ARH is implemented in R and provided in the supplementary material.</p>
<p><b>Contact:</b> <inter-ref locator="rasche@molgen.mpg.de" locator-type="email">rasche@molgen.mpg.de</inter-ref></p>
<p><b>Supplementary Information:</b> The supplementary material provides additional figures and tables, the R implementation of ARH, a basic implementation for the method comparison and the AEdb true positive set.</p>
]]></description>
<dc:creator><![CDATA[Rasche, A., Herwig, R.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 06:06:11 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp626</dc:identifier>
<dc:title><![CDATA[ARH: Predicting Splice Variants from Genome-wide Data with Modified Entropy]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-11-04</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp617v1?rss=1">
<title><![CDATA[SimBoolNet - A Cytoscape Plugin for Dynamic Simulation of Signaling Networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp617v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> SimBoolNet is an open source Cytoscape plugin that simulates the dynamics of signaling transduction using Boolean networks. Given a user-specified level of stimulation to signal receptors, SimBoolNet simulates the response of downstream molecules and visualizes with animation and records the dynamic changes of the network. It can be used to generate hypotheses and facilitate experimental studies about causal relations and crosstalk among cellular signaling pathways.</p>
<p><b>Availability:</b> SimBoolNet package (with manual) is freely available at <inter-ref locator="http://www.ncbi.nlm.nih.gov/CBBresearch/Przytycka/SimBoolNet" locator-type="url">http://www.ncbi.nlm.nih.gov/CBBresearch/Przytycka/SimBoolNet</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="przytyck@ncbi.nlm.nih.gov" locator-type="email">przytyck@ncbi.nlm.nih.gov</inter-ref> or <inter-ref locator="zhengj@ncbi.nlm.nih.gov" locator-type="email">zhengj@ncbi.nlm.nih.gov</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Zheng, J., Zhang, D., Przytycki, P. F., Zielinski, R., Capala, J., Przytycka, T. M.]]></dc:creator>
<dc:date>Tue, 03 Nov 2009 04:48:29 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp617</dc:identifier>
<dc:title><![CDATA[SimBoolNet - A Cytoscape Plugin for Dynamic Simulation of Signaling Networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-11-03</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp625v1?rss=1">
<title><![CDATA[MATICCE: mapping transitions in continuous character evolution]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp625v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> MATICCE is a new software package in the R language for mapping phylogenetic transitions in organismal traits that have continuous distributions. MATICCE integrates over phylogenetic and model uncertainty and provides simulation functions for visualizing evolutionary scenarios based on estimated parameter values.</p>
<p><b>Availability and Implementation:</b> MATICCE is written in the open-source R language and freely available through the Comprehensive R Archive Network (<inter-ref locator="http://cran.r-project.org/web/packages/maticce" locator-type="url">http://cran.r-project.org/web/packages/maticce</inter-ref>).</p>
<p><b>Contact:</b> <inter-ref locator="ahipp@mortonarb.org" locator-type="email">ahipp@mortonarb.org</inter-ref>.</p>
]]></description>
<dc:creator><![CDATA[Hipp, A. L., Escudero, M.]]></dc:creator>
<dc:date>Fri, 30 Oct 2009 06:40:01 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp625</dc:identifier>
<dc:title><![CDATA[MATICCE: mapping transitions in continuous character evolution]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-30</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp622v1?rss=1">
<title><![CDATA[Predictive rule inference for epistatic interaction detection in genome-wide association studies]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp622v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Under the current era of genome-wide association study (GWAS), finding epistatic interactions in the large volume of SNP data is a challenging and unsolved issue. Few of previous studies could handle genome-wide data due to the difficulties in searching the combinatorially explosive search space and statistically evaluating high-order epistatic interactions given the limited number of samples. In this work, we propose a novel learning approach (SNPRuler) based on the predictive rule inference to find disease-associated epistatic interactions.</p>
<p><b>Results:</b> Our extensive experiments on both simulated data and real genome-wide data from Wellcome Trust Case Control Consortium (WTCCC) show that SNPRuler significantly outperforms its recent competitor. To our knowledge, SNPRuler is the first method that guarantees to find the epistatic interactions without exhaustive search. Our results indicate that finding epistatic interactions in GWAS is computationally attainable in practice.</p>
<p><b>Availability:</b> <inter-ref locator="http://bioinformatics.ust.hk/SNPRuler.zip" locator-type="url">http://bioinformatics.ust.hk/SNPRuler.zip</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="eexiangw@ust.hk" locator-type="email">eexiangw@ust.hk</inter-ref>, <inter-ref locator="eeyu@ust.hk" locator-type="email">eeyu@ust.hk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Wan, X., Yang, C., Yang, Q., Xue, H., Tang, N. L.S., Yu, W.]]></dc:creator>
<dc:date>Fri, 30 Oct 2009 06:39:59 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp622</dc:identifier>
<dc:title><![CDATA[Predictive rule inference for epistatic interaction detection in genome-wide association studies]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-30</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp620v1?rss=1">
<title><![CDATA[Bayesian Model Selection for Characterizing Genomic Imprinting Effects and Patterns]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp620v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Although imprinted genes have been ubiquitously observed in nature, statistical methodology still has not been systematically developed for jointly characterizing genomic imprinting effects and patterns. To detect imprinting genes influencing quantitative traits, the least square and maximum likelihood approaches for fitting a single QTL and Bayesian method for simultaneously modeling multiple QTLs have been adopted in various studies.</p>
<p><b>Results:</b> In a widely used F<SUB>2</SUB> reciprocal mating population for mapping imprinting genes, we herein propose a genomic imprinting model which describes additive, dominance, and imprinting effects of multiple imprinted quantitative trait loci (iQTL) for traits of interest. Depending upon the estimates of the above genetic effects, we categorized imprinting patterns into seven types, which provides a complete classification scheme for describing imprinting patterns. Bayesian model selection was employed to identify iQTL along with many genetic parameters in a computationally efficient manner. To make statistical inference on the imprinting types of iQTL detected, a set of Bayes factors were formulated using the posterior probabilities for the genetic effects being compared. We demonstrated the performance of the proposed method by computer simulation experiments and then applied this method to two real data sets. Our approach can be generally used to identify inheritance modes and determine the contribution of major genes for quantitative variations.</p>
<p><b>Contact: </b> <inter-ref locator="annie.lin@duke.edu" locator-type="email">annie.lin@duke.edu</inter-ref>; <inter-ref locator="runqingyang@sjtu.edu.cn" locator-type="email">runqingyang@sjtu.edu.cn</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yang, R., Wang, X., Wu, Z., Prows, D. R., Lin, M.]]></dc:creator>
<dc:date>Fri, 30 Oct 2009 06:39:57 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp620</dc:identifier>
<dc:title><![CDATA[Bayesian Model Selection for Characterizing Genomic Imprinting Effects and Patterns]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-30</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp618v2?rss=1">
<title><![CDATA[Phenopedia and Genopedia: Disease-centered and Gene-centered Views of the Evolving Knowledge of Human Genetic Associations]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp618v2?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We developed Web-based applications that encourage the exploration of the literature on human genetic associations by using a database that is continuously updated from PubMed. These applications provide user-friendly interfaces for searching summarized information on human genetic associations, using either genes or diseases as the starting point.</p>
<p><b>Availability:</b> Phenopedia and Genopedia can be freely accessed at <inter-ref locator="http://www.hugenavigator.net/HuGENavigator/startPagePhenoPedia.do" locator-type="url">http://www.hugenavigator.net/HuGENavigator/startPagePhenoPedia.do</inter-ref> and <inter-ref locator="http://www.hugenavigator.net/HuGENavigator/startPagePedia.do" locator-type="url">http://www.hugenavigator.net/HuGENavigator/startPagePedia.do</inter-ref>, respectively.</p>
<p><b>Contact:</b> <inter-ref locator="wby0@cdc.gov" locator-type="email">wby0@cdc.gov</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yu, W., Clyne, M., Khoury, M.J., Gwinn, M.]]></dc:creator>
<dc:date>Fri, 30 Oct 2009 23:47:40 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp618</dc:identifier>
<dc:title><![CDATA[Phenopedia and Genopedia: Disease-centered and Gene-centered Views of the Evolving Knowledge of Human Genetic Associations]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-30</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp615v1?rss=1">
<title><![CDATA[inGAP: an integrated next-generation genome analysis pipeline]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp615v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We develop a novel mining pipeline, inGAP, guided by a Bayesian principle to detect single nucleotide polymorphisms (SNPs), insertion/deletions (indels) by comparing high-throughput pyrosequencing reads with a reference genome of related organisms. inGAP can be applied to the mapping of both Roche/454 and Illumina reads with no restriction of read length. Experiments on simulated and experimental data show that this pipeline can achieve overall 97% accuracy in SNP detection and 94% in the finding of indels. All the detected SNPs/indels can be further evaluated by a graphical editor in our pipeline. inGAP also provides functions of multiple genomes comparison and assistance of bacterial genome assembly.</p>
<p><b>Availability:</b> inGAP is available at <inter-ref locator="http://sites.google.com/site/nextgengenomics/ingap" locator-type="url">http://sites.google.com/site/nextgengenomics/ingap</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="scs@bx.psu.edu" locator-type="email">scs@bx.psu.edu</inter-ref></p>
<p><b>Supplementary information:</b> Supplementary data are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Qi, J., Zhao, F., Buboltz, A., Schuster, S. C.]]></dc:creator>
<dc:date>Fri, 30 Oct 2009 06:39:56 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp615</dc:identifier>
<dc:title><![CDATA[inGAP: an integrated next-generation genome analysis pipeline]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-30</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp621v1?rss=1">
<title><![CDATA[Pitfalls of supervised feature selection]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp621v1?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Smialowski, P., Frishman, D., Kramer, S.]]></dc:creator>
<dc:date>Thu, 29 Oct 2009 21:16:31 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp621</dc:identifier>
<dc:title><![CDATA[Pitfalls of supervised feature selection]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-29</prism:publicationDate>
<prism:section>LETTER TO THE EDITOR</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp619v1?rss=1">
<title><![CDATA[Simulation-based model selection for dynamical systems in systems and population biology]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp619v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Computer simulations have become an important tool across the biomedical sciences and beyond. For many important problems several different models or hypotheses exist and choosing which one best describes reality or observed data is not straightforward. We therefore require suitable statistical tools that allow us to choose rationally between different mechanistic models of e.g. signal transduction or gene regulation networks. This is particularly challenging in systems biology where only a small number of molecular species can be assayed at any given time and all measurements are subject to measurement uncertainty.</p>
<p><b>Results:</b> Here we develop such a model selection framework based on approximate Bayesian computation and employing sequential Monte Carlo sampling. We show that our approach can be applied across a wide range of biological scenarios, and we illustrate its use on real data describing influenza dynamics and the JAK-STAT signalling pathway. Bayesian model selection strikes a balance between the complexity of the simulation models and their ability to describe observed data. The present approach enables us to employ the whole formal apparatus to any system that can be (efficiently) simulated, even when exact likelihoods are computationally intractable.</p>
<p><b>Contact:</b> <inter-ref locator="ttoni@imperial.ac.uk" locator-type="email">ttoni@imperial.ac.uk</inter-ref>, <inter-ref locator="m.stumpf@imperial.ac.uk" locator-type="email">m.stumpf@imperial.ac.uk</inter-ref></p>
<p><b>Supplementary Information:</b> Tutorial on ABC rejection and ABC SMC for parameter estimation and model selection. Derivation of ABC SMC model selection algorithms. Supplementary figures and datasets.</p>
]]></description>
<dc:creator><![CDATA[Toni, T., Stumpf, M. P. H.]]></dc:creator>
<dc:date>Thu, 29 Oct 2009 21:16:30 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp619</dc:identifier>
<dc:title><![CDATA[Simulation-based model selection for dynamical systems in systems and population biology]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-29</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp601v1?rss=1">
<title><![CDATA[MicroRazerS: Rapid alignment of small RNA reads]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp601v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation</b>: Deep sequencing has become the method of choice for determining the small RNA content of a cell. Mapping the sequenced reads onto their reference genome serves as the basis for all further analyses, namely for identification and quantification. A method frequently used is Mega BLAST followed by several filtering steps, even though it is slow and inefficient for this task. Also, none of the currently available short read aligners has established itself for the particular task of small RNA mapping.</p>
<p><b>Results</b>: We present MicroRazerS, a tool optimized for mapping small RNAs onto a reference genome. It is an order of magnitude faster than Mega BLAST and comparable in speed to other short read mapping tools. In addition, it is more sensitive and easy to handle and adjust.</p>
<p><b>Availability</b>: MicroRazerS is part of the SeqAn C++ library and can be downloaded from <inter-ref locator="http://www.seqan.de/projects/MicroRazerS.html" locator-type="url">http://www.seqan.de/projects/MicroRazerS.html</inter-ref>.</p>
<p><b>Contact</b>: <inter-ref locator="emde@inf.fu-berlin.de" locator-type="email">emde@inf.fu-berlin.de</inter-ref>, <inter-ref locator="grunert@molgen.mpg.de" locator-type="email">grunert@molgen.mpg.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Emde, A.-K., Grunert, M., Weese, D., Reinert, K., Sperling, S. R.]]></dc:creator>
<dc:date>Thu, 29 Oct 2009 21:16:29 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp601</dc:identifier>
<dc:title><![CDATA[MicroRazerS: Rapid alignment of small RNA reads]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-29</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp581v1?rss=1">
<title><![CDATA[Model Aggregation: a building-block approach to creating large macromolecular regulatory networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp581v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Models of regulatory networks become more difficult to construct and understand as they grow in size and complexity. Modelers naturally build large models from smaller components that each represent subsets of reactions within the larger network. To assist modelers in this process, we present model aggregation, which defines models in terms of components that are designed for the purpose of being combined.</p>
<p><b>Results:</b> We have implemented a model editor that incorporates model aggregation, and we suggest supporting extensions to the Systems Biology Markup Language (SBML) Level 3. We illustrate aggregation with a model of the eukaryotic cell cycle &lsquo;engine&rsquo; created from smaller pieces.</p>
<p><b>Availability:</b> Java implementations of our SBML proposal are available in the JigCell Aggregation Connector. See <ty><inter-ref locator="http://jigcell.biol.vt.edu" locator-type="url">http://jigcell.biol.vt.edu</inter-ref></ty>.</p>
<p><b>Contact:</b> <inter-ref locator="shaffer@vt.edu" locator-type="email">shaffer@vt.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Randhawa, R., Shaffer, C. A., Tyson, J. J.]]></dc:creator>
<dc:date>Thu, 29 Oct 2009 21:16:28 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp581</dc:identifier>
<dc:title><![CDATA[Model Aggregation: a building-block approach to creating large macromolecular regulatory networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-29</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp614v1?rss=1">
<title><![CDATA[The GNUMAP Algorithm: Unbiased Probabilistic Mapping of Oligonucleotides from Next-Generation Sequencing]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp614v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The advent of next-generation sequencing technologies has increased the accuracy and quantity of sequence data, opening the door to greater opportunities in genomic research.</p>
<p><b>Results:</b> In this paper, we present GNUMAP (Genomic Nextgeneration Universal MAPper), a program capable of overcoming two major obstacles in the mapping of reads from next-generation sequencing runs. First, we have created an algorithm that probabilistically maps reads to repeat regions in the genome on a quantitative basis. Second, we have developed a probabilistic Needleman-Wunsch algorithm which utilizes _prb.txt and _int.txt files produced in the Solexa/Illumina pipeline to improve the mapping accuracy for lower quality reads and increase the amount of usable data produced in a given experiment.</p>
<p><b>Availability:</b> The source code for the software can be downloaded from <inter-ref locator="http://dna.cs.byu.edu/gnumap" locator-type="url">http://dna.cs.byu.edu/gnumap</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="nathanlclement@gmail.com" locator-type="email">nathanlclement@gmail.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Clement, N. L., Snell, Q., Clement, M. J., Hollenhorst, P. C., Purwar, J., Graves, B. J., Cairns, B. R., Johnson, W. E.]]></dc:creator>
<dc:date>Tue, 27 Oct 2009 04:13:57 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp614</dc:identifier>
<dc:title><![CDATA[The GNUMAP Algorithm: Unbiased Probabilistic Mapping of Oligonucleotides from Next-Generation Sequencing]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-27</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp607v1?rss=1">
<title><![CDATA[Computational Protein Profile Similarity Screening for Quantitative Mass Spectrometry Experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp607v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The qualitative and quantitative characterization of protein abundance profiles over a series of time points or a set of environmental conditions is becoming increasingly important. Using isobaric mass tagging experiments, mass spectrometry-based quantitative proteomics deliver accurate peptide abundance profiles for relative quantitation. Associated data analysis worflows need to provide tailored statistical treatment that (i) takes the correlation structure of the normalized peptide abundance profiles into account and (ii) allows inference of protein-level similarity. We introduce a suitable distance measure for relative abundance profiles, derive a statistical test for equality and propose a protein-level representation of peptide-level measurements. This yields a workflow that delivers a similarity ranking of protein abundance profiles with respect to a defined reference. All procedures have in common that they operate based on the true correlation structure that underlies the measurements. This optimizes power and delivers more intuitive and efficient results than existing methods that do not take these circumstances into account.</p>
<p><b>Results:</b> We use protein profile similarity screening to identify candidate proteins whose abundances are post-transcriptionally controlled by the Anaphase Promoting Complex (APC/C), a specific E3 ubiquitin ligase that is a master regulator of the cell cycle. Results are compared with an established protein correlation profiling method. The proposed procedure yields a 50.9-fold enrichment of co-regulated protein candidates and a 2.5-fold improvement over the previous method.</p>
<p><b>Availability:</b> A MATLAB toolbox is available from <inter-ref locator="http://hci.iwr.uni-heidelberg.de/mip/proteomics" locator-type="url">http://hci.iwr.uni-heidelberg.de/mip/proteomics</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="hanno.steen@childrens.harvard.edu" locator-type="email">hanno.steen@childrens.harvard.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Kirchner, M., Renard, B. Y., Kothe, U., Pappin, D. J., Hamprecht, F. A., Steen, H., Steen, J. A. J.]]></dc:creator>
<dc:date>Tue, 27 Oct 2009 04:13:55 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp607</dc:identifier>
<dc:title><![CDATA[Computational Protein Profile Similarity Screening for Quantitative Mass Spectrometry Experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-27</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp584v1?rss=1">
<title><![CDATA[Estimating population haplotype frequencies from pooled SNP data using incomplete database information]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp584v1?rss=1</link>
<description><![CDATA[
<p>Motivation: Information about haplotype structures gives a more detailed picture of genetic variation between individuals than singlelocus analyses. Databases that contain the most frequent haplotypes of certain populations are developing rapidly (e.g. the HapMap database for single-nucleotide polymorphisms in humans). Utilisation of such prior information about the prevailing haplotype structures makes it possible to estimate the haplotype frequencies also from large DNA pools. When genetic material from dozens of individuals is pooled together and analysed in a single genotyping, the overall number of genotypings and the costs of the genetic studies are reduced.</p>
<p>Results: A Bayesian model for estimating the haplotypes and their frequencies from pooled allelic observations is introduced. The model combines an idea of using database information for haplotype estimation with a computationally efficient multinormal approximation. In addition, the model treats the number and structures of the unknown haplotypes as random variables whose joint posterior distribution is estimated. The results on real human data from the HapMap database show that the proposed method provides significant improvements over the existing methods.</p>
<p>Availability: A reversible-jump Markov chain Monte Carlo algorithm for analysing the model is implemented in a program called Hippo. For comparisons, an approximate EM-algorithm that utilises database information about the existing haplotypes is implemented in a program called AEML. The source codes written in C (using Gnu Scientific Library) are available at <inter-ref locator="www.iki.fi/~mpirinen" locator-type="url">www.iki.fi/~mpirinen</inter-ref> .</p>
<p>Contact: <inter-ref locator="matti.pirinen@iki.fi" locator-type="email">matti.pirinen@iki.fi</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Pirinen, M.]]></dc:creator>
<dc:date>Tue, 27 Oct 2009 04:13:54 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp584</dc:identifier>
<dc:title><![CDATA[Estimating population haplotype frequencies from pooled SNP data using incomplete database information]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-27</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp613v1?rss=1">
<title><![CDATA[Annotation Confidence Score for Genome Annotation: A Genome Comparison Approach]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp613v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The massively parallel sequencing technology can be used by small research labs to generate genome sequences of their research interest. However, annotation of genomes still relies on the manual process, which becomes a serious bottleneck to the high throughput genome projects. Recently, automatic annotation methods are increasingly more accurate, but there are several issues. One important challenge in using automatic annotation methods is to distinguish annotation quality of ORFs or genes. The availability of such annotation quality of genes can reduce the human labor cost dramatically since manual inspection can focus only on genes with low annotation quality scores.</p>
<p><b>Results:</b> In this paper, we propose a novel annotation quality or confidence scoring scheme, called Annotation Confidence Score (ACS), using a genome comparison approach. The scoring scheme is computed by combining sequence and textual annotation similarity using a modified version of a logistic curve. The most important feature of the proposed scoring scheme is to generate a score that reflects the excellence in annotation quality of genes by automatically adjusting the number of genomes used to compute the score and their phylogenetic distance. Extensive experiments with bacterial genomes showed that the proposed scoring scheme generated scores for annotation quality according to the quality of annotation regardless of the number of reference genomes and their phylogenetic distance.</p>
<p><b>Availability:</b> <ty><inter-ref locator="http://microbial.informatics.indiana.edu/acs" locator-type="url">http://microbial.informatics.indiana.edu/acs</inter-ref></ty>.</p>
<p><b>Contact:</b> <inter-ref locator="sunkim2@indiana.edu" locator-type="email">sunkim2@indiana.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yang, Y., Gilbert, D., Kim, S.]]></dc:creator>
<dc:date>Sat, 24 Oct 2009 03:58:56 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp613</dc:identifier>
<dc:title><![CDATA[Annotation Confidence Score for Genome Annotation: A Genome Comparison Approach]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-24</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp612v1?rss=1">
<title><![CDATA[DEGseq: an R package for identifying differentially expressed genes from RNA-seq data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp612v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> High-throughput RNA sequencing (RNA-seq) is rapidly emerging as a major quantitative transcriptome profiling platform. Here we present DEGseq, an R package to identify differentially expressed genes or isoforms for RNA-seq data from different samples. In this package, we integrated three existing methods, and introduced two novel methods based on MA-plot to detect and visualize gene expression difference.</p>
<p><b>Availability:</b> The R package and a quick-start vignette is available at <inter-ref locator="http://bioinfo.au.tsinghua.edu.cn/software/degseq" locator-type="url">http://bioinfo.au.tsinghua.edu.cn/software/degseq</inter-ref></p>
<p><b>Contact:</b> XW: <inter-ref locator="xwwang@tsinghua.edu.cn" locator-type="email">xwwang@tsinghua.edu.cn</inter-ref>; XZ: <inter-ref locator="zhangxg@tsinghua.edu.cn" locator-type="email">zhangxg@tsinghua.edu.cn</inter-ref>.</p>
<p><b>Supplementary information:</b> Supplementary data are available at <I>Bioinformatics</I> online. </p>
]]></description>
<dc:creator><![CDATA[Wang, L., Feng, Z., Wang, X., Wang, X., Zhang, X.]]></dc:creator>
<dc:date>Sat, 24 Oct 2009 03:58:55 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp612</dc:identifier>
<dc:title><![CDATA[DEGseq: an R package for identifying differentially expressed genes from RNA-seq data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-24</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp611v1?rss=1">
<title><![CDATA[NGSView: an extensible open source editor for next-generation sequencing data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp611v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> High-throughput sequencing technologies introduce novel demands on tools available for data analysis. We have developed NGSView, a generally applicable, flexible and extensible next-generation sequence alignment editor. The software allows for visualization and manipulation of millions of sequences simultaneously on a desktop computer, through a graphical interface. NGSView is available under an open source license and can be extended through a well documented API.</p>
<p><b>Availability:</b> <inter-ref locator="http://ngsview.sourceforge.net" locator-type="url">http://ngsview.sourceforge.net</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="arner@gsc.riken.jp" locator-type="email">arner@gsc.riken.jp</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Arner, E., Hayashizaki, Y., Daub, C. O.]]></dc:creator>
<dc:date>Sat, 24 Oct 2009 03:58:54 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp611</dc:identifier>
<dc:title><![CDATA[NGSView: an extensible open source editor for next-generation sequencing data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-24</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp609v1?rss=1">
<title><![CDATA[Seed-based INTARNA prediction combined with GFP-reporter system identifies mRNA targets of the small RNA Yfr1]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp609v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> <I>Prochlorococcus</I> possesses the smallest genome of all sequenced photoautotrophs. Although the number of regulatory proteins in the genome is very small, the relative number of small regulatory RNAs is comparable to that of other bacteria. The compact genome size of <I>Prochlorococcus</I> offers an ideal system to search for targets of small RNAs and to refine existing target prediction algorithms.</p>
<p><b>Results:</b> Target predictions for the cyanobacterial small RNA Yfr1 were carried out with I<scp>nta</scp>RNA in <I>Prochlorococcus</I> MED4. The ultraconserved Yfr1 sequence motif was defined as the putative interaction seed. To study the impact of Yfr1 on its predicted mRNA targets, a reporter system based on green fluorescent protein (GFP) was applied. We show that Yfr1 inhibits the translation of two predicted targets. We used mutation analysis to confirm that Yfr1 directly regulates its targets by an antisense interaction sequestering the ribosome binding site, and to assess the importance of interaction site accessibility.</p>
<p><b>Contact:</b> <inter-ref locator="backofen@informatik.uni-freiburg.de" locator-type="email">backofen@informatik.uni-freiburg.de</inter-ref>, <inter-ref locator="claudia.steglich@biologie.uni-freiburg.de" locator-type="email">claudia.steglich@biologie.uni-freiburg.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Richter, A. S., Schleberger, C., Backofen, R., Steglich, C.]]></dc:creator>
<dc:date>Thu, 22 Oct 2009 09:22:12 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp609</dc:identifier>
<dc:title><![CDATA[Seed-based INTARNA prediction combined with GFP-reporter system identifies mRNA targets of the small RNA Yfr1]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-22</prism:publicationDate>
<prism:section>DISCOVERY NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp608v1?rss=1">
<title><![CDATA[The gputools package enables GPU computing in R]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp608v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> By default, the R statistical environment does not make use of parallelism. Researchers may resort to expensive solutions such as cluster hardware for large analysis tasks. Graphics processing units (GPUs) provide an inexpensive and computationally powerful alternative. Using R and the CUDA toolkit from Nvidia, we have implemented several functions commonly used in microarray gene expression analysis for GPU equipped computers.</p>
<p><b>Results:</b> R users can take advantage of the better performance provided by an Nvidia GPU.</p>
<p><b>Availability:</b> The package is available from CRAN, the R project's repository of packages, at <inter-ref locator="http://cran.r-project.org/web/packages/gputools" locator-type="url">http://cran.r-project.org/web/packages/gputools</inter-ref></p>
<p>More information about our gputools R package is available at <inter-ref locator="http://brainarray.mbni.med.umich.edu/brainarray/Rgpgpu" locator-type="url">http://brainarray.mbni.med.umich.edu/brainarray/Rgpgpu</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="bucknerj@umich.edu" locator-type="email">bucknerj@umich.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Buckner, J., Wilson, J., Seligman, M., Athey, B., Watson, S., Meng, F.]]></dc:creator>
<dc:date>Thu, 22 Oct 2009 09:22:12 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp608</dc:identifier>
<dc:title><![CDATA[The gputools package enables GPU computing in R]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-22</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp604v1?rss=1">
<title><![CDATA[Identification of Non-Hodgkin's Lymphoma Prognosis Signatures Using the CTGDR Method]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp604v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Although NHL (Non-Hodgkin's lymphoma) is the fifth leading cause of cancer incidence and mortality in the US, it remains poorly understood and is largely incurable. Biomedical studies have shown that genomic variations, measured with SNPs (single nucleotide polymorphisms) in genes, may have independent predictive power for disease free survival in NHL patients beyond clinical measurements.</p>
<p><b>Results:</b> We apply the CTGDR (Clustering Threshold Gradient Directed Regularization) method to genetic association studies using SNPs, analyze data from an association study of NHL, and identify prognosis signatures for diffuse large B cell lymphoma (DLBCL) and follicular lymphoma (FL), the two most common subtypes of NHL. With the CTGDR method, we are able to account for the <I>joint effects of multiple genes/SNPs</I>, whereas most existing studies are single-marker based. In addition, we are able to account for the "gene and SNPwithin-gene" hierarchical structure and identify <I>not only predictive genes but also predictive SNPs within identified genes</I>. In contrast, existing studies are limited to either gene or SNP identification, but not both. We propose using resampling methods to evaluate the predictive power and reproducibility of identified genes and SNPs. Simulation study and data analysis suggest satisfactory performance of the CTGDR method.</p>
<p><b>Contact:</b> <inter-ref locator="shuangge.ma@yale.edu" locator-type="email">shuangge.ma@yale.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ma, S., Zhang, Y., Huang, J., Han, X., Holford, T., Lan, Q., Rothman, N., Boyle, P., Zheng, T.]]></dc:creator>
<dc:date>Thu, 22 Oct 2009 09:22:11 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp604</dc:identifier>
<dc:title><![CDATA[Identification of Non-Hodgkin's Lymphoma Prognosis Signatures Using the CTGDR Method]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-22</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp602v1?rss=1">
<title><![CDATA[Evaluation of linguistic features useful in extraction of interactions from PubMed; Application to annotating known, high-throughput and predicted interactions in I2D]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp602v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Identification and characterization of protein-protein interactions (PPIs) is one of the key aims in biological research. While previous research in text mining has made substantial progress in automatic PPI detection from literature, the need to improve the precision and recall of the process remains. More accurate PPI detection will also improve the ability to extract experimental data related to PPIs and provide multiple evidence for each interaction.</p>
<p><b>Results:</b> We developed an interaction detection method and explored usefulness of various features in automatically identifying PPIs in text. The results show that our approach outperforms other systems using the <I>AImed</I> dataset. In the tests where our system achieves better precision with reduced recall, we discuss possible approaches for improvement. In addition to test datasets, we evaluated performance on interactions from five human-curated databases&mdash;BIND, DIP, HPRD, IntAct and MINT&mdash;where our system consistently identified evidence for about 60% of interactions when both proteins appear in at least one sentence in the PubMed abstract. We then applied the system to extract articles from PubMed to annotate known, high-throughput and interologous interactions in I<sup>2</sup>D.</p>
<p><b>Availability:</b> The data and software are available at: <ty><inter-ref locator="http://www.cs.utoronto.ca/~juris/data/BI09/" locator-type="url">http://www.cs.utoronto.ca/~juris/data/BI09/</inter-ref></ty>.</p>
<p><b>Contact:</b> <inter-ref locator="yniu@uhnres.utoronto.ca" locator-type="email">yniu@uhnres.utoronto.ca</inter-ref>, <inter-ref locator="juris@ai.utoronto.ca" locator-type="email">juris@ai.utoronto.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Niu, Y., Otasek, D., Jurisica, I.]]></dc:creator>
<dc:date>Thu, 22 Oct 2009 09:22:11 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp602</dc:identifier>
<dc:title><![CDATA[Evaluation of linguistic features useful in extraction of interactions from PubMed; Application to annotating known, high-throughput and predicted interactions in I2D]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-22</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp600v1?rss=1">
<title><![CDATA[Ancestors 1.0: A Web Server For Ancestral Sequence Reconstruction]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp600v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The computational inference of ancestral genomes consists of five difficult steps: Identifying syntenic regions, inferring ancestral arrangement of syntenic regions, aligning multiple sequences, reconstructing the insertion and deletion history, and finally inferring substitutions. Each of these steps have received lot of attention the past years. However, there currently exists no framework that integrates all of the the different steps in an easy workflow. Here, we introduce Ancestors 1.0, a web server allowing one to easily and quickly perform the last three steps of the ancestral genome reconstruction procedure. It implements several alignment algorithms, an indel maximum likelihood solver and a contextdependent maximum likelihood substitution inference algorithm. The results presented by the server include the posterior probabilities for the last two steps of the ancestral genome reconstruction and the expected error rate of each ancestral base prediction.</p>
<p><b>Availability:</b> The Ancestors 1.0 is available at &lt;<inter-ref locator="http://ancestors.bioinfo.uqam.ca/ancestorWeb/" locator-type="url">http://ancestors.bioinfo.uqam.ca/ancestorWeb/</inter-ref>&gt;.</p>
<p><b>Contact:</b> <inter-ref locator="diallo.abdoulaye@uqam.ca" locator-type="email">diallo.abdoulaye@uqam.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Diallo, A. B., Makarenkov, V., Blanchette, M.]]></dc:creator>
<dc:date>Thu, 22 Oct 2009 09:22:10 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp600</dc:identifier>
<dc:title><![CDATA[Ancestors 1.0: A Web Server For Ancestral Sequence Reconstruction]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-22</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp606v1?rss=1">
<title><![CDATA[Targeted Interrogation of Copy Number Variation using SCIMMkit]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp606v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Copy-number variants (CNVs) contribute substantially to human genomic diversity, and development of accurate and efficient methods for CNV genotyping is a central problem in exploring human genotype-phenotype associations.  SCIMMkit provides a robust, integrated implementation of three previously validated algorithms (SCIMM, SCIMM-Search, and SCOUT) for targeted interrogation of CNVs using Illumina Infinium II and GoldenGate SNP assays. SCIMMkit is applicable to standardized genome-wide SNP arrays and customized multiplexed SNP panels, providing economy, effi-ciency, and flexibility in experimental design.</p>
<p><b>Availability:</b> Source code and documentation are available for noncommercial use at <inter-ref locator="http://droog.gs.washington.edu/scimmkit" locator-type="url">http://droog.gs.washington.edu/scimmkit</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="troyz@u.washington.edu" locator-type="email">troyz@u.washington.edu</inter-ref></p>
<p><b>Supplementary Information:</b> Supplementary data are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Zerr, T., Cooper, G. M., Eichler, E. E., Nickerson, D. A.]]></dc:creator>
<dc:date>Wed, 21 Oct 2009 00:06:45 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp606</dc:identifier>
<dc:title><![CDATA[Targeted Interrogation of Copy Number Variation using SCIMMkit]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-21</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp605v1?rss=1">
<title><![CDATA[Reporting bias when using real data sets to analyze classification performance]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp605v1?rss=1</link>
<description><![CDATA[
<p>Motivation: It is commonplace for authors to propose a new classification rule, either the operator construction part or feature selection, and demonstrate its performance on real data sets, which often come from high-dimensional studies, such as from gene-expression microarrays, with small samples. Owing to the variability in feature selection and error estimation, individual reported performances are highly imprecise. Hence, if only the best test results are reported, then these will be biased relative to the overall performance of the proposed procedure.</p>
<p>Results: This paper characterizes reporting bias with several statistics and computes these statistics in a large simulation study using both modeled and real data. The results appear as curves giving the different reporting biases as functions of the number of samples tested when reporting only the best or second best performance. It does this for two classification rules, linear discriminant analysis (LDA) and 3 nearest-neighbor (3NN), and for filter and wrapper feature selection, <I>t</I>-test and sequential forward search. These were chosen on account of their well-studied properties and because they were amenable to the extremely large amount of processing required for the simulations. The results across all the experiments are consistent: there is generally large bias overriding what would be considered a significant performance differential, when reporting the best or second best performing data set. We conclude that there needs to be a database of data sets and that, for those studies depending on real data, results should be reported for all data sets in the database.</p>
<p>Availability: Companion website at <inter-ref locator="http://gsp.tamu.edu/Publications/supplementary/yousefi09a/" locator-type="url">http://gsp.tamu.edu/Publications/supplementary/yousefi09a/</inter-ref></p>
<p>Contact: <inter-ref locator="edward@ece.tamu.edu" locator-type="email">edward@ece.tamu.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yousefi, M. R., Hua, J., Sima, C., Dougherty, E. R.]]></dc:creator>
<dc:date>Wed, 21 Oct 2009 03:09:29 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp605</dc:identifier>
<dc:title><![CDATA[Reporting bias when using real data sets to analyze classification performance]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-21</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp603v1?rss=1">
<title><![CDATA[Accurate confidence aware clustering of array CGH tumor profiles]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp603v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Chromosomal aberrations tend to be characteristic for given (sub) types of cancer. Such aberrations can be detected with array Comparative Genomic Hybridization (aCGH). Clustering aCGH tumor profiles aids in identifying chromosomal regions of interest and provides useful diagnostic information on the cancer type. An important issue here is to what extent individual aCGH tumor profiles can be reliably assigned to clusters associated with a given cancer type.</p>
<p><b>Results:</b> We introduce a novel evolutionary fuzzy clustering (EFC) algorithm, which is able to deal with overlapping clusterings. Our method assesses these overlaps by using cluster membership degrees, which we use here as a confidence measure for individual samples to be assigned to a given tumor type. We first demonstrate the usefulness of our method using a synthetic aCGH dataset and subsequently show that EFC outperforms existing methods on four real datasets of aCGH tumor profiles involving four different cancer types. We also show that in general best performance is obtained using 1&mdash; Pearson correlation coefficient as a distance measure and that extra pre processing steps, such as segmentation and calling, lead to decreased clustering performance.</p>
<p><b>Availability:</b> The source code of the program is available from <inter-ref locator="http://ibi.vu.nl/programs/efcwww" locator-type="url">http://ibi.vu.nl/programs/efcwww</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="heringa@few.vu.nl" locator-type="email">heringa@few.vu.nl</inter-ref></p>
]]></description>
<dc:creator><![CDATA[van Houte, B. P.P., Heringa, J.]]></dc:creator>
<dc:date>Wed, 21 Oct 2009 03:09:29 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp603</dc:identifier>
<dc:title><![CDATA[Accurate confidence aware clustering of array CGH tumor profiles]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-21</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp599v1?rss=1">
<title><![CDATA[Roll: A new algorithm for the detection of protein pockets and cavities with a rolling probe sphere]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp599v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Prediction of ligand binding sites of proteins is significant as it can provide insight into biological functions and reaction mechanisms of proteins. It is also a prerequisite for protein - ligand docking and an important step in structure-based drug design.</p>
<p><b>Results:</b> We present a new algorithm, Roll, implemented in a program named POCASA, which can predict binding sites by detecting pockets and cavities of proteins with a rolling sphere. To evaluate the performance of POCASA, a test with the same data set as used in several existing methods was carried out. POCASA achieved a high success rate of 77%. In addition, the test results indicated that POCASA can predict good shapes of ligand binding sites.</p>
<p><b>Availability:</b> A web version of POCASA is freely available at <inter-ref locator="http://altair.sci.hokudai.ac.jp/g6/Research/POCASA_e.html" locator-type="url">http://altair.sci.hokudai.ac.jp/g6/Research/POCASA_e.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="yao@castor.sci.hokudai.ac.jp" locator-type="email">yao@castor.sci.hokudai.ac.jp</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yu, J., Zhou, Y., Tanaka, I., Yao, M.]]></dc:creator>
<dc:date>Wed, 21 Oct 2009 00:06:44 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp599</dc:identifier>
<dc:title><![CDATA[Roll: A new algorithm for the detection of protein pockets and cavities with a rolling probe sphere]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-21</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp594v1?rss=1">
<title><![CDATA[G-compass: A web-based comparative genome browser between human and other vertebrate genomes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp594v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> G-compass is designed for efficient comparative genome analysis between human and other vertebrate genomes. The current version of G-compass allows us to browse two corresponding genomic regions between human and another species in parallel. One-to-one evolutionarily conserved regions (i.e., orthologous regions) between species are highlighted along the genomes. Information such as locations of duplicated regions, copy number variations, and mammalian ultra-conserved elements is also provided. These features of G-compass enable us to easily determine patterns of genomic rearrangements and changes in gene orders through evolutionary time. Since G-compass is a satellite database of H-InvDB, which is a comprehensive annotation resource for human genes and transcripts, users can easily refer to manually curated functional annotations and other abundant biological information for each human transcript. G-compass is expected to be a valuable tool for comparing human and model organisms and promoting the exchange of functional information.</p>
<p><b>Availability:</b> G-compass is freely available at <inter-ref locator="http://www.h-invitational.jp/g-compass/" locator-type="url">http://www.h-invitational.jp/g-compass/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="t.imanishi@aist.go.jp" locator-type="email">t.imanishi@aist.go.jp</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Kawahara, Y., Sakate, R., Matsuya, A., Murakami, K., Sato, Y., Zhang, H., Gojobori, T., Itoh, T., Imanishi, T.]]></dc:creator>
<dc:date>Wed, 21 Oct 2009 00:06:44 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp594</dc:identifier>
<dc:title><![CDATA[G-compass: A web-based comparative genome browser between human and other vertebrate genomes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-21</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp586v1?rss=1">
<title><![CDATA[CRISPI: a CRISPR Interactive database]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp586v1?rss=1</link>
<description><![CDATA[
<p>Summary: The CRISPR genomic structures (Clustered Regularly Interspaced Short Palindromic Repeats) form a family of repeats that is largely present in archaea and frequent in bacteria. On the basis of a formal model of CRISPR using very few parameters, a systematic study of all their occurrences in all available genomes of Archaea and Bacteria has been carried out. This has resulted in a relational database, CRISPI, which also includes a complete repertory of associated CRISPR-associated genes (CAS). A userfriendly web interface with many graphical tools and functions allows users to extract results, find CRISPR in personal sequences or calculate sequence similarity with spacers.</p>
<p>Availability: CRISPI free access at <inter-ref locator="http://crispi.genouest.org" locator-type="url">http://crispi.genouest.org</inter-ref></p>
<p>Contact: <inter-ref locator="jnicolas@irisa.fr" locator-type="email">jnicolas@irisa.fr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Rousseau, C., Nicolas, J., Gonnet, M.]]></dc:creator>
<dc:date>Wed, 21 Oct 2009 03:09:28 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp586</dc:identifier>
<dc:title><![CDATA[CRISPI: a CRISPR Interactive database]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-21</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp597v1?rss=1">
<title><![CDATA[GeneE: Gene and protein query expansion with disambiguation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp597v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> When referring to genes, authors often use synonyms instead of the official gene symbols. In order to accurately retrieve as many relevant documents as possible, we have developed GeneE, a web application that expands a gene query to include all known synonyms, and adds disambiguation information for ambiguous terms, before forwarding the query to either PubMed, Google, or Jane. The query expansion algorithm is also available as a web service.</p>
<p><b>Availability:</b> <inter-ref locator="http://biosemantics.org/geneE" locator-type="url">http://biosemantics.org/geneE</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="m.schuemie@erasmusmc.nl" locator-type="email">m.schuemie@erasmusmc.nl</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Schuemie, M. J., Kang, N., Hekkelman, M. L., Kors, J. A.]]></dc:creator>
<dc:date>Fri, 16 Oct 2009 02:07:39 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp597</dc:identifier>
<dc:title><![CDATA[GeneE: Gene and protein query expansion with disambiguation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-16</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp596v1?rss=1">
<title><![CDATA[INTERSNP: Genome-wide Interaction Analysis Guided by A Priori Information]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp596v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Genome-wide association studies (GWAS) have lead to the identification of hundreds of genomic regions associated with complex diseases. Nevertheless, a large fraction of their heritability remains unexplained. Interaction between genetic variants is one of several putative explanations for the "case of missing heritability" and, therefore, a compelling next analysis step. However, genomewide interaction analysis (GWIA) of all pairs of SNPs from a standard marker panel is computationally unfeasible without massive parallelization. Furthermore, GWIA of all SNP triples is utopian. In order to overcome these computational constraints, we present a GWIA approach that selects combinations of SNPs for interaction analysis based on a priori information. Sources of information are statistical evidence (single marker association at a moderate level), genetic relevance (genomic location) and biologic relevance (SNP function class and pathway information). We introduce the software package INTERSNP that implements a logistic regression framework as well as log-linear models for joint analysis of multiple SNPs. Automatic handling of SNP annotation and pathways from the KEGG database is provided. In addition, Monte-Carlo simulations to judge genome-wide significance are implemented. We introduce various meaningful GWIA strategies that can be conducted using INTERSNP. Typical examples are, for instance, the analysis of all pairs of nonsynonymous SNPs, or, the analysis of all combinations of three SNPs that lie in a common pathway and that are among the top 50,000 single-marker results. We demonstrate the feasibility of these and other GWIA strategies by application to a GWAS data set and discuss promising results.</p>
<p><b>Availability:</b> The software is available at <inter-ref locator="http://intersnp.meb.uni-bonn.de" locator-type="url">http://intersnp.meb.uni-bonn.de</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="herold@imbie.meb.uni-bonn.de" locator-type="email">herold@imbie.meb.uni-bonn.de</inter-ref>; <inter-ref locator="becker@imbie.meb.uni-bonn.de" locator-type="email">becker@imbie.meb.uni-bonn.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Herold, C., Steffens, M., Brockschmidt, F. F., Baur, M. P., Becker, T.]]></dc:creator>
<dc:date>Fri, 16 Oct 2009 02:07:38 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp596</dc:identifier>
<dc:title><![CDATA[INTERSNP: Genome-wide Interaction Analysis Guided by A Priori Information]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-16</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp595v1?rss=1">
<title><![CDATA[NAViGaTOR: Network Analysis, Visualization & Graphing Toronto]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp595v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> NAViGaTOR is a powerful graphing application for the 2D and 3D visualization of biological networks. NAViGaTOR includes a rich suite of visual mark-up tools for manual and automated annotation, fast and scalable layout algorithms, and OpenGL hardware acceleration to facilitate the visualization of large graphs. Publication-quality images can be rendered through SVG graphics export. NAViGaTOR supports community-developed data formats (PSI-XML, BioPax, and GML), is platform-independent, and is extensible through a plug-in architecture.</p>
<p><b>Availability:</b> NAViGaTOR is freely available to the research community from <inter-ref locator="http://ophid.utoronto.ca/navigator/" locator-type="url">http://ophid.utoronto.ca/navigator/</inter-ref>. Installers and documentation are provided for 32- and 64-bit Windows, Mac, Linux, and Unix.</p>
<p><b>Contact:</b> <inter-ref locator="juris@ai.utoronto.ca" locator-type="email">juris@ai.utoronto.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Brown, K. R., Otasek, D., Ali, M., McGuffin, M., Xie, W., Devani, B., van Toch, I. L., Jurisica, I.]]></dc:creator>
<dc:date>Fri, 16 Oct 2009 02:07:38 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp595</dc:identifier>
<dc:title><![CDATA[NAViGaTOR: Network Analysis, Visualization & Graphing Toronto]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-16</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp593v1?rss=1">
<title><![CDATA[Significant speedup of database searches with HMMs by search space reduction with PSSM family models]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp593v1?rss=1</link>
<description><![CDATA[
<p>Motivation: Profile Hidden Markov models (pHMMs) are currently the most popular modeling concept for protein families. They provide sensitive family descriptors, and sequence database searching with pHMMs has become a standard task in today's genome annotation pipelines. On the downside, searching with pHMMs is computationally expensive.</p>
<p>Results: We propose a new method for efficient protein family classification and for speeding up database searches with pHMMs as is necessary for large scale analysis scenarios. We employ simpler models of protein families called PSSM family models. For fast database search, we combine full text indexing, efficient exact p-value computation of PSSM match scores, and fast fragment chaining. The resulting method is well suited to pre-filter the set of sequences to be searched for subsequent database searches with pHMMs.</p>
<p>We achieved a classification performance only marginally inferior to hmmsearch, yet, results could be obtained in a fraction of runtime with a speedup of more than 64 fold. In experiments addressing the method's ability to pre-filter the sequence space for subsequent database searches with pHMMs, our method reduces the number of sequences to be searched with hmmsearch to only 0.80% of all sequences. The filter is very fast and leads to a total speedup of factor 43 over the unfiltered search while retaining more than 99.5% of the original results. In a lossless filter setup for hmmsearch on UniProtKB/Swiss-Prot, we observed a speedup of factor 92.</p>
<p>Availability: The presented algorithms are implemented in the program PoSSuMsearch2, available for download at <inter-ref locator="http://bibiserv.techfak.uni-bielefeld.de/possumsearch2/" locator-type="url">http://bibiserv.techfak.uni-bielefeld.de/possumsearch2/</inter-ref>.</p>
<p>Contact: <inter-ref locator="beckstette@zbh.uni-hamburg.de" locator-type="email">beckstette@zbh.uni-hamburg.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Beckstette, M., Homann, R., Giegerich, R., Kurtz, S.]]></dc:creator>
<dc:date>Wed, 14 Oct 2009 02:39:55 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp593</dc:identifier>
<dc:title><![CDATA[Significant speedup of database searches with HMMs by search space reduction with PSSM family models]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-14</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp592v1?rss=1">
<title><![CDATA[Integrated Analysis of Copy Number Alterations and Gene Expression: A Bivariate Assessment of Equally Directed Abnormalities]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp592v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The analysis of a number of different genetic features like copy number variation, gene expression or loss of heterocygosity has considerably increased in recent years, as well as the number of available data sets. This is particularly due to the success of microarray technology. Thus, to understand mechanisms of disease pathogenesis on a molecular basis, e.g., in cancer research, the challenge of analyzing such different data types in an integrated way has become increasingly important. In order to tackle this problem, we propose a new procedure for an integrated analysis of two different data types that searches for genes and genetic regions which for both inputs display strong equally directed deviations from the reference median. We employ this approach, based on a modified correlation coefficient and an explorative Wilcoxon test, to find DNA regions of such abnormalities in gene expression and copy number (e.g., underexpressed genes accompanied by a loss of DNA material).</p>
<p><b>Results:</b> In an application to acute myeloid leukemia, our procedure is able to identify various regions on different chromosomes with characteristic abnormalities in gene expression and copy number data and shows a higher sensitivity to differences in abnormalities than standard approaches. While the results support various findings of previous studies, some new interesting DNA regions can be identified. In a simulation study, our procedure also shows more reliable results than standard approaches.</p>
<p><b>Availability:</b> Code and data available as R packages edira and ediraAMLdata from <ty><inter-ref locator="http://www.statistik.tu-dortmund.de/~schaefer/" locator-type="url">http://www.statistik.tu-dortmund.de/~schaefer/</inter-ref></ty></p>
<p><b>Contact:</b> <inter-ref locator="martin.schaefer@udo.edu" locator-type="email">martin.schaefer@udo.edu</inter-ref></p>
<p><b>Supplementary Information:</b> Supplementary material is available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Schafer, M., Schwender, H., Merk, S., Haferlach, C., Ickstadt, K., Dugas, M.]]></dc:creator>
<dc:date>Wed, 14 Oct 2009 02:39:54 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp592</dc:identifier>
<dc:title><![CDATA[Integrated Analysis of Copy Number Alterations and Gene Expression: A Bivariate Assessment of Equally Directed Abnormalities]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-14</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp587v1?rss=1">
<title><![CDATA[ABWGAT: Anchor Based Whole Genome Analysis Tool.]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp587v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Large numbers of genomes are being sequenced regularly and the rate will go up in future due to availability of new genome sequencing techniques. In order to understand genotype to phenotype relationships it is necessary to identify sequence variations at the genomic level. Alignment of a pair of genomes and parsing the alignment data is an accepted approach for identification of variations. Though there are a number of tools available for whole genome alignment none of these allows automatic parsing of the alignment and identification of different kinds of genomic variants with high degree of sensitivity. Here we present a simple web based interface for whole genome comparison named ABWGAT (Anchor Based Whole Genome Analysis Tool) that is simple to use. The output is a list of variations such as SNVs, INDELs, Repeat expansion and Inversion.</p>
<p><b>Availability:</b> The web server is freely available to non-commercial users at the following address: <inter-ref locator="http://abwgc.jnu.ac.in/~sarba/" locator-type="url">http://abwgc.jnu.ac.in/~sarba/</inter-ref> </p>
<p><b>Contact:</b> <inter-ref locator="dsarbashis@gmail.com" locator-type="email">dsarbashis@gmail.com</inter-ref></p>
<p><b>Supplementary Information:</b> Supplementary data are available at <inter-ref locator="http://abwgc.jnu.ac.in/~sarba/cgi-bin/abwgc_retrival.cgi" locator-type="url">http://abwgc.jnu.ac.in/~sarba/cgi-bin/abwgc_retrival.cgi</inter-ref> using job id 524, 526 and 528.</p>
]]></description>
<dc:creator><![CDATA[Das, S., Vishnoi, A., Bhattacharya, A.]]></dc:creator>
<dc:date>Wed, 14 Oct 2009 02:39:53 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp587</dc:identifier>
<dc:title><![CDATA[ABWGAT: Anchor Based Whole Genome Analysis Tool.]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-14</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp591v1?rss=1">
<title><![CDATA[Response to comment on 'Can sugars be produced from fatty acids? A test case for pathway analysis tools']]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp591v1?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[de Figueiredo, L. F., Schuster, S., Kaleta, C., Fell, D. A.]]></dc:creator>
<dc:date>Tue, 13 Oct 2009 08:07:54 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp591</dc:identifier>
<dc:title><![CDATA[Response to comment on 'Can sugars be produced from fatty acids? A test case for pathway analysis tools']]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-13</prism:publicationDate>
<prism:section>LETTER TO THE EDITOR</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp590v1?rss=1">
<title><![CDATA[Efficient Estimation of Pairwise Distances between Genomes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp590v1?rss=1</link>
<description><![CDATA[
<p>Motivation: Genome comparison is central to contemporary genomics and it typically relies on sequence alignment. However, genome-wide alignments are difficult to compute. We have therefore recently developed an accurate alignment-free estimator of the number of substitutions per site based on the lengths of exact matches between pairs of sequences. The previous implementation of this measure requires <I>n</I>(<I>n</I> &ndash; 1) suffix tree constructions and traversals, where <I>n</I> is the number of sequences analyzed. This does not scale well for large <I>n</I>.</p>
<p>Results: We present an algorithm to extract (<f><sup><I>n</I></sup></f> <f><SUB>2</SUB></f>) pairwise distances in a single traversal of a single suffix tree containing <I>n</I> sequences. As a result, the run time of the suffix tree construction phase of our algorithm is reduced from <I>O</I>(<I>n</I><sup>2</sup><I>L</I>) to <I>O</I>(<I>n</I><I>L</I>), where <I>L</I> is the length of each sequence. We implement this algorithm in the program kr version 2 and apply it to 825 HIV genomes, 13 genomes of enterobacteria and the complete genomes of 12 Drosophila species. We show that, depending on the input data set, the new program is at least 10 times faster than its predecessor.</p>
<p>Availability: Version 2 of kr can be tested via a web interface at <inter-ref locator="http://guanine.evolbio.mpg.de/kr2/" locator-type="url">http://guanine.evolbio.mpg.de/kr2/</inter-ref></p>
<p>It is written in standard C and its source code is available under the GNU General Public License from the same web site.</p>
<p>Contact: <inter-ref locator="mdomazet@evolbio.mpg.de" locator-type="email">mdomazet@evolbio.mpg.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Domazet-Loso, M., Haubold, B.]]></dc:creator>
<dc:date>Tue, 13 Oct 2009 08:07:52 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp590</dc:identifier>
<dc:title><![CDATA[Efficient Estimation of Pairwise Distances between Genomes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-13</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp589v1?rss=1">
<title><![CDATA[A novel method for mining highly imbalanced high-throughput screening data in PubChem]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp589v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The comprehensive information of small molecules and their biological activities in PubChem brings great opportunities for academic researchers. However, mining high throughput screening (HTS) assay data remains a great challenge given the very large data volume and the highly imbalanced nature with only small number of active compounds compared to inactive compounds. Therefore, there is currently a need for better strategies to work with HTS assay data. Moreover, as luciferase-based HTS technology is frequently exploited in the assays deposited in PubChem, constructing a computational model to distinguish and filter out potential interference compounds for these assays is another motivation.</p>
<p><b>Results:</b> We used the granular support vector machines (SVMs)repetitive undersampling method (GSVM-RU) to construct an SVM from luciferase inhibition bioassay data that the imbalance ratio of active/inactive is high (1/377). The best model recognized the active and inactive compounds at the accuracies of 86.60% and 88.89 with a total accuracy of 87.74%, by cross-validation test and blind test. These results demonstrate the robustness of the model in handling the intrinsic imbalance problem in HTS data and it can be used as a virtual screening tool to identify potential interference compounds in luciferase-based HTS experiments. Additionally, this method has also proved computationally efficient by greatly reducing the computational cost and can be easily adopted in the analysis of HTS data for other biological systems.</p>
<p><b>Availability:</b> Data are publicly available in PubChem with AIDs of 773, 1006 and 1379.</p>
<p><b>Contact:</b> <inter-ref locator="ywang@ncbi.nlm.nih.gov" locator-type="email">ywang@ncbi.nlm.nih.gov</inter-ref>; <inter-ref locator="bryant@ncbi.nlm.nih.gov" locator-type="email">bryant@ncbi.nlm.nih.gov</inter-ref></p>
<p><b>Supplementary information:</b> Supplementary data are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Li, Q., Wang, Y., Bryant, S. H.]]></dc:creator>
<dc:date>Tue, 13 Oct 2009 08:07:48 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp589</dc:identifier>
<dc:title><![CDATA[A novel method for mining highly imbalanced high-throughput screening data in PubChem]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-13</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp588v1?rss=1">
<title><![CDATA[Detailing regulatory networks through large scale data integration]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp588v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Much of a cell's regulatory response to changing environments occurs at the transcriptional level. Particularly in higher organisms, transcription factors, microRNAs, and epigenetic modifications can combine to form a complex regulatory network. Part of this system can be modeled as a collection of regulatory modules: coregulated genes, the conditions under which they're coregulated, and sequence-level regulatory motifs.</p>
<p><b>Results:</b> We present the COALESCE system for regulatory module prediction. The algorithm is efficient enough to discover expression biclusters and putative regulatory motifs in metazoan genomes (&gt;20,000 genes) and very large microarray compendia (&gt;10,000 conditions). Using Bayesian data integration, it can also include diverse supporting data types such as evolutionary conservation or nucleosome placement. We validate its performance using a functional evaluation of coclustered genes, known yeast and <I>E. coli</I> transcription factor targets, synthetic data, and various metazoan data compendia. In all cases, COALESCE performs as well or better than current biclustering and motif prediction tools, with high accuracy in functional and TF/target assignments and zero false positives on synthetic data. COALESCE provides an efficient and flexible plat-form within which large, diverse data collections can be integrated to predict metazoan regulatory networks.</p>
<p><b>Availability:</b> Source code (C++) is available at <inter-ref locator="http://function.princeton.edu/sleipnir" locator-type="url">http://function.princeton.edu/sleipnir</inter-ref>, and supporting data and a web interface are provided at <inter-ref locator="http://function.princeton.edu/coalesce" locator-type="url">http://function.princeton.edu/coalesce</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="hcoller@princeton.edu" locator-type="email">hcoller@princeton.edu</inter-ref>, <inter-ref locator="ogt@cs.princeton.edu" locator-type="email">ogt@cs.princeton.edu</inter-ref>.</p>
]]></description>
<dc:creator><![CDATA[Huttenhower, C., Mutungu, K. T., Indik, N., Yang, W., Schroeder, M., Forman, J. J., Troyanskaya, O. G., Coller, H. A.]]></dc:creator>
<dc:date>Tue, 13 Oct 2009 08:07:47 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp588</dc:identifier>
<dc:title><![CDATA[Detailing regulatory networks through large scale data integration]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-13</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp577v2?rss=1">
<title><![CDATA[Exon Array Analyzer: a web interface for Affymetrix exon array analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp577v2?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The Exon Array Analyzer (EAA) is a web server, which provides a user-friendly interface to identify alternative splicing events analyzed with Affymetrix Exon Arrays. The EAA implements the Splice Index algorithm to identify differential expressed exons. The use of various filters allows reduction of the number of false positive hits. Results are presented with detailed annotation information and graphics to identify splice events and to facilitate biological validations. To demonstrate the versatility of the EAA we analyzed exon arrays of 11 different murine tissues using sample data provided by Affymetrix (<inter-ref locator="http://www.affymetrix.com" locator-type="url">http://www.affymetrix.com</inter-ref>). Data from the heart were compared to other tissues to identify exons that undergo heart specific alternatively splicing resulting in the identification of 885 differentially expressed probe sets in 649 genes.</p>
<p><b>Availability:</b> The web interface is available at <inter-ref locator="http://EAA.mpi-bn.mpg.de/" locator-type="url">http://EAA.mpi-bn.mpg.de/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="Thomas.Braun@mpi-bn.mpg.de" locator-type="email">Thomas.Braun@mpi-bn.mpg.de</inter-ref></p>
<p><b>Supplementary information:</b> Detailed documentation is available on the Exon Array Analyzer web site (<inter-ref locator="http://EAA.mpi-bn.mpg.de/supp.php" locator-type="url">http://EAA.mpi-bn.mpg.de/supp.php</inter-ref>) including screen shots, example analyzes and step by step instructions.</p>
]]></description>
<dc:creator><![CDATA[Gellert, P., Uchida, S., Braun, T.]]></dc:creator>
<dc:date>Mon, 12 Oct 2009 21:25:43 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp577</dc:identifier>
<dc:title><![CDATA[Exon Array Analyzer: a web interface for Affymetrix exon array analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-12</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp585v1?rss=1">
<title><![CDATA[Predicting citation count of Bioinformatics papers within four years of publication]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp585v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Nowadays, publishers of scientific journals face the tough task of selecting high quality articles that will attract as many readers as possible from a pool of articles. This is due to the growth of scientific output and literature. The possibility of a journal having a tool capable of predicting the citation count of an article within the first few years after publication would pave the way for new assessment systems.</p>
<p><b>Results:</b> This paper presents a new approach based on building several prediction models for the <I>Bioinformatics</I> journal. These models predict the citation count of an article within four years after publication (global models). To build these models, tokens found in the abstracts of <I>Bioinformatics</I> papers have been used as predictive features, along with other features like the journal sections and two-week post publication periods. To improve the accuracy of the global models, specific models have been built for each <I>Bioinformatics</I> journal section (<I>Data and Text Mining</I>, <I>Databases and Ontologies</I>, <I>Gene Expression, Genetics</I> and <I>Population Analysis</I>, <I>Genome Analysis</I>, <I>Phylogenetics</I>, <I>Sequence Analysis</I>, <I>Structural Bioinformatics and Systems Biology</I>). In these new models, the average success rate for predictions using the naive Bayes and logistic regression supervised classification methods was 89.4% and 91.5%, respectively, within the nine sections and for four-year time horizon.</p>
<p><b>Availability:</b> Supplementary material on this experimental survey is available at <inter-ref locator="http://www.dia.fi.upm.es/~concha/bioinformatics.html" locator-type="url">http://www.dia.fi.upm.es/~concha/bioinformatics.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="aibanez@fi.upm.es" locator-type="email">aibanez@fi.upm.es</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ibanez, A., Larranaga, P., Bielza, C.]]></dc:creator>
<dc:date>Fri, 09 Oct 2009 08:37:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp585</dc:identifier>
<dc:title><![CDATA[Predicting citation count of Bioinformatics papers within four years of publication]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-09</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp583v1?rss=1">
<title><![CDATA[Inferring relative proportions of DNA variants from sequencing electropherograms]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp583v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Determination of the relative copy number of single-nucleotide sequence variants (SNVs) within a DNA sample is a frequent experimental goal. Various methods can be applied to this problem, although hybridization-based approaches tend to suffer from high setup cost and poor adaptability, while others (such as pyrosequencing) may not be accessible to all laboratories. The po-tential to extract relative copy number information from standard dye-terminator electropherograms has been little explored, yet this technology is cheap and widely accessible. Since several biologi-cally important loci have paralogous copies that interfere with geno-typing, and which may also display copy number variation (CNV), there are many situations in which determination of the relative copy number of SNVs is desirable.</p>
<p><b>Results:</b> We have developed a desktop application, QSVanalyser, which allows high throughput quantification of the proportions of DNA sequences containing single-nucleotide variants. In reconstruction experiments, QSVanalyser accurately estimated the known relative proportions of SNVs. By analysing a large panel of genomic DNA samples, we demonstrate the ability of the software to analyse not only common biallelic SNVs, but also SNVs within a locus at which gene conversion between 4 genomic paralogues operates, and within another that is subject to copy number variation.</p>
<p><b>Availability and Implementation:</b> QSVAnalyser is freely available at <inter-ref locator="http://dna.leeds.ac.uk/qsv/" locator-type="url">http://dna.leeds.ac.uk/qsv/</inter-ref>. It requires the Microsoft .NET framework version 2.0, which can be installed on all Microsoft operating systems from Windows 98 onwards.</p>
]]></description>
<dc:creator><![CDATA[Carr, I. M., Robinson, J. I., Dimitriou, R., Markham, A. F., Morgan, A. W., Bonthron, D. T.]]></dc:creator>
<dc:date>Fri, 09 Oct 2009 08:37:19 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp583</dc:identifier>
<dc:title><![CDATA[Inferring relative proportions of DNA variants from sequencing electropherograms]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-09</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp582v1?rss=1">
<title><![CDATA[A Bayesian approach to the alignment of mass spectra]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp582v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The need to align spectra to correct for mass-to-charge experimental variation is a problem that arises in mass spectrometry (MS). Most MS-based proteomic data analysis methods involve a two-step approach, identify peaks first and then do the alignment and statistical inference on these identified peaks only. However, the peak identification step relies on prior information on the proteins of interest or a peak detection model, which are subject to error. Also numerous additional features such as peak shape and peak width are lost in simple peak detection, and these are informative for correcting mass variation in the alignment step.</p>
<p><b>Results:</b> Here we present a novel Bayesian approach to align the complete spectra. The approach is based on a parametric model which assumes the spectrum and alignment function are Gaussian processes, but the alignment function is monotone. We show how to use the expectation-maximization algorithm to find the posterior mode of the set of alignment functions and the mean spectrum for a patient population. After alignment, we conduct tests while controlling for error attributable to multiple comparisons on the level of the peaks identified from the absolute mean spectra difference of two patient populations.</p>
<p><b>Contact:</b> <inter-ref locator="cavanr@biostat.umn.edu" locator-type="email">cavanr@biostat.umn.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Kong, X., Reilly, C.]]></dc:creator>
<dc:date>Fri, 09 Oct 2009 08:37:18 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp582</dc:identifier>
<dc:title><![CDATA[A Bayesian approach to the alignment of mass spectra]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-09</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp576v1?rss=1">
<title><![CDATA[Knowledge-Based Instantiation of Full Atomic Detail into Coarse Grain RNA 3D Structural Models]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp576v1?rss=1</link>
<description><![CDATA[
<p>Motivation: The recent development of methods for modeling RNA 3D structures using coarse-grain approaches creates a need to bridge low and high resolution modeling methods. Although they contain topological information, coarse-grain models lack atomic detail, which limits their utility for some applications.</p>
<p>Results: We have developed a method for adding full atomic detail to coarse-grain models of RNA 3D structures. Our method (C2A) uses geometries observed in known RNA crystal structures. Our method rebuilds full atomic detail from ideal coarse-grain backbones taken from crystal structures to within 1.87 to 3.31 &Aring; RMSd of the full atomic crystal structure. When starting from coarse-grain models generated by the modeling tool NAST, our method builds full atomic structures that are within 1.00 &Aring; RMSd of the starting structure. The resulting full atomic structures can be used as starting points for higher resolution modeling, thus bridging high and low resolution approaches to modeling RNA 3D structure.</p>
<p>Availability: Code for the C2A method, as well as the examples discussed in this paper, are freely available at <inter-ref locator="www.simtk.org/home/c2a" locator-type="url">www.simtk.org/home/c2a</inter-ref>.</p>
<p>Contact: <inter-ref locator="russ.altman@stanford.edu" locator-type="email">russ.altman@stanford.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Jonikas, M. A., Radmer, R. J., Altman, R. B.]]></dc:creator>
<dc:date>Wed, 07 Oct 2009 01:34:56 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp576</dc:identifier>
<dc:title><![CDATA[Knowledge-Based Instantiation of Full Atomic Detail into Coarse Grain RNA 3D Structural Models]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-07</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp580v1?rss=1">
<title><![CDATA[CentroidAlign: Fast and Accurate Aligner for Structured RNAs by Maximizing Expected Sum-of-pairs Score]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp580v1?rss=1</link>
<description><![CDATA[
<p>Motivation: The importance of accurate and fast predictions of multiple alignments for RNA sequences has increased due to recent findings about functional non-coding RNAs. Recent studies suggest that maximizing the expected accuracy of predictions will be useful for many problems in bioinformatics.</p>
<p>Results: We designed a novel estimator for multiple alignments of structured RNAs, based on maximizing the expected accuracy of predictions. First, we define the maximum expected accuracy (MEA) estimator for pairwise alignment of RNA sequences. This maximizes the expected sum-of-pairs score (SPS) of a predicted alignment under a probability distribution of alignments given by marginalizing the Sankoff model. Then, by approximating the MEA estimator, we obtain an estimator whose time complexity is <I>O</I>(<I>L</I><sup>3</sup> + <I>c</I><sup>2</sup><I>dL</I><sup>2</sup>) where <I>L</I> is the length of input sequences and both c and d are constants independent of <I>L</I>. The proposed estimator can handle uncertainty of secondary structures and alignments that are obstacles in Bioinformatics because it considers all the secondary structures and all the pairwise alignments as input sequences. Moreover, we integrate the probabilistic consistency transformation (PCT) on alignments into the proposed estimator. Computational experiments using six benchmark datasets indicate that the proposed method achieved a favorable SPS and was the fastest of many state&ndash;of&ndash;the&ndash;art tools for multiple alignments of structured RNAs.</p>
<p>Availability: The software called CentroidAlign, which is an implementation of the algorithm in this paper, is freely available on our website: <inter-ref locator="http://www.ncrna.org/software/centroidalign/" locator-type="url">http://www.ncrna.org/software/centroidalign/</inter-ref>.</p>
<p>Contact: <inter-ref locator="hamada-michiaki@aist.go.jp" locator-type="email">hamada-michiaki@aist.go.jp</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Hamada, M., Sato, K., Kiryu, H., Mituyama, T., Asai, K.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 08:37:32 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp580</dc:identifier>
<dc:title><![CDATA[CentroidAlign: Fast and Accurate Aligner for Structured RNAs by Maximizing Expected Sum-of-pairs Score]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp579v1?rss=1">
<title><![CDATA[Effect of read-mapping biases on detecting allele-specific expression from RNA-sequencing data.]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp579v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Next-generation sequencing has become an important tool for genome-wide quantification of DNA and RNA. However, a major technical hurdle lies in the need to map short sequence reads back to their correct locations in a reference genome. Here we investigate the impact of SNP variation on the reliability of read-mapping in the context of detecting allele-specific expression (ASE).</p>
<p><b>Results:</b> We generated sixteen million 35 bp reads from mRNA of each of two HapMap Yoruba individuals. When we mapped these reads to the human genome we found that, at heterozygous SNPs, there was a significant bias towards higher mapping rates of the allele in the reference sequence, compared to the alternative allele. Masking known SNP positions in the genome sequence eliminated the reference bias but, surprisingly, did not lead to more reliable results overall. We find that even after masking, ~5-10% of SNPs still have an inherent bias towards more effective mapping of one allele. Filtering out inherently biased SNPs removes 40% of the top signals of ASE. The remaining SNPs showing ASE are enriched in genes previously known to harbor cis-regulatory variation or known to show uniparental imprinting. Our results have implications for a variety of applications involving detection of alternate alleles from short-read sequence data.</p>
<p><b>Availability:</b> Scripts, written in Perl and R, for simulating short reads, masking SNP variation in a reference genome, and analyzing the simulation output are available upon request from JFD. Raw short read data were deposited in GEO (<inter-ref locator="http://www.ncbi.nlm.nih.gov/geo/" locator-type="url">http://www.ncbi.nlm.nih.gov/geo/</inter-ref>) under accession number GSE18156.</p>
<p><b>Contact:</b> <inter-ref locator="jdegner@uchicago.edu" locator-type="email">jdegner@uchicago.edu</inter-ref>, <inter-ref locator="marioni@uchicago.edu" locator-type="email">marioni@uchicago.edu</inter-ref>, <inter-ref locator="gilad@uchicago.edu" locator-type="email">gilad@uchicago.edu</inter-ref>, <inter-ref locator="pritch@uchicago.edu" locator-type="email">pritch@uchicago.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Degner, J. F., Marioni, J. C., Pai, A. A., Pickrell, J. K., Nkadori, E., Gilad, Y., Pritchard, J. K.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 08:37:31 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp579</dc:identifier>
<dc:title><![CDATA[Effect of read-mapping biases on detecting allele-specific expression from RNA-sequencing data.]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp578v1?rss=1">
<title><![CDATA[HTqPCR: High-throughput analysis and visualization of quantitative real-time PCR data in R]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp578v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Quantitative real-time polymerase chain reaction (qPCR) is routinely used for RNA expression profiling, validation of microarray hybridization data, and clinical diagnostic assays. Although numerous statistical tools are available in the public domain for the analysis of microarray experiments, this is not the case for qPCR. Proprietary software is typically provided by instrument manufacturers, but these solutions are not amenable to the tandem analysis of multiple assays. This is problematic when an experiment involves more than a simple comparison between a control and treatment sample, or when many qPCR datasets are to be analyzed in a high-throughput facility.</p>
<p><b>Results:</b> We have developed <I>HTqPCR</I>, a package for the R statistical computing environment, to enable the processing and analysis of qPCR data across multiple conditions and replicates.</p>
<p><b>Availability:</b> <I>HTqPCR</I> and user documentation can be obtained from the Bioconductor project, or at <inter-ref locator="http://www.ebi.ac.uk/bertone/software" locator-type="url">http://www.ebi.ac.uk/bertone/software</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="bertone@ebi.ac.uk" locator-type="email">bertone@ebi.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Dvinge, H., Bertone, P.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 07:38:05 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp578</dc:identifier>
<dc:title><![CDATA[HTqPCR: High-throughput analysis and visualization of quantitative real-time PCR data in R]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp575v1?rss=1">
<title><![CDATA[Pathway identification by network pruning in the metabolic network of Escherichia coli]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp575v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> All metabolic networks contain metabolites which take part in many reactions, such as ATP and NAD, known as currency metabolites. These are often removed in the study of these networks, but no consensus exists on what actually constitutes a currency metabolite, and it is also unclear how these highly connected nodes contribute to the global structure of the network.</p>
<p><b>Results:</b> In this paper we analyse how the <I>Escherichia coli</I> metabolic network responds to pruning in the form of sequential removal of metabolites with highest degree. As expected this leads to network fragmentation, but the process by which it occurs suggest modularity and long range correlations within the network. We find that the pruned networks contain longer paths than the random expectation, and that the paths that survive the pruning also exhibit a lower cost (no. of involved metabolites) compared to random paths in the full metabolic network. Finally we confirm that paths detected by pruning overlap with known metabolic pathways. We conclude that pruning reveals functional pathways in metabolic networks, where currency metabolites may be seen as ingredients in a well-balanced soup in which main metabolic production lines are immersed.</p>
<p><b>Contact:</b> <inter-ref locator="gerlee@nbi.dk" locator-type="email">gerlee@nbi.dk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Gerlee, P., Lizana, L., Sneppen, K.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 07:38:03 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp575</dc:identifier>
<dc:title><![CDATA[Pathway identification by network pruning in the metabolic network of Escherichia coli]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp574v1?rss=1">
<title><![CDATA[ncRNAppi - A tool for identifying disease-related miRNA and siR-NA targeting pathways]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp574v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Currently, there are a number of databases which store microRNA (miRNA) information, and tools available which provide miRNA target prediction. In this paper we describe a novel web-based tool that integrate the miRNA-targeted mRNA data, protein-protein interactions (PPI) records, tissues, biochemical pathways, human disease and gene function information to establish a disease-related miRNA target pathway database. This database is unique in the sense that it links miRNA target genes with their PPI partners ac-cording to being tissue-specific, diseases-specific or both. The same approach is also applied to siRNA data. This database provides two types of searches; (i) tissue-specific, and (ii) disease-specific miRNA (or siRNA) targeting pathways. The search allows one to identify tissue-specific or disease-specific miRNA (or siRNA) target gene's PPI partners two levels beyond.</p>
<p>The release version 1.0 is a freely accessible database available at <inter-ref locator="http://ncrnappi.cs.nthu.edu.tw" locator-type="url">http://ncrnappi.cs.nthu.edu.tw</inter-ref>, <inter-ref locator="http://ncRNAppi.bioinfo.asia.edu.tw/" locator-type="url">http://ncRNAppi.bioinfo.asia.edu.tw/</inter-ref> </p>
<p><b>Contact: </b> <sup>1</sup>*<inter-ref locator="ppiddi@gmail.com" locator-type="email">ppiddi@gmail.com</inter-ref>, <sup>2</sup>*<inter-ref locator="o2snow@gmail.com" locator-type="email">o2snow@gmail.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ng, K.-L., Liu, H.-C., Lee, S.-C.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 07:38:02 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp574</dc:identifier>
<dc:title><![CDATA[ncRNAppi - A tool for identifying disease-related miRNA and siR-NA targeting pathways]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp571v1?rss=1">
<title><![CDATA[Quantitative measurement of aging using image texture entropy]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp571v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> A key element in understanding the aging of <I>C. elegans</I> is objective quantification of the morphological differences between younger and older animals. Here we propose to use the image texture entropy as an objective measurement that reflects the structural deterioration of the <I>C. elegans</I> muscle tissues during aging.</p>
<p><b>Results:</b> The texture entropy and directionality of the muscle microscopy images were measured using 50 animals on day 0, 2, 4, 6, 8, 10, and 12 of adulthood. Results show that the entropy of the <I>C. elegans</I> pharynx tissues increases as the animal ages, but a sharper increase was measured between day two and day four, and between day eight and day 10. These results are in agreement with gene expression findings, and support the contention that the process of <I>C. elegans</I> aging has several distinct stages. This can indicate that <I>C. elegans</I> aging is driven by developmental pathways, rather than stochastic accumulation of damage.</p>
<p><b>Availability:</b> The image data are freely available on the internet at <inter-ref locator="http://ome.grc.nia.nih.gov/iicbu2008/celegans" locator-type="url">http://ome.grc.nia.nih.gov/iicbu2008/celegans</inter-ref>, and the Haralick and Tamura texture analysis source code can be downloaded at <inter-ref locator="http://ome.grc.nia.nih.gov/wnd-charm" locator-type="url">http://ome.grc.nia.nih.gov/wnd-charm</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="shamirl@mail.nih.gov" locator-type="email">shamirl@mail.nih.gov</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Shamir, L., Wolkow, C. A., Goldberg, I. G.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 07:38:01 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp571</dc:identifier>
<dc:title><![CDATA[Quantitative measurement of aging using image texture entropy]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>DISCOVERY NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp568v1?rss=1">
<title><![CDATA[Qupe - a Rich Internet Application to take a Step Forward in the Analysis of Mass Spectrometry-Based Quantitative Proteomics Experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp568v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The goal of present -omics sciences is to understand biological systems as a whole in terms of interactions of the individual cellular components. One of the main building blocks in this field of study is proteomics where tandem mass spectrometry (LC-MS/MS) in combination with isotopic labelling techniques provides a common way to obtain a direct insight into regulation at the protein level. Methods to identify and quantify the peptides contained in a sample are well-established, and their output usually results in lists of identified proteins and calculated relative abundance values. The next step is to move ahead from these abstract lists and apply statistical inference methods to compare measurements, to identify genes that are significantly up- or down-regulated, or to detect clusters of proteins with similar expression profiles.</p>
<p><b>Results:</b> We introduce the rich internet application Qupe providing comprehensive data management and analysis functions for LCMS/MS experiments. Starting with the import of mass spectra data the system guides the experimenter through the process of protein identification by database search, the calculation of protein abundance ratios, and, in particular, the statistical evaluation of the quantification results including multivariate analysis methods such as analysis of variance or hierarchical cluster analysis. While a data model to store these results has been developed, a well-defined programming interface facilitates the integration of novel approaches. A compute cluster is utilised to distribute computationally intensive calculations, and a web service allows to interchange information with other &ndash;omics software applications. To demonstrate that Qupe represents a step forward in quantitative proteomics analysis an application study on <I>Corynebacterium glutamicum</I> has been carried out.</p>
<p><b>Availability and Implementation:</b> Qupe is implemented in Java utilising Hibernate, Echo2, R and the Spring framework. We encourage the usage of the rich internet application in the sense of the "software as a service" concept, maintained on our servers and accessible at the following location: <inter-ref locator="http://qupe.cebitec.uni-bielefeld.de" locator-type="url">http://qupe.cebitec.uni-bielefeld.de</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="Stefan.Albaum@CeBiTec.Uni-Bielefeld.DE" locator-type="email">Stefan.Albaum@CeBiTec.Uni-Bielefeld.DE</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Albaum, S. P., Neuweger, H., Franzel, B., Lange, S., Mertens, D., Trotschel, C., Wolters, D., Kalinowski, J., Nattkemper, T. W., Goesmann, A.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 08:37:31 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp568</dc:identifier>
<dc:title><![CDATA[Qupe - a Rich Internet Application to take a Step Forward in the Analysis of Mass Spectrometry-Based Quantitative Proteomics Experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp567v1?rss=1">
<title><![CDATA[Adaptive multi-agent architecture for functional sequence motifs recognition]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp567v1?rss=1</link>
<description><![CDATA[
<p>Motivation: Accurate genome annotation or protein function prediction requires precise recognition of functional sequence motifs. Many computational motif prediction models have been proposed. Due to the complexity of the biological data, it may be desirable to apply an integrated approach that uses multiple models for analysis.</p>
<p>Results: In this paper, we propose a novel multi-agent architecture for the general purpose of functional sequence motif recognition. The approach takes advantage of the synergy provided by multiple agents through the employment of different agents equipped with distinctive problem solving skills and promotes the collaborations among them through decision maker (DM) agents that work as classifier ensembles. A genetic algorithm based fusion strategy is applied which offers evolutionary property to the DM agents. The consistency and robustness of the system are maintained by an evolvable agent that mediates the team of the ensemble agents. The combined effort of a recommendation system (Seer) and the self-learning mediator agent yields a successful identification of the most efficient agent deployment scheme at an early stage of the experimentation process, which has the potential of greatly reducing the computational cost of the system. Two concrete systems are constructed which aim at predicting two important sequence motifs &ndash; the translational initiation sites (TISs) and the core promoters. With the incorporation of three distinctive problem solver agents, the TIS predictor consistently outperforms most of the state-of-the-art approaches under investigation. Integrating three existing promoter predictors, our system is able to yield consistently good performance.</p>
<p>Availability: The program (MotifMAS) and the data sets are available upon request.</p>
<p>Contact: <inter-ref locator="jzeng@ucalgary.ca" locator-type="email">jzeng@ucalgary.ca</inter-ref>,<inter-ref locator="alhajj@ucalgary.ca" locator-type="email">alhajj@ucalgary.ca</inter-ref>,<inter-ref locator="demetric@ucalgary.ca" locator-type="email">demetric@ucalgary.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Zeng, J., Alhajj, R., Demetrick, D.]]></dc:creator>
<dc:date>Tue, 06 Oct 2009 07:38:01 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp567</dc:identifier>
<dc:title><![CDATA[Adaptive multi-agent architecture for functional sequence motifs recognition]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-06</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp572v1?rss=1">
<title><![CDATA[Processing and population genetic analysis of multigenic datasets with ProSeq3 software]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp572v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The current tendency in molecular population genetics is to use increasing numbers of genes in the analysis. Here I describe a program for handling and population genetic analysis of DNA polymorphism data collected from multiple genes. The program includes a sequence/alignment editor and an internal relational da-tabase that simplify the preparation and manipulation of multigenic DNA polymorphism datasets. The most commonly used DNA poly-morphism analyses are implemented in ProSeq3, facilitating popula-tion genetic analysis of large multigenic datasets. Extensive in-put/output options make ProSeq3 a convenient hub for sequence data processing and analysis.</p>
<p><b>Availability:</b> The program is available free of charge from <inter-ref locator="http://dps.plants.ox.ac.uk/sequencing/proseq.htm" locator-type="url">http://dps.plants.ox.ac.uk/sequencing/proseq.htm</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="dmitry.filatov@plants.ox.ac.uk" locator-type="email">dmitry.filatov@plants.ox.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Filatov, D. A.]]></dc:creator>
<dc:date>Thu, 01 Oct 2009 05:47:16 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp572</dc:identifier>
<dc:title><![CDATA[Processing and population genetic analysis of multigenic datasets with ProSeq3 software]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-01</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp570v1?rss=1">
<title><![CDATA[W-ChIPMotifs: a web application tool for de novo motif discovery from ChIP-based high throughput data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp570v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> <b>W-ChIPMotifs</b> is a web application tool that provides a user friendly interface for <I>de novo</I> motif discovery. The web tool is based on our previous <b>ChIPMotifs</b> program which is a <I>de novo</I> motif finding tool developed for ChIP-based high through-put data and incorporated various <I>ab initio</I> motif discovery tools such as MEME, MaMF, Weeder and optimized the significance of the detected motifs by using a bootstrap resampling statistic method and a Fisher test. Use of a randomized statistical model like bootstrap resampling can significantly increase the accuracy of the detected motifs. In our web tool, we have modified the program in two aspects: 1) we have refined the <I>p</I>-value with a Bonferroni correction; 2) we have incorporated the STAMP tool to infer phylogenetic information and to determine the detected motifs if they are novel and known using the TRANSFAC and JASPAR databases. A comprehensive result file is mailed to users.</p>
<p><b>Availability:</b> <inter-ref locator="http://motif.bmi.ohio-state.edu/ChIPMotifs" locator-type="url">http://motif.bmi.ohio-state.edu/ChIPMotifs</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="victor.jin@osumc.edu" locator-type="email">victor.jin@osumc.edu</inter-ref></p>
<p><b>Supplementary information:</b> Data used in the paper may be downloaded from <inter-ref locator="http://motif.bmi.ohio-state.edu/ChIPMotifs/examples.shtml" locator-type="url">http://motif.bmi.ohio-state.edu/ChIPMotifs/examples.shtml</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Jin, V. X., Apostolos, J., Nagisetty, N. S. V. R., Farnham, P. J.]]></dc:creator>
<dc:date>Thu, 01 Oct 2009 05:47:16 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp570</dc:identifier>
<dc:title><![CDATA[W-ChIPMotifs: a web application tool for de novo motif discovery from ChIP-based high throughput data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-01</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp569v1?rss=1">
<title><![CDATA[Functionally guided alignment of protein interaction networks for module detection]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp569v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Functional module detection within protein interaction networks is a challenging problem due to the sparsity of data and presence of errors. Computational techniques for this task range from purely graph theoretical approaches involving single networks to alignment of multiple networks from several species. Current network alignment methods all rely on protein sequence similarity to map proteins across species.</p>
<p><b>Results:</b> Here we carry out network alignment using a protein functional similarity measure. We show that using functional similarity to map proteins across species improves network alignment in terms of functional coherence and overlap with experimentally verified protein complexes. Moreover, the results from functional similarity based network alignment display little overlap (&lt;15%) with sequence similarity based alignment. Our combined approach integrating sequence and function based network alignment alongside graph clustering properties offers a 200% increase in coverage of experimental datasets and comparable accuracy to current network alignment methods.</p>
<p><b>Availability:</b> Program binaries and source code is freely available at: <inter-ref locator="http://www.stats.ox.ac.uk/research/bioinfo/resources" locator-type="url">http://www.stats.ox.ac.uk/research/bioinfo/resources</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="ali@stats.ox.ac.uk" locator-type="email">ali@stats.ox.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> Supplementary data are available at <I>Bioinformatics</I> online</p>
]]></description>
<dc:creator><![CDATA[Ali, W., Deane, C. M.]]></dc:creator>
<dc:date>Thu, 01 Oct 2009 05:47:15 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp569</dc:identifier>
<dc:title><![CDATA[Functionally guided alignment of protein interaction networks for module detection]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-10-01</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp564v1?rss=1">
<title><![CDATA[Computing the shortest elementary flux modes in genome-scale metabolic networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp564v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Elementary flux modes (EFMs) represent a key concept to analyze metabolic networks from a pathway-oriented perspective. In spite of considerable work in this field, the computation of the full set of elementary flux modes in large-scale metabolic networks still constitutes a challenging issue due to its underlying combinatorial complexity. </p>
<p><b>Results:</b> In this paper we illustrate that the full set of EFMs can be enumerated in increasing order of number of reactions via integer linear programming. In this light, we present a novel procedure to efficiently determine the <I>K</I>-shortest EFMs in large-scale metabolic networks. Our method was applied to find the <I>K</I>-shortest EFMs that produce lysine in the genome-scale metabolic networks of <I>Escherichia coli</I> and <I>Corynebacterium glutamicum</I>. A detailed analysis of the biological significance of the <I>K</I>-shortest EFMs was conducted, finding that glucose catabolism, ammonium assimilation, lysine ana-bolism and cofactor balancing were correctly predicted. The work presented here represents an important step forward in the analysis and computation of EFMs for large-scale metabolic networks, where traditional methods fail for networks of even moderate size.</p>
]]></description>
<dc:creator><![CDATA[de Figueiredo, L. F., Podhorski, A., Rubio, A., Kaleta, C., Beasley, J. E., Schuster, S., Planes, F. J.]]></dc:creator>
<dc:date>Wed, 30 Sep 2009 07:38:31 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp564</dc:identifier>
<dc:title><![CDATA[Computing the shortest elementary flux modes in genome-scale metabolic networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-30</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp566v1?rss=1">
<title><![CDATA[VDNA: The Virtual DNA plug-in for VMD]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp566v1?rss=1</link>
<description><![CDATA[
<p>Summary: The DNA inter base pair step parameters (Tilt, Roll, Twist, Shift, Slide, Rise) are a standard internal coordinate representation of DNA. In the absence of bend and shear, it is relatively easy to mentally visualize how Twist and Rise generate the familiar  double helix. More complex structures do not readily yield to such intuition. For this reason we developed a plug-in for VMD that accepts a set of mathematical expressions as input and generates a coarse-grained model of DNA as output. This feature of VDNA appears to provide a unique approach to DNA modeling. Predefined expressions include: linear, sheared, bent, and circular DNA and models of the nucleosome superhelix, chromatin, thermal motion, and nucleosome unwrapping. Availability: VDNA is preinstalled in VMD, <inter-ref locator="http://www.ks.uiuc.edu/Research/vmd" locator-type="url">http://www.ks.uiuc.edu/Research/vmd</inter-ref>. Updates are at <inter-ref locator="http://dna.ccs.tulane.edu" locator-type="url">http://dna.ccs.tulane.edu</inter-ref>. Contact: <inter-ref locator="bishop@tulane.edu" locator-type="email">bishop@tulane.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bishop, T. C.]]></dc:creator>
<dc:date>Tue, 29 Sep 2009 07:51:06 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp566</dc:identifier>
<dc:title><![CDATA[VDNA: The Virtual DNA plug-in for VMD]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-29</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp565v1?rss=1">
<title><![CDATA[Lost in translation: an assessment and perspective for computational microRNA target identification.]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp565v1?rss=1</link>
<description><![CDATA[
<p>MicroRNAs (miRNAs) are a class of short endogenously expressed RNA molecules that regulate gene expression by binding directly to the messenger RNA of protein coding genes. They have been found to confer a novel layer of genetic regulation in a wide range of bio-logical processes. Computational miRNA target prediction remains one of the key means used to decipher the role of miRNAs in devel-opment and disease. Here we introduce the basic idea behind the experimental identification of miRNA targets and present some of the most widely used computational miRNA target identification programs. The review includes an assessment of the prediction quality of these programs and their combinations. </p>
]]></description>
<dc:creator><![CDATA[Alexiou, P., Maragkakis, M., Papadopoulos, G. L, Reczko, M., Hatzigeorgiou, A. G]]></dc:creator>
<dc:date>Tue, 29 Sep 2009 07:51:06 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp565</dc:identifier>
<dc:title><![CDATA[Lost in translation: an assessment and perspective for computational microRNA target identification.]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-29</prism:publicationDate>
<prism:section>REVIEW</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp563v1?rss=1">
<title><![CDATA[The interwinding nature of protein-protein interfaces and its implication for protein complex formation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp563v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Structural features at protein-protein interfaces can be studied to understand protein-protein interactions. It was noticed that in a dataset of 45 multimeric proteins the interface could either be described as flat against flat or protruding/interwound. In the latter, residues within one chain were surrounded by those in other chains, whereas in the former they were not.</p>
<p><b>Results:</b> A simple method was developed that could distinguish between these two types with results that matched those made by eye.  Applying this automatic method to a large dataset of 888 structures, chains at interfaces were categorized as non-surrounded or surrounded.  It was found that the surrounded set had a significantly lower folding tendency using a sequence based measure, than the non-surrounded set.  This suggests that before complexation, surrounded chains are relatively unstable and may be involved in "fly-casting". This is supported by the finding that terminal regions are overrepresented in the surrounded set.</p>
<p><b>Availability:</b> <inter-ref locator="http://cib.cf.ocha.ac.jp/DACSIS/" locator-type="url">http://cib.cf.ocha.ac.jp/DACSIS/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="yura.kei@ocha.ac.jp" locator-type="email">yura.kei@ocha.ac.jp</inter-ref>; <inter-ref locator="sjh@cmp.uea.ac.uk" locator-type="email">sjh@cmp.uea.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Yura, K., Hayward, S.]]></dc:creator>
<dc:date>Tue, 29 Sep 2009 07:51:05 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp563</dc:identifier>
<dc:title><![CDATA[The interwinding nature of protein-protein interfaces and its implication for protein complex formation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-29</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp562v1?rss=1">
<title><![CDATA[EASYMIFS & SITEHOUND: a toolkit for the identification of ligand-binding sites in protein structures]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp562v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> S<scp>ite</scp>H<scp>ound</scp> uses Molecular Interaction Fields (MIF<scp>s</scp>) produced by E<scp>asy</scp>MIF<scp>s</scp> to identify protein structure regions that show a high propensity for interaction with ligands. The type of binding site identified depends on the probe atom used in the MIF calculation. The input to E<scp>asy</scp>MIF<scp>s</scp> is a PDB file of a protein structure; the output MIF serves as input to S<scp>ite</scp>H<scp>ound</scp>, which in turn produces a list of putative binding sites. Extensive testing of S<scp>ite</scp>H<scp>ound</scp> for the detection of binding sites for drug-like molecules and phosphory-lated ligands has been carried out.</p>
<p><b>Availability:</b> E<scp>asy</scp>MIF<scp>s</scp> and S<scp>ite</scp>H<scp>ound</scp> executables for Linux, Mac OS X, and MS Windows operating systems are freely available for download from <inter-ref locator="http://sitehound.sanchezlab.org/download.html" locator-type="url">http://sitehound.sanchezlab.org/download.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="roberto@sanchezlab.org" locator-type="email">roberto@sanchezlab.org</inter-ref> or <inter-ref locator="roberto.sanchez@mssm.edu" locator-type="email">roberto.sanchez@mssm.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ghersi, D., Sanchez, R.]]></dc:creator>
<dc:date>Tue, 29 Sep 2009 07:51:04 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp562</dc:identifier>
<dc:title><![CDATA[EASYMIFS & SITEHOUND: a toolkit for the identification of ligand-binding sites in protein structures]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-29</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp561v1?rss=1">
<title><![CDATA[LIBRUS: Combined Machine Learning and Homology Information for Sequence-based Ligand-Binding Residue Prediction]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp561v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Identifying residues that interact with ligands is useful as a first step to understanding protein function and as an aid to designing small molecules that target the protein for interaction. Several studies have shown sequence features are very informative for this type of prediction while structure features have also been useful when structure is available. We develop a sequence-based method, called LIBRUS, that combines homology-based transfer and direct prediction using machine learning and compare it to previous sequence-based work and current structure-based methods.</p>
<p><b>Results:</b> Our analysis shows that homology-based transfer is slightly more discriminating than a support vector machine learner using profiles and predicted secondary structure. We combine these two approaches in a method called LIBRUS. On a benchmark of 885 sequence independent proteins, it achieves an area under the <I>ROC</I> curve (<I>ROC</I>) of 0.83 with 45% precision at 50% recall, a significant improvement over previous sequence-based efforts. On an independent benchmark set, a current method, FINDSITE, based on structure features achieves a 0.81 <I>ROC</I> with 54% precision at 50% recall while LIBRUS achieves a <I>ROC</I> of 0.82 with 39% precision at 50% recall at a smaller computational cost. When LIBRUS and FINDSITE predictions are combined, performance is increased beyond either reaching an <I>ROC</I> of 0.86 and 59% precision at 50% recall.</p>
<p><b>Availability:</b> Software developed for this study is available at <ty><inter-ref locator="http://bioinfo.cs.umn.edu/supplements/binf2009" locator-type="url">http://bioinfo.cs.umn.edu/supplements/binf2009</inter-ref></ty> along with supplemental data on the study.</p>
<p><b>Contact:</b> <inter-ref locator="kauffman@cs.umn.edu" locator-type="email">kauffman@cs.umn.edu</inter-ref>, <inter-ref locator="karypis@cs.umn.edu" locator-type="email">karypis@cs.umn.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Kauffman, C., Karypis, G.]]></dc:creator>
<dc:date>Mon, 28 Sep 2009 04:39:17 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp561</dc:identifier>
<dc:title><![CDATA[LIBRUS: Combined Machine Learning and Homology Information for Sequence-based Ligand-Binding Residue Prediction]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-28</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp560v1?rss=1">
<title><![CDATA[Detection of new protein domains using co-occurrence: application to Plasmodium falciparum]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp560v1?rss=1</link>
<description><![CDATA[
<p>Motivation: Hidden Markov Models (HMMs) have proved to be a powerful tool for protein domain identification in newly sequenced organisms. However, numerous domains may be missed in highly divergent proteins. This is the case for Plasmodium falciparum proteins, the main causal agent of human malaria.</p>
<p>Results: We propose a method to improve the sensitivity of HMM domain detection by exploiting the tendency of the domains to appear preferentially with a few other favorite domains in a protein. When sequence information alone is not sufficient to warrant the presence of a particular domain, our method enables its detection on the basis of the presence of other Pfam or InterPro domains. Moreover, a shuffling procedure allows us to estimate the false discovery rate associated with the results. Applied to P. falciparum, our method identifies 585 new Pfam domains (versus the 3 683 already known domains in the Pfam database) with an estimated error rate below 20%. These new domains provide 387 new Gene Ontology annotations to the P. falciparum proteome. Analogous and congruent results are obtained when applying the method to related Plasmodium species (P. vivax and P. yoelii).</p>
<p>Availability: Supplementary Material and a database of the new domains and GO predictions achieved on Plasmodium proteins are available at <inter-ref locator="http://www.lirmm.fr/~terrapon/codd/" locator-type="url">http://www.lirmm.fr/~terrapon/codd/</inter-ref></p>
<p>Contact: <inter-ref locator="brehelin@lirmm.fr" locator-type="email">brehelin@lirmm.fr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Terrapon, N., Gascuel, O., Marechal, E., Breehelin, L.]]></dc:creator>
<dc:date>Mon, 28 Sep 2009 04:39:16 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp560</dc:identifier>
<dc:title><![CDATA[Detection of new protein domains using co-occurrence: application to Plasmodium falciparum]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-28</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp559v1?rss=1">
<title><![CDATA[Integration of heterogeneous expression data sets extends the role of the retinol pathway in diabetes and insulin resistance.]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp559v1?rss=1</link>
<description><![CDATA[
<p>Motivation: Type 2 diabetes is a chronic metabolic disease that involves both environmental and genetic factors.  To understand the genetics of type 2 diabetes and insulin resistance, the Genome Anatomy Project (DGAP) was launched to profile gene expression in a variety of related animal models and human subjects. We asked whether these heterogeneous models can be integrated to provide consistent and robust biological insights into the biology of insulin resistance.</p>
<p>Results: We perform integrative analysis of the 16 DGAP data sets that span multiple tissues, conditions, array types, laboratories, species, genetic backgrounds, and study designs.  For each data set, we identify differentially expressed genes compared to control.  Then, for the combined data, we rank genes according to the frequency with which they were found to be statistically significant across data sets.  This analysis reveals RetSat as a widely shared component of mechanisms involved in insulin resistance and sensitivity and adds to the growing importance of the retinol pathway in diabetes, adipogenesis and insulin resistance. Top candidates obtained from our analysis have been confirmed in recent laboratory studies.</p>
]]></description>
<dc:creator><![CDATA[Park, P. J, Kong, S. W., Tebaldi, T., Lai, W. R, Kasif, S., Kohane, I. S]]></dc:creator>
<dc:date>Mon, 28 Sep 2009 04:39:16 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp559</dc:identifier>
<dc:title><![CDATA[Integration of heterogeneous expression data sets extends the role of the retinol pathway in diabetes and insulin resistance.]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-28</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp558v1?rss=1">
<title><![CDATA[Combining Tissue Transcriptomics and Urine Metabolomics for Breast Cancer Biomarker Identification]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp558v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> For the early detection of cancer, highly sensitive and specific biomarkers are needed.  Particularly, biomarkers in bio-fluids are relatively more useful because those can be used for non-biopsy tests.  Although the altered metabolic activities of cancer cells have been observed in many studies, little is known about metabolic biomarkers for cancer screening.  In this study, a systematic method is proposed for identifying metabolic biomarkers in urine samples by selecting candidate biomarkers from altered genome-wide gene expression signatures of cancer cells.  Biomarkers identified by the present study have increased coherence and robustness because the significances of biomarkers are validated in both gene expression profiles and metabolic profiles.</p>
<p><b>Results:</b> The proposed method was applied to the gene expression profiles and urine samples of 50 breast cancer patients and 50 normal persons.  Nine altered metabolic pathways were identified from the breast cancer gene expression signatures.  Among these altered metabolic pathways, four metabolic biomarkers (Homovanillate, 4-Hydroxyphenylacetate, 5-Hydroxyindoleacetate, Urea) were identi-fied to be different in cancer and normal subjects (p-value &lt; 0.05).  In the case of the predictive performance, the identified biomarkers achieved AUC values of 0.75, 0.79, and 0.79 according to a linear discriminate analysis, a random forest classifier, and on a support vector machine, respectively.  Finally, biomarkers which showed consistent significance in pathways' gene expression as well as urine samples were identified.</p>
<p><b>Contact:</b> <inter-ref locator="dhlee@biosoft.kaist.ac.kr" locator-type="email">dhlee@biosoft.kaist.ac.kr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Nam, H., Chung, B. C., Kim, Y., Lee, K., Lee, D.]]></dc:creator>
<dc:date>Fri, 25 Sep 2009 23:19:30 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp558</dc:identifier>
<dc:title><![CDATA[Combining Tissue Transcriptomics and Urine Metabolomics for Breast Cancer Biomarker Identification]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-25</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp549v1?rss=1">
<title><![CDATA[Automatic assignment of reaction operators to enzymatic reactions]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp549v1?rss=1</link>
<description><![CDATA[
<p><b>Background:</b> Enzymes are classified in a numerical classification scheme introduced by the Nomenclature Committee of the IUBMB based on the overall reaction chemistry. Due to the manifold of enzymatic reactions the system has become highly complex. Assignment of enzymes to the enzyme classes requires a detailed knowledge of the system and manual analysis. Frequently rearrangements and deletions of enzymes and sub-subclasses are necessary. </p>
<p><b>Results:</b> We use the Dugundji-Ugi-model for coding of biochemical reactions which is based on electron shift patterns occurring during reactions. Changes of the bonds or of non-bonded valence electrons are expressed by reaction matrices. Our program calculates reaction matrices automatically on the sole basis of substrate and product chemical structures based on a new strategy for maximal common substructure determination, which allows an accurate atom mapping of the substrate and product atoms. The system has been tested for a large set of enzymatic reactions including all sub-subclasses of the EC classification system. Altogether 147 different representative reaction operators were found in the classified enzymes, 121 of which are unique with respect to an EC sub-subclass. The other 26 comprise groups of enzymes with very similar reactions, being identical with respect to the bonds formed and broken.</p>
<p><b>Conclusion:</b> The analysis and comparison of enzymatic reactions according to their electron shift patterns is defining enzyme groups characterised by unique reaction cores. Our results demonstrate the applicability of the Dugundji-Ugi-model as a reasonable pre-classification system allowing an objective and rational view on biochemical reactions.</p>
<p><b>Availability:</b> The program to generate reaction matrix descriptors is available upon request.</p>
]]></description>
<dc:creator><![CDATA[Leber, M., Egelhofer, V., Schomburg, I., Schomburg, D.]]></dc:creator>
<dc:date>Fri, 25 Sep 2009 23:19:27 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp549</dc:identifier>
<dc:title><![CDATA[Automatic assignment of reaction operators to enzymatic reactions]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-25</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp548v1?rss=1">
<title><![CDATA[Automatically Classifying Sentences in Full-Text Biomedical Arti-cles into Introduction, Methods, Results and Discussion]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp548v1?rss=1</link>
<description><![CDATA[
<p>Biomedical texts can be typically represented by four rhetorical categories: Introduction, Methods, Results and Discussion (IMRAD). Classifying sentences into these categories can benefit many other text-mining tasks. Although many studies have applied different approaches for automatically classifying sentences in MEDLINE abstracts into the IMRAD categories, few have explored the classification of sentences that appear in full-text biomedical articles. We first evaluated whether sentences in full-text biomedi-cal articles could be reliably annotated into the IMRAD format and then explored different approaches for automatically classifying these sentences into the IMRAD categories. Our results show an overall annotation agreement of 82.14% with a Kappa score of 0.756. The best classification system is a multinomial na&iuml;ve Bayes classifier trained on manually annotated data that achieved 91.95% accuracy and an average F-score of 91.55%, which is significantly higher than baseline systems. A web version of this system is available online at &ndash; <inter-ref locator="http://wood.ims.uwm.edu/full_text_classifier/" locator-type="url">http://wood.ims.uwm.edu/full_text_classifier/</inter-ref> .</p>
]]></description>
<dc:creator><![CDATA[Agarwal, S., Yu, H.]]></dc:creator>
<dc:date>Fri, 25 Sep 2009 23:19:27 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp548</dc:identifier>
<dc:title><![CDATA[Automatically Classifying Sentences in Full-Text Biomedical Arti-cles into Introduction, Methods, Results and Discussion]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-25</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp557v1?rss=1">
<title><![CDATA[In response to "Can sugars be produced from fatty acids? A test case for pathway analysis tools"]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp557v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> In their article entitled "Can sugars be produced from fatty acids? A test case for pathway analysis tools" de Figueiredo and co-authors assess the performance of three pathway prediction tools (METATOOL, PathFinding and Pathway Hunter Tool) using the synthesis of glucose-6-phosphate (G6P) from acetyl-CoA in humans as a test case (de Figueiredo, et al., 2008). We think that this article is biased for three reasons: (1) The metabolic networks used as input for the respective tools were of very different sizes; (2) the "assessment" is restricted to two study cases; (3) developers are inherently more skilled to use their own tools than those developed by other people. </p>
<p>We extended the analyses led by de Figueiredo and clearly show that the apparent superior performance of their tool (METATOOL) is partly due to the differences in input network sizes. We also see a conceptual problem in the comparison of tools that serve different purposes. In our opinion, metabolic path finding and elementary mode analysis are answering different biological questions, and should be considered as complementary rather than competitive approaches.</p>
]]></description>
<dc:creator><![CDATA[Faust, K., Croes, D., van Helden, J.]]></dc:creator>
<dc:date>Wed, 23 Sep 2009 08:21:18 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp557</dc:identifier>
<dc:title><![CDATA[In response to "Can sugars be produced from fatty acids? A test case for pathway analysis tools"]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-23</prism:publicationDate>
<prism:section>LETTER TO THE EDITOR</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp553v1?rss=1">
<title><![CDATA[SimCT: a generic tool to visualize ontology based relationships for biological objects]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp553v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We present a web-based service, SimCT, which allows to graphically display the relationships between biological objects (e.g. genes or proteins) based on their annotations to a OBO ontology. The result is presented as a tree of these objects, which can be viewed and explored through a specific java applet designed to high-light relevant features. Unlike the numerous tools that search for over-represented terms, SimCT draws a simplified representation of biological terms present in the set of objects, and can be applied to any ontology for which annotation data is available. Being web-based, it does not require prior installation, and provides an intuitive, easy-to-use service. </p>
<p><b>Availability:</b> <inter-ref locator="http://tagc.univ-mrs.fr/SimCT" locator-type="url">http://tagc.univ-mrs.fr/SimCT</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Herrmann, C., Berard, S., Tichit, L.]]></dc:creator>
<dc:date>Wed, 23 Sep 2009 08:21:17 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp553</dc:identifier>
<dc:title><![CDATA[SimCT: a generic tool to visualize ontology based relationships for biological objects]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-23</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp555v1?rss=1">
<title><![CDATA[HHsvm: Fast and accurate classification of profile-profile matches identified by HHsearch]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp555v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Recently developed profile-profile methods rival structural comparisons in their ability to detect homology between distantly related proteins. Despite this tremendous progress, many genuine relationships between protein families cannot be recognized as comparisons of their profiles result in scores that are statistically insignificant.</p>
<p><b>Results:</b> Using known evolutionary relationships among protein superfamilies in SCOP database, Support Vector Machines were trained on four sets of discriminatory features derived from the output of HHsearch. Upon validation, it was shown that the automatic classification of all profile-profile matches was superior to fixed threshold-based annotation in terms of sensitivity and specificity. The effectiveness of this approach was demonstrated by annotating several domains of unknown function from the Pfam database.</p>
<p><b>Availability:</b> Programs and scripts implementing the methods described in this manuscript are freely available from <inter-ref locator="http://hhsvm.dlakiclab.org/" locator-type="url">http://hhsvm.dlakiclab.org/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="mdlakic@montana.edu" locator-type="email">mdlakic@montana.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Dlakic, M.]]></dc:creator>
<dc:date>Tue, 22 Sep 2009 05:06:48 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp555</dc:identifier>
<dc:title><![CDATA[HHsvm: Fast and accurate classification of profile-profile matches identified by HHsearch]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-22</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp554v1?rss=1">
<title><![CDATA[MOODS: fast search for position weight matrix matches in DNA sequences]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp554v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> MOODS (MOtif Occurrence Detection Suite) is a software package for matching position weight matrices against DNA sequences. MOODS implements state-of-the-art on-line matching algorithms, achieving considerably faster scanning speed than with a simple brute-force search. MOODS is written in C++, with bindings for the popular BioPerl and Biopython toolkits. It can easily be adapted for different purposes and integrated into existing workflows. It can also be used as a C++ library.</p>
<p><b>Availability:</b> The package with documentation and examples of usage is available at <inter-ref locator="http://www.cs.helsinki.fi/group/pssmfind" locator-type="url">http://www.cs.helsinki.fi/group/pssmfind</inter-ref>. The source code is also available under the terms of a GNU General Public License (GPL).</p>
<p><b>Contact:</b> <inter-ref locator="janne.h.korhonen@helsinki.fi" locator-type="email">janne.h.korhonen@helsinki.fi</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Korhonen, J., Martinmaki, P., Pizzi, C., Rastas, P., Ukkonen, E.]]></dc:creator>
<dc:date>Tue, 22 Sep 2009 05:06:47 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp554</dc:identifier>
<dc:title><![CDATA[MOODS: fast search for position weight matrix matches in DNA sequences]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-22</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp547v1?rss=1">
<title><![CDATA[Bayesian detection of non-sinusoidal periodic patterns in circadian expression data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp547v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Cyclical biological processes such as cell division and circadian regulation produce coordinated periodic expression of thousands of genes. Identification of such genes and their expression patterns is a crucial step in discovering underlying regulatory mechanisms. Existing computational methods are biased towards discovering genes that follow sine-wave patterns.</p>
<p><b>Results:</b> We present an ANOVA periodicity detector and its Bayesian extension that can be used to discover periodic transcripts of arbitrary shapes from replicated gene expression profiles. The models are applicable when the profiles are collected at comparable time points for at least two cycles. We provide an empirical Bayes procedure for estimating parameters of the prior distributions and derive closedform expressions for the posterior probability of periodicity, enabling efficient computation. The model is applied to two data sets profiling circadian regulation in murine liver and skeletal muscle, revealing a substantial number of previously undetected non-sinusoidal periodic transcripts in each.We also apply quantitative real-timePCRto several highly ranked non-sinusoidal transcripts in liver tissue found by the model, providing independent evidence of circadian regulation of these genes.</p>
<p><b>Availability:</b> Matlab software for estimating prior distributions and performing inference is available for download from <inter-ref locator="ftp://ftp.ics.uci.edu/pub/dchudova/periodicity" locator-type="url"><ty>ftp://ftp.ics.uci.edu/pub/dchudova/periodicity</ty></inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="dchudova@gmail.com" locator-type="email">dchudova@gmail.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Chudova, D., Ihler, A., Lin, K. K, Andersen, B., Smyth, P.]]></dc:creator>
<dc:date>Tue, 22 Sep 2009 05:06:47 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp547</dc:identifier>
<dc:title><![CDATA[Bayesian detection of non-sinusoidal periodic patterns in circadian expression data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-22</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp552v1?rss=1">
<title><![CDATA[Reproducing the manual annotation of multiple sequence alignments using a SVM classifier]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp552v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Aligning protein sequences with the best possible accuracy requires sophisticated algorithms. Since the optimal alignment is not guaranteed to be the correct one, it is expected that even the best alignment will contain sites that do not respect the assumption of positional homology. Because formulating rules to identify these sites is difficult, it is common practice to manually remove them. Although considered necessary in some cases, manual editing is time consuming and not reproducible. We present here an automated editing method based on the classification of "valid" and "invalid" sites.</p>
<p><b>Results:</b> A Support Vector Machine (SVM) classifier is trained to reproduce the decisions made during manual editing with an accuracy of 95.0%. This implies that manual editing can be made reproducible and applied to large scale analyses. We further demonstrate that it is possible to retrain/extend the training of the classifier by providing examples of MSA annotation. Near optimal training can be achieved with only 1000 annotated sites, or roughly three samples of protein sequence alignments.</p>
<p><b>Availability:</b> This method is implemented in the software MANUEL, licensed under the GPL. A web-based application for single and batch job is available at <inter-ref locator="http://fester.cs.dal.ca/manuel" locator-type="url">http://fester.cs.dal.ca/manuel</inter-ref> .</p>
<p><b>Contact:</b> <inter-ref locator="cblouin@cs.dal.ca" locator-type="email">cblouin@cs.dal.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Blouin, C., Perry, S., Lavell, A., Susko, E., Roger, A. J.]]></dc:creator>
<dc:date>Mon, 21 Sep 2009 08:29:45 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp552</dc:identifier>
<dc:title><![CDATA[Reproducing the manual annotation of multiple sequence alignments using a SVM classifier]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-21</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp551v1?rss=1">
<title><![CDATA[How and when should interactome-derived clusters be used to predict functional modules and protein function?]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp551v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Clustering of protein-protein interaction networks is one of the most common approaches for predicting functional modules, protein complexes and protein functions. But, how well does clustering perform at these tasks?</p>
<p><b>Results:</b> We develop a general framework to assess how well computationally-derived clusters in physical interactomes overlap functional modules derived via the Gene Ontology (GO). Using this framework, we evaluate six diverse network clustering algorithms using S. <I>cerevisiae</I> and show that (1) the performances of these algorithms can differ substantially when run on the same network and (2) their relative performances change depending upon the topological characteristics of the network under consideration. For the specific task of function prediction in S. <I>cerevisiae</I>, we demonstrate that, surprisingly, a simple non-clustering guilt-by-association approach outperforms widely-used clustering based approaches that annotate a protein with the over-represented biological process and cellular component terms in its cluster; this is true over the range of clustering algorithms considered. Further analysis parameterizes performance based on the number of annotated proteins, and suggests when clustering approaches should be used for interactome functional analyses. Overall our results suggest a re-examination of when and how clustering approaches should be applied to physical interactomes, and establishes guidelines by which novel clustering approaches for biological networks should be justified and evaluated with respect to functional analysis.</p>
<p><b>Contact:</b> <inter-ref locator="msingh@cs.princeton.edu" locator-type="email">msingh@cs.princeton.edu</inter-ref></p>
<p><b>Supplementary information:</b> Supplementary data available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Song, J., Singh, M.]]></dc:creator>
<dc:date>Mon, 21 Sep 2009 08:29:45 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp551</dc:identifier>
<dc:title><![CDATA[How and when should interactome-derived clusters be used to predict functional modules and protein function?]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-21</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp550v1?rss=1">
<title><![CDATA[Client-side Integration of Life Science Literature Resources]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp550v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The online resources in the life sciences are characterized by a great fragmentation and one of the pressing issues of bioinformatics is making the integration of these resources a smoother and more flexible process than it is currently.</p>
<p>Here we present <I>i-cite</I>, a browser extension, which implements a client-side model of integration which improves the navigation within the rapidly increasing life science literature and links terms from it to corresponding non-textual data. </p>
<p><b>Availability:</b> <inter-ref locator="http://i-cite.org" locator-type="url">http://i-cite.org</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="nan23@cam.ac.uk" locator-type="email">nan23@cam.ac.uk</inter-ref> </p>
]]></description>
<dc:creator><![CDATA[Easty, R., Nikolov, N.]]></dc:creator>
<dc:date>Fri, 18 Sep 2009 00:15:02 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp550</dc:identifier>
<dc:title><![CDATA[Client-side Integration of Life Science Literature Resources]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-18</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp545v1?rss=1">
<title><![CDATA[PoreLogo: a new tool to analyse, visualise and compare channels in transmembrane proteins]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp545v1?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The increasing number of available atomic 3D structures of transmembrane channel proteins represents a valuable resource for better understanding their structure-function relationships and to eventually predict their selectivity. Herein we present PoreLogo, an automatic tool for analyzing, visualizing and comparing the amino acid composition of transmembrane channels and its conservation across the corresponding protein family.</p>
<p><b>Availability:</b> PoreLogo is accessible as a public web server at <inter-ref locator="http://www.ebi.ac.uk/thornton-srv/software/PoreLogo/" locator-type="url">http://www.ebi.ac.uk/thornton-srv/software/PoreLogo/</inter-ref>.</p>
<p><b>Contacts:</b> <inter-ref locator="marial@ebi.ac.uk" locator-type="email">marial@ebi.ac.uk</inter-ref>, <inter-ref locator="romina.oliva@uniparthenope.it" locator-type="email">romina.oliva@uniparthenope.it</inter-ref>.</p>
]]></description>
<dc:creator><![CDATA[Oliva, R., Thornton, J. M., Pellegrini-Calace, M.]]></dc:creator>
<dc:date>Thu, 17 Sep 2009 08:21:22 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp545</dc:identifier>
<dc:title><![CDATA[PoreLogo: a new tool to analyse, visualise and compare channels in transmembrane proteins]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-17</prism:publicationDate>
<prism:section>APPLICATIONS NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp546v1?rss=1">
<title><![CDATA[Genome analysis with inter-nucleotide distances]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp546v1?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> DNA sequences can be represented by sequences of four symbols, but it is often useful to convert the symbols into real or complex numbers for further analysis. Several mapping schemes have been used in the past, but they seem unrelated to any intrinsic characteristic of DNA. The objective of this work was to find a mapping scheme directly related to DNA characteristics and that would be useful in discriminating between different species. Mathematical models to explore DNA correlation structures may contribute to a better knowledge of the DNA and to find a concise DNA description.</p>
<p><b>Results:</b> We developed a methodology to process DNA sequences based on inter-nucleotide distances. Our main contribution is a method to obtain genomic signatures for complete genomes, based on the inter-nucleotide distances, that are able to discriminate between different species. Using these signatures and hierarchical clustering, it is possible to build phylogenetic trees.</p>
<p>Phylogenetic trees lead to genome differentiation and allow the inference of phylogenetic relations. The phylogenetic trees generated in this work display related species close to each other, suggesting that the inter-nucleotide distances are able to capture essential information about the genomes.</p>
<p>To create the genomic signature, we construct a vector which describes the inter-nucleotide distance distribution of a complete genome and compare it with the reference distance distribution, which is the distribution of a sequence where the nucleotides are placed randomly and independently. It is the residual or relative error between the data and the reference distribution that is used to compare the DNA sequences of different organisms.</p>
<p><b>Contact:</b> <inter-ref locator="vera@ua.pt" locator-type="email">vera@ua.pt</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Afreixo, V., Bastos, C. A. C., Pinho, A. J., Garcia, S. P., Ferreira, P. J. S. G.]]></dc:creator>
<dc:date>Wed, 16 Sep 2009 08:09:09 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp546</dc:identifier>
<dc:title><![CDATA[Genome analysis with inter-nucleotide distances]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-16</prism:publicationDate>
<prism:section>ORIGINAL PAPER</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btp544v1?rss=1">
<title><![CDATA[Identifiability of isoform deconvolution from junction arrays and RNA-Seq]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btp544v1?rss=1</link>
<description><![CDATA[
<p>Motivation: Splice junction microarrays and RNA-seq are two popular ways of quantifying splice variants within a cell. Unfortunately, isoform expressions cannot always be determined from the expressions of individual exons and splice junctions. While this issue has been noted before, the extent of the problem on various platforms has not yet been explored, nor have potential remedies been presented.</p>
<p>Results: We propose criteria that will guarantee identifiability of an isoform deconvolution model on exon and splice junction arrays and in RNA-Seq. We show that up to 97% of 2256 alternatively spliced human genes selected from the RefSeq database lead to identifiable gene models in RNA-seq, with similar results in mouse. However, in the Human Exon array only 26% of these genes lead to identifiable models, and even in the most comprehensive splice junction array only 69% lead to identifiable models.</p>
<p>Contact: <inter-ref locator="whwong@stanford.edu" locator-type="email">whwong@stanford.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Hiller, D., Jiang, H., Xu, W., Wong, W. H.]]></dc:creator>
<dc:date>Wed, 16 Sep 2009 23:50:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp544</dc:identifier>
<dc:title><![CDATA[Identifiability of isoform deconvolution from junction arrays and RNA-Seq]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2009-09-16</prism:publicationDate>
<prism:section>DISCOVERY NOTE</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/btm094v2?rss=1">
<title><![CDATA[This paper was published in error before receiving the final version]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/btm094v2?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[]]></dc:creator>
<dc:date>Mon, 30 Jul 2007 04:53:49 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btm094</dc:identifier>
<dc:title><![CDATA[This paper was published in error before receiving the final version]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:publicationDate>2007-07-30</prism:publicationDate>
<prism:section>ARTICLE</prism:section>
</item>

</rdf:RDF>