<?xml version="1.0" encoding="ISO-8859-1"?>

<rdf:RDF
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns="http://purl.org/rss/1.0/"
 xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
 xmlns:dc="http://purl.org/dc/elements/1.1/"
 xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
 xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
 xmlns:admin="http://webns.net/mvcb/"
>

<channel rdf:about="http://bioinformatics.oxfordjournals.org">
<title>Bioinformatics - recent issues</title>
<link>http://bioinformatics.oxfordjournals.org</link>
<description>Bioinformatics - RSS feed of recent issues (covers the latest 3 issues, including the current issue) </description>
<prism:eIssn>1460-2059</prism:eIssn>
<prism:publicationName>Bioinformatics</prism:publicationName>
<prism:issn>1367-4803</prism:issn>
<items>
 <rdf:Seq>
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/709?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/715?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/722?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/730?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/737?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/745?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/752?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/761?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/770?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/777?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/784?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/791?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/798?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/807?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/814?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/822?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/831?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/838?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/841?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/843?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/845?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/847?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/849?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/851?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/853?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/589?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/596?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/603?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/610?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/617?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/625?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/632?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/640?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/647?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/653?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/661?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/668?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/676?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/678?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/680?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/683?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/687?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/689?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/692?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/694?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/696?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/698?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/700?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/703?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/705?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/445?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/456?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/464?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/470?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/478?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/486?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/493?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/501?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/509?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/518?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/529?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/536?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/544?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/553?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/560?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/565?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/568?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/570?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/572?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/574?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/576?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/578?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/580?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/582?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/585?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/587?rss=1" />
 </rdf:Seq>
</items>
</channel>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/709?rss=1">
<title><![CDATA[Six Rossmannoid folds, including the Class I aminoacyl-tRNA synthetases, share a partial core with the anti-codon-binding domain of a Class II aminoacyl-tRNA synthetase]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/709?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Similarities in core residue packing provide evidence for divergence or convergence not reported using other methods.</p>
<p><b>Results:</b> We apply a new method for rapid structure comparison based on Simplicial Neighborhood Analysis of Protein Packing (SNAPP) to the diverse structural classification of proteins (SCOP) /&beta;-class of protein folds. The procedure identifies inter-residue packing motifs shared by protein pairs from different folds. A threshold of 0.67 &Aring; RMSD for all atoms of corresponding residues ensures inclusion of only highly significant similarities comparable with those observed for identical catalytic residues in homologues. Many tertiary packing motifs are shared among the three classical Rossmannoid folds, as well as thousands of other motifs that occur in at least two distinct folds. Merging of neighboring packing motifs facilitated recognition of larger, recurrent substructures or cores. The anti-codon-binding domain of an archeal aminoacyl-tRNA synthetase (aaRS) was discovered to possess a packed core in which eight identical amino acid residues are within 0.55 &Aring; RMSD of the comparable structure in the FixJ receiver, a member of the Rossmannoid family that also includes the CheY signaling protein and flavodoxin-like proteins. Further investigation identified close variants of this core in five other Rossmannoid folds, including a functionally relevant core in Class Ia aminoacyl-tRNA synthetases. Although it is possible that the two essentially identical cores in the ProRS anti-codon-binding domain and the FixJ receiver converged to the same structure, the consensus core obtained from the structural and sequence alignments suggests that all the implicated protein folds descended from a simpler ancestral protein in which this core provided nucleotide binding and proto-allosteric functions.</p>
<p><b>Availability:</b> Programs are available at <inter-ref locator="http://staff.vbi.vt.edu/cammer/snapp/download/" locator-type="url">http://staff.vbi.vt.edu/cammer/snapp/download/</inter-ref></p>
<p><b>Implementation:</b> Programs were written in Perl and c and run under Linux.</p>
<p><b>Contact:</b> <inter-ref locator="cammer@vbi.vt.edu" locator-type="email">cammer@vbi.vt.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Cammer, S., Carter, C. W.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq039</dc:identifier>
<dc:title><![CDATA[Six Rossmannoid folds, including the Class I aminoacyl-tRNA synthetases, share a partial core with the anti-codon-binding domain of a Class II aminoacyl-tRNA synthetase]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>714</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>709</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/715?rss=1">
<title><![CDATA[Identifying biologically relevant differences between metagenomic communities]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/715?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Metagenomics is the study of genetic material recovered directly from environmental samples. Taxonomic and functional differences between metagenomic samples can highlight the influence of ecological factors on patterns of microbial life in a wide range of habitats. Statistical hypothesis tests can help us distinguish ecological influences from sampling artifacts, but knowledge of only the <I>P</I>-value from a statistical hypothesis test is insufficient to make inferences about biological relevance. Current reporting practices for pairwise comparative metagenomics are inadequate, and better tools are needed for comparative metagenomic analysis.</p>
<p><b>Results:</b> We have developed a new software package, STAMP, for comparative metagenomics that supports best practices in analysis and reporting. Examination of a pair of iron mine metagenomes demonstrates that deeper biological insights can be gained using statistical techniques available in our software. An analysis of the functional potential of &lsquo;<I>Candidatus</I> Accumulibacter phosphatis&rsquo; in two enhanced biological phosphorus removal metagenomes identified several subsystems that differ between the <I>A.phosphatis</I> stains in these related communities, including phosphate metabolism, secretion and metal transport.</p>
<p><b>Availability:</b> Python source code and binaries are freely available from our website at <inter-ref locator="http://kiwi.cs.dal.ca/Software/STAMP" locator-type="url">http://kiwi.cs.dal.ca/Software/STAMP</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="beiko@cs.dal.ca" locator-type="email">beiko@cs.dal.ca</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq041/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Parks, D. H., Beiko, R. G.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq041</dc:identifier>
<dc:title><![CDATA[Identifying biologically relevant differences between metagenomic communities]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>721</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>715</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/722?rss=1">
<title><![CDATA[Microindel detection in short-read sequence data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/722?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Several recent studies have demonstrated the effectiveness of resequencing and single nucleotide variant (SNV) detection by deep short-read sequencing platforms. While several reliable algorithms are available for automated SNV detection, the automated detection of microindels in deep short-read data presents a new bioinformatics challenge.</p>
<p><b>Results:</b> We systematically analyzed how the short-read mapping tools MAQ, Bowtie, Burrows-Wheeler alignment tool (BWA), Novoalign and RazerS perform on simulated datasets that contain indels and evaluated how indels affect error rates in SNV detection. We implemented a simple algorithm to compute the equivalent indel region <I>eir</I>, which can be used to process the alignments produced by the mapping tools in order to perform indel calling. Using simulated data that contains indels, we demonstrate that indel detection works well on short-read data: the detection rate for microindels (&lt;4 bp) is &gt;90%. Our study provides insights into systematic errors in SNV detection that is based on ungapped short sequence read alignments. Gapped alignments of short sequence reads can be used to reduce this error and to detect microindels in simulated short-read data. A comparison with microindels automatically identified on the ABI Sanger and Roche 454 platform indicates that microindel detection from short sequence reads identifies both overlapping and distinct indels.</p>
<p><b>Contact:</b> <inter-ref locator="peter.krawitz@googlemail.com" locator-type="email">peter.krawitz@googlemail.com</inter-ref>; <inter-ref locator="peter.robinson@charite.de" locator-type="email">peter.robinson@charite.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq027/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Krawitz, P., Rodelsperger, C., Jager, M., Jostins, L., Bauer, S., Robinson, P. N.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq027</dc:identifier>
<dc:title><![CDATA[Microindel detection in short-read sequence data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>729</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>722</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/730?rss=1">
<title><![CDATA[SNVMix: predicting single nucleotide variants from next-generation sequencing of tumors]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/730?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Next-generation sequencing (NGS) has enabled whole genome and transcriptome single nucleotide variant (SNV) discovery in cancer. NGS produces millions of short sequence reads that, once aligned to a reference genome sequence, can be interpreted for the presence of SNVs. Although tools exist for SNV discovery from NGS data, none are specifically suited to work with data from tumors, where altered ploidy and tumor cellularity impact the statistical expectations of SNV discovery.</p>
<p><b>Results:</b> We developed three implementations of a probabilistic Binomial mixture model, called SNVMix, designed to infer SNVs from NGS data from tumors to address this problem. The first models allelic counts as observations and infers SNVs and model parameters using an expectation maximization (EM) algorithm and is therefore capable of adjusting to deviation of allelic frequencies inherent in genomically unstable tumor genomes. The second models nucleotide and mapping qualities of the reads by probabilistically weighting the contribution of a read/nucleotide to the inference of a SNV based on the confidence we have in the base call and the read alignment. The third combines filtering out low-quality data in addition to probabilistic weighting of the qualities. We quantitatively evaluated these approaches on 16 ovarian cancer RNASeq datasets with matched genotyping arrays and a human breast cancer genome sequenced to &gt;40<FONT FACE="arial,helvetica">x</FONT> (haploid) coverage with ground truth data and show systematically that the SNVMix models outperform competing approaches.</p>
<p><b>Availability:</b> Software and data are available at <inter-ref locator="http://compbio.bccrc.ca" locator-type="url">http://compbio.bccrc.ca</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="sshah@bccrc.ca" locator-type="email">sshah@bccrc.ca</inter-ref></p>
<p><b>Supplemantary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq040/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Goya, R., Sun, M. G.F., Morin, R. D., Leung, G., Ha, G., Wiegand, K. C., Senz, J., Crisan, A., Marra, M. A., Hirst, M., Huntsman, D., Murphy, K. P., Aparicio, S., Shah, S. P.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq040</dc:identifier>
<dc:title><![CDATA[SNVMix: predicting single nucleotide variants from next-generation sequencing of tumors]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>736</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>730</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/737?rss=1">
<title><![CDATA[A visual framework for sequence analysis using n-grams and spectral rearrangement]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/737?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Protein sequences are often composed of regions that have distinct evolutionary histories as a consequence of domain shuffling, recombination or gene conversion. New approaches are required to discover, visualize and analyze these sequence regions and thus enable a better understanding of protein evolution.</p>
<p><b>Results:</b> Here, we have developed an alignment-free and visual approach to analyze sequence relationships. We use the number of shared <I>n</I>-grams between sequences as a measure of sequence similarity and rearrange the resulting affinity matrix applying a spectral technique. Heat maps of the affinity matrix are employed to identify and visualize clusters of related sequences or outliers, while <I>n</I>-gram-based dot plots and conservation profiles allow detailed analysis of similarities among selected sequences. Using this approach, we have identified signatures of domain shuffling in an otherwise poorly characterized family, and homology clusters in another. We conclude that this approach may be generally useful as a framework to analyze related, but highly divergent protein sequences. It is particularly useful as a fast method to study sequence relationships prior to much more time-consuming multiple sequence alignment and phylogenetic analysis.</p>
<p><b>Availability:</b> A software implementation (MOSAIC) of the framework described here can be downloaded from <inter-ref locator="http://bioinformatics.org.au/mosaic/" locator-type="url">http://bioinformatics.org.au/mosaic/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="m.ragan@uq.edu.au" locator-type="email">m.ragan@uq.edu.au</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq042/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Maetschke, S. R., Kassahn, K. S., Dunn, J. A., Han, S.-P., Curley, E. Z., Stacey, K. J., Ragan, M. A.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq042</dc:identifier>
<dc:title><![CDATA[A visual framework for sequence analysis using n-grams and spectral rearrangement]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>744</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>737</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/745?rss=1">
<title><![CDATA[A fast and automated solution for accurately resolving protein domain architectures]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/745?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Accurate prediction of the domain content and arrangement in multi-domain proteins (which make up &gt;65% of the large-scale protein databases) provides a valuable tool for function prediction, comparative genomics and studies of molecular evolution. However, scanning a multi-domain protein against a database of domain sequence profiles can often produce conflicting and overlapping matches. We have developed a novel method that employs heaviest weighted clique-finding (HCF), which we show significantly outperforms standard published approaches based on successively assigning the best non-overlapping match (Best Match Cascade, BMC).</p>
<p><b>Results:</b> We created benchmark data set of structural domain assignments in the CATH database and a corresponding set of Hidden Markov Model-based domain predictions. Using these, we demonstrate that by considering all possible combinations of matches using the HCF approach, we achieve much higher prediction accuracy than the standard BMC method. We also show that it is essential to allow overlapping domain matches to a query in order to identify correct domain assignments. Furthermore, we introduce a straightforward and effective protocol for resolving any overlapping assignments, and producing a single set of non-overlapping predicted domains.</p>
<p><b>Availability and implementation:</b> The new approach will be used to determine MDAs for UniProt and Ensembl, and made available via the Gene3D website: <inter-ref locator="http://gene3d.biochem.ucl.ac.uk/Gene3D/" locator-type="url">http://gene3d.biochem.ucl.ac.uk/Gene3D/</inter-ref>. The software has been implemented in C++ and compiled for Linux: source code and binaries can be found at: <inter-ref locator="ftp://ftp.biochem.ucl.ac.uk/pub/gene3d_data/DomainFinder3/" locator-type="url">ftp://ftp.biochem.ucl.ac.uk/pub/gene3d_data/DomainFinder3/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="yeats@biochem.ucl.ac.uk" locator-type="email">yeats@biochem.ucl.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq034/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Yeats, C., Redfern, O. C., Orengo, C.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq034</dc:identifier>
<dc:title><![CDATA[A fast and automated solution for accurately resolving protein domain architectures]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>751</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>745</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/752?rss=1">
<title><![CDATA[Cascleave: towards more accurate prediction of caspase substrate cleavage sites]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/752?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The caspase family of cysteine proteases play essential roles in key biological processes such as programmed cell death, differentiation, proliferation, necrosis and inflammation. The complete repertoire of caspase substrates remains to be fully characterized. Accordingly, systematic computational screening studies of caspase substrate cleavage sites may provide insight into the substrate specificity of caspases and further facilitating the discovery of putative novel substrates.</p>
<p><b>Results:</b> In this article we develop an approach (termed Cascleave) to predict both classical (i.e. following a P<SUB>1</SUB> Asp) and non-typical caspase cleavage sites. When using local sequence-derived profiles, Cascleave successfully predicted 82.2% of the known substrate cleavage sites, with a Matthews correlation coefficient (MCC) of 0.667. We found that prediction performance could be further improved by incorporating information such as predicted solvent accessibility and whether a cleavage sequence lies in a region that is most likely natively unstructured. Novel bi-profile Bayesian signatures were found to significantly improve the prediction performance and yielded the best performance with an overall accuracy of 87.6% and a MCC of 0.747, which is higher accuracy than published methods that essentially rely on amino acid sequence alone. It is anticipated that Cascleave will be a powerful tool for predicting novel substrate cleavage sites of caspases and shedding new insights on the unknown caspase-substrate interactivity relationship.</p>
<p><b>Availability:</b> <inter-ref locator="http://sunflower.kuicr.kyoto-u.ac.jp/~sjn/Cascleave/" locator-type="url">http://sunflower.kuicr.kyoto-u.ac.jp/~sjn/Cascleave/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="jiangning.song@med.monash.edu.au" locator-type="email">jiangning.song@med.monash.edu.au</inter-ref>; <inter-ref locator="takutsu@kuicr.kyoto-u.ac.jp" locator-type="email">takutsu@kuicr.kyoto-u.ac.jp</inter-ref>; james; <inter-ref locator="whisstock@med.monash.edu.au" locator-type="email">whisstock@med.monash.edu.au</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq043/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Song, J., Tan, H., Shen, H., Mahmood, K., Boyd, S. E., Webb, G. I., Akutsu, T., Whisstock, J. C.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq043</dc:identifier>
<dc:title><![CDATA[Cascleave: towards more accurate prediction of caspase substrate cleavage sites]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>760</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>752</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/761?rss=1">
<title><![CDATA[Extraction and comparison of gene expression patterns from 2D RNA in situ hybridization images]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/761?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Recent advancements in high-throughput imaging have created new large datasets with tens of thousands of gene expression images. Methods for capturing these spatial and/or temporal expression patterns include <I>in situ</I> hybridization or fluorescent reporter constructs or tags, and results are still frequently assessed by subjective qualitative comparisons. In order to deal with available large datasets, fully automated analysis methods must be developed to properly normalize and model spatial expression patterns.</p>
<p><b>Results:</b> We have developed image segmentation and registration methods to identify and extract spatial gene expression patterns from RNA <I>in situ</I> hybridization experiments of <I>Drosophila</I> embryos. These methods allow us to normalize and extract expression information for 78 621 images from 3724 genes across six time stages. The similarity between gene expression patterns is computed using four scoring metrics: mean squared error, Haar wavelet distance, mutual information and spatial mutual information (SMI). We additionally propose a strategy to calculate the significance of the similarity between two expression images, by generating surrogate datasets with similar spatial expression patterns using a Monte Carlo swap sampler. On data from an early development time stage, we show that SMI provides the most biologically relevant metric of comparison, and that our significance testing generalizes metrics to achieve similar performance. We exemplify the application of spatial metrics on the well-known <I>Drosophila</I> segmentation network.</p>
<p><b>Availability:</b> A Java webstart application to register and compare patterns, as well as all source code, are available from: <inter-ref locator="http://tools.genome.duke.edu/generegulation/image_analysis/insitu" locator-type="url">http://tools.genome.duke.edu/generegulation/image_analysis/insitu</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="uwe.ohler@duke.edu" locator-type="email">uwe.ohler@duke.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp658/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Mace, D. L., Varnado, N., Zhang, W., Frise, E., Ohler, U.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp658</dc:identifier>
<dc:title><![CDATA[Extraction and comparison of gene expression patterns from 2D RNA in situ hybridization images]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>769</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>761</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/770?rss=1">
<title><![CDATA[Estimating replicate time shifts using Gaussian process regression]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/770?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Time-course gene expression datasets provide important insights into dynamic aspects of biological processes, such as circadian rhythms, cell cycle and organ development. In a typical microarray time-course experiment, measurements are obtained at each time point from multiple replicate samples. Accurately recovering the gene expression patterns from experimental observations is made challenging by both measurement noise and variation among replicates' rates of development. Prior work on this topic has focused on inference of expression patterns assuming that the replicate times are synchronized. We develop a statistical approach that simultaneously infers both (i) the underlying (hidden) expression profile for each gene, as well as (ii) the biological time for each individual replicate. Our approach is based on Gaussian process regression (GPR) combined with a probabilistic model that accounts for uncertainty about the biological development time of each replicate.</p>
<p><b>Results:</b> We apply GPR with uncertain measurement times to a microarray dataset of mRNA expression for the hair-growth cycle in mouse back skin, predicting both profile shapes and biological times for each replicate. The predicted time shifts show high consistency with independently obtained morphological estimates of relative development. We also show that the method systematically reduces prediction error on out-of-sample data, significantly reducing the mean squared error in a cross-validation study.</p>
<p><b>Availability:</b> Matlab code for GPR with uncertain time shifts is available at <inter-ref locator="http://sli.ics.uci.edu/Code/GPRTimeshift/" locator-type="url">http://sli.ics.uci.edu/Code/GPRTimeshift/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="ihler@ics.uci.edu" locator-type="email">ihler@ics.uci.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Liu, Q., Lin, K. K., Andersen, B., Smyth, P., Ihler, A.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq022</dc:identifier>
<dc:title><![CDATA[Estimating replicate time shifts using Gaussian process regression]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>776</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>770</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/777?rss=1">
<title><![CDATA[A hidden Ising model for ChIP-chip data analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/777?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Chromatin immunoprecipitation (ChIP) coupled with tiling microarray (chip) experiments have been used in a wide range of biological studies such as identification of transcription factor binding sites and investigation of DNA methylation and histone modification. Hidden Markov models are widely used to model the spatial dependency of ChIP-chip data. However, parameter estimation for these models is typically either heuristic or suboptimal, leading to inconsistencies in their applications. To overcome this limitation and to develop an efficient software, we propose a hidden ferromagnetic Ising model for ChIP-chip data analysis.</p>
<p><b>Results:</b> We have developed a simple, but powerful Bayesian hierarchical model for ChIP-chip data via a hidden Ising model. Metropolis within Gibbs sampling algorithm is used to simulate from the posterior distribution of the model parameters. The proposed model naturally incorporates the spatial dependency of the data, and can be used to analyze data with various genomic resolutions and sample sizes. We illustrate the method using three publicly available datasets and various simulated datasets, and compare it with three closely related methods, namely TileMap HMM, tileHMM and BAC. We find that our method performs as well as TileMap HMM and BAC for the high-resolution data from Affymetrix platform, but significantly outperforms the other three methods for the low-resolution data from Agilent platform. Compared with the BAC method which also involves MCMC simulations, our method is computationally much more efficient.</p>
<p><b>Availability:</b> A software called iChip is freely available at <inter-ref locator="http://www.bioconductor.org/" locator-type="url">http://www.bioconductor.org/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="moq@mskcc.org" locator-type="email">moq@mskcc.org</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Mo, Q., Liang, F.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq032</dc:identifier>
<dc:title><![CDATA[A hidden Ising model for ChIP-chip data analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>783</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>777</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/784?rss=1">
<title><![CDATA[Gene selection in microarray survival studies under possibly non-proportional hazards]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/784?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Univariate Cox regression (COX) is often used to select genes possibly linked to survival. With non-proportional hazards (NPH), COX could lead to under- or over-estimation of effects.</p>
<p>The effect size measure <I>c</I>=<I>P</I>(<I>T</I><SUB>1</SUB>&lt;<I>T</I><SUB>0</SUB>), i.e. the probability that a person randomly chosen from group <I>G</I><SUB>1</SUB> dies earlier than a person from <I>G</I><SUB>0</SUB>, is independent of the proportional hazards (PH) assumption. Here we consider its generalization to continuous data <I>c</I>' and investigate the suitability of <I>c</I>' for gene selection.</p>
<p><b>Results:</b> Under PH, <I>c</I>' is most efficiently estimated by COX. Under NPH, <I>c</I>' can be obtained by weighted Cox regression (WHE) or a novel method, concordance regression (CON). The least biased and most stable estimates were obtained by CON. We propose to use <I>c</I>' as summary measure of effect size to rank genes irrespective of different types of NPH and censoring patterns.</p>
<p><b>Availability:</b> WHE and CON are available as R packages.</p>
<p><b>Contact:</b> <inter-ref locator="georg.heinze@meduniwien.ac.at" locator-type="email">georg.heinze@meduniwien.ac.at</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq035/DC1" locator-type="url">Supplementary Data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dunkler, D., Schemper, M., Heinze, G.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq035</dc:identifier>
<dc:title><![CDATA[Gene selection in microarray survival studies under possibly non-proportional hazards]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>790</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>784</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/791?rss=1">
<title><![CDATA[Non-linear classification for on-the-fly fractional mass filtering and targeted precursor fragmentation in mass spectrometry experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/791?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Mass spectrometry (MS) has become the method of choice for protein/peptide sequence and modification analysis. The technology employs a two-step approach: ionized peptide precursor masses are detected, selected for fragmentation, and the fragment mass spectra are collected for computational analysis. Current precursor selection schemes are based on data- or information-dependent acquisition (DDA/IDA), where fragmentation mass candidates are selected by intensity and are subsequently included in a dynamic exclusion list to avoid constant refragmentation of highly abundant species. DDA/IDA methods do not exploit valuable information that is contained in the fractional mass of high-accuracy precursor mass measurements delivered by current instrumentation.</p>
<p><b>Results:</b> We extend previous contributions that suggest that fractional mass information allows targeted fragmentation of analytes of interest. We introduce a non-linear Random Forest classification and a discrete mapping approach, which can be trained to discriminate among arbitrary fractional mass patterns for an arbitrary number of classes of analytes. These methods can be used to increase fragmentation efficiency for specific subsets of analytes or to select suitable fragmentation technologies on-the-fly. We show that theoretical generalization error estimates transfer into practical application, and that their quality depends on the accuracy of prior distribution estimate of the analyte classes. The methods are applied to two real-world proteomics datasets.</p>
<p><b>Availability:</b> All software used in this study is available from <inter-ref locator="http://software.steenlab.org/fmf" locator-type="url">http://software.steenlab.org/fmf</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="hanno.steen@childrens.harvard.edu" locator-type="email">hanno.steen@childrens.harvard.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq036/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Kirchner, M., Timm, W., Fong, P., Wangemann, P., Steen, H.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq036</dc:identifier>
<dc:title><![CDATA[Non-linear classification for on-the-fly fractional mass filtering and targeted precursor fragmentation in mass spectrometry experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>797</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>791</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/798?rss=1">
<title><![CDATA[Correcting population stratification in genetic association studies using a phylogenetic approach]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/798?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The rapid development of genotyping technology and extensive cataloguing of single nucleotide polymorphisms (SNPs) across the human genome have made genetic association studies the mainstream for gene mapping of complex human diseases. For many diseases, the most practical approach is the population-based design with unrelated individuals. Although having the advantages of easier sample collection and greater power than family-based designs, unrecognized population stratification in the study samples can lead to both false-positive and false-negative findings and might obscure the true association signals if not appropriately corrected.</p>
<p><b>Methods:</b> We report PHYLOSTRAT, a new method that corrects for population stratification by combining phylogeny constructed from SNP genotypes and principal coordinates from multi-dimensional scaling (MDS) analysis. This hybrid approach efficiently captures both discrete and admixed population structures.</p>
<p><b>Results:</b> By extensive simulations, the analysis of a synthetic genome-wide association dataset created using data from the Human Genome Diversity Project, and the analysis of a lactase-height dataset, we show that our method can correct for population stratification more efficiently than several existing population stratification correction methods, including EIGENSTRAT, a hybrid approach based on MDS and clustering, and STRATSCORE , in terms of requiring fewer random SNPs for inference of population structure. By combining the flexibility and hierarchical nature of phylogenetic trees with the advantage of representing admixture using MDS, our hybrid approach can capture the complex population structures in human populations effectively.</p>
<p><b>Software Availability:</b> Codes can be downloaded from <inter-ref locator="http://people.pcbi.upenn.edu/~lswang/phylostrat/" locator-type="url">http://people.pcbi.upenn.edu/~lswang/phylostrat/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="mingyao@upenn.edu" locator-type="email">mingyao@upenn.edu</inter-ref>; <inter-ref locator="iswang@upenn.edu" locator-type="email">iswang@upenn.edu</inter-ref>.</p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq025/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Li, M., Reilly, M. P., Rader, D. J., Wang, L.-S.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq025</dc:identifier>
<dc:title><![CDATA[Correcting population stratification in genetic association studies using a phylogenetic approach]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>806</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>798</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/807?rss=1">
<title><![CDATA[Prediction of human functional genetic networks from heterogeneous data using RVM-based ensemble learning]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/807?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Three major problems confront the construction of a human genetic network from heterogeneous genomics data using kernel-based approaches: definition of a robust gold-standard negative set, large-scale learning and massive missing data values.</p>
<p><b>Results:</b> The proposed graph-based approach generates a robust GSN for the training process of genetic network construction. The RVM-based ensemble model that combines AdaBoost and reduced-feature yields improved performance on large-scale learning problems with massive missing values in comparison to Na&iuml;ve Bayes.</p>
<p><b>Contact:</b> <inter-ref locator="dargenio@bmsr.usc.edu" locator-type="email">dargenio@bmsr.usc.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq044/DC1" locator-type="url">Supplementary material</inter-ref> is available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Wu, C.-C., Asgharzadeh, S., Triche, T. J., D'Argenio, D. Z.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq044</dc:identifier>
<dc:title><![CDATA[Prediction of human functional genetic networks from heterogeneous data using RVM-based ensemble learning]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>813</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>807</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/814?rss=1">
<title><![CDATA[Predicting biodegradation products and pathways: a hybrid knowledge- and machine learning-based approach]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/814?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Current methods for the prediction of biodegradation products and pathways of organic environmental pollutants either do not take into account domain knowledge or do not provide probability estimates. In this article, we propose a hybrid knowledge- and machine learning-based approach to overcome these limitations in the context of the University of Minnesota Pathway Prediction System (UM-PPS). The proposed solution performs relative reasoning in a machine learning framework, and obtains one probability estimate for each biotransformation rule of the system. As the application of a rule then depends on a threshold for the probability estimate, the trade-off between recall (sensitivity) and precision (selectivity) can be addressed and leveraged in practice.</p>
<p><b>Results:</b> Results from leave-one-out cross-validation show that a recall and precision of ~0.8 can be achieved for a subset of 13 transformation rules. Therefore, it is possible to optimize precision without compromising recall. We are currently integrating the results into an experimental version of the UM-PPS server.</p>
<p><b>Availability:</b> The program is freely available on the web at <inter-ref locator="http://wwwkramer.in.tum.de/research/applications/biodegradation/data" locator-type="url">http://wwwkramer.in.tum.de/research/applications/biodegradation/data</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="kramer@in.tum.de" locator-type="email">kramer@in.tum.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Wicker, J., Fenner, K., Ellis, L., Wackett, L., Kramer, S.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq024</dc:identifier>
<dc:title><![CDATA[Predicting biodegradation products and pathways: a hybrid knowledge- and machine learning-based approach]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>821</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>814</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/822?rss=1">
<title><![CDATA[Small-sample precision of ROC-related estimates]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/822?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The receiver operator characteristic (ROC) curves are commonly used in biomedical applications to judge the performance of a discriminant across varying decision thresholds. The estimated ROC curve depends on the true positive rate (TPR) and false positive rate (FPR), with the key metric being the area under the curve (AUC). With small samples these rates need to be estimated from the training data, so a natural question arises: How well do the estimates of the AUC, TPR and FPR compare with the true metrics?</p>
<p><b>Results:</b> Through a simulation study using data models and analysis of real microarray data, we show that (i) for small samples the root mean square differences of the estimated and true metrics are considerable; (ii) even for large samples, there is only weak correlation between the true and estimated metrics; and (iii) generally, there is weak regression of the true metric on the estimated metric. For classification rules, we consider linear discriminant analysis, linear support vector machine (SVM) and radial basis function SVM. For error estimation, we consider resubstitution, three kinds of cross-validation and bootstrap. Using resampling, we show the unreliability of some published ROC results.</p>
<p><b>Availability:</b> Companion web site at <inter-ref locator="http://compbio.tgen.org/paper_supp/ROC/roc.html" locator-type="url">http://compbio.tgen.org/paper_supp/ROC/roc.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="edward@mail.ece.tamu.edu" locator-type="email">edward@mail.ece.tamu.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Hanczar, B., Hua, J., Sima, C., Weinstein, J., Bittner, M., Dougherty, E. R.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq037</dc:identifier>
<dc:title><![CDATA[Small-sample precision of ROC-related estimates]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>830</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>822</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/831?rss=1">
<title><![CDATA[Maximal conditional chi-square importance in random forests]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/831?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> High-dimensional data are frequently generated in genome-wide association studies (GWAS) and other studies. It is important to identify features such as single nucleotide polymorphisms (SNPs) in GWAS that are associated with a disease. Random forests represent a very useful approach for this purpose, using a variable importance score. This importance score has several shortcomings. We propose an alternative importance measure to overcome those shortcomings.</p>
<p><b>Results:</b> We characterized the effect of multiple SNPs under various models using our proposed importance measure in random forests, which uses maximal conditional chi-square (MCC) as a measure of association between a SNP and the trait conditional on other SNPs. Based on this importance measure, we employed a permutation test to estimate empirical <I>P</I>-values of SNPs. Our method was compared to a univariate test and the permutation test using the Gini and permutation importance. In simulation, the proposed method performed consistently superior to the other methods in identifying of risk SNPs. In a GWAS of age-related macular degeneration, the proposed method confirmed two significant SNPs (at the genome-wide adjusted level of 0.05). Further analysis showed that these two SNPs conformed with a heterogeneity model. Compared with the existing importance measures, the MCC importance measure is more sensitive to complex effects of risk SNPs by utilizing conditional information on different SNPs. The permutation test with the MCC importance measure provides an efficient way to identify candidate SNPs in GWAS and facilitates the understanding of the etiology between genetic variants and complex diseases.</p>
<p><b>Contact:</b> <inter-ref locator="heping.zhang@yale.edu" locator-type="email">heping.zhang@yale.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq038/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Wang, M., Chen, X., Zhang, H.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq038</dc:identifier>
<dc:title><![CDATA[Maximal conditional chi-square importance in random forests]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>837</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>831</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/838?rss=1">
<title><![CDATA[invertFREGENE: software for simulating inversions in population genetic data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/838?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Inversions are a common form of structural variation, which may have a marked effect on the genome and methods to infer quantities of interest such as those relating to population structure and natural selection. However, due to the challenge in detecting inversions, little is presently known about their impact. Software to simulate inversions could be used to provide a better understanding of how to detect and account for them; but while there are several software packages for simulating population genetic data, none incorporate inversion polymorphisms. Here, we describe a software package, modified from the forward-in-time simulator FREGENE, which simulates the evolution of an inversion polymorphism, of specified length, location, frequency and age, in a population of sequences. We describe previously unreported signatures of inversions in SNP data observed in invertFREGENE results and a known inversion in humans.</p>
<p><b>Availability:</b> C++ source code and user manual are available for download from <inter-ref locator="http://www.ebi.ac.uk/projects/BARGEN/" locator-type="url">http://www.ebi.ac.uk/projects/BARGEN/</inter-ref> under the GPL licence.</p>
<p><b>Contact:</b> <inter-ref locator="l.coin@ic.ac.uk" locator-type="email">l.coin@ic.ac.uk</inter-ref>; <inter-ref locator="c.hoggart@ic.ac.uk" locator-type="email">c.hoggart@ic.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq029/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[O'Reilly, P. F., Coin, L. J. M., Hoggart, C. J.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:07 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq029</dc:identifier>
<dc:title><![CDATA[invertFREGENE: software for simulating inversions in population genetic data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>840</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>838</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/841?rss=1">
<title><![CDATA[BEDTools: a flexible suite of utilities for comparing genomic features]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/841?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Testing for correlations between different sets of genomic features is a fundamental task in genomics research. However, searching for overlaps between features with existing web-based methods is complicated by the massive datasets that are routinely produced with current sequencing technologies. Fast and flexible tools are therefore required to ask complex questions of these data in an efficient manner.</p>
<p><b>Results:</b> This article introduces a new software suite for the comparison, manipulation and annotation of genomic features in Browser Extensible Data (BED) and General Feature Format (GFF) format. BEDTools also supports the comparison of sequence alignments in BAM format to both BED and GFF features. The tools are extremely efficient and allow the user to compare large datasets (e.g. next-generation sequencing data) with both public and custom genome annotation tracks. BEDTools can be combined with one another as well as with standard UNIX commands, thus facilitating routine genomics tasks as well as pipelines that can quickly answer intricate questions of large genomic datasets.</p>
<p><b>Availability and implementation:</b> BEDTools was written in C++. Source code and a comprehensive user manual are freely available at <inter-ref locator="http://code.google.com/p/bedtools" locator-type="url">http://code.google.com/p/bedtools</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="aaronquinlan@gmail.com" locator-type="email">aaronquinlan@gmail.com</inter-ref>; <inter-ref locator="imh4y@virginia.edu" locator-type="email">imh4y@virginia.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq033/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Quinlan, A. R., Hall, I. M.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:08 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq033</dc:identifier>
<dc:title><![CDATA[BEDTools: a flexible suite of utilities for comparing genomic features]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>842</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>841</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/843?rss=1">
<title><![CDATA[iMotifs: an integrated sequence motif visualization and analysis environment]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/843?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Short sequence motifs are an important class of models in molecular biology, used most commonly for describing transcription factor binding site specificity patterns. High-throughput methods have been recently developed for detecting regulatory factor binding sites <I>in vivo</I> and <I>in vitro</I> and consequently high-quality binding site motif data are becoming available for increasing number of organisms and regulatory factors. Development of intuitive tools for the study of sequence motifs is therefore important.</p>
<p>iMotifs is a graphical motif analysis environment that allows visualization of annotated sequence motifs and scored motif hits in sequences. It also offers motif inference with the sensitive NestedMICA algorithm, as well as overrepresentation and pairwise motif matching capabilities. All of the analysis functionality is provided without the need to convert between file formats or learn different command line interfaces.</p>
<p>The application includes a bundled and graphically integrated version of the NestedMICA motif inference suite that has no outside dependencies. Problems associated with local deployment of software are therefore avoided.</p>
<p><b>Availability:</b> iMotifs is licensed with the GNU Lesser General Public License v2.0 (LGPL 2.0). The software and its source is available at <inter-ref locator="http://wiki.github.com/mz2/imotifs" locator-type="url">http://wiki.github.com/mz2/imotifs</inter-ref> and can be run on Mac OS X Leopard (Intel/PowerPC). We also provide a cross-platform (Linux, OS X, Windows) LGPL 2.0 licensed library <ty>libxms</ty> for the Perl, Ruby, R and Objective-C programming languages for input and output of XMS formatted annotated sequence motif set files.</p>
<p><b>Contact:</b> <inter-ref locator="matias.piipari@gmail.com" locator-type="email">matias.piipari@gmail.com</inter-ref>; <inter-ref locator="imotifs@googlegroups.com" locator-type="email">imotifs@googlegroups.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Piipari, M., Down, T. A., Saini, H., Enright, A., Hubbard, T. J.P.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:08 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq026</dc:identifier>
<dc:title><![CDATA[iMotifs: an integrated sequence motif visualization and analysis environment]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>844</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>843</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/845?rss=1">
<title><![CDATA[FineStr: a web server for single-base-resolution nucleosome positioning]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/845?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The DNA in eukaryotic cells is packed into the chromatin that is composed of nucleosomes. Positioning of the nucleosome core particles on the sequence is a problem of great interest because of the role nucleosomes play in different cellular processes including gene regulation.</p>
<p>Using the sequence structure of 10.4 base DNA repeat presented in our previous works and nucleosome core DNA sequences database, we have derived the complete nucleosome DNA bendability matrix of <I>Caenorhabditis elegans</I>.</p>
<p>We have developed a web server named FineStr that allows users to upload genomic sequences in FASTA format and to perform a single-base-resolution nucleosome mapping on them.</p>
<p><b>Availability:</b> FineStr server is freely available for use on the web at <inter-ref locator="http:/www.cs.bgu.ac.il/~nucleom" locator-type="url">http:/www.cs.bgu.ac.il/~nucleom</inter-ref>. The site contains a help file with explanation regarding the exact usage.</p>
<p><b>Contact:</b> <inter-ref locator="gabdank@cs.bgu.ac.il" locator-type="email">gabdank@cs.bgu.ac.il</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Gabdank, I., Barash, D., Trifonov, E. N.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:08 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq030</dc:identifier>
<dc:title><![CDATA[FineStr: a web server for single-base-resolution nucleosome positioning]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>846</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>845</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/847?rss=1">
<title><![CDATA[XDIA: improving on the label-free data-independent analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/847?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> XDIA is a computational strategy for analyzing multiplexed spectra acquired using electron transfer dissociation and collision-activated dissociation; it significantly increases identified spectra (~250%) and unique peptides (~30%) when compared with the data-dependent ETCaD analysis on middle-down, single-phase shotgun proteomic analysis. Increasing identified spectra and peptides improves quantitation statistics confidence and protein coverage, respectively.</p>
<p><b>Availability:</b> The software and data produced in this work are freely available for academic use at <inter-ref locator="http://fields.scripps.edu/XDIA" locator-type="url">http://fields.scripps.edu/XDIA</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="paulo@pcarvalho.com" locator-type="email">paulo@pcarvalho.com</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq031/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Carvalho, P. C., Han, X., Xu, T., Cociorva, D., Carvalho, M. d. G., Barbosa, V. C., Yates, J. R.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:08 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq031</dc:identifier>
<dc:title><![CDATA[XDIA: improving on the label-free data-independent analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>848</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>847</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/849?rss=1">
<title><![CDATA[Filtering error from SOLiD Output]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/849?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Here, we report the development of a filtering framework designed for efficient identification of both polyclonal and independent errors within SOLiD sequence data. The filtering utilizes the quality values reported by SOLiD's primary analysis for the identification of the two different types of errors. The filtering framework facilitates the passage of high-quality data into a variety of functional genomics applications, including <I>de novo</I> assemblers and sequence matching programs for SNP calling, improving the output quality and reducing resources necessary for analysis.</p>
<p><b>Availability:</b> This error analysis framework is written in Perl and runs on Mac OS and Linux/Unix systems. The filter, documentation and sample Excel files for quality analysis are available at <inter-ref locator="http://hts.rutgers.edu/filter" locator-type="url">http://hts.rutgers.edu/filter</inter-ref> and are distributed as Open Source software under the GPLv3.0.</p>
<p><b>Contact:</b> <inter-ref locator="tmichael@waksman.rutgers.edu" locator-type="email">tmichael@waksman.rutgers.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq045/DC1" locator-type="url">Supplementary data</inter-ref> is available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Sasson, A., Michael, T. P.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:08 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq045</dc:identifier>
<dc:title><![CDATA[Filtering error from SOLiD Output]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>850</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>849</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/851?rss=1">
<title><![CDATA[Easy retrieval of single amino-acid polymorphisms and phenotype information using SwissVar]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/851?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The SwissVar portal provides access to a comprehensive collection of single amino acid polymorphisms and diseases in the UniProtKB/Swiss-Prot database via a unique search engine. In particular, it gives direct access to the newly improved Swiss-Prot variant pages. The key strength of this portal is that it provides a possibility to query for similar diseases, as well as the underlying protein products and the molecular details of each variant. In the context of the recently proposed molecular view on diseases, the SwissVar portal should be in a unique position to provide valuable information for researchers and to advance research in this area.</p>
<p><b>Availability:</b> The SwissVar portal is available at <inter-ref locator="www.expasy.org/swissvar" locator-type="url">www.expasy.org/swissvar</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="anais.mottaz@isb-sib.ch" locator-type="email">anais.mottaz@isb-sib.ch</inter-ref>; <inter-ref locator="lina.yip@isb-sib.ch" locator-type="email">lina.yip@isb-sib.ch</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq028/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Mottaz, A., David, F. P.A., Veuthey, A.-L., Yip, Y. L.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:08 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq028</dc:identifier>
<dc:title><![CDATA[Easy retrieval of single amino-acid polymorphisms and phenotype information using SwissVar]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>852</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>851</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/853?rss=1">
<title><![CDATA[Biological network comparison using graphlet degree distribution]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/6/853?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Przulj, N.]]></dc:creator>
<dc:date>Fri, 05 Mar 2010 04:22:08 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq091</dc:identifier>
<dc:title><![CDATA[Biological network comparison using graphlet degree distribution]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>6</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>854</prism:endingPage>
<prism:publicationDate>2010-03-15</prism:publicationDate>
<prism:startingPage>853</prism:startingPage>
<prism:section>ERRATUM</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/589?rss=1">
<title><![CDATA[Fast and accurate long-read alignment with Burrows-Wheeler transform]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/589?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Many programs for aligning short sequencing reads to a reference genome have been developed in the last 2 years. Most of them are very efficient for short reads but inefficient or not applicable for reads &gt;200 bp because the algorithms are heavily and specifically tuned for short queries with low sequencing error rate. However, some sequencing platforms already produce longer reads and others are expected to become available soon. For longer reads, hashing-based software such as BLAT and SSAHA2 remain the only choices. Nonetheless, these methods are substantially slower than short-read aligners in terms of aligned bases per unit time.</p>
<p><b>Results:</b> We designed and implemented a new algorithm, Burrows-Wheeler Aligner's Smith-Waterman Alignment (BWA-SW), to align long sequences up to 1 Mb against a large sequence database (e.g. the human genome) with a few gigabytes of memory. The algorithm is as accurate as SSAHA2, more accurate than BLAT, and is several to tens of times faster than both.</p>
<p><b>Availability:</b> <inter-ref locator="http://bio-bwa.sourceforge.net" locator-type="url">http://bio-bwa.sourceforge.net</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="rd@sanger.ac.uk" locator-type="email">rd@sanger.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Li, H., Durbin, R.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:50 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp698</dc:identifier>
<dc:title><![CDATA[Fast and accurate long-read alignment with Burrows-Wheeler transform]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>595</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>589</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/596?rss=1">
<title><![CDATA[Improving protein secondary structure prediction using a simple k-mer model]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/596?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Some first order methods for protein sequence analysis inherently treat each position as independent. We develop a general framework for introducing longer range interactions. We then demonstrate the power of our approach by applying it to secondary structure prediction; under the independence assumption, sequences produced by existing methods can produce features that are not protein like, an extreme example being a helix of length 1. Our goal was to make the predictions from state of the art methods more realistic, without loss of performance by other measures.</p>
<p><b>Results:</b> Our framework for longer range interactions is described as a <I>k</I>-mer order model. We succeeded in applying our model to the specific problem of secondary structure prediction, to be used as an additional layer on top of existing methods. We achieved our goal of making the predictions more realistic and protein like, and remarkably this also improved the overall performance. We improve the Segment OVerlap (SOV) score by 1.8%, but more importantly we radically improve the probability of the real sequence given a prediction from an average of 0.271 per residue to 0.385. Crucially, this improvement is obtained using no additional information.</p>
<p><b>Availability:</b> <inter-ref locator="http://supfam.cs.bris.ac.uk/kmer" locator-type="url">http://supfam.cs.bris.ac.uk/kmer</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="gough@cs.bris.ac.uk" locator-type="email">gough@cs.bris.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Madera, M., Calmus, R., Thiltgen, G., Karplus, K., Gough, J.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:50 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq020</dc:identifier>
<dc:title><![CDATA[Improving protein secondary structure prediction using a simple k-mer model]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>602</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>596</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/603?rss=1">
<title><![CDATA[Exploring classification strategies with the CoEPrA 2006 contest]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/603?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> <I>In silico</I> methods to classify compounds as potential drugs that bind to a specific target become increasingly important for drug design. To build classification devices training sets of drugs with known activities are needed. For many such classification problems, not only qualitative but also quantitative information of a specific property (e.g. binding affinity) is available. The latter can be used to build a regression scheme to predict this property for new compounds. Predicting a compound property explicitly is generally more difficult than classifying that the property lies below or above a given threshold value. Hence, an indirect classification that is based on regression may lead to poorer results than a direct classification scheme. In fact, initially researchers are only interested to classify compounds as potential drugs. The activities of these compounds are subsequently measured in wet lab.</p>
<p><b>Results:</b> We propose a novel approach that uses available quantitative information directly for classification rather than first using a regression scheme. It uses a new type of loss function called weighted biased regression. Application of this method to four widely studied datasets of the CoEPrA contest (Comparative Evaluation of Prediction Algorithms, <inter-ref locator="http://coepra.org" locator-type="url">http://coepra.org</inter-ref>) shows that it can outperform simple classification methods that do not make use of this additional quantitative information.</p>
<p><b>Availability:</b> A stand alone application is available at the webpage <inter-ref locator="http://agknapp.chemie.fu-berlin.de/agknapp/index.php?menu=software&amp;page=PeptideClassifier" locator-type="url">http://agknapp.chemie.fu-berlin.de/agknapp/index.php?menu=software&amp;page=PeptideClassifier</inter-ref> that can be used to build a model for a peptide training set to be submitted.</p>
<p><b>Contact:</b> <inter-ref locator="odemir@chemie.fu-berlin.de" locator-type="email">odemir@chemie.fu-berlin.de</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq021/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Demir-Kavuk, O., Riedesel, H., Knapp, E.-W.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:50 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq021</dc:identifier>
<dc:title><![CDATA[Exploring classification strategies with the CoEPrA 2006 contest]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>609</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>603</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/610?rss=1">
<title><![CDATA[RNAsnoop: efficient target prediction for H/ACA snoRNAs]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/610?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Small nucleolar RNAs are an abundant class of non-coding RNAs that guide chemical modifications of rRNAs, snRNAs and some mRNAs. In the case of many &lsquo;orphan&rsquo; snoRNAs, the targeted nucleotides remain unknown, however. The box H/ACA subclass determines uridine residues that are to be converted into pseudouridines via specific complementary binding in a well-defined secondary structure configuration that is outside the scope of common RNA (co-)folding algorithms.</p>
<p><b>Results:</b> <ty>RNAsnoop</ty> implements a dynamic programming algorithm that computes thermodynamically optimal H/ACA-RNA interactions in an efficient scanning variant. Complemented by an support vector machine (SVM)-based machine learning approach to distinguish true binding sites from spurious solutions and a system to evaluate comparative information, it presents an efficient and reliable tool for the prediction of H/ACA snoRNA target sites. We apply <ty>RNAsnoop</ty> to identify the snoRNAs that are responsible for several of the remaining &lsquo;orphan&rsquo; pseudouridine modifications in human rRNAs, and we assign a target to one of the five orphan H/ACA snoRNAs in <I>Drosophila</I>.</p>
<p><b>Availability:</b> The C source code of <ty>RNAsnoop</ty> is freely available at <inter-ref locator="http://www.tbi.univie.ac.at/~htafer/RNAsnoop" locator-type="url">http://www.tbi.univie.ac.at/~htafer/RNAsnoop</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="htafer@tbi.univie.ac.at" locator-type="email">htafer@tbi.univie.ac.at</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp680/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Tafer, H., Kehr, S., Hertel, J., Hofacker, I. L., Stadler, P. F.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp680</dc:identifier>
<dc:title><![CDATA[RNAsnoop: efficient target prediction for H/ACA snoRNAs]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>616</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>610</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/617?rss=1">
<title><![CDATA[Active site prediction using evolutionary and structural information]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/617?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The identification of catalytic residues is a key step in understanding the function of enzymes. While a variety of computational methods have been developed for this task, accuracies have remained fairly low. The best existing method exploits information from sequence and structure to achieve a precision (the fraction of predicted catalytic residues that are catalytic) of 18.5% at a corresponding recall (the fraction of catalytic residues identified) of 57% on a standard benchmark. Here we present a new method, D<scp>iscern</scp>, which provides a significant improvement over the state-of-the-art through the use of statistical techniques to derive a model with a small set of features that are jointly predictive of enzyme active sites.</p>
<p><b>Results:</b> In cross-validation experiments on two benchmark datasets from the Catalytic Site Atlas and CATRES resources containing a total of 437 manually curated enzymes spanning 487 SCOP families, D<scp>iscern</scp> increases catalytic site recall between 12% and 20% over methods that combine information from both sequence and structure, and by &ge;50% over methods that make use of sequence conservation signal only. Controlled experiments show that D<scp>iscern</scp>'s improvement in catalytic residue prediction is derived from the combination of three ingredients: the use of the INTREPID phylogenomic method to extract conservation information; the use of 3D structure data, including features computed for residues that are proximal in the structure; and a statistical regularization procedure to prevent overfitting.</p>
<p><b>Contact:</b> <inter-ref locator="kimmen@berkeley.edu" locator-type="email">kimmen@berkeley.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq008/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Sankararaman, S., Sha, F., Kirsch, J. F., Jordan, M. I., Sjolander, K.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq008</dc:identifier>
<dc:title><![CDATA[Active site prediction using evolutionary and structural information]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>624</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>617</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/625?rss=1">
<title><![CDATA[Protein secondary structure appears to be robust under in silico evolution while protein disorder appears not to be]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/625?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The mutation of amino acids often impacts protein function and structure. Mutations without negative effect sustain evolutionary pressure. We study a particular aspect of structural robustness with respect to mutations: regular protein secondary structure and natively unstructured (intrinsically disordered) regions. Is the formation of regular secondary structure an intrinsic feature of amino acid sequences, or is it a feature that is lost upon mutation and is maintained by evolution against the odds? Similarly, is disorder an intrinsic sequence feature or is it difficult to maintain? To tackle these questions, we <I>in silico</I> mutated native protein sequences into random sequence-like ensembles and monitored the change in predicted secondary structure and disorder.</p>
<p><b>Results:</b> We established that by our coarse-grained measures for change, predictions and observations were similar, suggesting that our results were not biased by prediction mistakes. Changes in secondary structure and disorder predictions were linearly proportional to the change in sequence. Surprisingly, neither the content nor the length distribution for the predicted secondary structure changed substantially. Regions with long disorder behaved differently in that significantly fewer such regions were predicted after a few mutation steps. Our findings suggest that the formation of regular secondary structure is an intrinsic feature of random amino acid sequences, while the formation of long-disordered regions is not an intrinsic feature of proteins with disordered regions. Put differently, helices and strands appear to be maintained easily by evolution, whereas maintaining disordered regions appears difficult. Neutral mutations with respect to disorder are therefore very unlikely.</p>
<p><b>Contact:</b> <inter-ref locator="schaefer@rostlab.org" locator-type="email">schaefer@rostlab.org</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq012/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Schaefer, C., Schlessinger, A., Rost, B.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq012</dc:identifier>
<dc:title><![CDATA[Protein secondary structure appears to be robust under in silico evolution while protein disorder appears not to be]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>631</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>625</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/632?rss=1">
<title><![CDATA[Faster computation of exact RNA shape probabilities]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/632?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Abstract shape analysis allows efficient computation of a representative sample of low-energy foldings of an RNA molecule. More comprehensive information is obtained by computing shape probabilities, accumulating the Boltzmann probabilities of all structures within each abstract shape. Such information is superior to free energies because it is independent of sequence length and base composition. However, up to this point, computation of shape probabilities evaluates all shapes simultaneously and comes with a computation cost which is exponential in the length of the sequence.</p>
<p><b>Results:</b> We device an approach called <I>RapidShapes</I> that computes the shapes above a specified probability threshold <I>T</I> by generating a list of promising shapes and constructing specialized folding programs for each shape to compute its share of Boltzmann probability. This aims at a heuristic improvement of runtime, while still computing exact probability values.</p>
<p><b>Conclusion:</b> Evaluating this approach and several substrategies, we find that only a small proportion of shapes have to be actually computed. For an RNA sequence of length 400, this leads, depending on the threshold, to a 10&ndash;138 fold speed-up compared with the previous complete method. Thus, probabilistic shape analysis has become feasible in medium-scale applications, such as the screening of RNA transcripts in a bacterial genome.</p>
<p><b>Availability:</b> <I>RapidShapes</I> is available via <inter-ref locator="http://bibiserv.cebitec.uni-bielefeld.de/rnashapes" locator-type="url">http://bibiserv.cebitec.uni-bielefeld.de/rnashapes</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="robert@techfak.uni-bielefeld.de" locator-type="email">robert@techfak.uni-bielefeld.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq014/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Janssen, S., Giegerich, R.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq014</dc:identifier>
<dc:title><![CDATA[Faster computation of exact RNA shape probabilities]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>639</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>632</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/640?rss=1">
<title><![CDATA[A censored beta mixture model for the estimation of the proportion of non-differentially expressed genes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/640?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The proportion of non-differentially expressed genes (<SUB>0</SUB>) is an important quantity in microarray data analysis. Although many statistical methods have been proposed for its estimation, it is still necessary to develop more efficient methods.</p>
<p><b>Methods:</b> Our approach for improving <SUB>0</SUB> estimation is to modify an existing simple method by introducing artificial censoring to <I>P</I>-values. In a comprehensive simulation study and the applications to experimental datasets, we compare our method with eight existing estimation methods.</p>
<p><b>Results:</b> The simulation study confirms that our method can clearly improve the estimation performance. Compared with the existing methods, our method can generally provide a relatively accurate estimate with relatively small variance. Using experimental microarray datasets, we also demonstrate that our method can generally provide satisfactory estimates in practice.</p>
<p><b>Availability:</b> The R code is freely available at <inter-ref locator="http://home.gwu.edu/~ylai/research/CBpi0/" locator-type="url">http://home.gwu.edu/~ylai/research/CBpi0/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="ylai@gwu.edu" locator-type="email">ylai@gwu.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq001/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Markitsis, A., Lai, Y.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq001</dc:identifier>
<dc:title><![CDATA[A censored beta mixture model for the estimation of the proportion of non-differentially expressed genes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>646</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>640</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/647?rss=1">
<title><![CDATA[Random distance dependent attachment as a model for neural network generation in the Caenorhabditis elegans]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/647?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The topology of the network induced by the neurons connectivity's in the <I>Caenorhabditis elegans</I> differs from most common random networks. The neurons positions of the <I>C.elegans</I> have been previously explained as being optimal to induce the required network wiring. We here propose a complementary explanation that the network wiring is the direct result of a local stochastic synapse formation process.</p>
<p><b>Results:</b> We show that a model based on the physical distance between neurons can explain the <I>C.elegans</I> neural network structure, specifically, we demonstrate that a simple model based on a geometrical synapse formation probability and the inhibition of short coherent cycles can explain the properties of the <I>C.elegans'</I> neural network. We suggest this model as an initial framework to discuss neural network generation and as a first step toward the development of models for more advanced creatures. In order to measure the circle frequency in the network, a novel graph-theory circle length measurement algorithm is proposed.</p>
<p><b>Contact:</b> <inter-ref locator="royi.its@gmail.com" locator-type="email">royi.its@gmail.com</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq015/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Itzhack, R., Louzoun, Y.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq015</dc:identifier>
<dc:title><![CDATA[Random distance dependent attachment as a model for neural network generation in the Caenorhabditis elegans]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>652</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>647</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/653?rss=1">
<title><![CDATA[Computational quantification of metabolic fluxes from a single isotope snapshot: application to an animal biopsy]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/653?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Quantitative determination of metabolic fluxes in single tissue biopsies is difficult. We report a novel analysis approach and software package for <I>in vivo</I> flux quantification using stable isotope labeling.</p>
<p><b>Results:</b> We developed a protocol based on brief, timed infusion of <sup>13</sup>C isotope-enriched substrates for the tricarboxylic acid (TCA) cycle followed by quick freezing of tissue biopsies. NMR measurements of tissue extracts were used for flux estimation based on a computational model of carbon transitions between TCA cycle metabolites and related amino acids. To this end, we developed a computational framework in which metabolic systems can be flexibly assembled, simulated and analyzed. Flux parameters were quantified from NMR multiplets by a partial grid search followed by repeated Nelder&ndash;Mead optimizations implemented on a computer grid. We implemented a model of the TCA cycle and showed by extensive simulations that the timed infusion protocol reliably quantitates multiple fluxes. Experimental validation of the method was done <I>in vivo</I> on hearts of anesthetized pigs under two different conditions: basal state (<I>n</I> = 7) and cardiac stress caused by infusion of dobutamine (<I>n</I> = 7). About nine tissue samples (40&ndash;200 mg dry-weight) were taken per heart. TCA cycle flux was 6.11 &plusmn; 0.28 (SEM) &micro;mol/min &middot; gdw at baseline versus 9.29 &plusmn; 1.03 &micro;mol/min &middot; gdw for dobutamine stress. Oxygen consumption calculated from the TCA cycle flux and from &lsquo;gold standard&rsquo; blood gas-based measurements were close, correlating with <I>r</I>=0.88 (<I>P</I> &lt; 10<sup>&ndash;4</sup>). Spatial heterogeneity in metabolic fluxes is detectable amongst the small samples. We propose that our novel isotope snapshot methodology is suitable for flux measurements in biopsies <I>in vivo</I>.</p>
<p><b>Availability:</b> Non-profit organizations will, upon request, be granted a non-exclusive license to use the software for internal research and teaching purposes at no charge. A web interface for using the software on our computer grid is available under <inter-ref locator="http://www.ibi.vu.nl/programs/" locator-type="url">http://www.ibi.vu.nl/programs/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="hans.van.beek@falw.vu.nl" locator-type="email">hans.van.beek@falw.vu.nl</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq018/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Binsl, T. W., Alders, D. J.C., Heringa, J., Groeneveld, A.B. J., van Beek, J. H.G.M.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq018</dc:identifier>
<dc:title><![CDATA[Computational quantification of metabolic fluxes from a single isotope snapshot: application to an animal biopsy]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>660</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>653</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/661?rss=1">
<title><![CDATA[Disambiguating the species of biomedical named entities using natural language parsers]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/661?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Text mining technologies have been shown to reduce the laborious work involved in organizing the vast amount of information hidden in the literature. One challenge in text mining is linking ambiguous word forms to unambiguous biological concepts. This article reports on a comprehensive study on resolving the ambiguity in mentions of biomedical named entities with respect to model organisms and presents an array of approaches, with focus on methods utilizing natural language parsers.</p>
<p><b>Results:</b> We build a corpus for organism disambiguation where every occurrence of protein/gene entity is manually tagged with a species ID, and evaluate a number of methods on it. Promising results are obtained by training a machine learning model on syntactic parse trees, which is then used to decide whether an entity belongs to the model organism denoted by a neighbouring species-indicating word (e.g. <I>yeast</I>). The parser-based approaches are also compared with a supervised classification method and results indicate that the former are a more favorable choice when domain portability is of concern. The best overall performance is obtained by combining the strengths of syntactic features and supervised classification.</p>
<p><b>Availability:</b> The corpus and demo are available at <inter-ref locator="http://www.nactem.ac.uk/deca_details/start.cgi" locator-type="url">http://www.nactem.ac.uk/deca_details/start.cgi</inter-ref>, and the software is freely available as U-Compare components (Kano <I>et al.</I>, <cross-ref type="bib" refid="B15">2009</cross-ref>): NaCTeM Species Word Detector and NaCTeM Species Disambiguator. U-Compare is available at <inter-ref locator="http://-compare.org/" locator-type="url">http://-compare.org/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="xinglong.wang@manchester.ac.uk" locator-type="email">xinglong.wang@manchester.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Wang, X., Tsujii, J., Ananiadou, S.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq002</dc:identifier>
<dc:title><![CDATA[Disambiguating the species of biomedical named entities using natural language parsers]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>667</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>661</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/668?rss=1">
<title><![CDATA[Bayesian rule learning for biomedical data mining]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/668?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Disease state prediction from biomarker profiling studies is an important problem because more accurate classification models will potentially lead to the discovery of better, more discriminative markers. Data mining methods are routinely applied to such analyses of biomedical datasets generated from high-throughput &lsquo;omic&rsquo; technologies applied to clinical samples from tissues or bodily fluids. Past work has demonstrated that rule models can be successfully applied to this problem, since they can produce understandable models that facilitate review of discriminative biomarkers by biomedical scientists. While many rule-based methods produce rules that make predictions under uncertainty, they typically do not quantify the uncertainty in the validity of the rule itself. This article describes an approach that uses a Bayesian score to evaluate rule models.</p>
<p><b>Results:</b> We have combined the expressiveness of rules with the mathematical rigor of Bayesian networks (BNs) to develop and evaluate a Bayesian rule learning (BRL) system. This system utilizes a novel variant of the K2 algorithm for building BNs from the training data to provide probabilistic scores for IF-antecedent-THEN-consequent rules using heuristic best-first search. We then apply rule-based inference to evaluate the learned models during 10-fold cross-validation performed two times. The BRL system is evaluated on 24 published &lsquo;omic&rsquo; datasets, and on average it performs on par or better than other readily available rule learning methods. Moreover, BRL produces models that contain on average 70% fewer variables, which means that the biomarker panels for disease prediction contain fewer markers for further verification and validation by bench scientists.</p>
<p><b>Contact:</b> <inter-ref locator="vanathi@pitt.edu" locator-type="email">vanathi@pitt.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq005/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Gopalakrishnan, V., Lustgarten, J. L., Visweswaran, S., Cooper, G. F.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq005</dc:identifier>
<dc:title><![CDATA[Bayesian rule learning for biomedical data mining]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>675</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>668</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/676?rss=1">
<title><![CDATA[BamView: viewing mapped read alignment data in the context of the reference sequence]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/676?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> BamView is an interactive Java application for visualizing the large amounts of data stored for sequence reads which are aligned against a reference genome sequence. It supports the BAM (Binary Alignment/Map) format. It can be used in a number of contexts including SNP calling and structural annotation. BamView has also been integrated into Artemis so that the reads can be viewed in the context of the nucleotide sequence and genomic features.</p>
<p><b>Availability:</b> BamView and Artemis are freely available (under a GPL licence) for download (for MacOSX, UNIX and Windows) at: <inter-ref locator="http://bamview.sourceforge.net/" locator-type="url">http://bamview.sourceforge.net/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="artemis@sanger.ac.uk" locator-type="email">artemis@sanger.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Carver, T., Bohme, U., Otto, T. D., Parkhill, J., Berriman, M.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq010</dc:identifier>
<dc:title><![CDATA[BamView: viewing mapped read alignment data in the context of the reference sequence]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>677</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>676</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/678?rss=1">
<title><![CDATA[rMAT - an R/Bioconductor package for analyzing ChIP-chip experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/678?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Chromatin immunoprecipitation combined with DNA microarrays (ChIP-chip) has evolved as a popular technique to study DNA&ndash;protein binding or post-translational chromatin/histone modifications at the genomic level. However, the raw microarray intensities generate a massive amount of data, creating a need for efficient analysis algorithms and statistical methods to identify enriched regions.</p>
<p><b>Results:</b> We present a fast, free and powerful, open source R package, <ty>rMAT</ty>, that allows the identification of regions enriched for transcription factor binding sites in ChIP-chip experiments on Affymetrix tiling arrays.</p>
<p><b>Availability:</b> The R-package <ty>rMAT</ty> is available from the Bioconductor web site at <inter-ref locator="http://bioconductor.org" locator-type="url">http://bioconductor.org</inter-ref> and runs on Linux, MAC OS and MS-Windows. <ty>rMAT</ty> is distributed under the terms of the Artistic Licence 2.0.</p>
<p><b>Contact:</b> <inter-ref locator="arnaud.droit@ircm.qc.ca" locator-type="email">arnaud.droit@ircm.qc.ca</inter-ref>; <inter-ref locator="raphael.gottardo@ircm.qc.ca" locator-type="email">raphael.gottardo@ircm.qc.ca</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq023/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Droit, A., Cheung, C., Gottardo, R.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq023</dc:identifier>
<dc:title><![CDATA[rMAT - an R/Bioconductor package for analyzing ChIP-chip experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>679</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>678</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/680?rss=1">
<title><![CDATA[CD-HIT Suite: a web server for clustering and comparing biological sequences]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/680?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> CD-HIT is a widely used program for clustering and comparing large biological sequence datasets. In order to further assist the CD-HIT users, we significantly improved this program with more functions and better accuracy, scalability and flexibility. Most importantly, we developed a new web server, CD-HIT Suite, for clustering a user-uploaded sequence dataset or comparing it to another dataset at different identity levels. Users can now interactively explore the clusters within web browsers. We also provide downloadable clusters for several public databases (NCBI NR, Swissprot and PDB) at different identity levels.</p>
<p><b>Availability:</b> Free access at <inter-ref locator="http://cd-hit.org" locator-type="url">http://cd-hit.org</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="liwz@sdsc.edu" locator-type="email">liwz@sdsc.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq003/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Huang, Y., Niu, B., Gao, Y., Fu, L., Li, W.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq003</dc:identifier>
<dc:title><![CDATA[CD-HIT Suite: a web server for clustering and comparing biological sequences]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>682</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>680</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/683?rss=1">
<title><![CDATA[Next-generation bioinformatics: using many-core processor architecture to develop a web service for sequence alignment]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/683?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Bioinformatics algorithms and computing power are the main bottlenecks for analyzing huge amount of data generated by the current technologies, such as the &lsquo;next-generation&rsquo; sequencing methodologies. At the same time, most powerful microprocessors are based on many-core chips, yet most applications cannot exploit such power, requiring parallelized algorithms. As an example of next-generation bioinformatics, we have developed from scratch a new parallelization of the Needleman&ndash;Wunsch (NW) sequence alignment algorithm for the 64-core Tile64 microprocessor. The unprecedented performance it offers for a standalone personal computer (PC) is discussed, optimally aligning sequences up to 20 times faster than the non-parallelized version, thus saving valuable time.</p>
<p><b>Availability:</b> This algorithm is available as a free web service for the scientific community at <inter-ref locator="http://www.sicuma.uma.es/multicore" locator-type="url">http://www.sicuma.uma.es/multicore</inter-ref>. The open source code is also available on such site.</p>
<p><b>Contact:</b> <inter-ref locator="galvez@uma.es" locator-type="email">galvez@uma.es</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq017/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Galvez, S., Diaz, D., Hernandez, P., Esteban, F. J., Caballero, J. A., Dorado, G.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq017</dc:identifier>
<dc:title><![CDATA[Next-generation bioinformatics: using many-core processor architecture to develop a web service for sequence alignment]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>686</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>683</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/687?rss=1">
<title><![CDATA[PSiFR: an integrated resource for prediction of protein structure and function]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/687?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> In the post-genomic era, the annotation of protein function facilitates the understanding of various biological processes. To extend the range of function annotation methods to the twilight zone of sequence identity, we have developed approaches that exploit both protein tertiary structure and/or protein sequence evolutionary relationships. To serve the scientific community, we have integrated the structure prediction tools, TASSER, TASSER-Lite and METATASSER, and the functional inference tools, FINDSITE, a structure-based algorithm for binding site prediction, Gene Ontology molecular function inference and ligand screening, EFICAz<sup>2</sup>, a sequence-based approach to enzyme function inference and DBD-hunter, an algorithm for predicting DNA-binding proteins and associated DNA-binding residues, into a unified web resource, Protein Structure and Function prediction Resource (PSiFR).</p>
<p><b>Availability and implementation:</b> PSiFR is freely available for use on the web at <inter-ref locator="http://psifr.cssb.biology.gatech.edu/" locator-type="url">http://psifr.cssb.biology.gatech.edu/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="skolnick@gatech.edu" locator-type="email">skolnick@gatech.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Pandit, S. B., Brylinski, M., Zhou, H., Gao, M., Arakaki, A. K., Skolnick, J.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq006</dc:identifier>
<dc:title><![CDATA[PSiFR: an integrated resource for prediction of protein structure and function]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>688</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>687</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/689?rss=1">
<title><![CDATA[PyRosetta: a script-based interface for implementing molecular modeling algorithms using Rosetta]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/689?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> PyRosetta is a stand-alone Python-based implementation of the Rosetta molecular modeling package that allows users to write custom structure prediction and design algorithms using the major Rosetta sampling and scoring functions. PyRosetta contains Python bindings to libraries that define Rosetta functions including those for accessing and manipulating protein structure, calculating energies and running Monte Carlo-based simulations. PyRosetta can be used in two ways: (i) interactively, using iPython and (ii) script-based, using Python scripting. Interactive mode contains a number of help features and is ideal for beginners while script-mode is best suited for algorithm development. PyRosetta has similar computational performance to Rosetta, can be easily scaled up for cluster applications and has been implemented for algorithms demonstrating protein docking, protein folding, loop modeling and design.</p>
<p><b>Availability:</b> PyRosetta is a stand-alone package available at <inter-ref locator="http://www.pyrosetta.org" locator-type="url">http://www.pyrosetta.org</inter-ref> under the Rosetta license which is free for academic and non-profit users. A tutorial, user's manual and sample scripts demonstrating usage are also available on the web site.</p>
<p><b>Contact:</b> <inter-ref locator="pyrosetta@graylab.jhu.edu" locator-type="email">pyrosetta@graylab.jhu.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Chaudhury, S., Lyskov, S., Gray, J. J.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq007</dc:identifier>
<dc:title><![CDATA[PyRosetta: a script-based interface for implementing molecular modeling algorithms using Rosetta]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>691</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>689</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/692?rss=1">
<title><![CDATA[iDBPs: a web server for the identification of DNA binding proteins]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/692?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The iDBPs server uses the three-dimensional (3D) structure of a query protein to predict whether it binds DNA. First, the algorithm predicts the functional region of the protein based on its evolutionary profile; the assumption is that large clusters of conserved residues are good markers of functional regions. Next, various characteristics of the predicted functional region as well as global features of the protein are calculated, such as the average surface electrostatic potential, the dipole moment and cluster-based amino acid conservation patterns. Finally, a random forests classifier is used to predict whether the query protein is likely to bind DNA and to estimate the prediction confidence. We have trained and tested the classifier on various datasets and shown that it outperformed related methods. On a dataset that reflects the fraction of DNA binding proteins (DBPs) in a proteome, the area under the ROC curve was 0.90. The application of the server to an updated version of the N-Func database, which contains proteins of unknown function with solved 3D-structure, suggested new putative DBPs for experimental studies.</p>
<p><b>Availability:</b> <inter-ref locator="http://idbps.tau.ac.il/" locator-type="url">http://idbps.tau.ac.il/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="NirB@tauex.tau.ac.il" locator-type="email">NirB@tauex.tau.ac.il</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq019/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Nimrod, G., Schushan, M., Szilagyi, A., Leslie, C., Ben-Tal, N.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq019</dc:identifier>
<dc:title><![CDATA[iDBPs: a web server for the identification of DNA binding proteins]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>693</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>692</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/694?rss=1">
<title><![CDATA[Multifactor dimensionality reduction for graphics processing units enables genome-wide testing of epistasis in sporadic ALS]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/694?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Epistasis, the presence of gene&ndash;gene interactions, has been hypothesized to be at the root of many common human diseases, but current genome-wide association studies largely ignore its role. Multifactor dimensionality reduction (MDR) is a powerful model-free method for detecting epistatic relationships between genes, but computational costs have made its application to genome-wide data difficult. Graphics processing units (GPUs), the hardware responsible for rendering computer games, are powerful parallel processors. Using GPUs to run MDR on a genome-wide dataset allows for statistically rigorous testing of epistasis.</p>
<p><b>Results:</b> The implementation of MDR for GPUs (MDRGPU) includes core features of the widely used Java software package, MDR. This GPU implementation allows for large-scale analysis of epistasis at a dramatically lower cost than the standard CPU-based implementations. As a proof-of-concept, we applied this software to a genome-wide study of sporadic amyotrophic lateral sclerosis (ALS). We discovered a statistically significant two-SNP classifier and subsequently replicated the significance of these two SNPs in an independent study of ALS. MDRGPU makes the large-scale analysis of epistasis tractable and opens the door to statistically rigorous testing of interactions in genome-wide datasets.</p>
<p><b>Availability:</b> MDRGPU is open source and available free of charge from <inter-ref locator="http://www.sourceforge.net/projects/mdr" locator-type="url">http://www.sourceforge.net/projects/mdr</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="jason.h.moore@dartmouth.edu" locator-type="email">jason.h.moore@dartmouth.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq009/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Greene, C. S., Sinnott-Armstrong, N. A., Himmelstein, D. S., Park, P. J., Moore, J. H., Harris, B. T.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq009</dc:identifier>
<dc:title><![CDATA[Multifactor dimensionality reduction for graphics processing units enables genome-wide testing of epistasis in sporadic ALS]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>695</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>694</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/696?rss=1">
<title><![CDATA[NEMO: a tool for analyzing gene and chromosome territory distributions from 3D-FISH experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/696?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Three-dimensional fluorescence <I>in situ</I> hybridization (3D-FISH) is used to study the organization and the positioning of chromosomes or specific sequences such as genes or RNA in cell nuclei. Many different programs (commercial or free) allow image analysis for 3D-FISH experiments. One of the more efficient open-source programs for automatically processing 3D-FISH microscopy images is Smart 3D-FISH, an ImageJ plug-in designed to automatically analyze distances between genes. One of the drawbacks of Smart 3D-FISH is that it has a rather basic user interface and produces its results in various text and image files thus making the data post-processing step time consuming. We developed a new Smart 3D-FISH graphical user interface, NEMO, which provides all information in the same place so that results can be checked and validated efficiently. NEMO gives users the ability to drive their experiments analysis in either automatic, semi-automatic or manual detection mode. We also tuned Smart 3D-FISH to better analyze chromosome territories.</p>
<p><b>Availability:</b> NEMO is a stand-alone Java application available for Windows and Linux platforms. The program is distributed under the creative commons licence and can be freely downloaded from <inter-ref locator="https://www-lgc.toulouse.inra.fr/nemo" locator-type="url">https://www-lgc.toulouse.inra.fr/nemo</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="eddie.iannuccelli@toulouse.inra.fr" locator-type="email">eddie.iannuccelli@toulouse.inra.fr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Iannuccelli, E., Mompart, F., Gellin, J., Lahbib-Mansais, Y., Yerle, M., Boudier, T.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq013</dc:identifier>
<dc:title><![CDATA[NEMO: a tool for analyzing gene and chromosome territory distributions from 3D-FISH experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>697</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>696</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/698?rss=1">
<title><![CDATA[MSMSpdbb: providing protein databases of closely related organisms to improve proteomic characterization of prokaryotic microbes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/698?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The Microbial Proteomic Resource (MPR) is a repository service that contains non-redundant protein databases of related bacterial strains, which were generated through an in-house developed software called Multi-Strain Mass Spectrometry Prokaryotic DataBase Builder (MSMSpdbb). MSMSpdbb merges and clusters protein sequences inferred from genomic sequences, and provide a protein list in FASTA format that covers for divergence in gene annotation, translational start site choice and presence of single nucleotide polymorphisms and other mutations.</p>
<p><b>Availability:</b> MSMSpdbb was developed in C++ using the Qt libraries (Nokia) and licensed under the GNU General Public License version 2. MSMSpdbb is freely available, and its installation files, instructions for use and additional documentation can be found at the MPR web site <inter-ref locator="http://org.uib.no/prokaryotedb/" locator-type="url">http://org.uib.no/prokaryotedb/</inter-ref> can also be found at Proteomecommons.org (see <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq004/DC1" locator-type="url">Supplementary Methods</inter-ref> for Hash number).</p>
<p><b>Contact:</b> <inter-ref locator="Gustavo.Souza@biomed.uib.no" locator-type="email">Gustavo.Souza@biomed.uib.no</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btq004/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[de Souza, G. A., Arntzen, M. O., Wiker, H. G.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq004</dc:identifier>
<dc:title><![CDATA[MSMSpdbb: providing protein databases of closely related organisms to improve proteomic characterization of prokaryotic microbes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>699</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>698</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/700?rss=1">
<title><![CDATA[dbTEU: a protein database of trace element utilization]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/700?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Biological trace elements are required for numerous biological processes and by all organisms. We describe a database, dbTEU (DataBase of Trace Element Utilization), that features known transporters and user proteins for five trace elements (copper, molybdenum, nickel, cobalt and selenium) and represents sequenced organisms from the three domains of life. The manually curated dbTEU currently includes ~16 500 proteins from &gt;700 organisms, and offers interactive trace element, protein, organism and sequence search and browse tools.</p>
<p><b>Availability and Implementation:</b> dbTEU is freely available at <inter-ref locator="http://gladyshevlab.bwh.harvard.edu/trace_element/" locator-type="url">http://gladyshevlab.bwh.harvard.edu/trace_element/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="vgladyshev@rics.bwh.harvard.edu" locator-type="email">vgladyshev@rics.bwh.harvard.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Zhang, Y., Gladyshev, V. N.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:51 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp705</dc:identifier>
<dc:title><![CDATA[dbTEU: a protein database of trace element utilization]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>702</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>700</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/703?rss=1">
<title><![CDATA[Xper2: introducing e-taxonomy]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/703?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Computer Aided Identification systems provide users with the resources to relate morpho-anatomic observations with taxa names and to subsequently access other knowledge about the organisms. They have the ability to manage descriptive data and make identifications through interactive keys. They are essential for both authors and users of biodiversity information. Xper<sup>2</sup> version 2.0 is one of the most user-friendly tools in its category and provides a complete environment dedicated to taxonomic management.</p>
<p><b>Availability:</b> Xper<sup>2</sup> software can be freely downloaded at <inter-ref locator="http://lis-upmc.snv.jussieu.fr/lis/?q=en/resources/softwares/xper2" locator-type="url">http://lis-upmc.snv.jussieu.fr/lis/?q=en/resources/softwares/xper2</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="visotheary.riviere-ung@snv.jussieu.fr" locator-type="email">visotheary.riviere-ung@snv.jussieu.fr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ung, V., Dubus, G., Zaragueta-Bagils, R., Vignes-Lebbe, R.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:52 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp715</dc:identifier>
<dc:title><![CDATA[Xper2: introducing e-taxonomy]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>704</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>703</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/705?rss=1">
<title><![CDATA[ProteinWorldDB: querying radical pairwise alignments among protein sets from complete genomes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/5/705?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Many analyses in modern biological research are based on comparisons between biological sequences, resulting in functional, evolutionary and structural inferences. When large numbers of sequences are compared, heuristics are often used resulting in a certain lack of accuracy. In order to improve and validate results of such comparisons, we have performed radical all-against-all comparisons of 4 million protein sequences belonging to the RefSeq database, using an implementation of the Smith&ndash;Waterman algorithm. This extremely intensive computational approach was made possible with the help of World Community Grid<SUP><SMALL><SMALL>TM</SMALL></SMALL></SUP>, through the Genome Comparison Project. The resulting database, ProteinWorldDB, which contains coordinates of pairwise protein alignments and their respective scores, is now made available. Users can download, compare and analyze the results, filtered by genomes, protein functions or clusters. ProteinWorldDB is integrated with annotations derived from Swiss-Prot, Pfam, KEGG, NCBI Taxonomy database and gene ontology. The database is a unique and valuable asset, representing a major effort to create a reliable and consistent dataset of cross-comparisons of the whole protein content encoded in hundreds of completely sequenced genomes using a rigorous dynamic programming approach.</p>
<p><b>Availability:</b> The database can be accessed through <inter-ref locator="http://proteinworlddb.org" locator-type="url">http://proteinworlddb.org</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="otto@fiocruz.br" locator-type="email">otto@fiocruz.br</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Otto, T. D., Catanho, M., Tristao, C., Bezerra, M., Fernandes, R. M., Elias, G. S., Scaglia, A. C., Bovermann, B., Berstis, V., Lifschitz, S., de Miranda, A. B., Degrave, W.]]></dc:creator>
<dc:date>Wed, 24 Feb 2010 04:48:52 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btq011</dc:identifier>
<dc:title><![CDATA[ProteinWorldDB: querying radical pairwise alignments among protein sets from complete genomes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>5</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>707</prism:endingPage>
<prism:publicationDate>2010-03-01</prism:publicationDate>
<prism:startingPage>705</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/445?rss=1">
<title><![CDATA[Bioinformatics challenges for genome-wide association studies]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/445?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The sequencing of the human genome has made it possible to identify an informative set of &gt;1 million single nucleotide polymorphisms (SNPs) across the genome that can be used to carry out genome-wide association studies (GWASs). The availability of massive amounts of GWAS data has necessitated the development of new biostatistical methods for quality control, imputation and analysis issues including multiple testing. This work has been successful and has enabled the discovery of new associations that have been replicated in multiple studies. However, it is now recognized that most SNPs discovered via GWAS have small effects on disease susceptibility and thus may not be suitable for improving health care through genetic testing. One likely explanation for the mixed results of GWAS is that the current biostatistical analysis paradigm is by design agnostic or unbiased in that it ignores all prior knowledge about disease pathobiology. Further, the linear modeling framework that is employed in GWAS often considers only one SNP at a time thus ignoring their genomic and environmental context. There is now a shift away from the biostatistical approach toward a more holistic approach that recognizes the complexity of the genotype&ndash;phenotype relationship that is characterized by significant heterogeneity and gene&ndash;gene and gene&ndash;environment interaction. We argue here that bioinformatics has an important role to play in addressing the complexity of the underlying genetic basis of common human diseases. The goal of this review is to identify and discuss those GWAS challenges that will require computational methods.</p>
<p><b>Contact:</b> <inter-ref locator="jason.h.moore@dartmouth.edu" locator-type="email">jason.h.moore@dartmouth.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Moore, J. H., Asselbergs, F. W., Williams, S. M.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp713</dc:identifier>
<dc:title><![CDATA[Bioinformatics challenges for genome-wide association studies]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>455</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>445</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/456?rss=1">
<title><![CDATA[ConceptGen: a gene set enrichment and gene set relation mapping tool]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/456?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The elucidation of biological concepts enriched with differentially expressed genes has become an integral part of the analysis and interpretation of genomic data. Of additional importance is the ability to explore <I>networks</I> of relationships among previously defined biological concepts from diverse information sources, and to explore results visually from multiple perspectives. Accomplishing these tasks requires a unified framework for agglomeration of data from various genomic resources, novel visualizations, and user functionality.</p>
<p><b>Results:</b> We have developed ConceptGen, a web-based gene set enrichment and gene set relation mapping tool that is streamlined and simple to use. ConceptGen offers over 20 000 concepts comprising 14 different types of biological knowledge, including data not currently available in any other gene set enrichment or gene set relation mapping tool. We demonstrate the functionalities of ConceptGen using gene expression data modeling TGF-beta-induced epithelial-mesenchymal transition and metabolomics data comparing metastatic versus localized prostate cancers.</p>
<p><b>Availability:</b> ConceptGen is part of the NIH's National Center for Integrative Biomedical Informatics (NCIBI) and is freely available at <inter-ref locator="http://conceptgen.ncibi.org" locator-type="url">http://conceptgen.ncibi.org</inter-ref>. For terms of use, visit <inter-ref locator="http://portal.ncibi.org/gateway/pdf/Terms%20of%20use-web.pdf" locator-type="url">http://portal.ncibi.org/gateway/pdf/Terms%20of%20use-web.pdf</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="conceptgen@umich.edu" locator-type="email">conceptgen@umich.edu</inter-ref>; <inter-ref locator="sartorma@umich.edu" locator-type="email">sartorma@umich.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp683/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Sartor, M. A., Mahavisno, V., Keshamouni, V. G., Cavalcoli, J., Wright, Z., Karnovsky, A., Kuick, R., Jagadish, H.V., Mirel, B., Weymouth, T., Athey, B., Omenn, G. S.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp683</dc:identifier>
<dc:title><![CDATA[ConceptGen: a gene set enrichment and gene set relation mapping tool]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>463</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>456</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/464?rss=1">
<title><![CDATA[CMDS: a population-based method for identifying recurrent DNA copy number aberrations in cancer from high-resolution data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/464?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> DNA copy number aberration (CNA) is a hallmark of genomic abnormality in tumor cells. Recurrent CNA (RCNA) occurs in multiple cancer samples across the same chromosomal region and has greater implication in tumorigenesis. Current commonly used methods for RCNA identification require CNA calling for individual samples before cross-sample analysis. This two-step strategy may result in a heavy computational burden, as well as a loss of the overall statistical power due to segmentation and discretization of individual sample's data. We propose a population-based approach for RCNA detection with no need of single-sample analysis, which is statistically powerful, computationally efficient and particularly suitable for high-resolution and large-population studies.</p>
<p><b>Results:</b> Our approach, correlation matrix diagonal segmentation (CMDS), identifies RCNAs based on a between-chromosomal-site correlation analysis. Directly using the raw intensity ratio data from all samples and adopting a diagonal transformation strategy, CMDS substantially reduces computational burden and can obtain results very quickly from large datasets. Our simulation indicates that the statistical power of CMDS is higher than that of single-sample CNA calling based two-step approaches. We applied CMDS to two real datasets of lung cancer and brain cancer from Affymetrix and Illumina array platforms, respectively, and successfully identified known regions of CNA associated with <I>EGFR</I>, <I>KRAS</I> and other important oncogenes. CMDS provides a fast, powerful and easily implemented tool for the RCNA analysis of large-scale data from cancer genomes.</p>
<p><b>Availability:</b> The R and C programs implementing our method are available at <inter-ref locator="https://dsgweb.wustl.edu/qunyuan/software/cmds" locator-type="url">https://dsgweb.wustl.edu/qunyuan/software/cmds</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="qunyuan@wustl.edu" locator-type="email">qunyuan@wustl.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp708/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Zhang, Q., Ding, L., Larson, D. E., Koboldt, D. C., McLellan, M. D., Chen, K., Shi, X., Kraja, A., Mardis, E. R., Wilson, R. K., Borecki, I. B., Province, M. A.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp708</dc:identifier>
<dc:title><![CDATA[CMDS: a population-based method for identifying recurrent DNA copy number aberrations in cancer from high-resolution data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>469</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>464</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/470?rss=1">
<title><![CDATA[A novel method for accurate one-dimensional protein structure prediction based on fragment matching]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/470?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The precise prediction of one-dimensional (1D) protein structure as represented by the protein secondary structure and 1D string of discrete state of dihedral angles (i.e. Shape Strings) is a prerequisite for the successful prediction of three-dimensional (3D) structure as well as protein&ndash;protein interaction. We have developed a novel 1D structure prediction method, called Frag1D, based on a straightforward fragment matching algorithm and demonstrated its success in the prediction of three sets of 1D structural alphabets, i.e. the classical three-state secondary structure, three- and eight-state Shape Strings.</p>
<p><b>Results:</b> By exploiting the vast protein sequence and protein structure data available, we have brought secondary-structure prediction closer to the expected theoretical limit. When tested by a leave-one-out cross validation on a non-redundant set of PDB cutting at 30% sequence identity containing 5860 protein chains, the overall per-residue accuracy for secondary-structure prediction, i.e. Q3 is 82.9%. The overall per-residue accuracy for three- and eight-state Shape Strings are 85.1 and 71.5%, respectively. We have also benchmarked our program with the latest version of PSIPRED for secondary structure prediction and our program predicted 0.3% better in Q3 when tested on 2241 chains with the same training set. For Shape Strings, we compared our method with a recently published method with the same dataset and definition as used by that method. Our program predicted at 2.2% better in accuracy for three-state Shape Strings. By quantitatively investigating the effect of data base size on 1D structure prediction we show that the accuracy increases by ~1% with every doubling of the database size.</p>
<p><b>Availability:</b> The program is available for download at <inter-ref locator="http://www.fos.su.se/~nanjiang/Frag1D/download" locator-type="url">http://www.fos.su.se/~nanjiang/Frag1D/download</inter-ref>. <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp679/DC1" locator-type="url">Supplementary data</inter-ref> are available at <inter-ref locator="http://www.fos.su.se/~nanjiang/Frag1D/supplement/suppl.html" locator-type="url">http://www.fos.su.se/~nanjiang/Frag1D/supplement/suppl.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="svenh@struc.su.se" locator-type="email">svenh@struc.su.se</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp679/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Zhou, T., Shu, N., Hovmoller, S.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp679</dc:identifier>
<dc:title><![CDATA[A novel method for accurate one-dimensional protein structure prediction based on fragment matching]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>477</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>470</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/478?rss=1">
<title><![CDATA[Limited contribution of stem-loop potential to symmetry of single-stranded genomic DNA]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/478?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The phenomenon of strand symmetry, which may provide clues to genome evolution, exists in all prokaryotic and eukaryotic genomes studied. Several possible mechanisms for its origins have been proposed, including: no strand biases for mutation and selection, strand inversion and selection of stem-loop structures. However, the relative contributions of these mechanisms to strand symmetry are not clear. In this article, we studied specifically the role of stem-loop potential of single-stranded DNA in strand symmetry.</p>
<p><b>Results:</b> We analyzed the complete genomes of 90 prokaryotes. We found that most oligonucleotides (pentanucleotides and higher) do not have a reverse complement in close proximity in the genomic sequences. Combined with further analysis, we conclude that the contribution of the widespread stem-loop potential of single-stranded genomic DNA to the formation and maintenance of strand symmetry would be very limited, at least for higher-order oligonucleotides. Therefore, other possible causes for strand symmetry must be taken into account to a deeper degree.</p>
<p><b>Contacts:</b> <inter-ref locator="lsszsh@mail.sysu.edu.cn" locator-type="email">lsszsh@mail.sysu.edu.cn</inter-ref>; <inter-ref locator="molevol@mail.sysu.edu.cn" locator-type="email">molevol@mail.sysu.edu.cn</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp703/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Zhang, S.-H., Huang, Y.-Z.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp703</dc:identifier>
<dc:title><![CDATA[Limited contribution of stem-loop potential to symmetry of single-stranded genomic DNA]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>485</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>478</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/486?rss=1">
<title><![CDATA[Mixture-model based estimation of gene expression variance from public database improves identification of differentially expressed genes in small sized microarray data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/486?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The small number of samples in many microarray experiments is a challenge for the correct identification of differentially expressed gens (DEGs) by conventional statistical means. Information from public microarray databases can help more efficient identification of DEGs. To model various experimental conditions of a public microarray database, we applied Gaussian mixture model and extracted bi- or tri-modal distributions of gene expression. Prior variance of Baldi's Bayesian framework was estimate for the analysis of the small sample-sized datasets.</p>
<p><b>Results:</b> First, we estimated the prior variance of a gene expression by pooling variances obtained from mixture modeling of large samples in the public microarray database. Then, using the prior variance, we identified DEGs in small sample-sized test datasets using the Baldi's framework. For benchmark study, we generated test datasets having several samples from relatively large datasets. Our proposed method outperformed other benchmark methods in terms of detecting gold-standard DEGs from the test datasets. The results may be a challenging evidence for usage of public microarray databases in microarray data analysis.</p>
<p><b>Availability:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp685/DC1" locator-type="url">Supplementary data</inter-ref> are available at <inter-ref locator="http://www.snubi.org/publication/MixBayes" locator-type="url">http://www.snubi.org/publication/MixBayes</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="juhan@snu.ac.kr" locator-type="email">juhan@snu.ac.kr</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Kim, M., Cho, S. B., Kim, J. H.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp685</dc:identifier>
<dc:title><![CDATA[Mixture-model based estimation of gene expression variance from public database improves identification of differentially expressed genes in small sized microarray data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>492</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>486</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/493?rss=1">
<title><![CDATA[RNA-Seq gene expression estimation with read mapping uncertainty]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/493?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> RNA-Seq is a promising new technology for accurately measuring gene expression levels. Expression estimation with RNA-Seq requires the mapping of relatively short sequencing reads to a reference genome or transcript set. Because reads are generally shorter than transcripts from which they are derived, a single read may map to multiple genes and isoforms, complicating expression analyses. Previous computational methods either discard reads that map to multiple locations or allocate them to genes heuristically.</p>
<p><b>Results:</b> We present a generative statistical model and associated inference methods that handle read mapping uncertainty in a principled manner. Through simulations parameterized by real RNA-Seq data, we show that our method is more accurate than previous methods. Our improved accuracy is the result of handling read mapping uncertainty with a statistical model and the estimation of gene expression levels as the sum of isoform expression levels. Unlike previous methods, our method is capable of modeling non-uniform read distributions. Simulations with our method indicate that a read length of 20&ndash;25 bases is optimal for gene-level expression estimation from mouse and maize RNA-Seq data when sequencing throughput is fixed.</p>
<p><b>Availability:</b> An initial C++ implementation of our method that was used for the results presented in this article is available at <inter-ref locator="http://deweylab.biostat.wisc.edu/rsem" locator-type="url">http://deweylab.biostat.wisc.edu/rsem</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="cdewey@biostat.wisc.edu" locator-type="email">cdewey@biostat.wisc.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp692/DC1" locator-type="url">Supplementary data</inter-ref> are available at Bioinformatics on</p>
]]></description>
<dc:creator><![CDATA[Li, B., Ruotti, V., Stewart, R. M., Thomson, J. A., Dewey, C. N.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp692</dc:identifier>
<dc:title><![CDATA[RNA-Seq gene expression estimation with read mapping uncertainty]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>500</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>493</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/501?rss=1">
<title><![CDATA[Penalized mixtures of factor analyzers with application to clustering high-dimensional microarray data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/501?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Model-based clustering has been widely used, e.g. in microarray data analysis. Since for high-dimensional data variable selection is necessary, several penalized model-based clustering methods have been proposed t&oslash;realize simultaneous variable selection and clustering. However, the existing methods all assume that the variables are independent with the use of diagonal covariance matrices.</p>
<p><b>Results:</b> To model non-independence of variables (e.g. correlated gene expressions) while alleviating the problem with the large number of unknown parameters associated with a general non-diagonal covariance matrix, we generalize the mixture of factor analyzers to that with penalization, which, among others, can effectively realize variable selection. We use simulated data and real microarray data to illustrate the utility and advantages of the proposed method over several existing ones.</p>
<p><b>Contact:</b> <inter-ref locator="weip@biostat.umn.edu" locator-type="email">weip@biostat.umn.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp707/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Xie, B., Pan, W., Shen, X.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp707</dc:identifier>
<dc:title><![CDATA[Penalized mixtures of factor analyzers with application to clustering high-dimensional microarray data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>508</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>501</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/509?rss=1">
<title><![CDATA[Functional embedding for the classification of gene expression profiles]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/509?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Low sample size <I>n</I> high-dimensional large <I>p</I> data with <I>n</I>&lt;&lt;<I>p</I> are commonly encountered in genomics and statistical genetics. Ill-conditioning of the variance-covariance matrix for such data renders the traditional multivariate data analytical approaches unattractive. On the other side, functional data analysis (FDA) approaches are designed for infinite-dimensional data and therefore may have potential for the analysis of large <I>p</I> data. We herein propose a functional embedding (FEM) technique, which exploits the interface between multivariate and functional data, aiming at borrowing strength across the sample through FDA techniques in order to resolve the difficulties caused by the high dimension <I>p</I>.</p>
<p><b>Results:</b> Using pairwise dissimilarities among predictor variables, one obtains a univariate configuration of these covariates. This is interpreted as variable ordination that defines the domain of a suitable function space, thus leading to the FEM of the high-dimensional data. The embedding may then be followed by functional logistic regression for the classification of high-dimensional multivariate data as an example for downstream analysis. The resulting functional classification is evaluated on several published gene expression array datasets and a mass spectrometric data, and is shown to compare favorably with various methods that have been employed previously for the classification of these high-dimensional gene expression profiles.</p>
<p><b>Availability:</b> The implementation of FEM and Classification via Functional Embedding (CFEM) as described in this article was done with the PACE package written in Matlab. The latest version of PACE is publicly accessible at <inter-ref locator="http://anson.ucdavis.edu/~mueller/data/programs.html" locator-type="url">http://anson.ucdavis.edu/~mueller/data/programs.html</inter-ref>. An example MATLAB script for FEM is available at <inter-ref locator="http://www.lehigh.edu/~psw205/psw205.html" locator-type="url">http://www.lehigh.edu/~psw205/psw205.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="psw205@lehigh.edu" locator-type="email">psw205@lehigh.edu</inter-ref>; <inter-ref locator="mueller@wald.ucdavis.edu" locator-type="email">mueller@wald.ucdavis.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Wu, P.-S., Muller, H.-G.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp711</dc:identifier>
<dc:title><![CDATA[Functional embedding for the classification of gene expression profiles]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>517</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>509</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/518?rss=1">
<title><![CDATA[Power to detect selective allelic amplification in genome-wide scans of tumor data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/518?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Somatic amplification of particular genomic regions and selection of cellular lineages with such amplifications drives tumor development. However, pinpointing genes under such selection has been difficult due to the large span of these regions. Our recently-developed method, the amplification distortion test (ADT), identifies specific nucleotide alleles and haplotypes that confer better survival for tumor cells when somatically amplified. In this work, we focus on evaluating ADT's power to detect such causal variants across a variety of tumor dataset scenarios.</p>
<p><b>Results:</b> Towards this end, we generated multiple parameter-based, synthetic datasets&mdash;derived from real data&mdash;that contain somatic copy number aberrations (CNAs) of various lengths and frequencies over germline single nucleotide polymorphisms (SNPs) genome-wide. Gold-standard causal sub-regions were assigned within these CNAs, followed by an assessment of ADT's ability to detect these sub-regions. Results indicate that ADT possesses high sensitivity and specificity in large sample sizes across most parameter cases, including those that more closely reflect existing SNP and CNA cancer data.</p>
<p><b>Availability:</b> ADT is implemented in the Java software HADiT and can be downloaded through the SVN repository (via Develop-&gt; Code-&gt;SVN Browse) at: <inter-ref locator="http://sourceforge.net/projects/hadit/" locator-type="url">http://sourceforge.net/projects/hadit/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="ninad.dewal@dbmi.columbia.edu" locator-type="email">ninad.dewal@dbmi.columbia.edu</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp694/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dewal, N., Freedman, M. L., LaFramboise, T., Pe'er, I.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:58 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp694</dc:identifier>
<dc:title><![CDATA[Power to detect selective allelic amplification in genome-wide scans of tumor data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>528</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>518</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/529?rss=1">
<title><![CDATA[Pandora, a PAthway and Network DiscOveRy Approach based on common biological evidence]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/529?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Many biological phenomena involve extensive interactions between many of the biological pathways present in cells. However, extraction of all the inherent biological pathways remains a major challenge in systems biology. With the advent of high-throughput functional genomic techniques, it is now possible to infer biological pathways and pathway organization in a systematic way by integrating disparate biological information.</p>
<p><b>Results:</b> Here, we propose a novel integrated approach that uses network topology to predict biological pathways. We integrated four types of biological evidence (protein&ndash;protein interaction, genetic interaction, domain&ndash;domain interaction and semantic similarity of Gene Ontology terms) to generate a functionally associated network. This network was then used to develop a new pathway finding algorithm to predict biological pathways in yeast. Our approach discovered 195 biological pathways and 31 functionally redundant pathway pairs in yeast. By comparing our identified pathways to three public pathway databases (KEGG, BioCyc and Reactome), we observed that our approach achieves a maximum positive predictive value of 12.8% and improves on other predictive approaches. This study allows us to reconstruct biological pathways and delineates cellular machinery in a systematic view.</p>
<p><b>Availability:</b> The method has been implemented in Perl and is available for downloading from <inter-ref locator="http://www.oicr.on.ca/research/ouellette/pandora" locator-type="url">http://www.oicr.on.ca/research/ouellette/pandora</inter-ref>. It is distributed under the terms of GPL (<inter-ref locator="http://opensource.org/licenses/gpl-2.0.php" locator-type="url">http://opensource.org/licenses/gpl-2.0.php</inter-ref>)</p>
<p><b>Contact:</b> <inter-ref locator="francis@oicr.on.ca" locator-type="email">francis@oicr.on.ca</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp701/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Zhang, K. X., Ouellette, B. F. F.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp701</dc:identifier>
<dc:title><![CDATA[Pandora, a PAthway and Network DiscOveRy Approach based on common biological evidence]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>535</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>529</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/536?rss=1">
<title><![CDATA[Predicting metabolic engineering knockout strategies for chemical production: accounting for competing pathways]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/536?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Computational modeling in metabolic engineering involves the prediction of genetic manipulations that would lead to optimized microbial strains, maximizing the production rate of chemicals of interest. Various computational methods are based on constraint-based modeling, which enables to anticipate the effect of genetic manipulations on cellular metabolism considering a genome-scale metabolic network. However, current methods do not account for the presence of competing pathways in a metabolic network that may diverge metabolic flux away from producing a required chemical, resulting in lower (or even zero) chemical production rates in reality&mdash;making these methods somewhat over optimistic.</p>
<p><b>Results:</b> In this article, we describe a novel constraint-based method called RobustKnock that predicts gene deletion strategies that lead to the over-production of chemicals of interest, by accounting for the presence of competing pathways in the network. We describe results of applying RobustKnock to <I>Escherichia coli's</I> metabolic network towards the production of various chemicals, demonstrating its ability to provide more robust predictions than those obtained via current state-of-the-art methods.</p>
<p><b>Availability:</b> An implementation of RobustKnock is available via <inter-ref locator="http://www.cs.technion.ac.il/~tomersh/tools/" locator-type="url">http://www.cs.technion.ac.il/~tomersh/tools/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="naamat@cs.technion.ac.il" locator-type="email">naamat@cs.technion.ac.il</inter-ref>; <inter-ref locator="tomersh@cs.technion.ac.il" locator-type="email">tomersh@cs.technion.ac.il</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Tepper, N., Shlomi, T.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp704</dc:identifier>
<dc:title><![CDATA[Predicting metabolic engineering knockout strategies for chemical production: accounting for competing pathways]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>543</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>536</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/544?rss=1">
<title><![CDATA[Bisque: a platform for bioimage analysis and management]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/544?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Advances in the field of microscopy have brought about the need for better image management and analysis solutions. Novel imaging techniques have created vast stores of images and metadata that are difficult to organize, search, process and analyze. These tasks are further complicated by conflicting and proprietary image and metadata formats, that impede analyzing and sharing of images and any associated data. These obstacles have resulted in research resources being locked away in digital media and file cabinets. Current image management systems do not address the pressing needs of researchers who must quantify image data on a regular basis.</p>
<p><b>Results:</b> We present Bisque, a web-based platform specifically designed to provide researchers with organizational and quantitative analysis tools for 5D image data. Users can extend Bisque with both data model and analysis extensions in order to adapt the system to local needs. Bisque's extensibility stems from two core concepts: flexible metadata facility and an open web-based architecture. Together these empower researchers to create, develop and <I>share</I> novel bioimage analyses. Several case studies using Bisque with specific applications are presented as an indication of how users can expect to extend Bisque for their own purposes.</p>
<p><b>Availability:</b> Bisque is web based, cross-platform and open source. The system is also available as software-as-a-service through the Center of Bioimage Informatics at UCSB.</p>
<p><b>Contact:</b> <inter-ref locator="kris@cs.ucsb.edu" locator-type="email">kris@cs.ucsb.edu</inter-ref>; <inter-ref locator="fedorov@ece.ucsb.edu" locator-type="email">fedorov@ece.ucsb.edu</inter-ref></p>
<p><b>Supplementary information:</b> The <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp699/DC1" locator-type="url">supplementary material</inter-ref> is available at <I>Bioinformatics</I> online, including screen shots, metadata XML descriptions and implementation details.</p>
]]></description>
<dc:creator><![CDATA[Kvilekval, K., Fedorov, D., Obara, B., Singh, A., Manjunath, B. S.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp699</dc:identifier>
<dc:title><![CDATA[Bisque: a platform for bioimage analysis and management]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>552</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>544</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/553?rss=1">
<title><![CDATA[jORCA: easily integrating bioinformatics Web Services]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/553?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Web services technology is becoming the option of choice to deploy bioinformatics tools that are universally available. One of the major strengths of this approach is that it supports machine-to-machine interoperability over a network. However, a weakness of this approach is that various Web Services differ in their definition and invocation protocols, as well as their communication and data formats&mdash;and this presents a barrier to service interoperability.</p>
<p><b>Results:</b> jORCA is a desktop client aimed at facilitating seamless integration of Web Services. It does so by making a uniform representation of the different web resources, supporting scalable service discovery, and automatic composition of workflows. Usability is at the top of the jORCA agenda; thus it is a highly customizable and extensible application that accommodates a broad range of user skills featuring double-click invocation of services in conjunction with advanced execution-control, on the fly data standardization, extensibility of viewer plug-ins, drag-and-drop editing capabilities, plus a file-based browsing style and organization of favourite tools. The integration of bioinformatics Web Services is made easier to support a wider range of users.</p>
<p><b>Availability and Implementation:</b> jORCA binaries and extended documentation are freely available at <inter-ref locator="http://www.bitlab-es.com/jorca" locator-type="url">http://www.bitlab-es.com/jorca</inter-ref> under the Creative Commons Attribution-No Derivative Works 2.5 Spain License and jORCA source code (implemented in Java) is available under request. (GPL v3 license). jORCA has been tested under UNIX (Fedora 11, open SUSE 11 and Ubuntu 8.1), MS-Windows and Mac OS 10.5 operating systems. Java VM version 1.6.0 later is required.</p>
<p><b>Contact:</b> <inter-ref locator="ots@uma.es" locator-type="email">ots@uma.es</inter-ref> or <inter-ref locator="vickymr@uma.es" locator-type="email">vickymr@uma.es</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp709/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Martin-Requena, V., Rios, J., Garcia, M., Ramirez, S., Trelles, O.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp709</dc:identifier>
<dc:title><![CDATA[jORCA: easily integrating bioinformatics Web Services]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>559</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>553</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/560?rss=1">
<title><![CDATA[GWAS Analyzer: integrating genotype, phenotype and public annotation data for genome-wide association study analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/560?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Genome-wide association studies are beginning to elucidate how our genetic differences contribute to susceptibility and severity of disease. While computational tools have previously been developed to support various aspects of genome-wide association studies, there is currently a need for informatics solutions that facilitate the integration of data from multiple sources.</p>
<p><b>Results:</b> Here we present GWAS Analyzer, a database driven web-based tool that integrates genotype and phenotype data, association analysis results and genomic annotations from multiple public resources. GWAS Analyzer contains features for browsing these interrelated data, exploring phenotypic values by family or genotype, and filtering association results based on multiple criteria. The utility of the tool has been demonstrated by a genome-wide association study of human <I>in vitro</I> susceptibility to bacterial infection. GWAS Analyzer facilitated management of large sets of phenotype and genotype data, analysis of phenotypic variation and heritability, and most importantly, generation of a refined set of candidate single nucleotide polymorphisms (SNPs). The tool revealed a SNP that was experimentally validated to be associated with increased cell death among <I>Salmonella</I> infected HapMap cell lines.</p>
<p><b>Availability:</b> <inter-ref locator="http://www.nwrce.org/gwas-analyzer" locator-type="url">http://www.nwrce.org/gwas-analyzer</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="mbrittna@u.washington.edu" locator-type="email">mbrittna@u.washington.edu</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp714/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Fong, C., Ko, D. C., Wasnick, M., Radey, M., Miller, S. I., Brittnacher, M.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp714</dc:identifier>
<dc:title><![CDATA[GWAS Analyzer: integrating genotype, phenotype and public annotation data for genome-wide association study analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>564</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>560</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/565?rss=1">
<title><![CDATA[Copy number variant detection in inbred strains from short read sequence data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/565?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We have developed an algorithm to detect copy number variants (CNVs) in homozygous organisms, such as inbred laboratory strains of mice, from short read sequence data. Our novel approach exploits the fact that inbred mice are homozygous at virtually every position in the genome to detect CNVs using a hidden Markov model (HMM). This HMM uses both the density of sequence reads mapped to the genome, and the rate of apparent heterozygous single nucleotide polymorphisms, to determine genomic copy number. We tested our algorithm on short read sequence data generated from re-sequencing chromosome 17 of the mouse strains A/J and CAST/EiJ with the Illumina platform. In total, we identified 118 copy number variants (43 for A/J and 75 for CAST/EiJ). We investigated the performance of our algorithm through comparison to CNVs previously identified by array-comparative genomic hybridization (array CGH). We performed quantitative-PCR validation on a subset of the calls that differed from the array CGH data sets.</p>
<p><b>Availability:</b> The software described in this manuscript, named cnD for copy number detector, is free and released under the GPL. The program is implemented in the D programming language using the Tango library. Source code and pre-compiled binaries are available at <inter-ref locator="http://www.sanger.ac.uk/resources/software/cnd.html" locator-type="url">http://www.sanger.ac.uk/resources/software/cnd.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="rd@sanger.ac.uk" locator-type="email">rd@sanger.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp693/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Simpson, J. T., McIntyre, R. E., Adams, D. J., Durbin, R.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp693</dc:identifier>
<dc:title><![CDATA[Copy number variant detection in inbred strains from short read sequence data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>567</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>565</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/568?rss=1">
<title><![CDATA[MARTA: a suite of Java-based tools for assigning taxonomic status to DNA sequences]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/568?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> We have created a suite of Java-based software to better provide taxonomic assignments to DNA sequences. We anticipate that the program will be useful for protistologists, virologists, mycologists and other microbial ecologists. The program relies on NCBI utilities including the BLAST software and Taxonomy database and is easily manipulated at the command-line to specify a BLAST candidate's query-coverage or percent identity requirements; other options include the ability to set minimal consensus requirements (%) for each of the eight major taxonomic ranks (Domain, Kingdom, Phylum, ...) and whether to consider lower scoring candidates when the top-hit lacks taxonomic classification.</p>
<p><b>Availability:</b> <inter-ref locator="http://bergelson.uchicago.edu/software/marta" locator-type="url">http://bergelson.uchicago.edu/software/marta</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="mhorton@uchicago.edu" locator-type="email">mhorton@uchicago.edu</inter-ref>; <inter-ref locator="jbergels@midway.uchicago.edu" locator-type="email">jbergels@midway.uchicago.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp682/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Horton, M., Bodenhausen, N., Bergelson, J.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp682</dc:identifier>
<dc:title><![CDATA[MARTA: a suite of Java-based tools for assigning taxonomic status to DNA sequences]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>569</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>568</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/570?rss=1">
<title><![CDATA[r2cat: synteny plots and comparative assembly]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/570?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Recent parallel pyrosequencing methods and the increasing number of finished genomes encourage the sequencing and investigation of closely related strains. Although the sequencing itself becomes easier and cheaper with each machine generation, the finishing of the genomes remains difficult. Instead of the desired whole genomic sequence, a set of contigs is the result of the assembly. In this applications note, we present the tool <I>r2cat</I> (related reference contig arrangement tool) that helps in the task of comparative assembly and also provides an interactive visualization for synteny inspection.</p>
<p><b>Availability:</b> <inter-ref locator="http://bibiserv.techfak.uni-bielefeld.de/r2cat" locator-type="url">http://bibiserv.techfak.uni-bielefeld.de/r2cat</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="peter.husemann@cebitec.uni-bielefeld.de" locator-type="email">peter.husemann@cebitec.uni-bielefeld.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Husemann, P., Stoye, J.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp690</dc:identifier>
<dc:title><![CDATA[r2cat: synteny plots and comparative assembly]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>571</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>570</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/572?rss=1">
<title><![CDATA[BRAT: bisulfite-treated reads analysis tool]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/572?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We present a new, accurate and efficient tool for mapping short reads obtained from the Illumina Genome Analyzer following sodium bisulfite conversion. Our tool, BRAT, supports single and paired-end reads and handles input files containing reads and mates of different lengths. BRAT is faster, maps more unique paired-end reads and has higher accuracy than existing programs. The software package includes tools to end-trim low-quality bases of the reads and to report nucleotide counts for mapped reads on the reference genome.</p>
<p><b>Availability:</b> The source code is freely available for download at <inter-ref locator="http://compbio.cs.ucr.edu/brat/" locator-type="url">http://compbio.cs.ucr.edu/brat/</inter-ref> and is distributed as Open Source software under the GPLv3.0.</p>
<p><b>Contact:</b> <inter-ref locator="elenah@cs.ucr.edu" locator-type="email">elenah@cs.ucr.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Harris, E. Y., Ponts, N., Levchuk, A., Roch, K. L., Lonardi, S.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp706</dc:identifier>
<dc:title><![CDATA[BRAT: bisulfite-treated reads analysis tool]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>573</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>572</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/574?rss=1">
<title><![CDATA[COPS Benchmark: interactive analysis of database search methods]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/574?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The performance of sequence database search methods is usually judged by receiver operating characteristic (ROC) analysis. The proper interpretation of the results obtained and a fair comparison across different methods critically depends on the properties of the data set used for such an analysis; in particular, each query must have the same number of true positives and true negatives. Here, we present a novel web service based on a dataset specifically designed for ROC analysis and the investigation of alignment quality. The data set is derived from a quantitative classification of protein structures (COPS), while analysis and results are presented through an intuitive web interface. The analysis provides details such as false positives per query, and visualization of the structural similarity between query and targets. Most importantly, results obtained for a specific alignment method are immediately related to those obtained for several popular standard sequence alignment methods.</p>
<p><b>Availability:</b> The COPS-Benchmark service is available at <inter-ref locator="http://benchmark.services.came.sbg.ac.at" locator-type="url">http://benchmark.services.came.sbg.ac.at</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="publications@came.sbg.ac.at" locator-type="email">publications@came.sbg.ac.at</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Frank, K., Gruber, M., Sippl, M. J.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp712</dc:identifier>
<dc:title><![CDATA[COPS Benchmark: interactive analysis of database search methods]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>575</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>574</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/576?rss=1">
<title><![CDATA[emPAI Calc--for the estimation of protein abundance from large-scale identification data by liquid chromatography-tandem mass spectrometry]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/576?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> emPAI Calc is an open-source web application for the estimation of protein abundance. It uses the correlation between the number of identified peptides and protein abundance in mass spectrometry-based proteomic experiments. The program is the first implementation of our previously reported emPAI algorithm; it calculates the emPAI from the protein identification results obtained by database search engines such as Mascot.<SUP><SMALL><SMALL>TM</SMALL></SMALL></SUP></p>
<p><b>Availability:</b> <inter-ref locator="http://empai.iab.keio.ac.jp/" locator-type="url">http://empai.iab.keio.ac.jp/</inter-ref>; <inter-ref locator="http://empai.iab.keio.ac.jp/supplement.php" locator-type="url">http://empai.iab.keio.ac.jp/supplement.php</inter-ref> Source codes are available under Mozilla Public License.</p>
<p><b>Contact:</b> <inter-ref locator="y-ishi@ttck.keio.ac.jp" locator-type="email">y-ishi@ttck.keio.ac.jp</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp700/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Shinoda, K., Tomita, M., Ishihama, Y.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp700</dc:identifier>
<dc:title><![CDATA[emPAI Calc--for the estimation of protein abundance from large-scale identification data by liquid chromatography-tandem mass spectrometry]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>577</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>576</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/578?rss=1">
<title><![CDATA[Visualizing SNP statistics in the context of linkage disequilibrium using LD-Plus]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/578?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Often in human genetic analysis, multiple tables of single nucleotide polymorphism (SNP) statistics are shown alongside a Haploview style correlation plot. Readers are then asked to make inferences that incorporate knowledge across these multiple sets of results. To better facilitate a collective understanding of all available data, we developed a Ruby-based web application, LD-Plus, to generate figures that simultaneously display physical location of SNPs, binary SNP attributes (such as coding/non-coding or presence on genotyping platforms), common haplotypes and their frequencies and continuously scaled values (such as <I>F</I><SUB><I>st</I></SUB>, minor allele frequency, genotyping efficiency or <I>P</I>-values), all in the context of the D' and <I>r</I><sup>2</sup> linkage disequilibrium structures. Combining these results into one comprehensive figure reduces dereferencing between figures and tables, and can provide unique insights into genetic features that are not clearly seen when results are partitioned across multiple figures and tables.</p>
<p><b>Availability:</b> LD-Plus is freely available for non-commercial research institutions. For full details see <inter-ref locator="http://chgr.mc.vanderbilt.edu/ritchielab/ldplus" locator-type="url">http://chgr.mc.vanderbilt.edu/ritchielab/ldplus</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="ritchie@chgr.mc.vanderbilt.edu" locator-type="email">ritchie@chgr.mc.vanderbilt.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bush, W. S., Dudek, S. M., Ritchie, M. D.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp678</dc:identifier>
<dc:title><![CDATA[Visualizing SNP statistics in the context of linkage disequilibrium using LD-Plus]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>579</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>578</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/580?rss=1">
<title><![CDATA[GWAF: an R package for genome-wide association analyses with family data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/580?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> GWAF, Genome-Wide Association analyses with Family, is an R package designed for GWAF. It implements association tests between a batch of genotyped or imputed single nucleotide polymorphisms (SNPs) and a binary or continuous trait with user specified genetic model, and generates informative results from the analyses. In addition, GWAF provides functions to visualize results. We evaluated GWAF using a simulated continuous trait and a binary trait dichotomized from the simulated continuous trait with real genotype data from the Framingham Heart Study's SNP Health Association Resource project.</p>
<p><b>Availability:</b> <inter-ref locator="http://cran.r-project.org/web/packages/GWAF/" locator-type="url">http://cran.r-project.org/web/packages/GWAF/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="qyang@bu.edu" locator-type="email">qyang@bu.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp710/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Chen, M.-H., Yang, Q.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp710</dc:identifier>
<dc:title><![CDATA[GWAF: an R package for genome-wide association analyses with family data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>581</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>580</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/582?rss=1">
<title><![CDATA[WebPARE: web-computing for inferring genetic or transcriptional interactions]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/582?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Inferring genetic or transcriptional interactions, when done successfully, may provide insights into biological processes or biochemical pathways of interest. Unfortunately, most computational algorithms require a certain level of programming expertise. To provide a simple web interface for users to infer interactions from time course gene expression data, we present WebPARE, which is based on the <unl>pa</unl>ttern <unl>re</unl>cognition algorithm (PARE). For expression data, in which each type of interaction (e.g. activator target) and the corresponding paired gene expression pattern are significantly associated, PARE uses a non-linear score to classify gene pairs of interest into a few subclasses of various time lags. In each subclass, PARE learns the parameters in the decision score using known interactions from biological experiments or published literature. Subsequently, the trained algorithm predicts interactions of a similar nature. Previously, PARE was shown to infer two sets of interactions in yeast successfully. Moreover, several predicted genetic interactions coincided with existing pathways; this indicates the potential of PARE in predicting partial pathway components. Given a list of gene pairs or genes of interest and expression data, WebPARE invokes PARE and outputs predicted interactions and their networks in directed graphs.</p>
<p><b>Availability:</b> A web-computing service WebPARE is publicly available at: <inter-ref locator="http://www.stat.sinica.edu.tw/WebPARE" locator-type="url">http://www.stat.sinica.edu.tw/WebPARE</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="gshieh@stat.sinica.edu.tw" locator-type="email">gshieh@stat.sinica.edu.tw</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp684/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Chuang, C.-L., Wu, J.-H., Cheng, C.-S., Shieh, G. S.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp684</dc:identifier>
<dc:title><![CDATA[WebPARE: web-computing for inferring genetic or transcriptional interactions]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>584</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>582</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/585?rss=1">
<title><![CDATA[GonadSAGE: a comprehensive SAGE database for transcript discovery on male embryonic gonad development]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/585?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Serial analysis of gene expression (SAGE) provides an alternative, with additional advantages, to microarray gene expression studies. GonadSAGE is the first publicly available web-based SAGE database on male gonad development that covers six male mouse embryonic gonad stages, including E10.5, E11.5, E12.5, E13.5, E15.5 and E17.5. The sequence coverage of each SAGE library is beyond 150K, &lsquo;which is the most extensive sequence-based male gonadal transcriptome to date&rsquo;. An interactive web interface with customizable parameters is provided for analyzing male gonad transcriptome information. Furthermore, the data can be visualized and analyzed with the other genomic features in the UCSC genome browser. It represents an integrated platform that leads to a better understanding of male gonad development, and allows discovery of related novel targets and regulatory pathways.</p>
<p><b>Availability:</b> GonadSAGE is at <inter-ref locator="http://gonadsage.nichd.nih.gov" locator-type="url">http://gonadsage.nichd.nih.gov</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="leetl@mail.nih.gov" locator-type="email">leetl@mail.nih.gov</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Lee, T.-L., Li, Y., Cheung, H.-H., Claus, J., Singh, S., Sastry, C., Rennert, O. M., Lau, Y.-F. C., Chan, W.-Y.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp695</dc:identifier>
<dc:title><![CDATA[GonadSAGE: a comprehensive SAGE database for transcript discovery on male embryonic gonad development]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>586</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>585</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/587?rss=1">
<title><![CDATA[DCDB: Drug combination database]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/26/4/587?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Rapid advances in pharmaceutical sciences have brought ever-increasing interests in combined therapies for better clinical efficacy and safety, especially in cases of complicated and refractory diseases. Innovative experimental technologies and theoretical frameworks are being actively developed for multicomponent drug research. In this work, we present the Drug Combination Database, with aims to facilitate analyses of known drug combinations, to summarize patterns of beneficial drug interactions, and to provide a basis for theoretical modeling and simulation of such drug interactions. Its current version (1.0) collected 499 approved or investigational drug combinations, including 40 unsuccessful drug combinations, involving 485 individual drugs, from &gt;6000 references.</p>
<p><b>Availability:</b> <inter-ref locator="http://www.cls.zju.edu.cn/dcdb/" locator-type="url">http://www.cls.zju.edu.cn/dcdb/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="xinchen@zju.edu.cn" locator-type="email">xinchen@zju.edu.cn</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp697/DC1" locator-type="url">Supplementary data</inter-ref> are available at the database website and <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Liu, Y., Hu, B., Fu, C., Chen, X.]]></dc:creator>
<dc:date>Thu, 11 Feb 2010 21:42:59 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp697</dc:identifier>
<dc:title><![CDATA[DCDB: Drug combination database]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>4</prism:number>
<prism:volume>26</prism:volume>
<prism:endingPage>588</prism:endingPage>
<prism:publicationDate>2010-02-15</prism:publicationDate>
<prism:startingPage>587</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

</rdf:RDF>