<?xml version="1.0" encoding="ISO-8859-1"?>

<rdf:RDF
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns="http://purl.org/rss/1.0/"
 xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
 xmlns:dc="http://purl.org/dc/elements/1.1/"
 xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
 xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
 xmlns:admin="http://webns.net/mvcb/"
>

<channel rdf:about="http://bioinformatics.oxfordjournals.org">
<title>Bioinformatics - recent issues</title>
<link>http://bioinformatics.oxfordjournals.org</link>
<description>Bioinformatics - RSS feed of recent issues (covers the latest 3 issues, including the current issue) </description>
<prism:eIssn>1460-2059</prism:eIssn>
<prism:publicationName>Bioinformatics</prism:publicationName>
<prism:issn>1367-4803</prism:issn>
<items>
 <rdf:Seq>
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3049?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3056?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3060?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3064?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3071?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3077?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3084?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3093?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3099?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3108?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3114?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3121?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3128?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3135?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3143?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3151?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3158?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3166?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3174?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3181?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3183?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3185?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3187?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3189?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3191?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3194?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3197?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3199?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3202?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2891?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2897?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2906?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2913?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2921?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2929?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2937?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2945?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2955?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2962?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2969?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2975?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2983?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2992?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3001?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3005?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3012?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3020?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3026?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3028?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3031?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3033?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3035?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3038?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3040?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3043?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3045?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3047?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2735?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2744?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2751?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2757?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2764?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2772?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2780?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2787?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2795?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2802?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2809?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2816?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2824?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2831?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2839?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2841?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2843?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2845?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2848?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2850?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2853?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2855?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2857?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2860?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2863?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2865?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2872?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2878?rss=1" />
  <rdf:li rdf:resource="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2882?rss=1" />
 </rdf:Seq>
</items>
</channel>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3049?rss=1">
<title><![CDATA[Lost in translation: an assessment and perspective for computational microRNA target identification]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3049?rss=1</link>
<description><![CDATA[
<p>MicroRNAs (miRNAs) are a class of short endogenously expressed RNA molecules that regulate gene expression by binding directly to the messenger RNA of protein coding genes. They have been found to confer a novel layer of genetic regulation in a wide range of biological processes. Computational miRNA target prediction remains one of the key means used to decipher the role of miRNAs in development and disease. Here we introduce the basic idea behind the experimental identification of miRNA targets and present some of the most widely used computational miRNA target identification programs. The review includes an assessment of the prediction quality of these programs and their combinations.</p>
<p><b>Contact:</b> <inter-ref locator="p.alexiou@fleming.gr" locator-type="email">p.alexiou@fleming.gr</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp565/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Alexiou, P., Maragkakis, M., Papadopoulos, G. L., Reczko, M., Hatzigeorgiou, A. G.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp565</dc:identifier>
<dc:title><![CDATA[Lost in translation: an assessment and perspective for computational microRNA target identification]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3055</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3049</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3056?rss=1">
<title><![CDATA[Identifiability of isoform deconvolution from junction arrays and RNA-Seq]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3056?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Splice junction microarrays and RNA-seq are two popular ways of quantifying splice variants within a cell. Unfortunately, isoform expressions cannot always be determined from the expressions of individual exons and splice junctions. While this issue has been noted before, the extent of the problem on various platforms has not yet been explored, nor have potential remedies been presented.</p>
<p><b>Results:</b> We propose criteria that will guarantee identifiability of an isoform deconvolution model on exon and splice junction arrays and in RNA-Seq. We show that up to 97% of 2256 alternatively spliced human genes selected from the RefSeq database lead to identifiable gene models in RNA-seq, with similar results in mouse. However, in the Human Exon array only 26% of these genes lead to identifiable models, and even in the most comprehensive splice junction array only 69% lead to identifiable models.</p>
<p><b>Contact:</b> <inter-ref locator="whwong@stanford.edu" locator-type="email">whwong@stanford.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp544/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Hiller, D., Jiang, H., Xu, W., Wong, W. H.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp544</dc:identifier>
<dc:title><![CDATA[Identifiability of isoform deconvolution from junction arrays and RNA-Seq]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3059</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3056</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3060?rss=1">
<title><![CDATA[Quantitative measurement of aging using image texture entropy]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3060?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> A key element in understanding the aging of <I>Caenorhabditis elegans</I> is objective quantification of the morphological differences between younger and older animals. Here we propose to use the image texture entropy as an objective measurement that reflects the structural deterioration of the <I>C.elegans</I> muscle tissues during aging.</p>
<p><b>Results:</b> The texture entropy and directionality of the muscle microscopy images were measured using 50 animals on Days 0, 2, 4, 6, 8, 10 and 12 of adulthood. Results show that the entropy of the <I>C.elegans</I> pharynx tissues increases as the animal ages, but a sharper increase was measured between Days 2 and 4, and between Days 8 and 10. These results are in agreement with gene expression findings, and support the contention that the process of <I>C.elegans</I> aging has several distinct stages. This can indicate that <I>C.elegans</I> aging is driven by developmental pathways, rather than stochastic accumulation of damage.</p>
<p><b>Availability:</b> The image data are freely available on the Internet at <inter-ref locator="http://ome.grc.nia.nih.gov/iicbu2008/celegans" locator-type="url">http://ome.grc.nia.nih.gov/iicbu2008/celegans</inter-ref>, and the Haralick and Tamura texture analysis source code can be downloaded at <inter-ref locator="http://ome.grc.nia.nih.gov/wnd-charm" locator-type="url">http://ome.grc.nia.nih.gov/wnd-charm</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="shamirl@mail.nih.gov" locator-type="email">shamirl@mail.nih.gov</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Shamir, L., Wolkow, C. A., Goldberg, I. G.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp571</dc:identifier>
<dc:title><![CDATA[Quantitative measurement of aging using image texture entropy]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3063</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3060</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3064?rss=1">
<title><![CDATA[Genome analysis with inter-nucleotide distances]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3064?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> DNA sequences can be represented by sequences of four symbols, but it is often useful to convert the symbols into real or complex numbers for further analysis. Several mapping schemes have been used in the past, but they seem unrelated to any intrinsic characteristic of DNA. The objective of this work was to find a mapping scheme directly related to DNA characteristics and that would be useful in discriminating between different species. Mathematical models to explore DNA correlation structures may contribute to a better knowledge of the DNA and to find a concise DNA description.</p>
<p><b>Results:</b> We developed a methodology to process DNA sequences based on inter-nucleotide distances. Our main contribution is a method to obtain genomic signatures for complete genomes, based on the inter-nucleotide distances, that are able to discriminate between different species. Using these signatures and hierarchical clustering, it is possible to build phylogenetic trees. Phylogenetic trees lead to genome differentiation and allow the inference of phylogenetic relations. The phylogenetic trees generated in this work display related species close to each other, suggesting that the inter-nucleotide distances are able to capture essential information about the genomes. To create the genomic signature, we construct a vector which describes the inter-nucleotide distance distribution of a complete genome and compare it with the reference distance distribution, which is the distribution of a sequence where the nucleotides are placed randomly and independently. It is the residual or relative error between the data and the reference distribution that is used to compare the DNA sequences of different organisms.</p>
<p><b>Contact:</b> <inter-ref locator="vera@ua.pt" locator-type="email">vera@ua.pt</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Afreixo, V., Bastos, C. A. C., Pinho, A. J., Garcia, S. P., Ferreira, P. J. S. G.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp546</dc:identifier>
<dc:title><![CDATA[Genome analysis with inter-nucleotide distances]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3070</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3064</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3071?rss=1">
<title><![CDATA[HHsvm: fast and accurate classification of profile-profile matches identified by HHsearch]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3071?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Recently developed profile&ndash;profile methods rival structural comparisons in their ability to detect homology between distantly related proteins. Despite this tremendous progress, many genuine relationships between protein families cannot be recognized as comparisons of their profiles result in scores that are statistically insignificant.</p>
<p><b>Results:</b> Using known evolutionary relationships among protein superfamilies in SCOP database, support vector machines were trained on four sets of discriminatory features derived from the output of HHsearch. Upon validation, it was shown that the automatic classification of all profile&ndash;profile matches was superior to fixed threshold-based annotation in terms of sensitivity and specificity. The effectiveness of this approach was demonstrated by annotating several domains of unknown function from the Pfam database.</p>
<p><b>Availability:</b> Programs and scripts implementing the methods described in this manuscript are freely available from <inter-ref locator="http://hhsvm.dlakiclab.org/" locator-type="url">http://hhsvm.dlakiclab.org/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="mdlakic@montana.edu" locator-type="email">mdlakic@montana.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp555/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dlakic, M.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp555</dc:identifier>
<dc:title><![CDATA[HHsvm: fast and accurate classification of profile-profile matches identified by HHsearch]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3076</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3071</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3077?rss=1">
<title><![CDATA[Detection of new protein domains using co-occurrence: application to Plasmodium falciparum]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3077?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Hidden Markov models (HMMs) have proved to be a powerful tool for protein domain identification in newly sequenced organisms. However, numerous domains may be missed in highly divergent proteins. This is the case for <I>Plasmodium falciparum</I> proteins, the main causal agent of human malaria.</p>
<p><b>Results:</b> We propose a method to improve the sensitivity of HMM domain detection by exploiting the tendency of the domains to appear preferentially with a few other favorite domains in a protein. When sequence information alone is not sufficient to warrant the presence of a particular domain, our method enables its detection on the basis of the presence of other Pfam or InterPro domains. Moreover, a shuffling procedure allows us to estimate the false discovery rate associated with the results. Applied to <I>P.falciparum</I>, our method identifies 585 new Pfam domains (versus the 3683 already known domains in the Pfam database) with an estimated error rate &lt;20%. These new domains provide 387 new Gene Ontology (GO) annotations to the <I>P.falciparum</I> proteome. Analogous and congruent results are obtained when applying the method to related <I>Plasmodium</I> species (<I>P.vivax</I> and <I>P.yoelii</I>).</p>
<p><b>Availability:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp560/DC1" locator-type="url">Supplementary Material</inter-ref> and a database of the new domains and GO predictions achieved on <I>Plasmodium</I> proteins are available at <inter-ref locator="http://www.lirmm.fr/~terrapon/codd/" locator-type="url">http://www.lirmm.fr/~terrapon/codd/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="brehelin@lirmm.fr" locator-type="email">brehelin@lirmm.fr</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp560/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Terrapon, N., Gascuel, O., Marechal, E., Breehelin, L.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp560</dc:identifier>
<dc:title><![CDATA[Detection of new protein domains using co-occurrence: application to Plasmodium falciparum]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3083</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3077</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3084?rss=1">
<title><![CDATA[Adaptive multi-agent architecture for functional sequence motifs recognition]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3084?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Accurate genome annotation or protein function prediction requires precise recognition of functional sequence motifs. Many computational motif prediction models have been proposed. Due to the complexity of the biological data, it may be desirable to apply an integrated approach that uses multiple models for analysis.</p>
<p><b>Results:</b> In this article, we propose a novel multi-agent architecture for the general purpose of functional sequence motif recognition. The approach takes advantage of the synergy provided by multiple agents through the employment of different agents equipped with distinctive problem solving skills and promotes the collaborations among them through decision maker (DM) agents that work as classifier ensembles. A genetic algorithm-based fusion strategy is applied which offers evolutionary property to the DM agents. The consistency and robustness of the system are maintained by an evolvable agent that mediates the team of the ensemble agents. The combined effort of a recommendation system (Seer) and the self-learning mediator agent yields a successful identification of the most efficient agent deployment scheme at an early stage of the experimentation process, which has the potential of greatly reducing the computational cost of the system. Two concrete systems are constructed that aim at predicting two important sequence motifs&mdash;the translational initiation sites (TISs) and the core promoters. With the incorporation of three distinctive problem solver agents, the TIS predictor consistently outperforms most of the state-of-the-art approaches under investigation. Integrating three existing promoter predictors, our system is able to yield consistently good performance.</p>
<p><b>Availability:</b> The program (MotifMAS) and the datasets are available upon request.</p>
<p><b>Contact:</b> <inter-ref locator="jzeng@ucalgary.ca" locator-type="email">jzeng@ucalgary.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Zeng, J., Alhajj, R., Demetrick, D.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp567</dc:identifier>
<dc:title><![CDATA[Adaptive multi-agent architecture for functional sequence motifs recognition]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3092</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3084</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3093?rss=1">
<title><![CDATA[Reproducing the manual annotation of multiple sequence alignments using a SVM classifier]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3093?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Aligning protein sequences with the best possible accuracy requires sophisticated algorithms. Since the optimal alignment is not guaranteed to be the correct one, it is expected that even the best alignment will contain sites that do not respect the assumption of positional homology. Because formulating rules to identify these sites is difficult, it is common practice to manually remove them. Although considered necessary in some cases, manual editing is time consuming and not reproducible. We present here an automated editing method based on the classification of &lsquo;valid&rsquo; and &lsquo;invalid&rsquo; sites.</p>
<p><b>Results:</b> A support vector machine (SVM) classifier is trained to reproduce the decisions made during manual editing with an accuracy of 95.0%. This implies that manual editing can be made reproducible and applied to large-scale analyses. We further demonstrate that it is possible to retrain/extend the training of the classifier by providing examples of multiple sequence alignment (MSA) annotation. Near optimal training can be achieved with only 1000 annotated sites, or roughly three samples of protein sequence alignments.</p>
<p><b>Availability:</b> This method is implemented in the software MANUEL, licensed under the GPL. A web-based application for single and batch job is available at <inter-ref locator="http://fester.cs.dal.ca/manuel" locator-type="url">http://fester.cs.dal.ca/manuel</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="cblouin@cs.dal.ca" locator-type="email">cblouin@cs.dal.ca</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp552/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Blouin, C., Perry, S., Lavell, A., Susko, E., Roger, A. J.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp552</dc:identifier>
<dc:title><![CDATA[Reproducing the manual annotation of multiple sequence alignments using a SVM classifier]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3098</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3093</prism:startingPage>
<prism:section>PHYLOGENETICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3099?rss=1">
<title><![CDATA[LIBRUS: combined machine learning and homology information for sequence-based ligand-binding residue prediction]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3099?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Identifying residues that interact with ligands is useful as a first step to understanding protein function and as an aid to designing small molecules that target the protein for interaction. Several studies have shown that sequence features are very informative for this type of prediction, while structure features have also been useful when structure is available. We develop a sequence-based method, called LIBRUS, that combines homology-based transfer and direct prediction using machine learning and compare it to previous sequence-based work and current structure-based methods.</p>
<p><b>Results:</b> Our analysis shows that homology-based transfer is slightly more discriminating than a support vector machine learner using profiles and predicted secondary structure. We combine these two approaches in a method called LIBRUS. On a benchmark of 885 sequence-independent proteins, it achieves an area under the ROC curve (<I>ROC</I>) of 0.83 with 45% precision at 50% recall, a significant improvement over previous sequence-based efforts. On an independent benchmark set, a current method, FINDSITE, based on structure features achieves an <I>ROC</I> of 0.81 with 54% precision at 50% recall, while LIBRUS achieves an <I>ROC</I> of 0.82 with 39% precision at 50% recall at a smaller computational cost. When LIBRUS and FINDSITE predictions are combined, performance is increased beyond either reaching an <I>ROC</I> of 0.86 and 59% precision at 50% recall.</p>
<p><b>Availability:</b> Software developed for this study is available at <inter-ref locator="http://bioinfo.cs.umn.edu/supplements/binf2009" locator-type="url">http://bioinfo.cs.umn.edu/supplements/binf2009</inter-ref> along with Supplementary data on the study.</p>
<p><b>Contact:</b> <inter-ref locator="kauffman@cs.umn.edu" locator-type="email">kauffman@cs.umn.edu</inter-ref>; <inter-ref locator="karypis@cs.umn.edu" locator-type="email">karypis@cs.umn.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Kauffman, C., Karypis, G.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp561</dc:identifier>
<dc:title><![CDATA[LIBRUS: combined machine learning and homology information for sequence-based ligand-binding residue prediction]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3107</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3099</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3108?rss=1">
<title><![CDATA[The interwinding nature of protein-protein interfaces and its implication for protein complex formation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3108?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Structural features at protein&ndash;protein interfaces can be studied to understand protein&ndash;protein interactions. It was noticed that in a dataset of 45 multimeric proteins the interface could either be described as flat against flat or protruding/interwound. In the latter, residues within one chain were surrounded by those in other chains, whereas in the former they were not.</p>
<p><b>Results:</b> A simple method was developed that could distinguish between these two types with results that matched those made by a human annotator. Applying this automatic method to a large dataset of 888 structures, chains at interfaces were categorized as non-surrounded or surrounded. It was found that the surrounded set had a significantly lower folding tendency using a sequence based measure, than the non-surrounded set. This suggests that before complexation, surrounded chains are relatively unstable and may be involved in &lsquo;fly-casting&rsquo;. This is supported by the finding that terminal regions are overrepresented in the surrounded set.</p>
<p><b>Availability:</b> <inter-ref locator="http://cib.cf.ocha.ac.jp/DACSIS/" locator-type="url">http://cib.cf.ocha.ac.jp/DACSIS/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="yura.kei@ocha.ac.jp" locator-type="email">yura.kei@ocha.ac.jp</inter-ref>; <inter-ref locator="sjh@cmp.uea.ac.uk" locator-type="email">sjh@cmp.uea.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp563/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Yura, K., Hayward, S.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp563</dc:identifier>
<dc:title><![CDATA[The interwinding nature of protein-protein interfaces and its implication for protein complex formation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3113</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3108</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3114?rss=1">
<title><![CDATA[Bayesian detection of non-sinusoidal periodic patterns in circadian expression data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3114?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Cyclical biological processes such as cell division and circadian regulation produce coordinated periodic expression of thousands of genes. Identification of such genes and their expression patterns is a crucial step in discovering underlying regulatory mechanisms. Existing computational methods are biased toward discovering genes that follow sine-wave patterns.</p>
<p><b>Results:</b> We present an analysis of variance (ANOVA) periodicity detector and its Bayesian extension that can be used to discover periodic transcripts of arbitrary shapes from replicated gene expression profiles. The models are applicable when the profiles are collected at comparable time points for at least two cycles. We provide an empirical Bayes procedure for estimating parameters of the prior distributions and derive closed-form expressions for the posterior probability of periodicity, enabling efficient computation. The model is applied to two datasets profiling circadian regulation in murine liver and skeletal muscle, revealing a substantial number of previously undetected non-sinusoidal periodic transcripts in each. We also apply quantitative real-time PCR to several highly ranked non-sinusoidal transcripts in liver tissue found by the model, providing independent evidence of circadian regulation of these genes.</p>
<p><b>Availability:</b> M<scp>atlab</scp> software for estimating prior distributions and performing inference is available for download from <inter-ref locator="http://www.datalab.uci.edu/resources/periodicity/" locator-type="url">http://www.datalab.uci.edu/resources/periodicity/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="dchudova@gmail.com" locator-type="email">dchudova@gmail.com</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp547/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Chudova, D., Ihler, A., Lin, K. K., Andersen, B., Smyth, P.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp547</dc:identifier>
<dc:title><![CDATA[Bayesian detection of non-sinusoidal periodic patterns in circadian expression data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3120</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3114</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3121?rss=1">
<title><![CDATA[Integration of heterogeneous expression data sets extends the role of the retinol pathway in diabetes and insulin resistance]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3121?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Type 2 diabetes is a chronic metabolic disease that involves both environmental and genetic factors. To understand the genetics of type 2 diabetes and insulin resistance, the DIabetes Genome Anatomy Project (DGAP) was launched to profile gene expression in a variety of related animal models and human subjects. We asked whether these heterogeneous models can be integrated to provide consistent and robust biological insights into the biology of insulin resistance.</p>
<p><b>Results:</b> We perform integrative analysis of the 16 DGAP data sets that span multiple tissues, conditions, array types, laboratories, species, genetic backgrounds and study designs. For each data set, we identify differentially expressed genes compared with control. Then, for the combined data, we rank genes according to the frequency with which they were found to be statistically significant across data sets. This analysis reveals RetSat as a widely shared component of mechanisms involved in insulin resistance and sensitivity and adds to the growing importance of the retinol pathway in diabetes, adipogenesis and insulin resistance. Top candidates obtained from our analysis have been confirmed in recent laboratory studies.</p>
<p><b>Contact:</b> <inter-ref locator="Isaac_kohane@harvard.edu" locator-type="email">Isaac_kohane@harvard.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Park, P. J., Kong, S. W., Tebaldi, T., Lai, W. R., Kasif, S., Kohane, I. S.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp559</dc:identifier>
<dc:title><![CDATA[Integration of heterogeneous expression data sets extends the role of the retinol pathway in diabetes and insulin resistance]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3127</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3121</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3128?rss=1">
<title><![CDATA[Qupe--a Rich Internet Application to take a step forward in the analysis of mass spectrometry-based quantitative proteomics experiments]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3128?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The goal of present -omics sciences is to understand biological systems as a whole in terms of interactions of the individual cellular components. One of the main building blocks in this field of study is proteomics where tandem mass spectrometry (LC-MS/MS) in combination with isotopic labelling techniques provides a common way to obtain a direct insight into regulation at the protein level. Methods to identify and quantify the peptides contained in a sample are well established, and their output usually results in lists of identified proteins and calculated relative abundance values. The next step is to move ahead from these abstract lists and apply statistical inference methods to compare measurements, to identify genes that are significantly up- or down-regulated, or to detect clusters of proteins with similar expression profiles.</p>
<p><b>Results:</b> We introduce the Rich Internet Application (RIA) Qupe providing comprehensive data management and analysis functions for LC-MS/MS experiments. Starting with the import of mass spectra data the system guides the experimenter through the process of protein identification by database search, the calculation of protein abundance ratios, and in particular, the statistical evaluation of the quantification results including multivariate analysis methods such as analysis of variance or hierarchical cluster analysis. While a data model to store these results has been developed, a well-defined programming interface facilitates the integration of novel approaches. A compute cluster is utilized to distribute computationally intensive calculations, and a web service allows to interchange information with other -omics software applications. To demonstrate that Qupe represents a step forward in quantitative proteomics analysis an application study on <I>Corynebacterium glutamicum</I> has been carried out.</p>
<p><b>Availability and Implementation:</b> Qupe is implemented in Java utilizing Hibernate, Echo2, R and the Spring framework. We encourage the usage of the RIA in the sense of the &lsquo;software as a service&rsquo; concept, maintained on our servers and accessible at the following location: <inter-ref locator="http://qupe.cebitec.uni-bielefeld.de" locator-type="url">http://qupe.cebitec.uni-bielefeld.de</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="stefan.albaum@cebitec.uni-bielefeld.de" locator-type="email">stefan.albaum@cebitec.uni-bielefeld.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp568/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Albaum, S. P., Neuweger, H., Franzel, B., Lange, S., Mertens, D., Trotschel, C., Wolters, D., Kalinowski, J., Nattkemper, T. W., Goesmann, A.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp568</dc:identifier>
<dc:title><![CDATA[Qupe--a Rich Internet Application to take a step forward in the analysis of mass spectrometry-based quantitative proteomics experiments]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3134</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3128</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3135?rss=1">
<title><![CDATA[Automatic assignment of reaction operators to enzymatic reactions]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3135?rss=1</link>
<description><![CDATA[
<p><b>Background:</b> Enzymes are classified in a numerical classification scheme introduced by the Nomenclature Committee of the IUBMB based on the overall reaction chemistry. Due to the manifold of enzymatic reactions the system has become highly complex. Assignment of enzymes to the enzyme classes requires a detailed knowledge of the system and manual analysis. Frequently rearrangements and deletions of enzymes and sub-subclasses are necessary.</p>
<p><b>Results:</b> We use the Dugundji&ndash;Ugi model for coding of biochemical reactions which is based on electron shift patterns occurring during reactions. Changes of the bonds or of non-bonded valence electrons are expressed by reaction matrices. Our program calculates reaction matrices automatically on the sole basis of substrate and product chemical structures based on a new strategy for maximal common substructure determination, which allows an accurate atom mapping of the substrate and product atoms. The system has been tested for a large set of enzymatic reactions including all sub-subclasses of the EC classification system. Altogether 147 different representative reaction operators were found in the classified enzymes, 121 of which are unique with respect to an EC sub-subclass. The other 26 comprise groups of enzymes with very similar reactions, being identical with respect to the bonds formed and broken.</p>
<p><b>Conclusion:</b> The analysis and comparison of enzymatic reactions according to their electron shift patterns is defining enzyme groups characterised by unique reaction cores. Our results demonstrate the applicability of the Dugundji&ndash;Ugi model as a reasonable pre-classification system allowing an objective and rational view on biochemical reactions.</p>
<p><b>Availability:</b> The program to generate reaction matrix descriptors is available upon request.</p>
<p><b>Contact:</b> <inter-ref locator="d.schomburg@tu-bs.de" locator-type="email">d.schomburg@tu-bs.de</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Leber, M., Egelhofer, V., Schomburg, I., Schomburg, D.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp549</dc:identifier>
<dc:title><![CDATA[Automatic assignment of reaction operators to enzymatic reactions]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3142</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3135</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3143?rss=1">
<title><![CDATA[How and when should interactome-derived clusters be used to predict functional modules and protein function?]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3143?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Clustering of protein&ndash;protein interaction networks is one of the most common approaches for predicting functional modules, protein complexes and protein functions. But, how well does clustering perform at these tasks?</p>
<p><b>Results:</b> We develop a general framework to assess how well computationally derived clusters in physical interactomes overlap functional modules derived via the Gene Ontology (GO). Using this framework, we evaluate six diverse network clustering algorithms using <I>Saccharomyces cerevisiae</I> and show that (i) the performances of these algorithms can differ substantially when run on the same network and (ii) their relative performances change depending upon the topological characteristics of the network under consideration. For the specific task of function prediction in <I>S.cerevisiae</I>, we demonstrate that, surprisingly, a simple non-clustering guilt-by-association approach outperforms widely used clustering-based approaches that annotate a protein with the overrepresented biological process and cellular component terms in its cluster; this is true over the range of clustering algorithms considered. Further analysis parameterizes performance based on the number of annotated proteins, and suggests when clustering approaches should be used for interactome functional analyses. Overall our results suggest a re-examination of when and how clustering approaches should be applied to physical interactomes, and establishes guidelines by which novel clustering approaches for biological networks should be justified and evaluated with respect to functional analysis.</p>
<p><b>Contact:</b> <inter-ref locator="msingh@cs.princeton.edu" locator-type="email">msingh@cs.princeton.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp551/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Song, J., Singh, M.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp551</dc:identifier>
<dc:title><![CDATA[How and when should interactome-derived clusters be used to predict functional modules and protein function?]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3150</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3143</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3151?rss=1">
<title><![CDATA[Combining tissue transcriptomics and urine metabolomics for breast cancer biomarker identification]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3151?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> For the early detection of cancer, highly sensitive and specific biomarkers are needed. Particularly, biomarkers in bio-fluids are relatively more useful because those can be used for non-biopsy tests. Although the altered metabolic activities of cancer cells have been observed in many studies, little is known about metabolic biomarkers for cancer screening. In this study, a systematic method is proposed for identifying metabolic biomarkers in urine samples by selecting candidate biomarkers from altered genome-wide gene expression signatures of cancer cells. Biomarkers identified by the present study have increased coherence and robustness because the significances of biomarkers are validated in both gene expression profiles and metabolic profiles.</p>
<p><b>Results:</b> The proposed method was applied to the gene expression profiles and urine samples of 50 breast cancer patients and 50 normal persons. Nine altered metabolic pathways were identified from the breast cancer gene expression signatures. Among these altered metabolic pathways, four metabolic biomarkers (Homovanillate, 4-hydroxyphenylacetate, 5-hydroxyindoleacetate and urea) were identified to be different in cancer and normal subjects (<I>p</I> &lt;0.05). In the case of the predictive performance, the identified biomarkers achieved area under the ROC curve values of 0.75, 0.79 and 0.79, according to a linear discriminate analysis, a random forest classifier and on a support vector machine, respectively. Finally, biomarkers which showed consistent significance in pathways' gene expression as well as urine samples were identified.</p>
<p><b>Contact:</b> <inter-ref locator="dhlee@biosoft.kaist.ac.kr" locator-type="email">dhlee@biosoft.kaist.ac.kr</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp558/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Nam, H., Chung, B. C., Kim, Y., Lee, K., Lee, D.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp558</dc:identifier>
<dc:title><![CDATA[Combining tissue transcriptomics and urine metabolomics for breast cancer biomarker identification]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3157</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3151</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3158?rss=1">
<title><![CDATA[Computing the shortest elementary flux modes in genome-scale metabolic networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3158?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Elementary flux modes (EFMs) represent a key concept to analyze metabolic networks from a pathway-oriented perspective. In spite of considerable work in this field, the computation of the full set of elementary flux modes in large-scale metabolic networks still constitutes a challenging issue due to its underlying combinatorial complexity.</p>
<p><b>Results:</b> In this article, we illustrate that the full set of EFMs can be enumerated in increasing order of number of reactions via integer linear programming. In this light, we present a novel procedure to efficiently determine the <I>K</I>-shortest EFMs in large-scale metabolic networks. Our method was applied to find the <I>K</I>-shortest EFMs that produce lysine in the genome-scale metabolic networks of <I>Escherichia coli</I> and <I>Corynebacterium glutamicum</I>. A detailed analysis of the biological significance of the <I>K</I>-shortest EFMs was conducted, finding that glucose catabolism, ammonium assimilation, lysine anabolism and cofactor balancing were correctly predicted. The work presented here represents an important step forward in the analysis and computation of EFMs for large-scale metabolic networks, where traditional methods fail for networks of even moderate size.</p>
<p><b>Contact:</b> <inter-ref locator="fplanes@tecnun.es" locator-type="email">fplanes@tecnun.es</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp564/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[de Figueiredo, L. F., Podhorski, A., Rubio, A., Kaleta, C., Beasley, J. E., Schuster, S., Planes, F. J.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp564</dc:identifier>
<dc:title><![CDATA[Computing the shortest elementary flux modes in genome-scale metabolic networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3165</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3158</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3166?rss=1">
<title><![CDATA[Functionally guided alignment of protein interaction networks for module detection]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3166?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Functional module detection within protein interaction networks is a challenging problem due to the sparsity of data and presence of errors. Computational techniques for this task range from purely graph theoretical approaches involving single networks to alignment of multiple networks from several species. Current network alignment methods all rely on protein sequence similarity to map proteins across species.</p>
<p><b>Results:</b> Here we carry out network alignment using a protein functional similarity measure. We show that using functional similarity to map proteins across species improves network alignment in terms of functional coherence and overlap with experimentally verified protein complexes. Moreover, the results from functional similarity-based network alignment display little overlap (&lt;15%) with sequence similarity-based alignment. Our combined approach integrating sequence and function-based network alignment alongside graph clustering properties offers a 200% increase in coverage of experimental datasets and comparable accuracy to current network alignment methods.</p>
<p><b>Availability:</b> Program binaries and source code is freely available at <inter-ref locator="http://www.stats.ox.ac.uk/research/bioinfo/resources" locator-type="url">http://www.stats.ox.ac.uk/research/bioinfo/resources</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="ali@stats.ox.ac.uk" locator-type="email">ali@stats.ox.ac.uk</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp569/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Ali, W., Deane, C. M.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp569</dc:identifier>
<dc:title><![CDATA[Functionally guided alignment of protein interaction networks for module detection]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3173</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3166</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3174?rss=1">
<title><![CDATA[Automatically classifying sentences in full-text biomedical articles into Introduction, Methods, Results and Discussion]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3174?rss=1</link>
<description><![CDATA[
<p>Biomedical texts can be typically represented by four rhetorical categories: Introduction, Methods, Results and Discussion (IMRAD). Classifying sentences into these categories can benefit many other text-mining tasks. Although many studies have applied different approaches for automatically classifying sentences in MEDLINE abstracts into the IMRAD categories, few have explored the classification of sentences that appear in full-text biomedical articles. We first evaluated whether sentences in full-text biomedical articles could be reliably annotated into the IMRAD format and then explored different approaches for automatically classifying these sentences into the IMRAD categories. Our results show an overall annotation agreement of 82.14% with a Kappa score of 0.756. The best classification system is a multinomial na&iuml;ve Bayes classifier trained on manually annotated data that achieved 91.95% accuracy and an average <I>F</I>-score of 91.55%, which is significantly higher than baseline systems. A web version of this system is available online at&mdash;<inter-ref locator="http://wood.ims.uwm.edu/full_text_classifier/" locator-type="url">http://wood.ims.uwm.edu/full_text_classifier/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="hongyu@uwm.edu" locator-type="email">hongyu@uwm.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Agarwal, S., Yu, H.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp548</dc:identifier>
<dc:title><![CDATA[Automatically classifying sentences in full-text biomedical articles into Introduction, Methods, Results and Discussion]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3180</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3174</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3181?rss=1">
<title><![CDATA[MOODS: fast search for position weight matrix matches in DNA sequences]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3181?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> MOODS (MOtif Occurrence Detection Suite) is a software package for matching position weight matrices against DNA sequences. MOODS implements state-of-the-art online matching algorithms, achieving considerably faster scanning speed than with a simple brute-force search. MOODS is written in C++, with bindings for the popular BioPerl and Biopython toolkits. It can easily be adapted for different purposes and integrated into existing workflows. It can also be used as a C++ library.</p>
<p><b>Availability:</b> The package with documentation and examples of usage is available at <inter-ref locator="http://www.cs.helsinki.fi/group/pssmfind" locator-type="url">http://www.cs.helsinki.fi/group/pssmfind</inter-ref>. The source code is also available under the terms of a GNU General Public License (GPL).</p>
<p><b>Contact:</b> <inter-ref locator="janne.h.korhonen@helsinki.fi" locator-type="email">janne.h.korhonen@helsinki.fi</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Korhonen, J., Martinmaki, P., Pizzi, C., Rastas, P., Ukkonen, E.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp554</dc:identifier>
<dc:title><![CDATA[MOODS: fast search for position weight matrix matches in DNA sequences]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3182</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3181</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3183?rss=1">
<title><![CDATA[PoreLogo: a new tool to analyse, visualize and compare channels in transmembrane proteins]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3183?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The increasing number of available atomic 3D structures of transmembrane channel proteins represents a valuable resource for better understanding their structure&ndash;function relationships and to eventually predict their selectivity. Herein, we present PoreLogo, an automatic tool for analysing, visualizing and comparing the amino acid composition of transmembrane channels and its conservation across the corresponding protein family.</p>
<p><b>Availability:</b> PoreLogo is accessible as a public web server at <inter-ref locator="http://www.ebi.ac.uk/thornton-srv/software/PoreLogo/" locator-type="url">http://www.ebi.ac.uk/thornton-srv/software/PoreLogo/</inter-ref>.</p>
<p><b>Contacts:</b> <inter-ref locator="marial@ebi.ac.uk" locator-type="email">marial@ebi.ac.uk</inter-ref>; <inter-ref locator="romina.oliva@uniparthenope.it" locator-type="email">romina.oliva@uniparthenope.it</inter-ref>.</p>
]]></description>
<dc:creator><![CDATA[Oliva, R., Thornton, J. M., Pellegrini-Calace, M.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp545</dc:identifier>
<dc:title><![CDATA[PoreLogo: a new tool to analyse, visualize and compare channels in transmembrane proteins]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3184</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3183</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3185?rss=1">
<title><![CDATA[EASYMIFS and SITEHOUND: a toolkit for the identification of ligand-binding sites in protein structures]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3185?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> S<scp>ite</scp>H<scp>ound</scp> uses Molecular Interaction Fields (MIFs) produced by E<scp>asy</scp>MIF<scp>s</scp> to identify protein structure regions that show a high propensity for interaction with ligands. The type of binding site identified depends on the probe atom used in the MIF calculation. The input to E<scp>asy</scp>MIF<scp>s</scp> is a PDB file of a protein structure; the output MIF serves as input to S<scp>ite</scp>H<scp>ound</scp>, which in turn produces a list of putative binding sites. Extensive testing of S<scp>ite</scp>H<scp>ound</scp> for the detection of binding sites for drug-like molecules and phosphorylated ligands has been carried out.</p>
<p><b>Availability:</b> E<scp>asy</scp>MIF<scp>s</scp> and S<scp>ite</scp>H<scp>ound</scp> executables for Linux, Mac OS X, and MS Windows operating systems are freely available for download from <inter-ref locator="http://sitehound.sanchezlab.org/download.html" locator-type="url">http://sitehound.sanchezlab.org/download.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="roberto@sanchezlab.org" locator-type="email">roberto@sanchezlab.org</inter-ref> or <inter-ref locator="roberto.sanchez@mssm.edu" locator-type="email">roberto.sanchez@mssm.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp562/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Ghersi, D., Sanchez, R.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp562</dc:identifier>
<dc:title><![CDATA[EASYMIFS and SITEHOUND: a toolkit for the identification of ligand-binding sites in protein structures]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3186</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3185</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3187?rss=1">
<title><![CDATA[VDNA: The virtual DNA plug-in for VMD]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3187?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The DNA inter base pair step parameters (Tilt, Roll, Twist, Shift, Slide, Rise) are a standard internal coordinate representation of DNA. In the absence of bend and shear, it is relatively easy to mentally visualize how Twist and Rise generate the familiar double helix. More complex structures do not readily yield to such intuition. For this reason, we developed a plug-in for VMD that accepts a set of mathematical expressions as input and generates a coarse-grained model of DNA as output. This feature of VDNA appears to provide a unique approach to DNA modeling. Predefined expressions include: linear, sheared, bent and circular DNA, and models of the nucleosome superhelix, chromatin, thermal motion and nucleosome unwrapping.</p>
<p><b>Availability:</b> VDNA is pre-installed in VMD, <inter-ref locator="http://www.ks.uiuc.edu/Research/vmd" locator-type="url">http://www.ks.uiuc.edu/Research/vmd</inter-ref>. Updates are at <inter-ref locator="http://dna.ccs.tulane.edu" locator-type="url">http://dna.ccs.tulane.edu</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="bishop@tulane.edu" locator-type="email">bishop@tulane.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bishop, T. C.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp566</dc:identifier>
<dc:title><![CDATA[VDNA: The virtual DNA plug-in for VMD]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3188</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3187</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3189?rss=1">
<title><![CDATA[Processing and population genetic analysis of multigenic datasets with ProSeq3 software]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3189?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The current tendency in molecular population genetics is to use increasing numbers of genes in the analysis. Here I describe a program for handling and population genetic analysis of DNA polymorphism data collected from multiple genes. The program includes a sequence/alignment editor and an internal relational database that simplify the preparation and manipulation of multigenic DNA polymorphism datasets. The most commonly used DNA polymorphism analyses are implemented in ProSeq3, facilitating population genetic analysis of large multigenic datasets. Extensive input/output options make ProSeq3 a convenient hub for sequence data processing and analysis.</p>
<p><b>Availability:</b> The program is available free of charge from <inter-ref locator="http://dps.plants.ox.ac.uk/sequencing/proseq.htm" locator-type="url">http://dps.plants.ox.ac.uk/sequencing/proseq.htm</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="dmitry.filatov@plants.ox.ac.uk" locator-type="email">dmitry.filatov@plants.ox.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Filatov, D. A.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp572</dc:identifier>
<dc:title><![CDATA[Processing and population genetic analysis of multigenic datasets with ProSeq3 software]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3190</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3189</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3191?rss=1">
<title><![CDATA[W-ChIPMotifs: a web application tool for de novo motif discovery from ChIP-based high-throughput data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3191?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> W-ChIPMotifs is a web application tool that provides a user friendly interface for <I>de novo</I> motif discovery. The web tool is based on our previous ChIPMotifs program which is a <I>de novo</I> motif finding tool developed for ChIP-based high-throughput data and incorporated various <I>ab initio</I> motif discovery tools such as MEME, MaMF, Weeder and optimized the significance of the detected motifs by using a bootstrap resampling statistic method and a Fisher test. Use of a randomized statistical model like bootstrap resampling can significantly increase the accuracy of the detected motifs. In our web tool, we have modified the program in two aspects: (i) we have refined the <I>P</I>-value with a Bonferroni correction; (ii) we have incorporated the STAMP tool to infer phylogenetic information and to determine the detected motifs if they are novel and known using the TRANSFAC and JASPAR databases. A comprehensive result file is mailed to users.</p>
<p><b>Availability:</b> <inter-ref locator="http://motif.bmi.ohio-state.edu/ChIPMotifs" locator-type="url">http://motif.bmi.ohio-state.edu/ChIPMotifs</inter-ref>. Data used in the article may be downloaded from <inter-ref locator="http://motif.bmi.ohio-state.edu/ChIPMotifs/examples.shtml" locator-type="url">http://motif.bmi.ohio-state.edu/ChIPMotifs/examples.shtml</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="victor.jin@osumc.edu" locator-type="email">victor.jin@osumc.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Jin, V. X., Apostolos, J., Nagisetty, N. S. V. R., Farnham, P. J.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp570</dc:identifier>
<dc:title><![CDATA[W-ChIPMotifs: a web application tool for de novo motif discovery from ChIP-based high-throughput data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3193</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3191</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3194?rss=1">
<title><![CDATA[Client-side integration of life science literature resources]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3194?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The online resources in the life sciences are characterized by a great fragmentation and one of the pressing issues of bioinformatics is making the integration of these resources a smoother and more flexible process than it is currently. Here we present <I>i-cite</I>, a browser extension, which implements a client-side model of integration which improves the navigation within the rapidly increasing life science literature and links terms from it to corresponding non-textual data.</p>
<p><b>Availability:</b> <inter-ref locator="http://i-cite.org" locator-type="url">http://i-cite.org</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="nan23@cam.ac.uk" locator-type="email">nan23@cam.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Easty, R., Nikolov, N.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp550</dc:identifier>
<dc:title><![CDATA[Client-side integration of life science literature resources]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3196</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3194</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3197?rss=1">
<title><![CDATA[SimCT: a generic tool to visualize ontology-based relationships for biological objects]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3197?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We present a web-based service, SimCT, which allows to graphically display the relationships between biological objects (e.g. genes or proteins) based on their annotations to a biomedical ontology. The result is presented as a tree of these objects, which can be viewed and explored through a specific java applet designed to highlight relevant features. Unlike the numerous tools that search for overrepresented terms, SimCT draws a simplified representation of biological terms present in the set of objects, and can be applied to any ontology for which annotation data is available. Being web-based, it does not require prior installation, and provides an intuitive, easy-to-use service.</p>
<p><b>Availability:</b> <inter-ref locator="http://tagc.univ-mrs.fr/SimCT" locator-type="url">http://tagc.univ-mrs.fr/SimCT</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="carl.herrmann@univmed.fr" locator-type="email">carl.herrmann@univmed.fr</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp553/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online</p>
]]></description>
<dc:creator><![CDATA[Herrmann, C., Berard, S., Tichit, L.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp553</dc:identifier>
<dc:title><![CDATA[SimCT: a generic tool to visualize ontology-based relationships for biological objects]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3198</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3197</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3199?rss=1">
<title><![CDATA[ncRNAppi--a tool for identifying disease-related miRNA and siRNA targeting pathways]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3199?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Currently, there are a number of databases which store microRNA (miRNA) information, and tools available which provide miRNA target prediction. In this article, we describe a novel web-based tool that integrate the miRNA-targeted mRNA data, protein&ndash;protein interactions (PPI) records, tissues, biochemical pathways, human disease and gene function information to establish a disease-related miRNA target pathway database. This database is unique in the sense that it links miRNA target genes with their PPI partners according to being tissue- and diseases-specific or both. The same approach is also applied to siRNA data. This database provides two types of searches: (i) tissue- and (ii) disease-specific miRNA (or siRNA) targeting pathways. The search allows one to identify tissue- or disease-specific miRNA (or siRNA) target gene's PPI partners two levels beyond.</p>
<p><b>Availability:</b> The release version 1.0 is a freely accessible database available at <inter-ref locator="http://ncrnappi.cs.nthu.edu.tw" locator-type="url">http://ncrnappi.cs.nthu.edu.tw</inter-ref> and <inter-ref locator="http://ncRNAppi.bioinfo.asia.edu.tw/" locator-type="url">http://ncRNAppi.bioinfo.asia.edu.tw/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="ppiddi@gmail.com" locator-type="email">ppiddi@gmail.com</inter-ref>; <inter-ref locator="o2snow@gmail.com" locator-type="email">o2snow@gmail.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ng, K.-L., Liu, H.-C., Lee, S.-C.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp574</dc:identifier>
<dc:title><![CDATA[ncRNAppi--a tool for identifying disease-related miRNA and siRNA targeting pathways]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3201</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3199</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3202?rss=1">
<title><![CDATA[In response to 'Can sugars be produced from fatty acids? A test case for pathway analysis tools']]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/23/3202?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> In their article entitled &lsquo;Can sugars be produced from fatty acids? A test case for pathway analysis tools&rsquo; de Figueiredo and co-authors assess the performance of three pathway prediction tools (METATOOL, PathFinding and Pathway Hunter Tool) using the synthesis of glucose-6-phosphate (G6P) from acetyl-CoA in humans as a test case. We think that this article is biased for three reasons: (i) the metabolic networks used as input for the respective tools were of very different sizes; (ii) the &lsquo;assessment&rsquo; is restricted to two study cases; (iii) developers are inherently more skilled to use their own tools than those developed by other people. We extended the analyses led by de Figueiredo and clearly show that the apparent superior performance of their tool (METATOOL) is partly due to the differences in input network sizes. We also see a conceptual problem in the comparison of tools that serve different purposes. In our opinion, metabolic path finding and elementary mode analysis are answering different biological questions, and should be considered as complementary rather than competitive approaches.</p>
<p><b>Contact:</b> <inter-ref locator="kfaust@ulb.ac.be" locator-type="email">kfaust@ulb.ac.be</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp557/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Faust, K., Croes, D., van Helden, J.]]></dc:creator>
<dc:date>Tue, 17 Nov 2009 07:51:19 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp557</dc:identifier>
<dc:title><![CDATA[In response to 'Can sugars be produced from fatty acids? A test case for pathway analysis tools']]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>23</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3205</prism:endingPage>
<prism:publicationDate>2009-12-01</prism:publicationDate>
<prism:startingPage>3202</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2891?rss=1">
<title><![CDATA[Phenotypic categorization of genetic skin diseases reveals new relations between phenotypes, genes and pathways]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2891?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Systematic analysis of connection between proteins, their cellular function and phenotypic manifestations in disease is a central problem of biological and clinical research. The solution to this problem requires the development of new approaches to link the rapidly growing dataset of gene&ndash;disease associations with the many complex and overlapping phenotypes of human disease.</p>
<p><b>Results:</b> We analyze genetic skin disorders and suggest a manually designed set of elementary phenotypes whose combinations define diseases as points in a multidimensional space, providing a basis for phenotypic disease clustering. Placing the known gene&ndash;disease associations in the context of this space reveals new patterns that suggest previously unknown functional links between proteins, signaling pathways and disease phenotypes. For example, analysis of telangiectasias (spider vein diseases) reveals a previously unrecognized interplay between the TGF-&beta; signaling pathway and pentose phosphate pathway. This interaction may mediate glucose-dependent regulation of TGF-&beta; signaling, providing a clue to the known association between angiopathies and diabetes and implying new gene candidates for mutational analysis and drug targeting.</p>
<p><b>Contact:</b> <inter-ref locator="grishin@chop.swmed.edu" locator-type="email">grishin@chop.swmed.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp538/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Sadreyev, R. I., Feramisco, J. D., Tsao, H., Grishin, N. V.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp538</dc:identifier>
<dc:title><![CDATA[Phenotypic categorization of genetic skin diseases reveals new relations between phenotypes, genes and pathways]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2896</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2891</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2897?rss=1">
<title><![CDATA[De novo computational prediction of non-coding RNA genes in prokaryotic genomes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2897?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The computational identification of non-coding RNA (ncRNA) genes represents one of the most important and challenging problems in computational biology. Existing methods for ncRNA gene prediction rely mostly on homology information, thus limiting their applications to ncRNA genes with known homologues.</p>
<p><b>Results:</b> We present a novel <I>de novo</I> prediction algorithm for ncRNA genes using features derived from the sequences and structures of known ncRNA genes in comparison to decoys. Using these features, we have trained a neural network-based classifier and have applied it to <I>Escherichia coli</I> and <I>Sulfolobus solfataricus</I> for genome-wide prediction of ncRNAs. Our method has an average prediction sensitivity and specificity of 68% and 70%, respectively, for identifying windows with potential for ncRNA genes in <I>E.coli</I>. By combining windows of different sizes and using positional filtering strategies, we predicted 601 candidate ncRNAs and recovered 41% of known ncRNAs in <I>E.coli</I>. We experimentally investigated six novel candidates using Northern blot analysis and found expression of three candidates: one represents a potential new ncRNA, one is associated with stable mRNA decay intermediates and one is a case of either a potential riboswitch or transcription attenuator involved in the regulation of cell division. In general, our approach enables the identification of both <I>cis</I>- and <I>trans</I>-acting ncRNAs in partially or completely sequenced microbial genomes without requiring homology or structural conservation.</p>
<p><b>Availability:</b> The source code and results are available at <inter-ref locator="http://csbl.bmb.uga.edu/publications/materials/tran/" locator-type="url">http://csbl.bmb.uga.edu/publications/materials/tran/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="xyn@bmb.uga.edu" locator-type="email">xyn@bmb.uga.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/537/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Tran, T. T., Zhou, F., Marshburn, S., Stead, M., Kushner, S. R., Xu, Y.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp537</dc:identifier>
<dc:title><![CDATA[De novo computational prediction of non-coding RNA genes in prokaryotic genomes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2905</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2897</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2906?rss=1">
<title><![CDATA[Integrative clustering of multiple genomic data types using a joint latent variable model with application to breast and lung cancer subtype analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2906?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The molecular complexity of a tumor manifests itself at the genomic, epigenomic, transcriptomic and proteomic levels. Genomic profiling at these multiple levels should allow an integrated characterization of tumor etiology. However, there is a shortage of effective statistical and bioinformatic tools for truly integrative data analysis. The standard approach to integrative clustering is separate clustering followed by manual integration. A more statistically powerful approach would incorporate all data types simultaneously and generate a single integrated cluster assignment.</p>
<p><b>Methods:</b> We developed a joint latent variable model for integrative clustering. We call the resulting methodology iCluster. iCluster incorporates flexible modeling of the associations between different data types and the variance&ndash;covariance structure within data types in a single framework, while simultaneously reducing the dimensionality of the datasets. Likelihood-based inference is obtained through the Expectation&ndash;Maximization algorithm.</p>
<p><b>Results:</b> We demonstrate the iCluster algorithm using two examples of joint analysis of copy number and gene expression data, one from breast cancer and one from lung cancer. In both cases, we identified subtypes characterized by concordant DNA copy number changes and gene expression as well as unique profiles specific to one or the other in a completely automated fashion. In addition, the algorithm discovers potentially novel subtypes by combining weak yet consistent alteration patterns across data types.</p>
<p><b>Availability:</b> R code to implement iCluster can be downloaded at <inter-ref locator="http://www.mskcc.org/mskcc/html/85130.cfm" locator-type="url">http://www.mskcc.org/mskcc/html/85130.cfm</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="shenr@mskcc.org" locator-type="email">shenr@mskcc.org</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/543/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Shen, R., Olshen, A. B., Ladanyi, M.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp543</dc:identifier>
<dc:title><![CDATA[Integrative clustering of multiple genomic data types using a joint latent variable model with application to breast and lung cancer subtype analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2912</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2906</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2913?rss=1">
<title><![CDATA[Predicting homologous signaling pathways using machine learning]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2913?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> In general, each cell signaling pathway involves many proteins, each with one or more specific roles. As they are essential components of cell activity, it is important to understand how these proteins work&mdash;and in particular, to determine which of the species' proteins participate in each role. Experimentally determining this mapping of proteins to roles is difficult and time consuming. Fortunately, many pathways are similar across species, so we may be able to use known pathway information of one species to understand the corresponding pathway of another.</p>
<p><b>Results:</b> We present an automatic approach, Predict Signaling Pathway (PSP), which uses the signaling pathways in well-studied species to predict the roles of proteins in less-studied species. We use a machine learning approach to create a predictor that achieves a generalization <I>F</I>-measure of 78.2% when applied to 11 different pathways across 14 different species. We also show our approach is very effective in predicting the pathways that have not yet been experimentally studied completely.</p>
<p><b>Availability:</b> The list of predicted proteins for all pathways over all considered species is available at <inter-ref locator="http://www.cs.ualberta.ca/~bioinfo/signaling" locator-type="url">http://www.cs.ualberta.ca/~bioinfo/signaling</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="bioinfo@cs.ualberta.ca" locator-type="email">bioinfo@cs.ualberta.ca</inter-ref>; <inter-ref locator="duane@cs.ualberta.ca" locator-type="email">duane@cs.ualberta.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bostan, B., Greiner, R., Szafron, D., Lu, P.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp532</dc:identifier>
<dc:title><![CDATA[Predicting homologous signaling pathways using machine learning]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2920</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2913</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2921?rss=1">
<title><![CDATA[Understanding hydrogen-bond patterns in proteins using network motifs]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2921?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Protein structures can be viewed as networks of contacts (edges) between amino-acid residues (nodes). Here we dissect proteins into sub-graphs consisting of six nodes and their corresponding edges, with an edge being either a backbone hydrogen bond (H-bond) or a covalent interaction. Six thousand three hundred and twenty-two such sub-graphs were found in a large non-redundant dataset of high-resolution structures, from which 35 occur much more frequently than in a random model. Many of these significant sub-graphs (also called network motifs) correspond to sub-structures of  helices and &beta;-sheets, as expected. However, others correspond to more exotic sub-structures such as 3<SUB>10</SUB> helix, Schellman motif and motifs that were not defined previously. This topological characterization of patterns is very useful for producing a detailed differences map to compare protein structures. Here we analyzed in details the differences between NMR, molecular dynamics (MD) simulations and X-ray structures for Lysozyme, SH3 and the lambda repressor. In these cases, the same structures solved by NMR and simulated by MD showed small but consistent differences in their motif composition from the crystal structures, despite a very small root mean square deviation (RMSD) between them. This may be due to differences in the pair-wise energy functions used and the dynamic nature of these proteins.</p>
<p><b>Availability:</b> A web-based tool to calculate network motifs is available at <inter-ref locator="http://bioinfo.weizmann.ac.il/protmot/" locator-type="url">http://bioinfo.weizmann.ac.il/protmot/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="gideon.schreiber@weizmann.ac.il" locator-type="email">gideon.schreiber@weizmann.ac.il</inter-ref>; <inter-ref locator="koby.levy@weizmann.ac.il" locator-type="email">koby.levy@weizmann.ac.il</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/541/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Rahat, O., Alon, U., Levy, Y., Schreiber, G.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp541</dc:identifier>
<dc:title><![CDATA[Understanding hydrogen-bond patterns in proteins using network motifs]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2928</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2921</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2929?rss=1">
<title><![CDATA[A boosting approach to structure learning of graphs with and without prior knowledge]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2929?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Identifying the network structure through which genes and their products interact can help to elucidate normal cell physiology as well as the genetic architecture of pathological phenotypes. Recently, a number of gene network inference tools have appeared based on Gaussian graphical model representations. Following this, we introduce a novel Boosting approach to learn the structure of a high-dimensional Gaussian graphical model motivated by the applications in genomics. A particular emphasis is paid to the inclusion of partial prior knowledge on the structure of the graph. With the increasing availability of pathway information and large-scale gene expression datasets, we believe that conditioning on prior knowledge will be an important aspect in raising the statistical power of structural learning algorithms to infer true conditional dependencies.</p>
<p><b>Results:</b> Our Boosting approach, termed BoostiGraph, is conceptually and algorithmically simple. It complements recent work on the network inference problem based on Lasso-type approaches. BoostiGraph is computationally cheap and is applicable to very high-dimensional graphs. For example, on graphs of order 5000 nodes, it is able to map out paths for the conditional independence structure in few minutes. Using computer simulations, we investigate the ability of our method with and without prior information to infer Gaussian graphical models from artificial as well as actual microarray datasets. The experimental results demonstrate that, using our method, it is possible to recover the true network topology with relatively high accuracy.</p>
<p><b>Availability:</b> This method and all other associated files are freely available from <inter-ref locator="http://www.stats.ox.ac.uk/~anjum/" locator-type="url">http://www.stats.ox.ac.uk/~anjum/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="s.anjum@har.mrc.ac.uk" locator-type="email">s.anjum@har.mrc.ac.uk</inter-ref>; <inter-ref locator="cholmes@stats.ox.ac.uk" locator-type="email">cholmes@stats.ox.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/485/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinfomatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Anjum, S., Doucet, A., Holmes, C. C.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp485</dc:identifier>
<dc:title><![CDATA[A boosting approach to structure learning of graphs with and without prior knowledge]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2936</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2929</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2937?rss=1">
<title><![CDATA[Learning gene regulatory networks from gene expression measurements using non-parametric molecular kinetics]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2937?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Regulation of gene expression is fundamental to the operation of a cell. Revealing the structure and dynamics of a gene regulatory network (GRN) is of great interest and represents a considerably challenging computational problem. The GRN estimation problem is complicated by the fact that the number of gene expression measurements is typically extremely small when compared with the dimension of the biological system. Further, because the gene regulation process is intrinsically complex, commonly used parametric models can provide too simple description of the underlying phenomena and, thus, can be unreliable. In this article, we propose a novel methodology for the inference of GRNs from time-series and steady-state gene expression measurements. The presented framework is based on the use of Bayesian analysis with ordinary differential equations (ODEs) and non-parametric Gaussian process modeling for the transcriptional-level regulation.</p>
<p><b>Results:</b> The performance of the proposed structure inference method is evaluated using a recently published <I>in vivo</I> dataset. By comparing the obtained results with those of existing ODE- and Bayesian-based inference methods we demonstrate that the proposed method provides more accurate network structure learning. The predictive capabilities of the method are examined by splitting the dataset into a training set and a test set and by predicting the test set based on the training set.</p>
<p><b>Availability:</b> A MATLAB implementation of the method will be available from <inter-ref locator="http://www.cs.tut.fi/~aijo2/gp" locator-type="url">http://www.cs.tut.fi/~aijo2/gp</inter-ref> upon publication</p>
<p><b>Contact:</b> <inter-ref locator="harri.lahdesmaki@tut.fi" locator-type="email">harri.lahdesmaki@tut.fi</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/511/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Aijo, T., Lahdesmaki, H.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp511</dc:identifier>
<dc:title><![CDATA[Learning gene regulatory networks from gene expression measurements using non-parametric molecular kinetics]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2944</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2937</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2945?rss=1">
<title><![CDATA[Identification of genes involved in the same pathways using a Hidden Markov Model-based approach]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2945?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The sequencing of whole genomes from various species has provided us with a wealth of genetic information. To make use of the vast amounts of data available today it is necessary to devise computer-based analysis techniques.</p>
<p><b>Results:</b> We propose a Hidden Markov Model (HMM) based algorithm to detect groups of genes functionally similar to a set of input genes from microarray expression data. A subset of experiments from a microarray is selected based on a set of related input genes. HMMs are trained from the input genes and a group of random gene input sets to provide significance estimates. Every gene in the microarray is scored using all HMMs and significant matches with the input genes are retained. We ran this algorithm on the life cycle of Drosophila microarray data set with KEGG pathways for cell cycle and translation factors as input data sets. Results show high functional similarity in resulting gene sets, increasing our biological insight into gene pathways and KEGG annotations. The algorithm performed very well compared to the Signature Algorithm and a purely correlation-based approach.</p>
<p><b>Availability:</b> Java source codes and data sets are available at <inter-ref locator="http://www.ittc.ku.edu/~xwchen/software.htm" locator-type="url">http://www.ittc.ku.edu/~xwchen/software.htm</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="xwchen@ittc.ku.edu" locator-type="email">xwchen@ittc.ku.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/521/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Senf, A., Chen, X.-w.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp521</dc:identifier>
<dc:title><![CDATA[Identification of genes involved in the same pathways using a Hidden Markov Model-based approach]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2954</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2945</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2955?rss=1">
<title><![CDATA[Mining gene functional networks to improve mass-spectrometry-based protein identification]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2955?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> High-throughput protein identification experiments based on tandem mass spectrometry (MS/MS) often suffer from low sensitivity and low-confidence protein identifications. In a typical shotgun proteomics experiment, it is assumed that all proteins are equally likely to be present. However, there is often other evidence to suggest that a protein is present and confidence in individual protein identification can be updated accordingly.</p>
<p><b>Results:</b> We develop a method that analyzes MS/MS experiments in the larger context of the biological processes active in a cell. Our method, MSNet, improves protein identification in shotgun proteomics experiments by considering information on functional associations from a gene functional network. MSNet substantially increases the number of proteins identified in the sample at a given error rate. We identify 8&ndash;29% more proteins than the original MS experiment when applied to yeast grown in different experimental conditions analyzed on different MS/MS instruments, and 37% more proteins in a human sample. We validate up to 94% of our identifications in yeast by presence in ground-truth reference sets.</p>
<p><b>Availability and Implementation:</b> Software and datasets are available at <inter-ref locator="http://aug.csres.utexas.edu/msnet" locator-type="url">http://aug.csres.utexas.edu/msnet</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="miranker@cs.utexas.edu" locator-type="email">miranker@cs.utexas.edu</inter-ref>, <inter-ref locator="marcotte@icmb.utexas.edu" locator-type="email">marcotte@icmb.utexas.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/461/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Ramakrishnan, S. R., Vogel, C., Kwon, T., Penalva, L. O., Marcotte, E. M., Miranker, D. P.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp461</dc:identifier>
<dc:title><![CDATA[Mining gene functional networks to improve mass-spectrometry-based protein identification]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2961</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2955</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2962?rss=1">
<title><![CDATA[Simultaneous inference of biological networks of multiple species from genome-wide data and evolutionary information: a semi-supervised approach]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2962?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The existing supervised methods for biological network inference work on each of the networks individually based only on intra-species information such as gene expression data. We believe that it will be more effective to use genomic data and cross-species evolutionary information from different species simultaneously, rather than to use the genomic data alone.</p>
<p><b>Results:</b> We created a new semi-supervised learning method called <I>Link Propagation</I> for inferring biological networks of multiple species based on genome-wide data and evolutionary information. The new method was applied to simultaneous reconstruction of three metabolic networks of <I>Caenorhabditis elegans</I>, <I>Helicobacter pylori</I> and <I>Saccharomyces cerevisiae</I>, based on gene expression similarities and amino acid sequence similarities. The experimental results proved that the new simultaneous network inference method consistently improves the predictive performance over the individual network inferences, and it also outperforms in accuracy and speed other established methods such as the pairwise support vector machine.</p>
<p><b>Availability:</b> The software and data are available at <inter-ref locator="http://cbio.ensmp.fr/~yyamanishi/LinkPropagation/" locator-type="url">http://cbio.ensmp.fr/~yyamanishi/LinkPropagation/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="kashima@mist.i.u-tokyo.ac.jp" locator-type="email">kashima@mist.i.u-tokyo.ac.jp</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/494/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Kashima, H., Yamanishi, Y., Kato, T., Sugiyama, M., Tsuda, K.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp494</dc:identifier>
<dc:title><![CDATA[Simultaneous inference of biological networks of multiple species from genome-wide data and evolutionary information: a semi-supervised approach]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2968</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2962</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2969?rss=1">
<title><![CDATA[Improving peptide identification with single-stage mass spectrum peaks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2969?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Database searching is the major peptide identification method in shotgun proteomics. It searches tandem mass spectrometry (MS/MS) spectra against a protein database to identify target peptides. The success of such a database searching method relies on a scoring algorithm that can evaluate the quality of peptide-spectrum matches (PSMs) accurately. However, current scoring algorithms frequently generate inaccurate assignments due to variations and noises in the MS/MS spectra. To address this issue, we like to improve peptide identification by using additional information from other data sources.</p>
<p><b>Results:</b> Single-stage MS data is complementary to MS/MS data in the sense that it provides broader mass coverage but less sequence information. In this article, we show that single-stage MS data can be used to re-rank PSMs. The proposed method explores a linear combination of scores between MS and MS/MS data to perform re-ranking. Experimental results on real data show that such a re-ranking strategy improves the identification performance significantly.</p>
<p><b>Availability:</b> <inter-ref locator="http://bioinformatics.ust.hk/ReRankPSMwMS1.rar" locator-type="url">http://bioinformatics.ust.hk/ReRankPSMwMS1.rar</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="eezyhe@ust.hk" locator-type="email">eezyhe@ust.hk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/501/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[He, Z., Yu, W.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp501</dc:identifier>
<dc:title><![CDATA[Improving peptide identification with single-stage mass spectrum peaks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2974</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2969</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2975?rss=1">
<title><![CDATA[Metabolite and reaction inference based on enzyme specificities]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2975?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Many enzymes are not absolutely specific, or even promiscuous: they can catalyze transformations of more compounds than the traditional ones as listed in, e.g. KEGG. This information is currently only available in databases, such as the BRENDA enzyme activity database. In this article, we propose to model enzyme aspecificity by predicting whether an input compound is likely to be transformed by a certain enzyme. Such a predictor has many applications, for example, to complete reconstructed metabolic networks, to aid in metabolic engineering or to help identify unknown peaks in mass spectra.</p>
<p><b>Results:</b> We have developed a system for metabolite and reaction inference based on enzyme specificities (<I>MaRIboES</I>). It employs structural and stereochemistry similarity measures and molecular fingerprints to generalize enzymatic reactions based on data available in BRENDA. Leave-one-out cross-validation shows that 80% of known reactions are predicted well. Application to the yeast glycolytic and pentose phosphate pathways predicts a large number of known and new reactions, often leading to the formation of novel compounds, as well as a number of interesting bypasses and cross-links.</p>
<p><b>Availability:</b> M<scp>atlab</scp> and C++ code is freely available at <inter-ref locator="https://gforge.nbic.nl/projects/mariboes/" locator-type="url">https://gforge.nbic.nl/projects/mariboes/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="d.deridder@tudelft.nl" locator-type="email">d.deridder@tudelft.nl</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/507/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[de Groot, M. J. L., van Berlo, R. J. P., van Winden, W. A., Verheijen, P. J. T., Reinders, M. J. T., de Ridder, D.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp507</dc:identifier>
<dc:title><![CDATA[Metabolite and reaction inference based on enzyme specificities]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2982</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2975</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2983?rss=1">
<title><![CDATA[A dictionary to identify small molecules and drugs in free text]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2983?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> From the scientific community, a lot of effort has been spent on the correct identification of gene and protein names in text, while less effort has been spent on the correct identification of chemical names. Dictionary-based term identification has the power to recognize the diverse representation of chemical information in the literature and map the chemicals to their database identifiers.</p>
<p><b>Results:</b> We developed a dictionary for the identification of small molecules and drugs in text, combining information from UMLS, MeSH, ChEBI, DrugBank, KEGG, HMDB and ChemIDplus. Rule-based term filtering, manual check of highly frequent terms and disambiguation rules were applied. We tested the combined dictionary and the dictionaries derived from the individual resources on an annotated corpus, and conclude the following: (i) each of the different processing steps increase precision with a minor loss of recall; (ii) the overall performance of the combined dictionary is acceptable (precision 0.67, recall 0.40 (0.80 for trivial names); (iii) the combined dictionary performed better than the dictionary in the chemical recognizer OSCAR3; (iv) the performance of a dictionary based on ChemIDplus alone is comparable to the performance of the combined dictionary.</p>
<p><b>Availability:</b> The combined dictionary is freely available as an XML file in Simple Knowledge Organization System format on the web site <inter-ref locator="http://www.biosemantics.org/chemlist" locator-type="url">http://www.biosemantics.org/chemlist</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="k.hettne@erasmusmc.nl" locator-type="email">k.hettne@erasmusmc.nl</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/535/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Hettne, K. M., Stierum, R. H., Schuemie, M. J., Hendriksen, P. J. M., Schijvenaars, B. J. A., Mulligen, E. M. v., Kleinjans, J., Kors, J. A.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp535</dc:identifier>
<dc:title><![CDATA[A dictionary to identify small molecules and drugs in free text]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2991</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2983</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2992?rss=1">
<title><![CDATA[Characterization of 1H NMR spectroscopic data and the generation of synthetic validation sets]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/2992?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Common contemporary practice within the nuclear magnetic resonance (NMR) metabolomics community is to evaluate and validate novel algorithms on empirical data or simplified simulated data. Empirical data captures the complex characteristics of experimental data, but the optimal or most correct analysis is unknown <I>a priori</I>; therefore, researchers are forced to rely on indirect performance metrics, which are of limited value. In order to achieve fair and complete analysis of competing techniques more exacting metrics are required. Thus, metabolomics researchers often evaluate their algorithms on simplified simulated data with a known answer. Unfortunately, the conclusions obtained on simulated data are only of value if the data sets are complex enough for results to generalize to true experimental data. Ideally, synthetic data should be indistinguishable from empirical data, yet retain a known best analysis.</p>
<p><b>Results:</b> We have developed a technique for creating realistic synthetic metabolomics validation sets based on NMR spectroscopic data. The validation sets are developed by characterizing the salient distributions in sets of empirical spectroscopic data. Using this technique, several validation sets are constructed with a variety of characteristics present in &lsquo;real&rsquo; data. A case study is then presented to compare the relative accuracy of several alignment algorithms using the increased precision afforded by these synthetic data sets.</p>
<p><b>Availability:</b> These data sets are available for download at <inter-ref locator="http://birg.cs.wright.edu/nmr_synthetic_data_sets" locator-type="url">http://birg.cs.wright.edu/nmr_synthetic_data_sets</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="travis.doom@wright.edu" locator-type="email">travis.doom@wright.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/540/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Anderson, P. E., Raymer, M. L., Kelly, B. J., Reo, N. V., DelRaso, N. J., Doom, T. E.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp540</dc:identifier>
<dc:title><![CDATA[Characterization of 1H NMR spectroscopic data and the generation of synthetic validation sets]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3000</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>2992</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3001?rss=1">
<title><![CDATA[Flynet: a genomic resource for Drosophila melanogaster transcriptional regulatory networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3001?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The highly coordinated expression of thousands of genes in an organism is regulated by the concerted action of transcription factors, chromatin proteins and epigenetic mechanisms. High-throughput experimental data for genome wide <I>in vivo</I> protein&ndash;DNA interactions and epigenetic marks are becoming available from large projects, such as the model organism ENCyclopedia Of DNA Elements (modENCODE) and from individual labs. Dissemination and visualization of these datasets in an explorable form is an important challenge.</p>
<p><b>Results:</b> To support research on <I>Drosophila melanogaster</I> transcription regulation and make the genome wide <I>in vivo</I> protein&ndash;DNA interactions data available to the scientific community as a whole, we have developed a system called Flynet. Currently, Flynet contains 101 datasets for 38 transcription factors and chromatin regulator proteins in different experimental conditions. These factors exhibit different types of binding profiles ranging from sharp localized peaks to broad binding regions. The protein&ndash;DNA interaction data in Flynet was obtained from the analysis of chromatin immunoprecipitation experiments on one color and two color genomic tiling arrays as well as chromatin immunoprecipitation followed by massively parallel sequencing. A web-based interface, integrated with an AJAX based genome browser, has been built for queries and presenting analysis results. Flynet also makes available the <I>cis</I>-regulatory modules reported in literature, known and <I>de novo</I> identified sequence motifs across the genome, and other resources to study gene regulation.</p>
<p><b>Contact:</b> <inter-ref locator="grossman@uic.edu" locator-type="email">grossman@uic.edu</inter-ref></p>
<p><b>Availability:</b> Flynet is available at <inter-ref locator="https://www.cistrack.org/flynet/" locator-type="url">https://www.cistrack.org/flynet/</inter-ref>.</p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/469/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Tian, F., Shah, P. K., Liu, X., Negre, N., Chen, J., Karpenko, O., White, K. P., Grossman, R. L.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp469</dc:identifier>
<dc:title><![CDATA[Flynet: a genomic resource for Drosophila melanogaster transcriptional regulatory networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3004</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3001</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3005?rss=1">
<title><![CDATA[Mobyle: a new full web bioinformatics framework]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3005?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> For the biologist, running bioinformatics analyses involves a time-consuming management of data and tools. Users need support to organize their work, retrieve parameters and reproduce their analyses. They also need to be able to combine their analytic tools using a safe data flow software mechanism. Finally, given that scientific tools can be difficult to install, it is particularly helpful for biologists to be able to use these tools through a web user interface. However, providing a web interface for a set of tools raises the problem that a single web portal cannot offer all the existing and possible services: it is the user, again, who has to cope with data copy among a number of different services. A framework enabling portal administrators to build a network of cooperating services would therefore clearly be beneficial.</p>
<p><b>Results:</b> We have designed a system, Mobyle, to provide a flexible and usable Web environment for defining and running bioinformatics analyses. It embeds simple yet powerful data management features that allow the user to reproduce analyses and to combine tools using a hierarchical typing system. Mobyle offers invocation of services distributed over remote Mobyle servers, thus enabling a federated network of curated bioinformatics portals without the user having to learn complex concepts or to install sophisticated software. While being focused on the end user, the Mobyle system also addresses the need, for the bioinfomatician, to automate remote services execution: PlayMOBY is a companion tool that automates the publication of BioMOBY web services, using Mobyle program definitions.</p>
<p><b>Availability:</b> The Mobyle system is distributed under the terms of the GNU GPLv2 on the project web site (<inter-ref locator="http://bioweb2.pasteur.fr/projects/mobyle/" locator-type="url">http://bioweb2.pasteur.fr/projects/mobyle/</inter-ref>). It is already deployed on three servers: <inter-ref locator="http://mobyle.pasteur.fr" locator-type="url">http://mobyle.pasteur.fr</inter-ref>, <inter-ref locator="http://mobyle.rpbs.univ-paris-diderot.fr" locator-type="url">http://mobyle.rpbs.univ-paris-diderot.fr</inter-ref> and <inter-ref locator="http://lipm-bioinfo.toulouse.inra.fr/Mobyle" locator-type="url">http://lipm-bioinfo.toulouse.inra.fr/Mobyle</inter-ref>. The PlayMOBY companion is distributed under the terms of the CeCILL license, and is available at <inter-ref locator="http://lipm-bioinfo.toulouse.inra.fr/biomoby/PlayMOBY/" locator-type="url">http://lipm-bioinfo.toulouse.inra.fr/biomoby/PlayMOBY/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="mobyle-support@pasteur.fr" locator-type="email">mobyle-support@pasteur.fr</inter-ref>; <inter-ref locator="mobyle-support@rpbs.univ-paris-diderot.fr" locator-type="email">mobyle-support@rpbs.univ-paris-diderot.fr</inter-ref>; <inter-ref locator="letondal@pasteur.fr" locator-type="email">letondal@pasteur.fr</inter-ref></p>
<p><b>Supplementary information:</b><inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/493/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Neron, B., Menager, H., Maufrais, C., Joly, N., Maupetit, J., Letort, S., Carrere, S., Tuffery, P., Letondal, C.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp493</dc:identifier>
<dc:title><![CDATA[Mobyle: a new full web bioinformatics framework]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3011</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3005</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3012?rss=1">
<title><![CDATA[A method for visualizing CellML models]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3012?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The Physiome Project was established in 1997 to develop tools to facilitate international collaboration in the physiological sciences and the sharing of biological models and experimental data. The CellML language was developed to represent and exchange mathematical models of biological processes. CellML models can be very complicated, making it difficult to interpret the underlying physical and biological concepts and relationships captured/described in the mathematical model.</p>
<p><b>Results:</b> To address this issue a set of ontologies was developed to explicitly annotate the biophysical concepts represented in the CellML models. This article presents a framework that combines a visual language, together with CellML ontologies, to support the visualization of the underlying physical and biological concepts described by the mathematical model and also their relationships with the CellML model. Automated CellML model visualization assists in the interpretation of model concepts and facilitates model communication and exchange between different communities.</p>
<p><b>Contact:</b> <inter-ref locator="sarala.dissanayake@auckland.ac.nz" locator-type="email">sarala.dissanayake@auckland.ac.nz</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/495/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Wimalaratne, S. M., Halstead, M. D. B., Lloyd, C. M., Cooling, M. T., Crampin, E. J., Nielsen, P. F.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp495</dc:identifier>
<dc:title><![CDATA[A method for visualizing CellML models]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3019</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3012</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3020?rss=1">
<title><![CDATA[Comparative analysis and unification of domain-domain interaction networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3020?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Certain protein domains are known to preferentially interact with other domains. Several approaches have been proposed to predict domain&ndash;domain interactions, and over nine datasets are available. Our aim is to analyse the coverage and quality of the existing resources, as well as the extent of their overlap. With this knowledge, we have the opportunity to merge individual domain interaction networks to construct a comprehensive and reliable database.</p>
<p><b>Results:</b> In this article we introduce a new approach towards comparing domain&ndash;domain interaction networks. This approach is used to compare nine predicted domain and protein interaction networks. The networks were used to generate a database of unified domain interactions, UniDomInt. Each interaction in the dataset is scored according to the benchmarked reliability of the sources. The performance of UniDomInt is an improvement compared to the underlying source networks and to another composite resource, <I>Domine</I>.</p>
<p><b>Availability:</b> <inter-ref locator="http://sonnhammer.sbc.su.se/download/UniDomInt/" locator-type="url">http://sonnhammer.sbc.su.se/download/UniDomInt/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="Erik.Sonnhammer@sbc.su.se" locator-type="email">Erik.Sonnhammer@sbc.su.se</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bjorkholm, P., Sonnhammer, E. L. L.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp522</dc:identifier>
<dc:title><![CDATA[Comparative analysis and unification of domain-domain interaction networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3025</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3020</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3026?rss=1">
<title><![CDATA[Saint: a lightweight integration environment for model annotation]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3026?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> Saint is a web application which provides a lightweight annotation integration environment for quantitative biological models. The system enables modellers to rapidly mark up models with biological information derived from a range of data sources.</p>
<p><b>Availability and Implementation:</b> Saint is freely available for use on the web at <inter-ref locator="http://www.cisban.ac.uk/saint" locator-type="url">http://www.cisban.ac.uk/saint</inter-ref>. The web application is implemented in Google Web Toolkit and Tomcat, with all major browsers supported. The Java source code is freely available for download at <inter-ref locator="http://saint-annotate.sourceforge.net" locator-type="url">http://saint-annotate.sourceforge.net</inter-ref>. The Saint web server requires an installation of libSBML and has been tested on Linux (32-bit Ubuntu 8.10 and 9.04).</p>
<p><b>Contact:</b> <inter-ref locator="helpdesk@cisban.ac.uk" locator-type="email">helpdesk@cisban.ac.uk</inter-ref>; <inter-ref locator="a.l.lister@ncl.ac.uk" locator-type="email">a.l.lister@ncl.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/523/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Lister, A. L., Pocock, M., Taschuk, M., Wipat, A.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp523</dc:identifier>
<dc:title><![CDATA[Saint: a lightweight integration environment for model annotation]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3027</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3026</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3028?rss=1">
<title><![CDATA[CellClassifier: supervised learning of cellular phenotypes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3028?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b>CellClassifier is a tool for classifying single-cell phenotypes in microscope images. It includes several unique and user-friendly features for classification using multiclass support vector machines</p>
<p><b>Availability:</b> Source code, user manual and SaveObjectSegmentation CellProfiler module available for download at <inter-ref locator="www.cellclassifier.ethz.ch" locator-type="url">www.cellclassifier.ethz.ch</inter-ref> under the GPL license (implemented in Matlab).</p>
<p><b>Contact:</b> <inter-ref locator="pelkmans@imsb.biol.ethz.ch" locator-type="email">pelkmans@imsb.biol.ethz.ch</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/524/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Ramo, P., Sacher, R., Snijder, B., Begemann, B., Pelkmans, L.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp524</dc:identifier>
<dc:title><![CDATA[CellClassifier: supervised learning of cellular phenotypes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3030</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3028</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3031?rss=1">
<title><![CDATA[PubMed-EX: a web browser extension to enhance PubMed search with text mining features]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3031?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> PubMed-EX is a browser extension that marks up PubMed search results with additional text-mining information. PubMed-EX's page mark-up, which includes section categorization and gene/disease and relation mark-up, can help researchers to quickly focus on key terms and provide additional information on them. All text processing is performed server-side, freeing up user resources.</p>
<p><b>Availability:</b> PubMed-EX is freely available at <inter-ref locator="http://bws.iis.sinica.edu.tw/PubMed-EX" locator-type="url">http://bws.iis.sinica.edu.tw/PubMed-EX</inter-ref> and <inter-ref locator="http://iisr.cse.yzu.edu.tw:8000/PubMed-EX/" locator-type="url">http://iisr.cse.yzu.edu.tw:8000/PubMed-EX/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="thtsai@saturn.yzu.edu.tw" locator-type="email">thtsai@saturn.yzu.edu.tw</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/475/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Tsai, R. T.-H., Dai, H.-J., Lai, P.-T., Huang, C.-H.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp475</dc:identifier>
<dc:title><![CDATA[PubMed-EX: a web browser extension to enhance PubMed search with text mining features]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3032</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3031</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3033?rss=1">
<title><![CDATA[digeR: a graphical user interface R package for analyzing 2D-DIGE data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3033?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> 2D Difference In-Gel Electrophoresis (2D-DIGE) or 2D gel technology is being used as a routine proteomics technique for biomarker discovery. Analyzing such high-dimensional data requires multivariate analysis techniques to be applied. In addition, protein post-translational modification (PTM) information from the 2D gel data is usually overlooked. We report on an R package, digeR, with an easy to use graphical user interface for analyzing 2D-DIGE (2D gel) data. It provides a tool for visually looking for potential PTM changes from different biological states and support biomarker discovery through multivariate analysis techniques.</p>
<p><b>Availability:</b> digeR package is freely available from the CRAN: <inter-ref locator="http://cran.r-project.org/web/packages/digeR/index.html" locator-type="url">http://cran.r-project.org/web/packages/digeR/index.html</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="yue.fan@ucd.ie" locator-type="email">yue.fan@ucd.ie</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/514/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Fan, Y., Murphy, T. B., Watson, R. W. G.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp514</dc:identifier>
<dc:title><![CDATA[digeR: a graphical user interface R package for analyzing 2D-DIGE data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3034</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3033</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3035?rss=1">
<title><![CDATA[UniMaP: finding unique mass and peptide signatures in the human proteome]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3035?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The uniqueness of a measured molecular mass or peptide sequence plays a very important role in the fields of protein identification and peptide/protein-biomarker investigation. We present a publicly available web application that offers information concerning the uniqueness of one or more molecular masses and one or more peptide sequences in the human proteome. When a sequence is found to be unique in humans, the application is able to search across all species querying whether this sequence is unique, not only in humans but also in other species found in the Swiss-Prot Database. The application is also able to search for unique protein fragments derived computationally from enzymatic digestion driven by certain enzymes. Furthermore, the application can list all the unique masses and peptides of a given protein. Through this application, researchers are able to find unique tags, either on a molecular mass level or on a sequence level. These unique tags are remarkably important in research related to protein identification or biomarker discovery and measurements.</p>
<p><b>Availability:</b> UniMaP web-application is available at <inter-ref locator="http://bioserver-1.bioacademy.gr/Bioserver/UniMaP/" locator-type="url">http://bioserver-1.bioacademy.gr/Bioserver/UniMaP/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="gspyrou@bioacademy.gr" locator-type="email">gspyrou@bioacademy.gr</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/516/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Alexandridou, A., Tsangaris, G. Th., Vougas, K., Nikita, K., Spyrou, G.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp516</dc:identifier>
<dc:title><![CDATA[UniMaP: finding unique mass and peptide signatures in the human proteome]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3037</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3035</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3038?rss=1">
<title><![CDATA[Identifying related journals through log analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3038?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> With the explosion of biomedical literature and the evolution of online and open access, scientists are reading more articles from a wider variety of journals. Thus, the list of core journals relevant to their research may be less obvious and may often change over time. To help researchers quickly identify appropriate journals to read and publish in, we developed a web application for finding related journals based on the analysis of PubMed log data.</p>
<p><b>Availability:</b> <inter-ref locator="http://www.ncbi.nlm.nih.gov/IRET/Journals" locator-type="url">http://www.ncbi.nlm.nih.gov/IRET/Journals</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="luzh@ncbi.nlm.nih.gov" locator-type="email">luzh@ncbi.nlm.nih.gov</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/529/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Lu, Z., Xie, N., Wilbur, W. J.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp529</dc:identifier>
<dc:title><![CDATA[Identifying related journals through log analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3039</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3038</prism:startingPage>
<prism:section>DATA AND TEXT MINING</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3040?rss=1">
<title><![CDATA[CMap 1.01: a comparative mapping application for the Internet]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3040?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b>CMap is a web-based tool for displaying and comparing maps of any type and from any species. A user can compare an unlimited number of maps, view pair-wise comparisons of known correspondences, and search for maps or for features by name, species, type and accession. CMap is freely available, can run on a variety of database engines and uses only free and open software components.</p>
<p><b>Availability:</b> <inter-ref locator="http://www.gmod.org/cmap" locator-type="url">http://www.gmod.org/cmap</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="kclark@cshl.edu" locator-type="email">kclark@cshl.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Youens-Clark, K., Faga, B., Yap, I. V., Stein, L., Ware, D.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:49 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp458</dc:identifier>
<dc:title><![CDATA[CMap 1.01: a comparative mapping application for the Internet]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3042</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3040</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3043?rss=1">
<title><![CDATA[Next generation software for functional trend analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3043?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> FuncAssociate is a web application that discovers properties enriched in lists of genes or proteins that emerge from large-scale experimentation. Here we describe an updated application with a new interface and several new features. For example, enrichment analysis can now be performed within multiple gene- and protein-naming systems. This feature avoids potentially serious translation artifacts to which other enrichment analysis strategies are subject.</p>
<p><b>Availability:</b> The FuncAssociate web application is freely available to all users at <inter-ref locator="http://llama.med.harvard.edu/funcassociate" locator-type="url">http://llama.med.harvard.edu/funcassociate</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="fritz_roth@hms.harvard.edu" locator-type="email">fritz_roth@hms.harvard.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Berriz, G. F., Beaver, J. E., Cenik, C., Tasan, M., Roth, F. P.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:50 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp498</dc:identifier>
<dc:title><![CDATA[Next generation software for functional trend analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3044</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3043</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3045?rss=1">
<title><![CDATA[QuickGO: a web-based tool for Gene Ontology searching]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3045?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> QuickGO is a web-based tool that allows easy browsing of the Gene Ontology (GO) and all associated electronic and manual GO annotations provided by the GO Consortium annotation groups QuickGO has been a popular GO browser for many years, but after a recent redevelopment it is now able to offer a greater range of facilities including bulk downloads of GO annotation data which can be extensively filtered by a range of different parameters and GO slim set generation.</p>
<p><b>Availability and Implementation:</b> QuickGO has implemented in JavaScript, Ajax and HTML, with all major browsers supported. It can be queried online at <inter-ref locator="http://www.ebi.ac.uk/QuickGO" locator-type="url">http://www.ebi.ac.uk/QuickGO</inter-ref>. The software for QuickGO is freely available under the Apache 2 licence and can be downloaded from <inter-ref locator="http://www.ebi.ac.uk/QuickGO/installation.html" locator-type="url">http://www.ebi.ac.uk/QuickGO/installation.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="goa@ebi.ac.uk" locator-type="email">goa@ebi.ac.uk</inter-ref>; <inter-ref locator="dbinns@ebi.ac.uk" locator-type="email">dbinns@ebi.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Binns, D., Dimmer, E., Huntley, R., Barrell, D., O'Donovan, C., Apweiler, R.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:50 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp536</dc:identifier>
<dc:title><![CDATA[QuickGO: a web-based tool for Gene Ontology searching]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3046</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3045</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3047?rss=1">
<title><![CDATA[Rapid detection, classification and accurate alignment of up to a million or more related protein sequences]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/22/3047?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Neuwald, A. F.]]></dc:creator>
<dc:date>Wed, 04 Nov 2009 05:45:50 PST</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp573</dc:identifier>
<dc:title><![CDATA[Rapid detection, classification and accurate alignment of up to a million or more related protein sequences]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>22</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>3047</prism:endingPage>
<prism:publicationDate>2009-11-15</prism:publicationDate>
<prism:startingPage>3047</prism:startingPage>
<prism:section>CORRIGENDUM</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2735?rss=1">
<title><![CDATA[Efficiently finding genome-wide three-way gene interactions from transcript- and genotype-data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2735?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> We address the issue of finding a three-way gene interaction, i.e. two interacting genes in expression under the genotypes of another gene, given a dataset in which expressions and genotypes are measured at once for each individual. This issue can be a general, switching mechanism in expression of two genes, being controlled by categories of another gene, and finding this type of interaction can be a key to elucidating complex biological systems. The most suitable method for this issue is likelihood ratio test using logistic regressions, which we call <I>interaction test</I>, but a serious problem of this test is computational intractability at a genome-wide level.</p>
<p><b>Results:</b> We developed a fast method for this issue which improves the speed of interaction test by around 10 times for any size of datasets, keeping highly interacting genes with an accuracy of ~85%. We applied our method to ~3 <FONT FACE="arial,helvetica">x</FONT> 10<sup>8</sup> three-way combinations generated from a dataset on human brain samples and detected three-way gene interactions with small <I>P</I>-values. To check the reliability of our results, we first conducted permutations by which we can show that the obtained <I>P</I>-values are significantly smaller than those obtained from permuted null examples. We then used GEO (Gene Expression Omnibus) to generate gene expression datasets with binary classes to confirm the detected three-way interactions by using these datasets and interaction tests. The result showed us some datasets with significantly small <I>P</I>-values, strongly supporting the reliability of the detected three-way interactions.</p>
<p><b>Availability:</b> Software is available from <inter-ref locator="http://www.bic.kyoto-u.ac.jp/pathway/kayano/bioinfo_three-way.html" locator-type="url">http://www.bic.kyoto-u.ac.jp/pathway/kayano/bioinfo_three-way.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="kayano@kuicr.kyoto-u.ac.jp" locator-type="email">kayano@kuicr.kyoto-u.ac.jp</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp531/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Kayano, M., Takigawa, I., Shiga, M., Tsuda, K., Mamitsuka, H.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:19 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp531</dc:identifier>
<dc:title><![CDATA[Efficiently finding genome-wide three-way gene interactions from transcript- and genotype-data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2743</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2735</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2744?rss=1">
<title><![CDATA[Automated inference of molecular mechanisms of disease from amino acid substitutions]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2744?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Advances in high-throughput genotyping and next generation sequencing have generated a vast amount of human genetic variation data. Single nucleotide substitutions within protein coding regions are of particular importance owing to their potential to give rise to amino acid substitutions that affect protein structure and function which may ultimately lead to a disease state. Over the last decade, a number of computational methods have been developed to predict whether such amino acid substitutions result in an altered phenotype. Although these methods are useful in practice, and accurate for their intended purpose, they are not well suited for providing probabilistic estimates of the underlying disease mechanism.</p>
<p><b>Results:</b> We have developed a new computational model, MutPred, that is based upon protein sequence, and which models changes of structural features and functional sites between wild-type and mutant sequences. These changes, expressed as probabilities of gain or loss of structure and function, can provide insight into the specific molecular mechanism responsible for the disease state. MutPred also builds on the established SIFT method but offers improved classification accuracy with respect to human disease mutations. Given conservative thresholds on the predicted disruption of molecular function, we propose that MutPred can generate accurate and reliable hypotheses on the molecular basis of disease for ~11% of known inherited disease-causing mutations. We also note that the proportion of changes of functionally relevant residues in the sets of cancer-associated somatic mutations is higher than for the inherited lesions in the Human Gene Mutation Database which are instead predicted to be characterized by disruptions of protein structure.</p>
<p><b>Availability:</b> <inter-ref locator="http://mutdb.org/mutpred" locator-type="url">http://mutdb.org/mutpred</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="predrag@indiana.edu" locator-type="email">predrag@indiana.edu</inter-ref>; <inter-ref locator="smooney@buckinstitute.org" locator-type="email">smooney@buckinstitute.org</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Li, B., Krishnan, V. G., Mort, M. E., Xin, F., Kamati, K. K., Cooper, D. N., Mooney, S. D., Radivojac, P.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp528</dc:identifier>
<dc:title><![CDATA[Automated inference of molecular mechanisms of disease from amino acid substitutions]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2750</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2744</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2751?rss=1">
<title><![CDATA[Algorithms for optimal protein structure alignment]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2751?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Structural alignment is an important tool for understanding the evolutionary relationships between proteins. However, finding the best pairwise structural alignment is difficult, due to the infinite number of possible superpositions of two structures. Unlike the sequence alignment problem, which has a polynomial time solution, the structural alignment problem has not been even classified as solvable.</p>
<p><b>Results:</b> We study one of the most widely used measures of protein structural similarity, defined as the number of pairs of residues in two proteins that can be superimposed under a predefined distance cutoff. We prove that, for any two proteins, this measure can be optimized for all but finitely many distance cutoffs. Our method leads to a series of algorithms for optimizing other structure similarity measures, including the measures commonly used in protein structure prediction experiments. We also present a polynomial time algorithm for finding a near-optimal superposition of two proteins. Aside from having a relatively low cost, the algorithm for near-optimal solution returns a superposition of provable quality. In other words, the difference between the score of the returned superposition and the score of an optimal superposition can be explicitly computed and used to determine whether the returned superposition is, in fact, the best superposition.</p>
<p><b>Contact:</b> <inter-ref locator="poleksic@cs.uni.edu" locator-type="email">poleksic@cs.uni.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp530/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Poleksic, A.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp530</dc:identifier>
<dc:title><![CDATA[Algorithms for optimal protein structure alignment]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2756</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2751</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2757?rss=1">
<title><![CDATA[CCHMM_PROF: a HMM-based coiled-coil predictor with evolutionary information]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2757?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b>The widespread coiled-coil structural motif in proteins is known to mediate a variety of biological interactions. Recognizing a coiled-coil containing sequence and locating its coiled-coil domains are key steps towards the determination of the protein structure and function. Different tools are available for predicting coiled-coil domains in protein sequences, including those based on position-specific score matrices and machine learning methods.</p>
<p><b>Results:</b> In this article, we introduce a hidden Markov model (CCHMM_PROF) that exploits the information contained in multiple sequence alignments (profiles) to predict coiled-coil regions. The new method discriminates coiled-coil sequences with an accuracy of 97% and achieves a true positive rate of 79% with only 1% of false positives. Furthermore, when predicting the location of coiled-coil segments in protein sequences, the method reaches an accuracy of 80% at the residue level and a best per-segment and per-protein efficiency of 81% and 80%, respectively. The results indicate that CCHMM_PROF outperforms all the existing tools and can be adopted for large-scale genome annotation.</p>
<p><b>Availability:</b> The dataset is available at <inter-ref locator="http://www.biocomp.unibo.it/~lisa/coiled-coils" locator-type="url">http://www.biocomp.unibo.it/~lisa/coiled-coils</inter-ref>. The predictor is freely available at <inter-ref locator="http://gpcr.biocomp.unibo.it/cgi/predictors/cchmmprof/pred_cchmmprof.cgi" locator-type="url">http://gpcr.biocomp.unibo.it/cgi/predictors/cchmmprof/pred_cchmmprof.cgi</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="piero@biocomp.unibo.it" locator-type="email">piero@biocomp.unibo.it</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Bartoli, L., Fariselli, P., Krogh, A., Casadio, R.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp539</dc:identifier>
<dc:title><![CDATA[CCHMM_PROF: a HMM-based coiled-coil predictor with evolutionary information]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2763</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2757</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2764?rss=1">
<title><![CDATA[Correlating multiple SNPs and multiple disease phenotypes: penalized non-linear canonical correlation analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2764?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Canonical correlation analysis (CCA) can be used to capture the underlying genetic background of a complex disease, by associating two datasets containing information about a patient's phenotypical and genetic details. Often the genetic information is measured on a qualitative scale, consequently ordinary CCA cannot be applied to such data. Moreover, the size of the data in genetic studies can be enormous, thereby making the results difficult to interpret.</p>
<p><b>Results:</b> We developed a penalized non-linear CCA approach that can deal with qualitative data by transforming each qualitative variable into a continuous variable through optimal scaling. Additionally, sparse results were obtained by adapting soft-thresholding to this non-linear version of the CCA. By means of simulation studies, we show that our method is capable of extracting relevant variables out of high-dimensional sets. We applied our method to a genetic dataset containing 144 patients with glial cancer.</p>
<p><b>Contact:</b> <inter-ref locator="s.waaijenborg@amc.uva.nl" locator-type="email">s.waaijenborg@amc.uva.nl</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Waaijenborg, S., Zwinderman, A. H.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp491</dc:identifier>
<dc:title><![CDATA[Correlating multiple SNPs and multiple disease phenotypes: penalized non-linear canonical correlation analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2771</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2764</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2772?rss=1">
<title><![CDATA[The effects of probe binding affinity differences on gene expression measurements and how to deal with them]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2772?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> When comparing gene expression levels between species or strains using microarrays, sequence differences between the groups can cause false identification of expression differences. Our simulated dataset shows that a sequence divergence of only 1% between species can lead to falsely reported expression differences for &gt;50% of the transcripts&mdash;similar levels of effect have been reported previously in comparisons of human and chimpanzee expression. We propose a method for identifying probes that cause such false readings, using only the microarray data, so that problematic probes can be excluded from analysis. We then test the power of the method to detect sequence differences and to correct for falsely reported expression differences. Our method can detect 70% of the probes with sequence differences using human and chimpanzee data, while removing only 18% of probes with no sequence differences. Although only 70% of the probes with sequence differences are detected, the effect of removing probes on falsely reported expression differences is more dramatic: the method can remove 98% of the falsely reported expression differences from a simulated dataset. We argue that the method should be used even when sequence data are available.</p>
<p><b>Contact:</b> <inter-ref locator="lachmann@eva.mpg.de" locator-type="email">lachmann@eva.mpg.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp492/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dannemann, M., Lorenc, A., Hellmann, I., Khaitovich, P., Lachmann, M.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp492</dc:identifier>
<dc:title><![CDATA[The effects of probe binding affinity differences on gene expression measurements and how to deal with them]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2779</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2772</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2780?rss=1">
<title><![CDATA[Statistical methods for gene set co-expression analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2780?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The power of a microarray experiment derives from the identification of genes differentially regulated across biological conditions. To date, differential regulation is most often taken to mean differential expression, and a number of useful methods for identifying differentially expressed (DE) genes or gene sets are available. However, such methods are not able to identify many relevant classes of differentially regulated genes. One important example concerns differentially co-expressed (DC) genes.</p>
<p><b>Results:</b> We propose an approach, gene set co-expression analysis (GSCA), to identify DC gene sets. The GSCA approach provides a false discovery rate controlled list of interesting gene sets, does not require that genes be highly correlated in at least one biological condition and is readily applied to data from individual or multiple experiments, as we demonstrate using data from studies of lung cancer and diabetes.</p>
<p><b>Availability:</b> The GSCA approach is implemented in R and available at <inter-ref locator="www.biostat.wisc.edu/~kendzior/GSCA/" locator-type="url">www.biostat.wisc.edu/~kendzior/GSCA/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="kendzior@biostat.wisc.edu" locator-type="email">kendzior@biostat.wisc.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp502/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Choi, Y., Kendziorski, C.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp502</dc:identifier>
<dc:title><![CDATA[Statistical methods for gene set co-expression analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2786</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2780</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2787?rss=1">
<title><![CDATA[A novel algorithm for detecting differentially regulated paths based on gene set enrichment analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2787?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Deregulated signaling cascades are known to play a crucial role in many pathogenic processes, among them are tumor initiation and progression. In the recent past, modern experimental techniques that allow for measuring the amount of mRNA transcripts of almost all known human genes in a tissue or even in a single cell have opened new avenues for studying the activity of the signaling cascades and for understanding the information flow in the networks.</p>
<p><b>Results:</b> We present a novel dynamic programming algorithm for detecting deregulated signaling cascades. The so-called FiDePa (Finding Deregulated Paths) algorithm interprets differences in the expression profiles of tumor and normal tissues. It relies on the well-known gene set enrichment analysis (GSEA) and efficiently detects all paths in a given regulatory or signaling network that are significantly enriched with differentially expressed genes or proteins. Since our algorithm allows for comparing a single tumor expression profile with the control group, it facilitates the detection of specific regulatory features of a tumor that may help to optimize tumor therapy. To demonstrate the capabilities of our algorithm, we analyzed a glioma expression dataset with respect to a directed graph that combined the regulatory networks of the KEGG and TRANSPATH database. The resulting glioma consensus network that encompasses all detected deregulated paths contained many genes and pathways that are known to be key players in glioma or cancer-related pathogenic processes. Moreover, we were able to correlate clinically relevant features like necrosis or metastasis with the detected paths.</p>
<p><b>Availability:</b> C++ source code is freely available, BiNA can be downloaded from <inter-ref locator="http://www.bnplusplus.org/" locator-type="url">http://www.bnplusplus.org/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="ack@bioinf.uni-sb.de" locator-type="email">ack@bioinf.uni-sb.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp510/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Keller, A., Backes, C., Gerasch, A., Kaufmann, M., Kohlbacher, O., Meese, E., Lenhof, H.-P.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp510</dc:identifier>
<dc:title><![CDATA[A novel algorithm for detecting differentially regulated paths based on gene set enrichment analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2794</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2787</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2795?rss=1">
<title><![CDATA[Bi-correlation clustering algorithm for determining a set of co-regulated genes]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2795?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Biclustering has been emerged as a powerful tool for identification of a group of co-expressed genes under a subset of experimental conditions (measurements) present in a gene expression dataset. Several biclustering algorithms have been proposed till date. In this article, we address some of the important shortcomings of these existing biclustering algorithms and propose a new correlation-based biclustering algorithm called bi-correlation clustering algorithm (BCCA).</p>
<p><b>Results:</b> BCCA has been able to produce a diverse set of biclusters of co-regulated genes over a subset of samples where all the genes in a bicluster have a similar change of expression pattern over the subset of samples. Moreover, the genes in a bicluster have common transcription factor binding sites in the corresponding promoter sequences. The presence of common transcription factors binding sites, in the corresponding promoter sequences, is an evidence that a group of genes in a bicluster are co-regulated. Biclusters determined by BCCA also show highly enriched functional categories. Using different gene expression datasets, we demonstrate strength and superiority of BCCA over some existing biclustering algorithms.</p>
<p><b>Availability:</b> The software for BCCA has been developed using C and Visual Basic languages, and can be executed on the Microsoft Windows platforms. The software may be downloaded as a zip file from <inter-ref locator="http://www.isical.ac.in/~rajat" locator-type="url">http://www.isical.ac.in/~rajat</inter-ref>. Then it needs to be installed. Two word files (included in the zip file) need to be consulted before installation and execution of the software.</p>
<p><b>Contact:</b> <inter-ref locator="rajat@isical.ac.in" locator-type="email">rajat@isical.ac.in</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp526/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Bhattacharya, A., De, R. K.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp526</dc:identifier>
<dc:title><![CDATA[Bi-correlation clustering algorithm for determining a set of co-regulated genes]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2801</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2795</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2802?rss=1">
<title><![CDATA[Multiple testing in genome-wide association studies via hidden Markov models]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2802?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Genome-wide association studies (GWAS) interrogate common genetic variation across the entire human genome in an unbiased manner and hold promise in identifying genetic variants with moderate or weak effect sizes. However, conventional testing procedures, which are mostly <I>P</I>-value based, ignore the dependency and therefore suffer from loss of efficiency. The goal of this article is to exploit the dependency information among adjacent single nucleotide polymorphisms (SNPs) to improve the screening efficiency in GWAS.</p>
<p><b>Results:</b> We propose to model the linear block dependency in the SNP data using hidden Markov models (HMMs). A compound decision&ndash;theoretic framework for testing HMM-dependent hypotheses is developed. We propose a powerful data-driven procedure [pooled local index of significance (PLIS)] that controls the false discovery rate (FDR) at the nominal level. PLIS is shown to be optimal in the sense that it has the smallest false negative rate (FNR) among all valid FDR procedures. By re-ranking significance for all SNPs with dependency considered, PLIS gains higher power than conventional <I>P</I>-value based methods. Simulation results demonstrate that PLIS dominates conventional FDR procedures in detecting disease-associated SNPs. Our method is applied to analysis of the SNP data from a GWAS of type 1 diabetes. Compared with the Benjamini&ndash;Hochberg (BH) procedure, PLIS yields more accurate results and has better reproducibility of findings.</p>
<p><b>Conclusion:</b> The genomic rankings based on our procedure are substantially different from the rankings based on the <I>P</I>-values. By integrating information from adjacent locations, the PLIS rankings benefit from the increased signal-to-noise ratio, hence our procedure often has higher statistical power and better reproducibility. It provides a promising direction in large-scale GWAS.</p>
<p><b>Availability:</b> An R package PLIS has been developed to implement the PLIS procedure. Source codes are available upon request and will be available on CRAN (<inter-ref locator="http://cran.r-project.org/" locator-type="url">http://cran.r-project.org/</inter-ref>).</p>
<p><b>Contact:</b> <inter-ref locator="zhiwei@njit.edu" locator-type="email">zhiwei@njit.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp476/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Wei, Z., Sun, W., Wang, K., Hakonarson, H.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp476</dc:identifier>
<dc:title><![CDATA[Multiple testing in genome-wide association studies via hidden Markov models]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2808</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2802</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2809?rss=1">
<title><![CDATA[Quantifying cancer progression with conjunctive Bayesian networks]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2809?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Cancer is an evolutionary process characterized by accumulating mutations. However, the precise timing and the order of genetic alterations that drive tumor progression remain enigmatic.</p>
<p><b>Results:</b> We present a specific probabilistic graphical model for the accumulation of mutations and their interdependencies. The Bayesian network models cancer progression by an explicit unobservable accumulation process in time that is separated from the observable but error-prone detection of mutations. Model parameters are estimated by an Expectation-Maximization algorithm and the underlying interaction graph is obtained by a simulated annealing procedure. Applying this method to cytogenetic data for different cancer types, we find multiple complex oncogenetic pathways deviating substantially from simplified models, such as linear pathways or trees. We further demonstrate how the inferred progression dynamics can be used to improve genetics-based survival predictions which could support diagnostics and prognosis.</p>
<p><b>Availability:</b> The software package ct-cbn is available under a GPL license on the web site <inter-ref locator="cbg.ethz.ch/software/ct-cbn" locator-type="url">cbg.ethz.ch/software/ct-cbn</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="moritz.gerstung@bsse.ethz.ch" locator-type="email">moritz.gerstung@bsse.ethz.ch</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Gerstung, M., Baudis, M., Moch, H., Beerenwinkel, N.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp505</dc:identifier>
<dc:title><![CDATA[Quantifying cancer progression with conjunctive Bayesian networks]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2815</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2809</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2816?rss=1">
<title><![CDATA[Accessible methods for the dynamic time-scale decomposition of biochemical systems]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2816?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> The growing complexity of biochemical models asks for means to rationally dissect the networks into meaningful and rather independent subnetworks. Such foregoing should ensure an understanding of the system without any heuristics employed. Important for the success of such an approach is its accessibility and the clarity of the presentation of the results.</p>
<p><b>Results:</b> In order to achieve this goal, we developed a method which is a modification of the classical approach of time-scale separation. This modified method as well as the more classical approach have been implemented for time-dependent application within the widely used software COPASI. The implementation includes different possibilities for the representation of the results including 3D-visualization.</p>
<p><b>Availability:</b> The methods are included in COPASI which is free for academic use and available at <inter-ref locator="www.copasi.org" locator-type="url">www.copasi.org</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="irina.surovtsova@bioquant.uni-heidelberg.de" locator-type="email">irina.surovtsova@bioquant.uni-heidelberg.de</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp451/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Surovtsova, I., Simus, N., Lorenz, T., Konig, A., Sahle, S., Kummer, U.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp451</dc:identifier>
<dc:title><![CDATA[Accessible methods for the dynamic time-scale decomposition of biochemical systems]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2823</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2816</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2824?rss=1">
<title><![CDATA[Modified variational Bayes EM estimation of hidden Markov tree model of cell lineages]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2824?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Human pluripotent stem cell lines persist in culture as a heterogeneous population of SSEA3 positive and SSEA3 negative cells. Tracking individual stem cells in real time can elucidate the kinetics of cells switching between the SSEA3 positive and negative substates. However, identifying a cell's substate at all time points within a cell lineage tree is technically difficult.</p>
<p><b>Results:</b> A variational Bayesian Expectation Maximization (EM) with smoothed probabilities (VBEMS) algorithm for hidden Markov trees (HMT) is proposed for incomplete tree structured data. The full posterior of the HMT parameters is determined and the underflow problems associated with previous algorithms are eliminated. Example results for the prediction of the types of cells in synthetic and real stem cell lineage trees are presented.</p>
<p><b>Availability:</b>The Matlab code for the VBEMS algorithm is freely available at <inter-ref locator="http://www.acse.dept.shef.ac.uk/repository/vbems_lineage_tree/VBEMS.ZIP" locator-type="url">http://www.acse.dept.shef.ac.uk/repository/vbems_lineage_tree/VBEMS.ZIP</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="visakan@sheffield.ac.uk" locator-type="email">visakan@sheffield.ac.uk</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp456/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Olariu, V., Coca, D., Billings, S. A., Tonge, P., Gokhale, P., Andrews, P. W., Kadirkamanathan, V.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp456</dc:identifier>
<dc:title><![CDATA[Modified variational Bayes EM estimation of hidden Markov tree model of cell lineages]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2830</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2824</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2831?rss=1">
<title><![CDATA[A hypergraph-based learning algorithm for classifying gene expression and arrayCGH data with prior knowledge]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2831?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Incorporating biological prior knowledge into predictive models is a challenging data integration problem in analyzing high-dimensional genomic data. We introduce a hypergraph-based semi-supervised learning algorithm called <I>HyperPrior</I> to classify gene expression and array-based comparative genomic hybridization (arrayCGH) data using biological knowledge as constraints on graph-based learning. <I>HyperPrior</I> is a robust two-step iterative method that alternatively finds the optimal labeling of the samples and the optimal weighting of the features, guided by constraints encoding prior knowledge. The prior knowledge for analyzing gene expression data is that cancer-related genes tend to interact with each other in a protein&ndash;protein interaction network. Similarly, the prior knowledge for analyzing arrayCGH data is that probes that are spatially nearby in their layout along the chromosomes tend to be involved in the same amplification or deletion event. Based on the prior knowledge, <I>HyperPrior</I> imposes a consistent weighting of the correlated genomic features in graph-based learning.</p>
<p><b>Results:</b> We applied <I>HyperPrior</I> to test two arrayCGH datasets and two gene expression datasets for both cancer classification and biomarker identification. On all the datasets, <I>HyperPrior</I> achieved competitive classification performance, compared with SVMs and the other baselines utilizing the same prior knowledge. <I>HyperPrior</I> also identified several discriminative regions on chromosomes and discriminative subnetworks in the PPI, both of which contain cancer-related genomic elements. Our results suggest that <I>HyperPrior</I> is promising in utilizing biological prior knowledge to achieve better classification performance and more biologically interpretable findings in gene expression and arrayCGH data.</p>
<p><b>Availability:</b> <inter-ref locator="http://compbio.cs.umn.edu/HyperPrior" locator-type="url">http://compbio.cs.umn.edu/HyperPrior</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="kuang@cs.umn.edu" locator-type="email">kuang@cs.umn.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp467/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Tian, Z., Hwang, T., Kuang, R.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp467</dc:identifier>
<dc:title><![CDATA[A hypergraph-based learning algorithm for classifying gene expression and arrayCGH data with prior knowledge]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2838</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2831</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2839?rss=1">
<title><![CDATA[TagDust--a program to eliminate artifacts from next generation sequencing data]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2839?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Next-generation parallel sequencing technologies produce large quantities of short sequence reads. Due to experimental procedures various types of artifacts are commonly sequenced alongside the targeted RNA or DNA sequences. Identification of such artifacts is important during the development of novel sequencing assays and for the downstream analysis of the sequenced libraries.</p>
<p><b>Results:</b> Here we present TagDust, a program identifying artifactual sequences in large sequencing runs. Given a user-defined cutoff for the false discovery rate, TagDust identifies all reads explainable by combinations and partial matches to known sequences used during library preparation. We demonstrate the quality of our method on sequencing runs performed on Illumina's Genome Analyzer platform.</p>
<p><b>Availability:</b> Executables and documentation are available from <inter-ref locator="http://genome.gsc.riken.jp/osc/english/software/" locator-type="url">http://genome.gsc.riken.jp/osc/english/software/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="timolassmann@gmail.com" locator-type="email">timolassmann@gmail.com</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Lassmann, T., Hayashizaki, Y., Daub, C. O.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp527</dc:identifier>
<dc:title><![CDATA[TagDust--a program to eliminate artifacts from next generation sequencing data]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2840</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2839</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2841?rss=1">
<title><![CDATA[Updates to the RMAP short-read mapping software]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2841?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We report on a major new version of the RMAP software for mapping reads from short-read sequencing technology. General improvements to accuracy and space requirements are included, along with novel functionality. Included in the RMAP software package are tools for mapping paired-end reads, mapping using more sophisticated use of quality scores, collecting ambiguous mapping locations and mapping bisulfite-treated reads.</p>
<p><b>Availability:</b> The applications described in this note are available for download at <inter-ref locator="http://www.cmb.usc.edu/people/andrewds/rmap" locator-type="url">http://www.cmb.usc.edu/people/andrewds/rmap</inter-ref> and are distributed as Open Source software under the GPLv3.0. The software has been tested on Linux and OS X platforms.</p>
<p><b>Contact:</b> <inter-ref locator="andrewds@usc.edu" locator-type="email">andrewds@usc.edu</inter-ref>; <inter-ref locator="mzhang@cshl.edu" locator-type="email">mzhang@cshl.edu</inter-ref></p>
<p>The RMAP algorithm was introduced by (Smith <I>et al.</I>, <cross-ref type="bib" refid="B8">2008</cross-ref>) as one of the earliest available programs for mapping reads from the Illumina second-generation sequencing technology. One important contribution of RMAP was to incorporate the use of quality scores directly into the mapping process: read positions with too low a quality score were not considered while mapping, and that quality score cutoff could be adjusted by the user. Subsequently, numerous mapping algorithm have appeared (Langmead <I>et al.</I>, <cross-ref type="bib" refid="B2">2009</cross-ref>; Li,H. <I>et al.</I>, <cross-ref type="bib" refid="B3">2008</cross-ref>; Li,R. <I>et al.</I>, <cross-ref type="bib" refid="B4">2008</cross-ref>; Lin <I>et al.</I>, <cross-ref type="bib" refid="B5">2008</cross-ref>; Schatz, <cross-ref type="bib" refid="B7">2009</cross-ref>; Yanovsky <I>et al.</I>, <cross-ref type="bib" refid="B9">2008</cross-ref>), with improvements in both efficiency and breadth of functionality (e.g. ability to map paired-end reads; integrated SNP calling). Investigators requiring solutions to mapping problems now have many options. As new applications of short-read sequencing emerge, many variations on the analysis task of read mapping emerge. Diversity in performance characteristics of existing mapping tools becomes potentially valuable.</p>
<p>We report the first major update to RMAP. The basic algorithmic framework in RMAP is still to preprocess reads and scan the genome, but several modifications have been made and much additional functionality has been included. Importantly, RMAP has a memory footprint that depends on the number of reads being mapped. This feature allows RMAP to be used effectively in cluster environments with commodity nodes, because partitioning the reads allows natural parallelizations with linear reduction in memory requirements per processor core used.</p>
<p>Included in this release of the RMAP software package is functionality for mapping paired-end reads, making more sophisticated use of quality scores, collecting mapping locations for ambiguously mapping reads and mapping bisulfite-treated reads.</p>
]]></description>
<dc:creator><![CDATA[Smith, A. D., Chung, W.-Y., Hodges, E., Kendall, J., Hannon, G., Hicks, J., Xuan, Z., Zhang, M. Q.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp533</dc:identifier>
<dc:title><![CDATA[Updates to the RMAP short-read mapping software]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2842</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2841</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2843?rss=1">
<title><![CDATA[3D-SURFER: software for high-throughput protein surface comparison and analysis]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2843?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We present 3D-SURFER, a web-based tool designed to facilitate high-throughput comparison and characterization of proteins based on their surface shape. As each protein is effectively represented by a vector of 3D Zernike descriptors, comparison times for a query protein against the entire PDB take, on an average, only a couple of seconds. The web interface has been designed to be as interactive as possible with displays showing animated protein rotations, CATH codes and structural alignments using the CE program. In addition, geometrically interesting local features of the protein surface, such as pockets that often correspond to ligand binding sites as well as protrusions and flat regions can also be identified and visualized.</p>
<p><b>Availability:</b> 3D-SURFER is a web application that can be freely accessed from: <inter-ref locator="http://dragon.bio.purdue.edu/3d-surfer" locator-type="url">http://dragon.bio.purdue.edu/3d-surfer</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="dkihara@purdue.edu" locator-type="email">dkihara@purdue.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp542/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[La, D., Esquivel-Rodriguez, J., Venkatraman, V., Li, B., Sael, L., Ueng, S., Ahrendt, S., Kihara, D.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp542</dc:identifier>
<dc:title><![CDATA[3D-SURFER: software for high-throughput protein surface comparison and analysis]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2844</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2843</prism:startingPage>
<prism:section>STRUCTURAL BIOINFORMATICS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2845?rss=1">
<title><![CDATA[COMPASS: a program for generating serial samples under an infinite sites model]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2845?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The program <I>COMPASS</I> can generate samples that have been collected at various points in time from a population that is evolving according to a Wright&ndash;Fisher model. The samples are generated using coalescence simulations permitting various demographic scenarios and the program uses an infinite sites model to generate polymorphism data for the samples. By generating serially sampled population-genetic data, <I>COMPASS</I> allows investigating properties of polymorphism data that has been collected at different time points, and aid in making inference from ancient polymorphism data.</p>
<p><b>Availability:</b> The program and the manual are available at: <inter-ref locator="http://www.egs.uu.se/evbiol/Research/JakobssonLab/compass.html" locator-type="url">http://www.egs.uu.se/evbiol/Research/JakobssonLab/compass.html</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="mattias.jakobsson@ebc.uu.se" locator-type="email">mattias.jakobsson@ebc.uu.se</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Jakobsson, M.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp534</dc:identifier>
<dc:title><![CDATA[COMPASS: a program for generating serial samples under an infinite sites model]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2847</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2845</prism:startingPage>
<prism:section>GENETICS AND POPULATION ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2848?rss=1">
<title><![CDATA[iBioSim: a tool for the analysis and design of genetic circuits]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2848?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> <ty>iBioSim</ty> is a tool that supports learning of genetic circuit models, efficient abstraction-based analysis of these models and the design of synthetic genetic circuits. <ty>iBioSim</ty> includes project management features and a graphical user interface that facilitate the development and maintenance of genetic circuit models as well as both experimental and simulation data records.</p>
<p><b>Availability:</b> <ty>iBioSim</ty> is available for download for Windows, Linux, and MacOS at <inter-ref locator="http://www.async.ece.utah.edu/iBioSim/" locator-type="url">http://www.async.ece.utah.edu/iBioSim/</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="myers@ece.utah.edu" locator-type="email">myers@ece.utah.edu</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Myers, C. J., Barker, N., Jones, K., Kuwahara, H., Madsen, C., Nguyen, N.-P. D.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp457</dc:identifier>
<dc:title><![CDATA[iBioSim: a tool for the analysis and design of genetic circuits]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2849</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2848</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2850?rss=1">
<title><![CDATA[WEbcoli: an interactive and asynchronous web application for in silico design and analysis of genome-scale E.coli model]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2850?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> WEbcoli is a <I>WEb</I> application for <I>in silico</I> designing, analyzing and engineering <I>Escherichia coli</I> metabolism. It is devised and implemented using advanced web technologies, thereby leading to enhanced usability and dynamic web accessibility. As a main feature, the WEbcoli system provides a user-friendly rich web interface, allowing users to virtually design and synthesize mutant strains derived from the genome-scale wild-type <I>E.coli</I> model and to customize pathways of interest through a graph editor. In addition, constraints-based flux analysis can be conducted for quantifying metabolic fluxes and charactering the physiological and metabolic states under various genetic and/or environmental conditions.</p>
<p><b>Availability:</b> WEbcoli is freely accessible at <inter-ref locator="http://webcoli.org" locator-type="url">http://webcoli.org</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="cheld@nus.edu.sg" locator-type="email">cheld@nus.edu.sg</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Jung, T.-S., Yeo, H. C., Reddy, S. G., Cho, W.-S., Lee, D.-Y.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp496</dc:identifier>
<dc:title><![CDATA[WEbcoli: an interactive and asynchronous web application for in silico design and analysis of genome-scale E.coli model]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2852</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2850</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2853?rss=1">
<title><![CDATA[ERNEST: a toolbox for chemical reaction network theory]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2853?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> ERNEST Reaction Network Equilibria Study Toolbox is a MATLAB package which, by checking various different criteria on the structure of a chemical reaction network, can exclude the multistationarity of the corresponding reaction system. The results obtained are independent of the rate constants of the reactions, and can be used for model discrimination.</p>
<p><b>Availability and Implementation:</b> The software, implemented in MATLAB, is available under the GNU GPL free software license from <inter-ref locator="http://people.sissa.it/~altafini/papers/SoAl09/" locator-type="url">http://people.sissa.it/~altafini/papers/SoAl09/</inter-ref>. It requires the MATLAB Optimization Toolbox.</p>
<p><b>Contact:</b> <inter-ref locator="altafini@sissa.it" locator-type="email">altafini@sissa.it</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Soranzo, N., Altafini, C.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp513</dc:identifier>
<dc:title><![CDATA[ERNEST: a toolbox for chemical reaction network theory]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2854</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2853</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2855?rss=1">
<title><![CDATA[integrOmics: an R package to unravel relationships between two omics datasets]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2855?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> With the availability of many &lsquo;omics&rsquo; data, such as transcriptomics, proteomics or metabolomics, the integrative or joint analysis of multiple datasets from different technology platforms is becoming crucial to unravel the relationships between different biological functional levels. However, the development of such an analysis is a major computational and technical challenge as most approaches suffer from high data dimensionality. New methodologies need to be developed and validated.</p>
<p><b>Results:</b> <ty>integrOmics</ty> efficiently performs integrative analyses of two types of &lsquo;omics&rsquo; variables that are measured on the same samples. It includes a regularized version of canonical correlation analysis to enlighten correlations between two datasets, and a sparse version of partial least squares (PLS) regression that includes simultaneous variable selection in both datasets. The usefulness of both approaches has been demonstrated previously and successfully applied in various integrative studies.</p>
<p><b>Availability:</b> <ty>integrOmics</ty> is freely available from <inter-ref locator="http://CRAN.R-project.org/" locator-type="url">http://CRAN.R-project.org/</inter-ref> or from the web site companion (<inter-ref locator="http://math.univ-toulouse.fr/biostat" locator-type="url">http://math.univ-toulouse.fr/biostat</inter-ref>) that provides full documentation and tutorials.</p>
<p><b>Contact:</b> <inter-ref locator="k.lecao@uq.edu.au" locator-type="email">k.lecao@uq.edu.au</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp515/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Le Cao, K.-A., Gonzalez, I., Dejean, S.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:20 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp515</dc:identifier>
<dc:title><![CDATA[integrOmics: an R package to unravel relationships between two omics datasets]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2856</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2855</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2857?rss=1">
<title><![CDATA[Analyzing biological network parameters with CentiScaPe]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2857?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> The increasing availability of large network datasets along with the progresses in experimental high-throughput technologies have prompted the need for tools allowing easy integration of experimental data with data derived form network computational analysis. In order to enrich experimental data with network topological parameters, we have developed the Cytoscape plug-in CentiScaPe. The plug-in computes several network centrality parameters and allows the user to analyze existing relationships between experimental data provided by the users and node centrality values computed by the plug-in. CentiScaPe allows identifying network nodes that are relevant from both experimental and topological viewpoints. CentiScaPe also provides a Boolean logic-based tool that allows easy characterization of nodes whose topological relevance depends on more than one centrality. Finally, different graphic outputs and the included description of biological significance for each computed centrality facilitate the analysis by the end users not expert in graph theory, thus allowing easy node categorization and experimental prioritization.</p>
<p><b>Availability:</b> CentiScaPe can be downloaded via the Cytoscape web site: <inter-ref locator="http://chianti.ucsd.edu/cyto_web/plugins/index.php" locator-type="url">http://chianti.ucsd.edu/cyto_web/plugins/index.php</inter-ref>. Tutorial, centrality descriptions and example data are available at: <inter-ref locator="http://profs.sci.univr.it/~scardoni/centiscape/centiscapepage.php" locator-type="url">http://profs.sci.univr.it/~scardoni/centiscape/centiscapepage.php</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="giovanni.scardoni@gmail.com" locator-type="email">giovanni.scardoni@gmail.com</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp517/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Scardoni, G., Petterlini, M., Laudanna, C.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp517</dc:identifier>
<dc:title><![CDATA[Analyzing biological network parameters with CentiScaPe]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2859</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2857</prism:startingPage>
<prism:section>SYSTEMS BIOLOGY</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2860?rss=1">
<title><![CDATA[PathBuilder--open source software for annotating and developing pathway resources]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2860?rss=1</link>
<description><![CDATA[
<p><b>Summary:</b> We have developed PathBuilder, an open-source web application to annotate biological information pertaining to signaling pathways and to create web-based pathway resources. PathBuilder enables annotation of molecular events including protein&ndash;protein interactions, enzyme&ndash;substrate relationships and protein translocation events either manually or through automated importing of data from other databases. Salient features of PathBuilder include automatic validation of data formats, built-in modules for visualization of pathways, automated import of data from other pathway resources, export of data in several standard data exchange formats and an application programming interface for retrieving existing pathway datasets.</p>
<p><b>Availability:</b> PathBuilder is freely available for download at <inter-ref locator="http://pathbuilder.sourceforge.net/" locator-type="url">http://pathbuilder.sourceforge.net/</inter-ref> under the terms of GNU lesser general public license (LGPL: <inter-ref locator="http://www.gnu.org/copyleft/lesser.html" locator-type="url">http://www.gnu.org/copyleft/lesser.html</inter-ref>). The software is platform independent and has been tested on Windows and Linux platforms.</p>
<p><b>Contact:</b> <inter-ref locator="pandey@jhmi.edu" locator-type="email">pandey@jhmi.edu</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp453/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Kandasamy, K., Keerthikumar, S., Raju, R., Keshava Prasad, T. S., Ramachandra, Y. L., Mohan, S., Pandey, A.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp453</dc:identifier>
<dc:title><![CDATA[PathBuilder--open source software for annotating and developing pathway resources]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2862</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2860</prism:startingPage>
<prism:section>DATABASES AND ONTOLOGIES</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2863?rss=1">
<title><![CDATA[A report on the 2009 SIG on short read sequencing and algorithms (Short-SIG)]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2863?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Brudno, M., Medvedev, P., Stoye, J., De La Vega, F. M.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp525</dc:identifier>
<dc:title><![CDATA[A report on the 2009 SIG on short read sequencing and algorithms (Short-SIG)]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2864</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2863</prism:startingPage>
<prism:section>REPORT</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2865?rss=1">
<title><![CDATA[Pindel: a pattern growth approach to detect break points of large deletions and medium sized insertions from paired-end short reads]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2865?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> There is a strong demand in the genomic community to develop effective algorithms to reliably identify genomic variants. Indel detection using next-gen data is difficult and identification of long structural variations is extremely challenging.</p>
<p><b>Results:</b> We present Pindel, a pattern growth approach, to detect breakpoints of large deletions and medium-sized insertions from paired-end short reads. We use both simulated reads and real data to demonstrate the efficiency of the computer program and accuracy of the results.</p>
<p><b>Availability:</b> The binary code and a short user manual can be freely downloaded from <inter-ref locator="http://www.ebi.ac.uk/~kye/pindel/" locator-type="url">http://www.ebi.ac.uk/~kye/pindel/</inter-ref>.</p>
<p><b>Contact:</b> <inter-ref locator="k.ye@lumc.nl" locator-type="email">k.ye@lumc.nl</inter-ref>; <inter-ref locator="zn1@sanger.ac.uk" locator-type="email">zn1@sanger.ac.uk</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Ye, K., Schulz, M. H., Long, Q., Apweiler, R., Ning, Z.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp394</dc:identifier>
<dc:title><![CDATA[Pindel: a pattern growth approach to detect break points of large deletions and medium sized insertions from paired-end short reads]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2871</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2865</prism:startingPage>
<prism:section>GENOME ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2872?rss=1">
<title><![CDATA[De novo transcriptome assembly with ABySS]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2872?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Whole transcriptome shotgun sequencing data from non-normalized samples offer unique opportunities to study the metabolic states of organisms. One can deduce gene expression levels using sequence coverage as a surrogate, identify coding changes or discover novel isoforms or transcripts. Especially for discovery of novel events, <I>de novo</I> assembly of transcriptomes is desirable.</p>
<p><b>Results:</b> Transcriptome from tumor tissue of a patient with follicular lymphoma was sequenced with 36 base pair (bp) single- and paired-end reads on the Illumina Genome Analyzer II platform. We assembled ~194 million reads using ABySS into 66 921 contigs 100 bp or longer, with a maximum contig length of 10 951 bp, representing over 30 million base pairs of unique transcriptome sequence, or roughly 1% of the genome.</p>
<p><b>Availability and Implementation:</b> Source code and binaries of ABySS are freely available for download at <inter-ref locator="http://www.bcgsc.ca/platform/bioinfo/software/abyss" locator-type="url">http://www.bcgsc.ca/platform/bioinfo/software/abyss</inter-ref>. Assembler tool is implemented in C++. The parallel version uses Open MPI. ABySS-Explorer tool is implemented in Java using the Java universal network/graph framework.</p>
<p><b>Contact:</b> <inter-ref locator="ibirol@bcgsc.ca" locator-type="email">ibirol@bcgsc.ca</inter-ref></p>
]]></description>
<dc:creator><![CDATA[Birol, I., Jackman, S. D., Nielsen, C. B., Qian, J. Q., Varhol, R., Stazyk, G., Morin, R. D., Zhao, Y., Hirst, M., Schein, J. E., Horsman, D. E., Connors, J. M., Gascoyne, R. D., Marra, M. A., Jones, S. J. M.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp367</dc:identifier>
<dc:title><![CDATA[De novo transcriptome assembly with ABySS]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2877</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2872</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2878?rss=1">
<title><![CDATA[Increasing the coverage of a metapopulation consensus genome by iterative read mapping and assembly]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2878?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> Most microbial species can not be cultured in the laboratory. Metagenomic sequencing may still yield a complete genome if the sequenced community is enriched and the sequencing coverage is high. However, the complexity in a natural population may cause the enrichment culture to contain multiple related strains. This diversity can confound existing strict assembly programs and lead to a fragmented assembly, which is unnecessary if we have a related reference genome available that can function as a scaffold.</p>
<p><b>Results:</b> Here, we map short metagenomic sequencing reads from a population of strains to a related reference genome, and compose a genome that captures the consensus of the population's sequences. We show that by iteration of the mapping and assembly procedure, the coverage increases while the similarity with the reference genome decreases. This indicates that the assembly becomes less dependent on the reference genome and approaches the consensus genome of the multi-strain population.</p>
<p><b>Contact:</b> <inter-ref locator="dutilh@cmbi.ru.nl" locator-type="email">dutilh@cmbi.ru.nl</inter-ref></p>
<p><b>Supplementary Information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp377/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Dutilh, B. E., Huynen, M. A., Strous, M.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp377</dc:identifier>
<dc:title><![CDATA[Increasing the coverage of a metapopulation consensus genome by iterative read mapping and assembly]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2881</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2878</prism:startingPage>
<prism:section>SEQUENCE ANALYSIS</prism:section>
</item>

<item rdf:about="http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2882?rss=1">
<title><![CDATA[ISOLATE: a computational strategy for identifying the primary origin of cancers using high-throughput sequencing]]></title>
<link>http://bioinformatics.oxfordjournals.org/cgi/content/short/25/21/2882?rss=1</link>
<description><![CDATA[
<p><b>Motivation:</b> One of the most deadly cancer diagnoses is the carcinoma of unknown primary origin. Without the knowledge of the site of origin, treatment regimens are limited in their specificity and result in high mortality rates. Though supervised classification methods have been developed to predict the site of origin based on gene expression data, they require large numbers of previously classified tumors for training, in part because they do not account for sample heterogeneity, which limits their application to well-studied cancers.</p>
<p><b>Results:</b> We present ISOLATE, a new statistical method that simultaneously predicts the primary site of origin of cancers and addresses sample heterogeneity, while taking advantage of new high-throughput sequencing technology that promises to bring higher accuracy and reproducibility to gene expression profiling experiments. ISOLATE makes predictions <I>de novo</I>, without having seen any training expression profiles of cancers with identified origin. Compared with previous methods, ISOLATE is able to predict the primary site of origin, de-convolve and remove the effect of sample heterogeneity and identify differentially expressed genes with higher accuracy, across both synthetic and clinical datasets. Methods such as ISOLATE are invaluable tools for clinicians faced with carcinomas of unknown primary origin.</p>
<p><b>Availability:</b> ISOLATE is available for download at: <inter-ref locator="http://morrislab.med.utoronto.ca/software" locator-type="url">http://morrislab.med.utoronto.ca/software</inter-ref></p>
<p><b>Contact:</b> <inter-ref locator="gerald.quon@utoronto.ca" locator-type="email">gerald.quon@utoronto.ca</inter-ref>; <inter-ref locator="quaid.morris@utoronto.ca" locator-type="email">quaid.morris@utoronto.ca</inter-ref></p>
<p><b>Supplementary information:</b> <inter-ref locator="http://bioinformatics.oxfordjournals.org/cgi/content/full/btp378/DC1" locator-type="url">Supplementary data</inter-ref> are available at <I>Bioinformatics</I> online.</p>
]]></description>
<dc:creator><![CDATA[Quon, G., Morris, Q.]]></dc:creator>
<dc:date>Fri, 23 Oct 2009 06:34:21 PDT</dc:date>
<dc:identifier>info:doi/10.1093/bioinformatics/btp378</dc:identifier>
<dc:title><![CDATA[ISOLATE: a computational strategy for identifying the primary origin of cancers using high-throughput sequencing]]></dc:title>
<dc:publisher>Oxford University Press</dc:publisher>
<prism:number>21</prism:number>
<prism:volume>25</prism:volume>
<prism:endingPage>2889</prism:endingPage>
<prism:publicationDate>2009-11-01</prism:publicationDate>
<prism:startingPage>2882</prism:startingPage>
<prism:section>GENE EXPRESSION</prism:section>
</item>

</rdf:RDF>