Blaustein, R.; Meyer, J.; Conesa, A.; Lorca, G.; Teplitski, M.
Impacts of abundance of Candidatus Liberibacter on the citrus phyto-microbiome Journal Article
In: PHYTOPATHOLOGY, vol. 106, no. 12, S, pp. 25-26, 2016, ISSN: 0031-949X, (Annual Meeting of the American-Phytopathological-Society (APS), Tampa, FL, JUL 30-AUG 03, 2016).
@article{ISI:000390471900128,
title = {Impacts of abundance of Candidatus Liberibacter on the citrus
phyto-microbiome},
author = { R. Blaustein and J. Meyer and A. Conesa and G. Lorca and M. Teplitski},
issn = {0031-949X},
year = {2016},
date = {2016-12-01},
journal = {PHYTOPATHOLOGY},
volume = {106},
number = {12, S},
pages = {25-26},
organization = {Amer Phytopathol Soc},
note = {Annual Meeting of the American-Phytopathological-Society (APS), Tampa, FL, JUL 30-AUG 03, 2016},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Furio-Tari, P.; Conesa, A.; Tarazona, S.
RGmatch: matching genomic regions to proximal genes in omics data integration Journal Article
In: BMC BIOINFORMATICS, vol. 17, no. 15, 2016, ISSN: 1471-2105.
@article{ISI:000392470500001,
title = {RGmatch: matching genomic regions to proximal genes in omics data
integration},
author = { P. Furio-Tari and A. Conesa and S. Tarazona},
url = {http://dx.doi.org/10.1186/s12859-016-1293-1},
doi = {10.1186/s12859-016-1293-1},
issn = {1471-2105},
year = {2016},
date = {2016-11-01},
journal = {BMC BIOINFORMATICS},
volume = {17},
number = {15},
abstract = {Background: The integrative analysis of multiple genomics data often
requires that genome coordinates-based signals have to be associated
with proximal genes. The relative location of a genomic region with
respect to the gene (gene area) is important for functional data
interpretation; hence algorithms that match regions to genes should be
able to deliver insight into this information.
Results: In this work we review the tools that are publicly available
for making region-to-gene associations. We also present a novel method, RGmatch, a flexible and easy-to-use Python tool that computes
associations either at the gene, transcript, or exon level, applying a
set of rules to annotate each region-gene association with the region
location within the gene. RGmatch can be applied to any organism as long
as genome annotation is available. Furthermore, we qualitatively and
quantitatively compare RGmatch to other tools.
Conclusions: RGmatch simplifies the association of a genomic region with
its closest gene. At the same time, it is a powerful tool because the
rules used to annotate these associations are very easy to modify
according to the researcher's specific interests. Some important
differences between RGmatch and other similar tools already in existence
are RGmatch's flexibility, its wide range of user options, compatibility
with any annotatable organism, and its comprehensive and user-friendly
output.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
requires that genome coordinates-based signals have to be associated
with proximal genes. The relative location of a genomic region with
respect to the gene (gene area) is important for functional data
interpretation; hence algorithms that match regions to genes should be
able to deliver insight into this information.
Results: In this work we review the tools that are publicly available
for making region-to-gene associations. We also present a novel method, RGmatch, a flexible and easy-to-use Python tool that computes
associations either at the gene, transcript, or exon level, applying a
set of rules to annotate each region-gene association with the region
location within the gene. RGmatch can be applied to any organism as long
as genome annotation is available. Furthermore, we qualitatively and
quantitatively compare RGmatch to other tools.
Conclusions: RGmatch simplifies the association of a genomic region with
its closest gene. At the same time, it is a powerful tool because the
rules used to annotate these associations are very easy to modify
according to the researcher's specific interests. Some important
differences between RGmatch and other similar tools already in existence
are RGmatch's flexibility, its wide range of user options, compatibility
with any annotatable organism, and its comprehensive and user-friendly
output.
Panis, D. N. De; Padro, J.; Furio-Tari, P.; Tarazona, S.; Carmona, P. S. Milla; Soto, I. M.; Dopazo, H.; Conesa, A.; Hasson, E.
Transcriptome modulation during host shift is driven by secondary metabolites in desert Drosophila Journal Article
In: MOLECULAR ECOLOGY, vol. 25, no. 18, pp. 4534-4550, 2016, ISSN: 0962-1083.
@article{ISI:000383344400010,
title = {Transcriptome modulation during host shift is driven by secondary
metabolites in desert Drosophila},
author = { D. N. De Panis and J. Padro and P. Furio-Tari and S. Tarazona and P. S. Milla Carmona and I. M. Soto and H. Dopazo and A. Conesa and E. Hasson},
url = {http://dx.doi.org/10.1111/mec.13785},
doi = {10.1111/mec.13785},
issn = {0962-1083},
year = {2016},
date = {2016-09-01},
journal = {MOLECULAR ECOLOGY},
volume = {25},
number = {18},
pages = {4534-4550},
abstract = {High-throughput transcriptome studies are breaking new ground to
investigate the responses that organisms deploy in alternative
environments. Nevertheless, much remains to be understood about the
genetic basis of host plant adaptation. Here, we investigate genome-wide
expression in the fly Drosophila buzzatii raised in different
conditions. This species uses decaying tissues of cactus of the genus
Opuntia as primary rearing substrate and secondarily, the necrotic
tissues of the columnar cactus Trichocereus terscheckii. The latter
constitutes a harmful host, rich in mescaline and other related
phenylethylamine alkaloids. We assessed the transcriptomic responses of
larvae reared in Opuntia sulphurea and T. terscheckii, with and without
the addition of alkaloids extracted from the latter. Whole-genome
expression profiles were massively modulated by the rearing environment, mainly by the presence of T. terscheckii alkaloids. Differentially
expressed genes were mainly related to detoxification, oxidation-reduction and stress response; however, we also found genes
involved in development and neurobiological processes. In conclusion, our study contributes new data onto the role of transcriptional
plasticity in response to alternative rearing environments.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
investigate the responses that organisms deploy in alternative
environments. Nevertheless, much remains to be understood about the
genetic basis of host plant adaptation. Here, we investigate genome-wide
expression in the fly Drosophila buzzatii raised in different
conditions. This species uses decaying tissues of cactus of the genus
Opuntia as primary rearing substrate and secondarily, the necrotic
tissues of the columnar cactus Trichocereus terscheckii. The latter
constitutes a harmful host, rich in mescaline and other related
phenylethylamine alkaloids. We assessed the transcriptomic responses of
larvae reared in Opuntia sulphurea and T. terscheckii, with and without
the addition of alkaloids extracted from the latter. Whole-genome
expression profiles were massively modulated by the rearing environment, mainly by the presence of T. terscheckii alkaloids. Differentially
expressed genes were mainly related to detoxification, oxidation-reduction and stress response; however, we also found genes
involved in development and neurobiological processes. In conclusion, our study contributes new data onto the role of transcriptional
plasticity in response to alternative rearing environments.
Furio-Tari, P.; Tarazona, S.; Gabaldon, T.; Enright, A. J.; Conesa, A.
spongeScan: A web for detecting microRNA binding elements in lncRNA sequences Journal Article
In: NUCLEIC ACIDS RESEARCH, vol. 44, no. W1, pp. W176-W180, 2016, ISSN: 0305-1048.
@article{ISI:000379786800029,
title = {spongeScan: A web for detecting microRNA binding elements in lncRNA
sequences},
author = { P. Furio-Tari and S. Tarazona and T. Gabaldon and A. J. Enright and A. Conesa},
url = {http://dx.doi.org/10.1093/nar/gkw443},
doi = {10.1093/nar/gkw443},
issn = {0305-1048},
year = {2016},
date = {2016-07-01},
journal = {NUCLEIC ACIDS RESEARCH},
volume = {44},
number = {W1},
pages = {W176-W180},
abstract = {Non-coding RNA transcripts such as microRNAs (miRNAs) and long
non-coding RNAs (lncRNAs) are important genetic regulators. However, the
functions of many of these transcripts are still not clearly understood.
Recently, it has become apparent that there is significant crosstalk
between miRNAs and lncRNAs and that this creates competition for binding
between the miRNA, a lncRNA and other regulatory targets. Indeed, various competitive endogenous RNAs (ceRNAs) have already been
identified where a lncRNA acts by sequestering miRNAs. This implies the
down-regulation in the interaction of the miRNAs with their mRNA
targets, what has been called a sponge effect. Multiple approaches exist
for the prediction of miRNA targets in mRNAs. However, few methods exist
for the prediction of miRNA response elements (MREs) in lncRNAs acting
as ceRNAs (sponges). Here, we present spongeScan
(http://spongescan.rc.ufl.edu), a graphical web tool to compute and
visualize putative MREs in lncRNAs, along with different measures to
assess their likely behavior as ceRNAs.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
non-coding RNAs (lncRNAs) are important genetic regulators. However, the
functions of many of these transcripts are still not clearly understood.
Recently, it has become apparent that there is significant crosstalk
between miRNAs and lncRNAs and that this creates competition for binding
between the miRNA, a lncRNA and other regulatory targets. Indeed, various competitive endogenous RNAs (ceRNAs) have already been
identified where a lncRNA acts by sequestering miRNAs. This implies the
down-regulation in the interaction of the miRNAs with their mRNA
targets, what has been called a sponge effect. Multiple approaches exist
for the prediction of miRNA targets in mRNAs. However, few methods exist
for the prediction of miRNA response elements (MREs) in lncRNAs acting
as ceRNAs (sponges). Here, we present spongeScan
(http://spongescan.rc.ufl.edu), a graphical web tool to compute and
visualize putative MREs in lncRNAs, along with different measures to
assess their likely behavior as ceRNAs.
Auffray, C.; Balling, R.; Barroso, I.; Bencze, L.; Benson, M.; Bergeron, J.; Bernal-Delgado, E.; Blomberg, N.; Bock, C.; Conesa, A.; Signore, S. Del; Delogne, C.; Devilee, P.; Meglio, A. Di; Eijkemans, M.; Flicek, P.; Graf, N.; Grimm, V.; Guchelaar, H.; Guo, Y.; Gut, I. G.; Hanbury, A.; Hanif, S.; Hilgers, R.; Honrado, A.; Hose, D. R.; Houwing-Duistermaat, J.; Hubbard, T.; Janacek, S. H.; Karanikas, H.; Kievits, T.; Kohler, M.; Kremer, A.; Lanfear, J.; Lengauer, T.; Maes, E.; Meert, T.; Mueller, W.; Nickel, D.; Oledzki, P.; Pedersen, B.; Petkovic, M.; Pliakos, K.; Rattray, M.; i Mas, J. Redon; Schneider, R.; Sengstag, T.; Serra-Picamal, X.; Spek, W.; Vaas, L. A. I.; van Batenburg, O.; Vandelaer, M.; Varnai, P.; Villoslada, P.; Vizcaino, J. A.; Wubbe, J. P. M.; Zanetti, G.
Making sense of big data in health research: Towards an EU action plan Journal Article
In: GENOME MEDICINE, vol. 8, 2016, ISSN: 1756-994X.
@article{ISI:000378592900001,
title = {Making sense of big data in health research: Towards an EU action plan},
author = { C. Auffray and R. Balling and I. Barroso and L. Bencze and M. Benson and J. Bergeron and E. Bernal-Delgado and N. Blomberg and C. Bock and A. Conesa and S. Del Signore and C. Delogne and P. Devilee and A. Di Meglio and M. Eijkemans and P. Flicek and N. Graf and V. Grimm and H. Guchelaar and Y. Guo and I. G. Gut and A. Hanbury and S. Hanif and R. Hilgers and A. Honrado and D. R. Hose and J. Houwing-Duistermaat and T. Hubbard and S. H. Janacek and H. Karanikas and T. Kievits and M. Kohler and A. Kremer and J. Lanfear and T. Lengauer and E. Maes and T. Meert and W. Mueller and D. Nickel and P. Oledzki and B. Pedersen and M. Petkovic and K. Pliakos and M. Rattray and J. Redon i Mas and R. Schneider and T. Sengstag and X. Serra-Picamal and W. Spek and L. A. I. Vaas and O. van Batenburg and M. Vandelaer and P. Varnai and P. Villoslada and J. A. Vizcaino and J. P. M. Wubbe and G. Zanetti},
url = {http://dx.doi.org/10.1186/s13073-016-0323-y},
doi = {10.1186/s13073-016-0323-y},
issn = {1756-994X},
year = {2016},
date = {2016-06-01},
journal = {GENOME MEDICINE},
volume = {8},
abstract = {Medicine and healthcare are undergoing profound changes. Whole-genome
sequencing and high-resolution imaging technologies are key drivers of
this rapid and crucial transformation. Technological innovation combined
with automation and miniaturization has triggered an explosion in data
production that will soon reach exabyte proportions. How are we going to
deal with this exponential increase in data production? The potential of
``big data'' for improving health is enormous but, at the same time, we face a wide range of challenges to overcome urgently. Europe is very
proud of its cultural diversity; however, exploitation of the data made
available through advances in genomic medicine, imaging, and a wide
range of mobile health applications or connected devices is hampered by
numerous historical, technical, legal, and political barriers. European
health systems and databases are diverse and fragmented. There is a lack
of harmonization of data formats, processing, analysis, and data
transfer, which leads to incompatibilities and lost opportunities. Legal
frameworks for data sharing are evolving. Clinicians, researchers, and
citizens need improved methods, tools, and training to generate, analyze, and query data effectively. Addressing these barriers will
contribute to creating the European Single Market for health, which will
improve health arid healthcare for all Europearis.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
sequencing and high-resolution imaging technologies are key drivers of
this rapid and crucial transformation. Technological innovation combined
with automation and miniaturization has triggered an explosion in data
production that will soon reach exabyte proportions. How are we going to
deal with this exponential increase in data production? The potential of
``big data'' for improving health is enormous but, at the same time, we face a wide range of challenges to overcome urgently. Europe is very
proud of its cultural diversity; however, exploitation of the data made
available through advances in genomic medicine, imaging, and a wide
range of mobile health applications or connected devices is hampered by
numerous historical, technical, legal, and political barriers. European
health systems and databases are diverse and fragmented. There is a lack
of harmonization of data formats, processing, analysis, and data
transfer, which leads to incompatibilities and lost opportunities. Legal
frameworks for data sharing are evolving. Clinicians, researchers, and
citizens need improved methods, tools, and training to generate, analyze, and query data effectively. Addressing these barriers will
contribute to creating the European Single Market for health, which will
improve health arid healthcare for all Europearis.
van der Kloet, F. M.; Sebastian-Leon, P.; Conesa, A.; Smilde, A. K.; Westerhuis, J. A.
Separating common from distinctive variation Journal Article
In: BMC BIOINFORMATICS, vol. 17, no. 5, 2016, ISSN: 1471-2105, (Conference on Statistical Methods for Omics Data Integration and Analysis, Heraklion, GREECE, NOV 10-12, 2014).
@article{ISI:000381318400009,
title = {Separating common from distinctive variation},
author = { F. M. van der Kloet and P. Sebastian-Leon and A. Conesa and A. K. Smilde and J. A. Westerhuis},
url = {http://dx.doi.org/10.1186/s12859-016-1037-2},
doi = {10.1186/s12859-016-1037-2},
issn = {1471-2105},
year = {2016},
date = {2016-06-01},
journal = {BMC BIOINFORMATICS},
volume = {17},
number = {5},
abstract = {Background: Joint and individual variation explained (JIVE), distinct
and common simultaneous component analysis (DISCO) and O2-PLS, a
two-block (X-Y) latent variable regression method with an integral OSC
filter can all be used for the integrated analysis of multiple data sets
and decompose them in three terms: a low(er)-rank approximation
capturing common variation across data sets, low(er)-rank approximations
for structured variation distinctive for each data set, and residual
noise. In this paper these three methods are compared with respect to
their mathematical properties and their respective ways of defining
common and distinctive variation.
Results: The methods are all applied on simulated data and mRNA and
miRNA data-sets from GlioBlastoma Multiform (GBM) brain tumors to
examine their overlap and differences. When the common variation is
abundant, all methods are able to find the correct solution. With real
data however, complexities in the data are treated differently by the
three methods.
Conclusions: All three methods have their own approach to estimate
common and distinctive variation with their specific strength and
weaknesses. Due to their orthogonality properties and their used
algorithms their view on the data is slightly different. By assuming
orthogonality between common and distinctive, true natural or biological
phenomena that may not be orthogonal at all might be misinterpreted.},
note = {Conference on Statistical Methods for Omics Data Integration and
Analysis, Heraklion, GREECE, NOV 10-12, 2014},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
and common simultaneous component analysis (DISCO) and O2-PLS, a
two-block (X-Y) latent variable regression method with an integral OSC
filter can all be used for the integrated analysis of multiple data sets
and decompose them in three terms: a low(er)-rank approximation
capturing common variation across data sets, low(er)-rank approximations
for structured variation distinctive for each data set, and residual
noise. In this paper these three methods are compared with respect to
their mathematical properties and their respective ways of defining
common and distinctive variation.
Results: The methods are all applied on simulated data and mRNA and
miRNA data-sets from GlioBlastoma Multiform (GBM) brain tumors to
examine their overlap and differences. When the common variation is
abundant, all methods are able to find the correct solution. With real
data however, complexities in the data are treated differently by the
three methods.
Conclusions: All three methods have their own approach to estimate
common and distinctive variation with their specific strength and
weaknesses. Due to their orthogonality properties and their used
algorithms their view on the data is slightly different. By assuming
orthogonality between common and distinctive, true natural or biological
phenomena that may not be orthogonal at all might be misinterpreted.
Okonechnikov, K.; Conesa, A.; Garcia-Alcalde, F.
Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data Journal Article
In: BIOINFORMATICS, vol. 32, no. 2, pp. 292-294, 2016, ISSN: 1367-4803.
@article{ISI:000368360100020,
title = {Qualimap 2: advanced multi-sample quality control for high-throughput
sequencing data},
author = { K. Okonechnikov and A. Conesa and F. Garcia-Alcalde},
url = {http://dx.doi.org/10.1093/bioinformatics/btv566},
doi = {10.1093/bioinformatics/btv566},
issn = {1367-4803},
year = {2016},
date = {2016-01-01},
journal = {BIOINFORMATICS},
volume = {32},
number = {2},
pages = {292-294},
abstract = {Motivation: Detection of random errors and systematic biases is a
crucial step of a robust pipeline for processing high-throughput
sequencing (HTS) data. Bioinformatics software tools capable of
performing this task are available, either for general analysis of HTS
data or targeted to a specific sequencing technology. However, most of
the existing QC instruments only allow processing of one sample at a
time.
Results: Qualimap 2 represents a next step in the QC analysis of HTS
data. Along with comprehensive single-sample analysis of alignment data, it includes new modes that allow simultaneous processing and comparison
of multiple samples. As with the first version, the new features are
available via both graphical and command line interface. Additionally, it includes a large number of improvements proposed by the user
community.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
crucial step of a robust pipeline for processing high-throughput
sequencing (HTS) data. Bioinformatics software tools capable of
performing this task are available, either for general analysis of HTS
data or targeted to a specific sequencing technology. However, most of
the existing QC instruments only allow processing of one sample at a
time.
Results: Qualimap 2 represents a next step in the QC analysis of HTS
data. Along with comprehensive single-sample analysis of alignment data, it includes new modes that allow simultaneous processing and comparison
of multiple samples. As with the first version, the new features are
available via both graphical and command line interface. Additionally, it includes a large number of improvements proposed by the user
community.
Conesa, A.; Madrigal, P.; Tarazona, S.; Gomez-Cabrero, D.; Cervera, A.; McPherson, A.; Szczesniak, M. W.; Gaffney, D. J.; Elo, L. L.; Zhang, X.; Mortazavi, A.
A survey of best practices for RNA-seq data analysis Journal Article
In: GENOME BIOLOGY, vol. 17, 2016, ISSN: 1474-760X.
@article{ISI:000368903900004,
title = {A survey of best practices for RNA-seq data analysis},
author = { A. Conesa and P. Madrigal and S. Tarazona and D. Gomez-Cabrero and A. Cervera and A. McPherson and M. W. Szczesniak and D. J. Gaffney and L. L. Elo and X. Zhang and A. Mortazavi},
url = {http://dx.doi.org/10.1186/s13059-016-0881-8},
doi = {10.1186/s13059-016-0881-8},
issn = {1474-760X},
year = {2016},
date = {2016-01-01},
journal = {GENOME BIOLOGY},
volume = {17},
abstract = {RNA-sequencing (RNA-seq) has a wide variety of applications, but no
single analysis pipeline can be used in all cases. We review all of the
major steps in RNA-seq data analysis, including experimental design, quality control, read alignment, quantification of gene and transcript
levels, visualization, differential gene expression, alternative
splicing, functional analysis, gene fusion detection and eQTL mapping.
We highlight the challenges associated with each step. We discuss the
analysis of small RNAs and the integration of RNA-seq with other
functional genomics techniques. Finally, we discuss the outlook for
novel technologies that are changing the state of the art in
transcriptomics.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
single analysis pipeline can be used in all cases. We review all of the
major steps in RNA-seq data analysis, including experimental design, quality control, read alignment, quantification of gene and transcript
levels, visualization, differential gene expression, alternative
splicing, functional analysis, gene fusion detection and eQTL mapping.
We highlight the challenges associated with each step. We discuss the
analysis of small RNAs and the integration of RNA-seq with other
functional genomics techniques. Finally, we discuss the outlook for
novel technologies that are changing the state of the art in
transcriptomics.
Conesa, Ana; Madrigal, Pedro; Tarazona, Sonia; Gomez-Cabrero, David; Cervera, Alejandra; McPherson, Andrew; Szcześniak, Michał Wojciech; Gaffney, Daniel J; Elo, Laura L; Zhang, Xuegong; Mortazavi, Ali
A survey of best practices for RNA-seq data analysis Journal Article
In: Genome Biol, vol. 17, pp. 13, 2016, ISSN: 1474-760X.
@article{pmid26813401,
title = {A survey of best practices for RNA-seq data analysis},
author = {Ana Conesa and Pedro Madrigal and Sonia Tarazona and David Gomez-Cabrero and Alejandra Cervera and Andrew McPherson and Michał Wojciech Szcześniak and Daniel J Gaffney and Laura L Elo and Xuegong Zhang and Ali Mortazavi},
doi = {10.1186/s13059-016-0881-8},
issn = {1474-760X},
year = {2016},
date = {2016-01-01},
journal = {Genome Biol},
volume = {17},
pages = {13},
abstract = {RNA-sequencing (RNA-seq) has a wide variety of applications, but no single analysis pipeline can be used in all cases. We review all of the major steps in RNA-seq data analysis, including experimental design, quality control, read alignment, quantification of gene and transcript levels, visualization, differential gene expression, alternative splicing, functional analysis, gene fusion detection and eQTL mapping. We highlight the challenges associated with each step. We discuss the analysis of small RNAs and the integration of RNA-seq with other functional genomics techniques. Finally, we discuss the outlook for novel technologies that are changing the state of the art in transcriptomics.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tarazona, S.; Furio-Tari, P.; Turra, D.; Pietro, A. Di; Nueda, M. Jose; Ferrer, A.; Conesa, A.
Data quality aware analysis of differential expression in RNA-seq with NOISeq R/Bioc package Journal Article
In: NUCLEIC ACIDS RESEARCH, vol. 43, no. 21, 2015, ISSN: 0305-1048.
@article{ISI:000366410900003,
title = {Data quality aware analysis of differential expression in RNA-seq with
NOISeq R/Bioc package},
author = { S. Tarazona and P. Furio-Tari and D. Turra and A. Di Pietro and M. Jose Nueda and A. Ferrer and A. Conesa},
url = {http://dx.doi.org/10.1093/nar/gkv711},
doi = {10.1093/nar/gkv711},
issn = {0305-1048},
year = {2015},
date = {2015-12-01},
journal = {NUCLEIC ACIDS RESEARCH},
volume = {43},
number = {21},
abstract = {As the use of RNA-seq has popularized, there is an increasing
consciousness of the importance of experimental design, bias removal, accurate quantification and control of false positives for proper data
analysis. We introduce the NOISeq R-package for quality control and
analysis of count data. We show how the available diagnostic tools can
be used to monitor quality issues, make pre-processing decisions and
improve analysis. We demonstrate that the nonparametric NOISeqBIO
efficiently controls false discoveries in experiments with biological
replication and outperforms state-of-the-art methods. NOISeq is a
comprehensive resource that meets current needs for robust data-aware
analysis of RNA-seq differential expression.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
consciousness of the importance of experimental design, bias removal, accurate quantification and control of false positives for proper data
analysis. We introduce the NOISeq R-package for quality control and
analysis of count data. We show how the available diagnostic tools can
be used to monitor quality issues, make pre-processing decisions and
improve analysis. We demonstrate that the nonparametric NOISeqBIO
efficiently controls false discoveries in experiments with biological
replication and outperforms state-of-the-art methods. NOISeq is a
comprehensive resource that meets current needs for robust data-aware
analysis of RNA-seq differential expression.
Conesa-Zamora, P.; Garcia-Solano, J.; del Carmen Turpin, M.; Sebastian-Leon, P.; Torres-Moreno, D.; Estrada, E.; Tuomisto, A.; Wilce, J.; Maekinen, M. J.; Perez-Guillermo, M.; Conesa, A.
Methylome profiling reveals functions and genes which are differentially methylated in serrated compared to conventional colorectal carcinoma Journal Article
In: CLINICAL EPIGENETICS, vol. 7, 2015, ISSN: 1868-7083.
@article{ISI:000361363000001,
title = {Methylome profiling reveals functions and genes which are differentially
methylated in serrated compared to conventional colorectal carcinoma},
author = { P. Conesa-Zamora and J. Garcia-Solano and M. del Carmen Turpin and P. Sebastian-Leon and D. Torres-Moreno and E. Estrada and A. Tuomisto and J. Wilce and M. J. Maekinen and M. Perez-Guillermo and A. Conesa},
url = {http://dx.doi.org/10.1186/s13148-015-0128-7},
doi = {10.1186/s13148-015-0128-7},
issn = {1868-7083},
year = {2015},
date = {2015-09-01},
journal = {CLINICAL EPIGENETICS},
volume = {7},
abstract = {Background: Serrated adenocarcinoma (SAC) is a recently recognized
colorectal cancer (CRC) subtype accounting for 7.5-8.7 % of CRCs. It
has been shown that SAC has a worse prognosis and different histological
and molecular features compared to conventional carcinoma (CC) but, to
date, there is no study analysing its methylome profile.
Results: The methylation status of 450,000 CpG sites using the Infinium
Human Methylation 450 BeadChip array was investigated in 103 colorectal
specimens, including 39 SACs and 34 matched CCs, from Spanish and
Finnish patients. Microarray data showed a higher representation of
morphogenesis-, neurogenesis-, cytoskeleton-and vesicle
transport-related functions and also significant differential
methylation of 15 genes, including the iodothyronine deiodinase DIO3 and
the forkhead family transcription factor FOXD2 genes which were
validated at the CpG, mRNA and protein level using pyrosequencing, methylation-specific PCR, quantitative polymerase chain reaction (qPCR)
and immunohistochemistry. A quantification study of the methylation
status of CpG sequences in FOXD2 demonstrated a novel region controlling
gene expression. Moreover, differences in these markers were also
evident when comparing SAC with CRC showing molecular and histological
features of high-level microsatellite instability.
Conclusions: This methylome study demonstrates distinct epigenetic
regulation patterns in SAC which are consistent to previous expression
profile studies and that DIO3 and FOXD2 might be molecular targets for a
specific histology-oriented treatment of CRC.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
colorectal cancer (CRC) subtype accounting for 7.5-8.7 % of CRCs. It
has been shown that SAC has a worse prognosis and different histological
and molecular features compared to conventional carcinoma (CC) but, to
date, there is no study analysing its methylome profile.
Results: The methylation status of 450,000 CpG sites using the Infinium
Human Methylation 450 BeadChip array was investigated in 103 colorectal
specimens, including 39 SACs and 34 matched CCs, from Spanish and
Finnish patients. Microarray data showed a higher representation of
morphogenesis-, neurogenesis-, cytoskeleton-and vesicle
transport-related functions and also significant differential
methylation of 15 genes, including the iodothyronine deiodinase DIO3 and
the forkhead family transcription factor FOXD2 genes which were
validated at the CpG, mRNA and protein level using pyrosequencing, methylation-specific PCR, quantitative polymerase chain reaction (qPCR)
and immunohistochemistry. A quantification study of the methylation
status of CpG sequences in FOXD2 demonstrated a novel region controlling
gene expression. Moreover, differences in these markers were also
evident when comparing SAC with CRC showing molecular and histological
features of high-level microsatellite instability.
Conclusions: This methylome study demonstrates distinct epigenetic
regulation patterns in SAC which are consistent to previous expression
profile studies and that DIO3 and FOXD2 might be molecular targets for a
specific histology-oriented treatment of CRC.
Morin-Adeline, V.; Mueller, K.; Conesa, A.; Slapeta, J.
In: VETERINARY PARASITOLOGY, vol. 212, no. 3-4, pp. 111-117, 2015, ISSN: 0304-4017.
@article{ISI:000363355400007,
title = {Comparative RNA-seq analysis of the Tritrichomonas foetus PIG30/1
isolate from pigs reveals close association with Tritrichomonas foetus
BP-4 isolate `bovine genotype'},
author = { V. Morin-Adeline and K. Mueller and A. Conesa and J. Slapeta},
url = {http://dx.doi.org/10.1016/j.vetpar.2015.08.012},
doi = {10.1016/j.vetpar.2015.08.012},
issn = {0304-4017},
year = {2015},
date = {2015-09-01},
journal = {VETERINARY PARASITOLOGY},
volume = {212},
number = {3-4},
pages = {111-117},
abstract = {Tritrichomonas foetus was described as a commensal of the stomach, caecum and nasal cavity of pigs before it was recognised as the cause of
reproductive tract disease of cattle. T. foetus also causes chronic
large bowel diarrhoea in domestic cats. Multi-locus genotyping and
comparative transcriptome analysis has previously revealed that T.
foetus isolated from cat and cattle hosts are genetically distinct, referred to as the `feline genotype' and `bovine genotype', respectively. Conversely, multi-locus genotyping has grouped porcine T.
foetus with the `bovine genotype'. To compare the extent of the
similarity between porcine T. foetus and cattle `bovine genotype'
isolates, RNA-sequencing (RNA-seq) was used to produce the first
cell-wide transcriptome library of porcine T. foetus PIG30/1.
Comparative transcriptome analysis of the PIG30/1 with the published
bovine (BP-4) and feline (G10/1) transcriptomes revealed that the
porcine T. foetus shares a 4.7 fold greater number of orthologous genes
with the bovine T. foetus than with the feline T. foetus. Comparing
transcription of the virulence factors, cysteine proteases (CP) between
the three isolates, the porcine T. foetus was found to preferentially
transcribe CP8 like the `bovine genotype' T. foetus, compared to thehigh
transcription of CP7 seen for `feline genotype' T. foetus. At the
cell-wide transcriptome level, the porcine T. foetus isolate (PIG30/1)
groups closer with the `bovine genotype' T. foetus rather than the
`feline genotype' T. foetus. (C) 2015 Elsevier B.V. All rights reserved.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
reproductive tract disease of cattle. T. foetus also causes chronic
large bowel diarrhoea in domestic cats. Multi-locus genotyping and
comparative transcriptome analysis has previously revealed that T.
foetus isolated from cat and cattle hosts are genetically distinct, referred to as the `feline genotype' and `bovine genotype', respectively. Conversely, multi-locus genotyping has grouped porcine T.
foetus with the `bovine genotype'. To compare the extent of the
similarity between porcine T. foetus and cattle `bovine genotype'
isolates, RNA-sequencing (RNA-seq) was used to produce the first
cell-wide transcriptome library of porcine T. foetus PIG30/1.
Comparative transcriptome analysis of the PIG30/1 with the published
bovine (BP-4) and feline (G10/1) transcriptomes revealed that the
porcine T. foetus shares a 4.7 fold greater number of orthologous genes
with the bovine T. foetus than with the feline T. foetus. Comparing
transcription of the virulence factors, cysteine proteases (CP) between
the three isolates, the porcine T. foetus was found to preferentially
transcribe CP8 like the `bovine genotype' T. foetus, compared to thehigh
transcription of CP7 seen for `feline genotype' T. foetus. At the
cell-wide transcriptome level, the porcine T. foetus isolate (PIG30/1)
groups closer with the `bovine genotype' T. foetus rather than the
`feline genotype' T. foetus. (C) 2015 Elsevier B.V. All rights reserved.
Irmer, H.; Tarazona, S.; Sasse, C.; Olbermann, P.; Loeffler, J.; Krappmann, S.; Conesa, A.; Braus, G. H.
RNAseq analysis of Aspergillus fumigatus in blood reveals a just wait and see resting stage behavior Journal Article
In: BMC GENOMICS, vol. 16, 2015, ISSN: 1471-2164.
@article{ISI:000360039300003,
title = {RNAseq analysis of Aspergillus fumigatus in blood reveals a just wait
and see resting stage behavior},
author = { H. Irmer and S. Tarazona and C. Sasse and P. Olbermann and J. Loeffler and S. Krappmann and A. Conesa and G. H. Braus},
url = {http://dx.doi.org/10.1186/s12864-015-1853-1},
doi = {10.1186/s12864-015-1853-1},
issn = {1471-2164},
year = {2015},
date = {2015-08-01},
journal = {BMC GENOMICS},
volume = {16},
abstract = {Background: Invasive aspergillosis is started after germination of
Aspergillus fumigatus conidia that are inhaled by susceptible
individuals. Fungal hyphae can grow in the lung through the epithelial
tissue and disseminate hematogenously to invade into other organs. Low
fungaemia indicates that fungal elements do not reside in the
bloodstream for long.
Results: We analyzed whether blood represents a hostile environment to
which the physiology of A. fumigatus has to adapt. An in vitro model of
A. fumigatus infection was established by incubating mycelium in blood.
Our model allowed to discern the changes of the gene expression profile
of A. fumigatus at various stages of the infection. The majority of
described virulence factors that are connected to pulmonary infections
appeared not to be activated during the blood phase. Three active
processes were identified that presumably help the fungus to survive the
blood environment in an advanced phase of the infection: iron
homeostasis, secondary metabolism, and the formation of detoxifying
enzymes.
Conclusions: We propose that A. fumigatus is hardly able to propagate in
blood. After an early stage of sensing the environment, virtually all
uptake mechanisms and energy-consuming metabolic pathways are shut-down.
The fungus appears to adapt by trans-differentiation into a resting
mycelial stage. This might reflect the harsh conditions in blood where
A. fumigatus cannot take up sufficient nutrients to establish
self-defense mechanisms combined with significant growth.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Aspergillus fumigatus conidia that are inhaled by susceptible
individuals. Fungal hyphae can grow in the lung through the epithelial
tissue and disseminate hematogenously to invade into other organs. Low
fungaemia indicates that fungal elements do not reside in the
bloodstream for long.
Results: We analyzed whether blood represents a hostile environment to
which the physiology of A. fumigatus has to adapt. An in vitro model of
A. fumigatus infection was established by incubating mycelium in blood.
Our model allowed to discern the changes of the gene expression profile
of A. fumigatus at various stages of the infection. The majority of
described virulence factors that are connected to pulmonary infections
appeared not to be activated during the blood phase. Three active
processes were identified that presumably help the fungus to survive the
blood environment in an advanced phase of the infection: iron
homeostasis, secondary metabolism, and the formation of detoxifying
enzymes.
Conclusions: We propose that A. fumigatus is hardly able to propagate in
blood. After an early stage of sensing the environment, virtually all
uptake mechanisms and energy-consuming metabolic pathways are shut-down.
The fungus appears to adapt by trans-differentiation into a resting
mycelial stage. This might reflect the harsh conditions in blood where
A. fumigatus cannot take up sufficient nutrients to establish
self-defense mechanisms combined with significant growth.
de la Fuente, L.; Conesa, A.; Lloret, A.; Badenes, M. Luisa; Rios, G.
Genome-wide changes in histone H3 lysine 27 trimethylation associated with bud dormancy release in peach Journal Article
In: TREE GENETICS & GENOMES, vol. 11, no. 3, 2015, ISSN: 1614-2942.
@article{ISI:000355704700013,
title = {Genome-wide changes in histone H3 lysine 27 trimethylation associated
with bud dormancy release in peach},
author = { L. de la Fuente and A. Conesa and A. Lloret and M. Luisa Badenes and G. Rios},
url = {http://dx.doi.org/10.1007/s11295-015-0869-7},
doi = {10.1007/s11295-015-0869-7},
issn = {1614-2942},
year = {2015},
date = {2015-06-01},
journal = {TREE GENETICS & GENOMES},
volume = {11},
number = {3},
abstract = {Bud dormancy is an evolutionary adaptation of perennial plants to the
seasonal fluctuation of temperatures in temperate climates, affected by
intrinsic and environmental signals. Recent investigations point to a
relevant role of epigenetic mechanisms in the regulation of bud
dormancy. We have performed a chromatin immunoprecipitation sequencing
(ChIP-seq) analysis of histone H3 lysine-27 trimethylation (H3K27me3), a
chromatin mark associated with stable gene silencing, in dormant (D) and
dormancy-released (ND) buds of peach (Prunus persica). H3K27me3 regions
were more abundant in gene-rich euchromatic zones of chromosomes and
associated with gene bodies. The dormancy regulators DORMANCY-ASSOCIATED
MADS-box (DAM) 1, DAM4, DAM5 and DAM6 were found significantly enriched
in H3K27me3 in ND samples, in close agreement with their
dormancy-specific expression. The DAM locus was modified at specific
short regions, allowing the uneven regulation of distinct DAM genes.
Additional regulatory factors related to meristem activity and flowering
genes from Arabidopsis thaliana were differentially H3K27 trimethylated, which suggests that meristem reactivation and flower development could
be also epigenetically regulated in reproductive buds of peach. A (GA)n
motif and CACTA-type transposon-related sequences were found
over-represented in H3K27me3 regions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
seasonal fluctuation of temperatures in temperate climates, affected by
intrinsic and environmental signals. Recent investigations point to a
relevant role of epigenetic mechanisms in the regulation of bud
dormancy. We have performed a chromatin immunoprecipitation sequencing
(ChIP-seq) analysis of histone H3 lysine-27 trimethylation (H3K27me3), a
chromatin mark associated with stable gene silencing, in dormant (D) and
dormancy-released (ND) buds of peach (Prunus persica). H3K27me3 regions
were more abundant in gene-rich euchromatic zones of chromosomes and
associated with gene bodies. The dormancy regulators DORMANCY-ASSOCIATED
MADS-box (DAM) 1, DAM4, DAM5 and DAM6 were found significantly enriched
in H3K27me3 in ND samples, in close agreement with their
dormancy-specific expression. The DAM locus was modified at specific
short regions, allowing the uneven regulation of distinct DAM genes.
Additional regulatory factors related to meristem activity and flowering
genes from Arabidopsis thaliana were differentially H3K27 trimethylated, which suggests that meristem reactivation and flower development could
be also epigenetically regulated in reproductive buds of peach. A (GA)n
motif and CACTA-type transposon-related sequences were found
over-represented in H3K27me3 regions.
Yanez, Y.; Grau, E.; Rodriguez-Cortez, V. C.; Hervas, D.; Vidal, E.; Noguera, R.; Hernandez, M.; Segura, V.; Canete, A.; Conesa, A.; de Mora, J. Font; Castel, V.
Two independent epigenetic biomarkers predict survival in neuroblastoma Journal Article
In: CLINICAL EPIGENETICS, vol. 7, 2015, ISSN: 1868-7083.
@article{ISI:000350585600002,
title = {Two independent epigenetic biomarkers predict survival in neuroblastoma},
author = { Y. Yanez and E. Grau and V. C. Rodriguez-Cortez and D. Hervas and E. Vidal and R. Noguera and M. Hernandez and V. Segura and A. Canete and A. Conesa and J. Font de Mora and V. Castel},
url = {http://dx.doi.org/10.1186/s13148-015-0054-8},
doi = {10.1186/s13148-015-0054-8},
issn = {1868-7083},
year = {2015},
date = {2015-02-01},
journal = {CLINICAL EPIGENETICS},
volume = {7},
abstract = {Background: Neuroblastoma (NB) is the most common extracranial pediatric
solid tumor with a highly variable clinical course, ranging from
spontaneous regression to life-threatening disease. Survival rates for
high-risk NB patients remain disappointingly low despite multimodal
treatment. Thus, there is an urgent clinical need for additional
biomarkers to improve risk stratification, treatment management, and
survival rates in children with aggressive NB.
Results: Using gene promoter methylation analysis in 48 neuroblastoma
tumors with microarray technology, we found a strong association between survival and gene promoter hypermethylation (P = 0.036).
Hypermethylation of 70 genes significantly differentiated high-risk
survivor patients from those who died during follow-up time. Sixteen
genes with relevant roles in cancer biology were further validated in an
additional cohort of 83 neuroblastoma tumors by bisulfite
pyrosequencing. High promoter methylation rates of these genes were
found in patients with metastatic tumors (either stage metastatic (M) or
metastatic special (MS)), 18 months or older at first diagnosis, MYCN
amplification, relapsed, and dead. Notably, the degree of methylation of
retinoblastoma 1 (RB1) and teratocarcinoma-derived growth factor 1
(TDGF1) predicts event-free and overall survival independently of the
established risk factors. In addition, low RB1 mRNA expression levels
associate with poor prognosis suggesting that promoter methylation could
contribute to the transcriptional silencing of this gene in NB.
Conclusions: We found a new epigenetic signature predictive for NB
patients' outcome: the methylation status of RB1 and TDGF1 associate
with poorer survival. This information is useful to assess prognosis and
improve treatment selection.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
solid tumor with a highly variable clinical course, ranging from
spontaneous regression to life-threatening disease. Survival rates for
high-risk NB patients remain disappointingly low despite multimodal
treatment. Thus, there is an urgent clinical need for additional
biomarkers to improve risk stratification, treatment management, and
survival rates in children with aggressive NB.
Results: Using gene promoter methylation analysis in 48 neuroblastoma
tumors with microarray technology, we found a strong association between survival and gene promoter hypermethylation (P = 0.036).
Hypermethylation of 70 genes significantly differentiated high-risk
survivor patients from those who died during follow-up time. Sixteen
genes with relevant roles in cancer biology were further validated in an
additional cohort of 83 neuroblastoma tumors by bisulfite
pyrosequencing. High promoter methylation rates of these genes were
found in patients with metastatic tumors (either stage metastatic (M) or
metastatic special (MS)), 18 months or older at first diagnosis, MYCN
amplification, relapsed, and dead. Notably, the degree of methylation of
retinoblastoma 1 (RB1) and teratocarcinoma-derived growth factor 1
(TDGF1) predicts event-free and overall survival independently of the
established risk factors. In addition, low RB1 mRNA expression levels
associate with poor prognosis suggesting that promoter methylation could
contribute to the transcriptional silencing of this gene in NB.
Conclusions: We found a new epigenetic signature predictive for NB
patients' outcome: the methylation status of RB1 and TDGF1 associate
with poorer survival. This information is useful to assess prognosis and
improve treatment selection.
Terol, Javier; Carbonell, Jos Atextcopyright; Alonso, Roberto; Tadeo, Francisco R.; Herrero-Ortega, Amparo; Atextpmez, Victoria Iba; Atextpmoz, Juan Vicente Mu; Aa, Antonio L Atextthreesuperiorpez-Garc; Estornell, Leandro H.; Colmenero-Flores, Jos Atextcopyright M.; Conesa, Ana; Dopazo, Joaqu An; Atextthreesuperiorn, Manuel Tal
SEQUENCING OF 150 CITRUS VARIETIES: LINKING GENOTYPES TO PHENOTYPES Proceedings Article
In: ActaHortic., pp. 585-589, International Society for Horticultural Science (ISHS), Leuven, Belgium, 2015, ISSN: 2406-6168.
@inproceedings{Terol2015,
title = {SEQUENCING OF 150 CITRUS VARIETIES: LINKING GENOTYPES TO PHENOTYPES},
author = {Javier Terol and Jos Atextcopyright Carbonell and Roberto Alonso and Francisco R. Tadeo and Amparo Herrero-Ortega and Victoria Iba Atextpmez and Juan Vicente Mu Atextpmoz and Antonio L Atextthreesuperiorpez-Garc Aa and Leandro H. Estornell and Jos Atextcopyright M. Colmenero-Flores and Ana Conesa and Joaqu An Dopazo and Manuel Tal Atextthreesuperiorn},
url = {https://doi.org/10.17660/ActaHortic.2015.1065.73},
doi = {10.17660/ActaHortic.2015.1065.73},
issn = {2406-6168},
year = {2015},
date = {2015-01-20},
booktitle = {ActaHortic.},
number = {1065},
pages = {585-589},
publisher = {International Society for Horticultural Science (ISHS), Leuven, Belgium},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Morin-Adeline, V.; Lomas, R.; O'Meally, D.; Stack, C.; Conesa, A.; Slapeta, J.
In: BMC GENOMICS, vol. 15, 2014, ISSN: 1471-2164.
@article{ISI:000345790500001,
title = {Comparative transcriptomics reveals striking similarities between the
bovine and feline isolates of Tritrichomonas foetus: consequences for in
silico drug-target identification},
author = { V. Morin-Adeline and R. Lomas and D. O'Meally and C. Stack and A. Conesa and J. Slapeta},
url = {http://dx.doi.org/10.1186/1471-2164-15-955},
doi = {10.1186/1471-2164-15-955},
issn = {1471-2164},
year = {2014},
date = {2014-11-01},
journal = {BMC GENOMICS},
volume = {15},
abstract = {Background: Few, if any, protozoan parasites are reported to exhibit
extreme organ tropism like the flagellate Tritrichomonas foetus. In
cattle, T. foetus infects the reproductive system causing abortion, whereas the infection in cats results in chronic large bowel diarrhoea.
In the absence of a T. foetus genome, we utilized a de novo approach to
assemble the transcriptome of the bovine and feline genotype to identify
host-specific adaptations and virulence factors specific to each
genotype. Furthermore, a subset of orthologs was used to characterize
putative druggable targets and expose complications of in silico drug
target mining in species with indefinite host-ranges.
Results: Illumina RNA-seq reads were assembled into two representative
bovine and feline transcriptomes containing 42,363 and 36,559 contigs, respectively. Coding and non-coding regions of the genome libraries
revealed striking similarities, with 24,620 shared homolog pairs reduced
down to 7,547 coding orthologs between the two genotypes. The
transcriptomes were near identical in functional category distribution;
with no indication of selective pressure acting on orthologs despite
differences in parasite origins/host. Orthologs formed a large
proportion of highly expressed transcripts in both genotypes (bovine
genotype: 76%, feline genotype: 56%). Mining the libraries for
protease virulence factors revealed the cysteine proteases (CP) to be
the most common. In total, 483 and 445 bovine and feline T. foetus
transcripts were identified as putative proteases based on MEROPS
database, with 9 hits to putative protease inhibitors. In bovine T.
foetus, CP8 is the preferentially transcribed CP while in the feline
genotype, transcription of CP7 showed higher abundance. In silico
druggability analysis of the two genotypes revealed that when host
sequences are taken into account, drug targets are genotype-specific.
Conclusion: Gene discovery analysis based on RNA-seq data analysis
revealed prominent similarities between the bovine and feline T. foetus, suggesting recent adaptation to their respective host/niche. T. foetus
represents a unique case of a mammalian protozoan expanding its
parasitic grasp across distantly related host lineages. Consequences of
the host-range for in silico drug targeting are exposed here, demonstrating that targets of the parasite in one host are not
necessarily ideal for the same parasite in another host.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
extreme organ tropism like the flagellate Tritrichomonas foetus. In
cattle, T. foetus infects the reproductive system causing abortion, whereas the infection in cats results in chronic large bowel diarrhoea.
In the absence of a T. foetus genome, we utilized a de novo approach to
assemble the transcriptome of the bovine and feline genotype to identify
host-specific adaptations and virulence factors specific to each
genotype. Furthermore, a subset of orthologs was used to characterize
putative druggable targets and expose complications of in silico drug
target mining in species with indefinite host-ranges.
Results: Illumina RNA-seq reads were assembled into two representative
bovine and feline transcriptomes containing 42,363 and 36,559 contigs, respectively. Coding and non-coding regions of the genome libraries
revealed striking similarities, with 24,620 shared homolog pairs reduced
down to 7,547 coding orthologs between the two genotypes. The
transcriptomes were near identical in functional category distribution;
with no indication of selective pressure acting on orthologs despite
differences in parasite origins/host. Orthologs formed a large
proportion of highly expressed transcripts in both genotypes (bovine
genotype: 76%, feline genotype: 56%). Mining the libraries for
protease virulence factors revealed the cysteine proteases (CP) to be
the most common. In total, 483 and 445 bovine and feline T. foetus
transcripts were identified as putative proteases based on MEROPS
database, with 9 hits to putative protease inhibitors. In bovine T.
foetus, CP8 is the preferentially transcribed CP while in the feline
genotype, transcription of CP7 showed higher abundance. In silico
druggability analysis of the two genotypes revealed that when host
sequences are taken into account, drug targets are genotype-specific.
Conclusion: Gene discovery analysis based on RNA-seq data analysis
revealed prominent similarities between the bovine and feline T. foetus, suggesting recent adaptation to their respective host/niche. T. foetus
represents a unique case of a mammalian protozoan expanding its
parasitic grasp across distantly related host lineages. Consequences of
the host-range for in silico drug targeting are exposed here, demonstrating that targets of the parasite in one host are not
necessarily ideal for the same parasite in another host.
Sebastian-Leon, P.; Vidal, E.; Minguez, P.; Conesa, A.; Tarazona, S.; Amadoz, A.; Armero, C.; Salavert, F.; Vidal-Puig, A.; Montaner, D.; Dopazo, J.
Understanding disease mechanisms with models of signaling pathway activities Journal Article
In: BMC SYSTEMS BIOLOGY, vol. 8, 2014, ISSN: 1752-0509.
@article{ISI:000347559200001,
title = {Understanding disease mechanisms with models of signaling pathway
activities},
author = { P. Sebastian-Leon and E. Vidal and P. Minguez and A. Conesa and S. Tarazona and A. Amadoz and C. Armero and F. Salavert and A. Vidal-Puig and D. Montaner and J. Dopazo},
url = {http://dx.doi.org/10.1186/s12918-014-0121-3},
doi = {10.1186/s12918-014-0121-3},
issn = {1752-0509},
year = {2014},
date = {2014-10-01},
journal = {BMC SYSTEMS BIOLOGY},
volume = {8},
abstract = {Background: Understanding the aspects of the cell functionality that
account for disease or drug action mechanisms is one of the main
challenges in the analysis of genomic data and is on the basis of the
future implementation of precision medicine.
Results: Here we propose a simple probabilistic model in which signaling
pathways are separated into elementary sub-pathways or signal
transmission circuits (which ultimately trigger cell functions) and then
transforms gene expression measurements into probabilities of activation
of such signal transmission circuits. Using this model, differential
activation of such circuits between biological conditions can be
estimated. Thus, circuit activation statuses can be interpreted as
biomarkers that discriminate among the compared conditions. This type of
mechanism-based biomarkers accounts for cell functional activities and
can easily be associated to disease or drug action mechanisms. The
accuracy of the proposed model is demonstrated with simulations and real
datasets.
Conclusions: The proposed model provides detailed information that
enables the interpretation disease mechanisms as a consequence of the
complex combinations of altered gene expression values. Moreover, it
offers a framework for suggesting possible ways of therapeutic
intervention in a pathologically perturbed system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
account for disease or drug action mechanisms is one of the main
challenges in the analysis of genomic data and is on the basis of the
future implementation of precision medicine.
Results: Here we propose a simple probabilistic model in which signaling
pathways are separated into elementary sub-pathways or signal
transmission circuits (which ultimately trigger cell functions) and then
transforms gene expression measurements into probabilities of activation
of such signal transmission circuits. Using this model, differential
activation of such circuits between biological conditions can be
estimated. Thus, circuit activation statuses can be interpreted as
biomarkers that discriminate among the compared conditions. This type of
mechanism-based biomarkers accounts for cell functional activities and
can easily be associated to disease or drug action mechanisms. The
accuracy of the proposed model is demonstrated with simulations and real
datasets.
Conclusions: The proposed model provides detailed information that
enables the interpretation disease mechanisms as a consequence of the
complex combinations of altered gene expression values. Moreover, it
offers a framework for suggesting possible ways of therapeutic
intervention in a pathologically perturbed system.
Su, Z.; Labaj, P. P.; Li, S.; Thierry-Mieg, J.; Thierry-Mieg, D.; Shi, W.; Wang, C.; Schroth, G. P.; Setterquist, R. A.; Thompson, J. F.; Jones, W. D.; Xiao, W.; Xu, W.; Jensen, R. V.; Kelly, R.; Xu, J.; Conesa, A.; Furlanello, C.; Gao, H.; Hong, H.; Jafari, N.; Letovsky, S.; Liao, Y.; Lu, F.; Oakeley, E. J.; Peng, Z.; Praul, C. A.; Santoyo-Lopez, J.; Scherer, A.; Shi, T.; Smyth, G. K.; Staedtler, F.; Sykacek, P.; Tan, X.; Thompson, E. A.; Vandesompele, J.; Wang, M. D.; Wang, J.; Wolfinger, R. D.; Zavadil, J.; Auerbach, S. S.; Bao, W.; Binder, H.; Blomquist, T.; Brilliant, M. H.; Bushel, P. R.; Cain, W.; Catalano, J. G.; Chang, C.; Chen, T.; Chen, G.; Chen, R.; Chierici, M.; Chu, T.; Clevert, D.; Deng, Y.; Derti, A.; Devanarayan, V.; Dong, Z.; Dopazo, J.; Du, T.; Fang, H.; Fang, Y.; Fasold, M.; Fernandez, A.; Fischer, M.; Furio-Tari, P.; Fuscoe, J. C.; Caiment, F.; Gaj, S.; Gandara, J.; Gao, H.; Ge, W.; Gondo, Y.; Gong, B.; Gong, M.; Gong, Z.; Green, B.; Guo, C.; Guo, L.; Guo, L.; Hadfield, J.; Hellemans, J.; Hochreiter, S.; Jia, M.; Jian, M.; Johnson, C. D.; Kay, S.; Kleinjans, J.; Lababidi, S.; Levy, S.; Li, Q.; Li, L.; Li, L.; Li, P.; Li, Y.; Li, H.; Li, J.; Li, S.; Lin, S. M.; Lopez, F. J.; Lu, X.; Luo, H.; Ma, X.; Meehan, J.; Megherbi, D. B.; Mei, N.; Mu, B.; Ning, B.; Pandey, A.; Perez-Florido, J.; Perkins, R. G.; Peters, R.; Phan, J. H.; Pirooznia, M.; Qian, F.; Qing, T.; Rainbow, L.; Rocca-Serra, P.; Sambourg, L.; Sansone, S.; Schwartz, S.; Shah, R.; Shen, J.; Smith, T. M.; Stegle, O.; Stralis-Pavese, N.; Stupka, E.; Suzuki, Y.; Szkotnicki, L. T.; Tinning, M.; Tu, B.; van Deft, J.; Vela-Boza, A.; Venturini, E.; Walker, S. J.; Wan, L.; Wang, W.; Wang, J.; Wang, J.; Wieben, E. D.; Willey, J. C.; Wu, P.; Xuan, J.; Yang, Y.; Ye, Z.; Yin, Y.; Yu, Y.; Yuan, Y.; Zhang, J.; Zhang, K. K.; Zhang, W.; Zhang, W.; Zhang, Y.; Zhao, C.; Zheng, Y.; Zhou, Y.; Zumbo, P.; Tong, W.; Kreil, D. P.; Mason, C. E.; Shi, L.
A comprehensive assessment of RNA-seq accuracy, reproducibility and information content by the Sequencing Quality Control Consortium Journal Article
In: NATURE BIOTECHNOLOGY, vol. 32, no. 9, pp. 903-914, 2014, ISSN: 1087-0156.
@article{ISI:000342600300030,
title = {A comprehensive assessment of RNA-seq accuracy, reproducibility and
information content by the Sequencing Quality Control Consortium},
author = { Z. Su and P. P. Labaj and S. Li and J. Thierry-Mieg and D. Thierry-Mieg and W. Shi and C. Wang and G. P. Schroth and R. A. Setterquist and J. F. Thompson and W. D. Jones and W. Xiao and W. Xu and R. V. Jensen and R. Kelly and J. Xu and A. Conesa and C. Furlanello and H. Gao and H. Hong and N. Jafari and S. Letovsky and Y. Liao and F. Lu and E. J. Oakeley and Z. Peng and C. A. Praul and J. Santoyo-Lopez and A. Scherer and T. Shi and G. K. Smyth and F. Staedtler and P. Sykacek and X. Tan and E. A. Thompson and J. Vandesompele and M. D. Wang and J. Wang and R. D. Wolfinger and J. Zavadil and S. S. Auerbach and W. Bao and H. Binder and T. Blomquist and M. H. Brilliant and P. R. Bushel and W. Cain and J. G. Catalano and C. Chang and T. Chen and G. Chen and R. Chen and M. Chierici and T. Chu and D. Clevert and Y. Deng and A. Derti and V. Devanarayan and Z. Dong and J. Dopazo and T. Du and H. Fang and Y. Fang and M. Fasold and A. Fernandez and M. Fischer and P. Furio-Tari and J. C. Fuscoe and F. Caiment and S. Gaj and J. Gandara and H. Gao and W. Ge and Y. Gondo and B. Gong and M. Gong and Z. Gong and B. Green and C. Guo and L. Guo and L. Guo and J. Hadfield and J. Hellemans and S. Hochreiter and M. Jia and M. Jian and C. D. Johnson and S. Kay and J. Kleinjans and S. Lababidi and S. Levy and Q. Li and L. Li and L. Li and P. Li and Y. Li and H. Li and J. Li and S. Li and S. M. Lin and F. J. Lopez and X. Lu and H. Luo and X. Ma and J. Meehan and D. B. Megherbi and N. Mei and B. Mu and B. Ning and A. Pandey and J. Perez-Florido and R. G. Perkins and R. Peters and J. H. Phan and M. Pirooznia and F. Qian and T. Qing and L. Rainbow and P. Rocca-Serra and L. Sambourg and S. Sansone and S. Schwartz and R. Shah and J. Shen and T. M. Smith and O. Stegle and N. Stralis-Pavese and E. Stupka and Y. Suzuki and L. T. Szkotnicki and M. Tinning and B. Tu and J. van Deft and A. Vela-Boza and E. Venturini and S. J. Walker and L. Wan and W. Wang and J. Wang and J. Wang and E. D. Wieben and J. C. Willey and P. Wu and J. Xuan and Y. Yang and Z. Ye and Y. Yin and Y. Yu and Y. Yuan and J. Zhang and K. K. Zhang and W. Zhang and W. Zhang and Y. Zhang and C. Zhao and Y. Zheng and Y. Zhou and P. Zumbo and W. Tong and D. P. Kreil and C. E. Mason and L. Shi},
url = {http://dx.doi.org/10.1038/nbt.2957},
doi = {10.1038/nbt.2957},
issn = {1087-0156},
year = {2014},
date = {2014-09-01},
journal = {NATURE BIOTECHNOLOGY},
volume = {32},
number = {9},
pages = {903-914},
abstract = {We present primary results from the Sequencing Quality Control (SEQC)
project, coordinated by the US Food and Drug Administration. Examining
Illumina HiSeq, Life Technologies SOLiD and Roche 454 platforms at
multiple laboratory sites using reference RNA samples with built-in
controls, we assess RNA sequencing (RNA-seq) performance for junction
discovery and differential expression profiling and compare it to
microarray and quantitative PCR (qPCR) data using complementary metrics.
At all sequencing depths, we discover unannotated exon-exon junctions, with >80% validated by qPCR. We find that measurements of relative
expression are accurate and reproducible across sites and platforms if
specific-filters are used. In contrast, RNA-seq and microarrays do not
provide accurate absolute measurements, and gene-specific biases are
observed for all examined platforms, including qPCR. Measurement
performance depends on the platform and data analysis pipeline, and
variation is large for transcript-level profiling. The complete SEQC
data sets, comprising >100 billion reads (10Tb), provide unique
resources for evaluating RNA-seq analyses for clinical and regulatory
settings.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
project, coordinated by the US Food and Drug Administration. Examining
Illumina HiSeq, Life Technologies SOLiD and Roche 454 platforms at
multiple laboratory sites using reference RNA samples with built-in
controls, we assess RNA sequencing (RNA-seq) performance for junction
discovery and differential expression profiling and compare it to
microarray and quantitative PCR (qPCR) data using complementary metrics.
At all sequencing depths, we discover unannotated exon-exon junctions, with >80% validated by qPCR. We find that measurements of relative
expression are accurate and reproducible across sites and platforms if
specific-filters are used. In contrast, RNA-seq and microarrays do not
provide accurate absolute measurements, and gene-specific biases are
observed for all examined platforms, including qPCR. Measurement
performance depends on the platform and data analysis pipeline, and
variation is large for transcript-level profiling. The complete SEQC
data sets, comprising >100 billion reads (10Tb), provide unique
resources for evaluating RNA-seq analyses for clinical and regulatory
settings.
Nueda, M. J.; Tarazona, S.; Conesa, A.
Next maSigPro: updating maSigPro bioconductor package for RNA-seq time series Journal Article
In: BIOINFORMATICS, vol. 30, no. 18, pp. 2598-2602, 2014, ISSN: 1367-4803.
@article{ISI:000342913000008,
title = {Next maSigPro: updating maSigPro bioconductor package for RNA-seq time
series},
author = { M. J. Nueda and S. Tarazona and A. Conesa},
url = {http://dx.doi.org/10.1093/bioinformatics/btu333},
doi = {10.1093/bioinformatics/btu333},
issn = {1367-4803},
year = {2014},
date = {2014-09-01},
journal = {BIOINFORMATICS},
volume = {30},
number = {18},
pages = {2598-2602},
abstract = {Motivation: The widespread adoption of RNA-seq to quantitatively measure
gene expression has increased the scope of sequencing experimental
designs to include time-course experiments. maSigPro is an R package
specifically suited for the analysis of time-course gene expression
data, which was developed originally for microarrays and hence was
limited in its application to count data.
Results: We have updated maSigPro to support RNA-seq time series
analysis by introducing generalized linear models in the algorithm to
support the modeling of count data while maintaining the traditional
functionalities of the package. We show a good performance of the
maSigPro-GLM method in several simulated time-course scenarios and in a
real experimental dataset.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
gene expression has increased the scope of sequencing experimental
designs to include time-course experiments. maSigPro is an R package
specifically suited for the analysis of time-course gene expression
data, which was developed originally for microarrays and hence was
limited in its application to count data.
Results: We have updated maSigPro to support RNA-seq time series
analysis by introducing generalized linear models in the algorithm to
support the modeling of count data while maintaining the traditional
functionalities of the package. We show a good performance of the
maSigPro-GLM method in several simulated time-course scenarios and in a
real experimental dataset.