001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.util.ArrayList; 025import java.util.Collection; 026import java.util.HashSet; 027import java.util.Iterator; 028import java.util.List; 029import org.biojava3.core.sequence.transcription.TranscriptionEngine; 030import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 031import uk.ac.roslin.ensembl.config.EnsemblCoordSystemType; 032import uk.ac.roslin.ensembl.dao.database.DBRegistry; 033import uk.ac.roslin.ensembl.dao.database.DBSpecies; 034import uk.ac.roslin.ensembl.datasourceaware.core.*; 035import uk.ac.roslin.ensembl.exception.NonUniqueException; 036import uk.ac.roslin.ensembl.model.Mapping; 037import uk.ac.roslin.ensembl.model.core.Exon; 038 039public class ExonsTranscriptionAndTranslation { 040 041 042 //demonstrates integration of BioJava3 transcription and translation functions 043 //and JEnsembls retrieval and stitching together of exon sequences 044 //uses BioJava transcription engines for translation 045 //the datasource is queried to use the correct codon table if specified 046 //show works with chordate, plant and bacterial genes 047 048 049 public static void main(String[] args) throws Exception { 050 051 DBRegistry eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 052 053 //note by default translation by this default engine is configured to convert 054 //non-Methionine initiations to Methionine 055 TranscriptionEngine te = eReg.getDefaultTranscriptionEngine(); 056 057 /* valid codes are 058 * 1 - UNIVERSAL 059 * 2 - VERTEBRATE_MITOCHONDRIAL 060 * 3 - YEAST_MITOCHONDRIAL 061 * 4 - MOLD_MITOCHONDRIAL 062 * 5 - INVERTEBRATE_MITOCHONDRIAL 063 * 6 - CILIATE_NUCLEAR 064 * 9 - ECHINODERM_MITOCHONDRIAL 065 * 10 - EUPLOTID_NUCLEAR 066 * 11 - BACTERIAL 067 * 12 - ALTERNATIVE_YEAST_NUCLEAR 068 * 13 - ASCIDIAN_MITOCHONDRIAL 069 * 14 - FLATWORM_MITOCHONDRIAL 070 * 15 - BLEPHARISMA_MACRONUCLEAR 071 * 16 - 2CHLOROPHYCEAN_MITOCHONDRIAL 072 * 21 - TREMATODE_MITOCHONDRIAL 073 * 23 - SCENEDESMUS_MITOCHONDRIAL 074 */ 075 TranscriptionEngine te2 = eReg.getTranscriptionEngine(2); 076 TranscriptionEngine te30 = eReg.getTranscriptionEngine(30); 077 078 079 DBSpecies cow = eReg.getSpeciesByAlias("cow"); 080 DAGene g = cow.getGeneByStableID("ENSBTAG00000021527"); 081 Mapping mapping = g.getChromosomeMapping(); 082 083 //if the genes are not annotated on the chromosome level 084 // Mapping mapping = gene.getAnnotationLevelMappings().first(); 085 086 087 System.out.println("Gene: "+ g.getStableID()); 088 System.out.println("\tversion: "+ g.getDBVersion()); 089 System.out.println("\tchr start: "+mapping.getTargetCoordinates().getStart()); 090 System.out.println("\tchr stop: "+mapping.getTargetCoordinates().getEnd()); 091 System.out.println("\tassembly: "+g.getAssembly()); 092 System.out.println("\tdescription: "+ g.getDescription()); 093 System.out.println("\tsymbol: "+g.getDisplayName()); 094 System.out.println("\tstrand: "+mapping.getTargetCoordinates().getStrand()); 095 System.out.println("\ttaxonID: "+g.getSpecies().getTaxonomyID()); 096 System.out.println("\tstatus: "+g.getStatus()); 097 System.out.println("\ttype: "+ g.getBiotype()); 098 099 100 //look at all the transcripts 101 102 System.out.println("\tTranscript Count: "+ g.getTranscripts().size()); 103 for (DATranscript t : g.getTranscripts()) { 104 System.out.println("\t\tTranscript: "+ t.getStableID()); 105 System.out.println("\t\t "+ t.getDisplayName()); 106 System.out.println("\t\t "+ t.getStatus()); 107 System.out.println("\t\t "+ t.getBiotype()); 108 System.out.println("\t\t "+ t.getDescription()); 109 System.out.println("\t\t "+ t.getGene().getStableID()); 110 System.out.println("\t\tCanonical ? "+ t.isCanonical()); 111 System.out.println("\t\tXREF: "+ t.getDisplayXRef().getDBDisplayName()); 112 System.out.println("\t\tXREF: "+ t.getDisplayXRef().getDisplayID()); 113 System.out.println("\t\tXREF: "+ t.getDisplayXRef().getInfoType()); 114 System.out.println("\t\tXREF: "+ t.getDisplayXRef().getInfo()); 115 for (Mapping m:t.getLoadedMappings(EnsemblCoordSystemType.chromosome)) { 116 System.out.println("\t\tMapping: "+m.getTargetHashID()); 117 System.out.println("\t\t\tCoords: "+m.getTargetCoordinates().toString()); 118 } 119 120 // look at all the exons of the transcript 121 122 System.out.println("EXONS"); 123 for (DAExon e : t.getExons()) { 124 System.out.println("\t\tRank: "+ e.getRank()); 125 System.out.println("\t\tStableID: "+ e.getStableID()); 126 System.out.println("\t\tID: "+ e.getId()); 127 System.out.println("\t\tstart phase: "+ e.getPhase()); 128 System.out.println("\t\tend phase: "+ e.getEndPhase()); 129 System.out.println("\t\tcurrent: "+ e.isCurrent()); 130 System.out.println("\t\tconstitutive: "+ e.isConstitutive()); 131 132 //get locations of exon 133 for (Mapping m:e.getLoadedMappings(EnsemblCoordSystemType.chromosome)) { 134 System.out.println("\t\tMapping: "+m.getTargetHashID()); 135 System.out.println("\t\t\tCoords: "+m.getTargetCoordinates().toString()); 136 } 137 } 138 139 140 141 142 } 143 144// 145 List<DAGene> genes = new ArrayList<DAGene>(); 146// 147 DBSpecies sp = eReg.getSpeciesByAlias("human"); 148 149 //chr1 150 //DAGene ge = sp.getGeneByStableID("ENSG00000197049","68");//this id retired in 69 151 DAGene ge = sp.getGeneByStableID("ENSG00000185495"); 152 System.out.println("GENE ENSG00000185495 \tversion: "+ ge.getDBVersion()); 153 154 155 if (ge.getCanonicalTranscript().isTranslated()) { 156 System.out.println(ge.getCanonicalTranslation().getTranslatedSequenceAsString()); 157 System.out.println(""); 158 //checks whether the codon table is specified by the datasource 159 System.out.println(ge.getCanonicalTranslation().getProteinSequence().getSequenceAsString()); 160 System.out.println(""); 161 } 162 163 DATranscript tt = (DATranscript) sp.getTranscriptByStableID("ENST00000400701", "76"); 164 165 System.out.println("A Transcript on chr X with out of phase start sequence: ENST00000400701" 166 +"\nNB: only in release 76!!"); 167 168 Collection<DAExon> exons = (Collection<DAExon>) tt.getExons(); 169 170 System.out.println("EXONS:..."); 171 172 for (DAExon ex : exons) { 173 System.out.print(ex.getRank()); 174 System.out.print("\t" + ex.getStableID()); 175 System.out.print("\t" + ex.getId()); 176 System.out.print("\t" + ex.getPhase()); 177 System.out.print("\t" + ex.getEndPhase()); 178 179 180 //not necessarily on a chromosome :) 181 //for (Mapping m : ex.getLoadedMappings(EnsemblCoordSystemType.chromosome)) { 182 for (Mapping m : ex.getLoadedMappings()) { 183 System.out.println("\t" +((DADNASequence) m.getTarget()).getName() +": "+ m.getTargetCoordinates().toString()); 184 } 185 } 186 187 188 if (tt.isTranslated()) { 189 190 DATranslation trl = (DATranslation) tt.getCanonicalTranslation(); 191 192 Exon e = trl.getFirstExon(); 193 Exon e2 = trl.getLastExon(); 194 Integer i = trl.getFirstExonStart(); 195 Integer ii = trl.getLastExonEnd(); 196 197 System.out.println("\nTranslateable Sequence:"); 198 System.out.println(trl.getTranslatedSequenceAsString()); 199 System.out.println(""); 200 201 System.out.println("PROTEIN: "); 202 System.out.println(trl.getProteinSequence().getSequenceAsString()); 203 204 } else { 205 System.out.println("\n"+tt.getStableID() + ": NOT TRANSLATED"); 206 } 207 208 //MT 209 DAGene g8 = sp.getGeneByStableID("ENSG00000198899"); 210 211 //all chr1 212 213 DAGene g5 = sp.getGeneByStableID("ENSG00000197049","56"); 214 DAGene g1 = sp.getGeneByStableID("ENSG00000237330"); 215 DAGene g6 = sp.getGeneByStableID("ENSG00000237330","56"); 216 217 //retired after 67 218 DAGene g2 = sp.getGeneByStableID("ENSG00000238916", "67"); 219 220 DBSpecies sp2 = eReg.getSpeciesByAlias("cow"); 221 DAGene g3 = sp2.getGeneByStableID("ENSBTAG00000021527", "68"); 222 DAGene g4 = sp2.getGeneByStableID("ENSBTAG00000021531","68"); 223 224 DBSpecies sp3 = eReg.getSpeciesByAlias("chicken"); 225 DAGene g7 = sp3.getGeneByStableID("ENSGALG00000021950","68"); 226 227 228 229 genes.add(g8); 230 genes.add(g7); 231 genes.add(g); 232 genes.add(g5); 233 genes.add(g1); 234 genes.add(g6); 235 genes.add(g2); 236 genes.add(g3); 237 genes.add(g4); 238 239 DBRegistry eRegG = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLGENOMES); 240 DBRegistry bRegG = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA); 241 //DBRegistry bRegG = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA, 19); 242 243 DBSpecies spG = eRegG.getSpeciesByAlias("Arabidopsis lyrata"); 244 DBSpecies spG2 = eRegG.getSpeciesByAlias("Arabidopsis thaliana"); 245 246 //there are two rcords with this sp/strain name - but with different gca ids 247 248 DBSpecies spG3 = null; 249 DBSpecies spG4 = null; 250 251 //we need to try to add a 'begins with' function to look up aliases here 252 //- the aliases keep on changing and becoming more specific 253 254 try { 255 //spG3 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv"); 256 spG3 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv (ASM27773v2)"); 257// spG4 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv (Myco_tube_H37Rv_V2)"); 258 spG4 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv (ASM19595v2)"); 259 } catch (NonUniqueException nonUniqueException) { 260 System.out.print("Mycobacterium tuberculosis H37Rv is not unique...."); 261 HashSet<DBSpecies> allHits = (HashSet<DBSpecies>) nonUniqueException.getAllHits(); 262 System.out.println(allHits.size()); 263 Iterator<DBSpecies> iterator = allHits.iterator(); 264 spG3 = iterator.next(); 265 spG4 = iterator.next(); 266 } 267 268 //DBSpecies spG4 = eRegG.getSpeciesByAlias("gca_000195955"); 269 270 DAGene geneG1_15 = spG.getGeneByStableID("fgenesh2_kg.1__2__AT1G02190.2", "15"); 271 DAGene geneG1_19 = spG.getGeneByStableID("fgenesh2_kg.1__2__AT1G02190.2", "19"); 272 DAGene geneG2_15 = spG2.getGeneByStableID("AT1G78060", "15"); 273 DAGene geneG2_19 = spG2.getGeneByStableID("AT1G78060", "19"); 274 //DAGene geneG3 = spG3.getGeneByStableID("EBMYCG00000000423", "15"); 275 DAGene geneG3 = spG3.getGeneByStableID("RVBD_0301"); 276 DAGene geneG4 = spG4.getGeneByStableID("Rv0301"); 277// List<DAGene> geneG3 = spG3.getGenesForExactName("lysU", "18"); 278// List<DAGene> geneG4 = spG4.getGenesForExactName("EBG00313316", "18"); 279 280 genes.add(geneG1_15); 281 genes.add(geneG1_19); 282 genes.add(geneG2_15); 283 genes.add(geneG2_19); 284 if (geneG3 != null) {genes.add(geneG3);} 285 if (geneG4 != null) {genes.add(geneG4);} 286 287 288 289 //showing we can get genes, transcripts and translations by stableID 290 //from a species 291 292 293// DBSpecies hs = eReg.getSpeciesByAlias("human"); 294// System.out.println("*** HUMAN ***"); 295// DAGene gene = hs.getGeneByStableID("ENSG00000139618"); 296// 297// DATranscript t = sp.getTranscriptByStableID("ENST00000380152" ); 298// DATranslation tr = sp.getTranslationByStableID("ENSP00000369497" ); 299// 300// //note the queries used pre v67 need to link to the relevant stableid table 301// DAGene gene60 = hs.getGeneByStableID("ENSG00000139618", "60"); 302// DATranscript t60 = sp.getTranscriptByStableID("ENST00000380152","60" ); 303// DATranslation tr60 = sp.getTranslationByStableID("ENSP00000369497","60" ); 304// 305// 306// // can lazy load a gene to a transcript 307// DAGene gene2 = t.getGene(); 308// // can lazy load a transcript to a translation 309// DATranscript t2 = tr.getTranscript(); 310// 311// System.out.println("Gene VegaID: "+gene.getVegaID()); 312// System.out.println("Gene retrieved from transcript: VegaID: "+gene2.getVegaID()); 313// System.out.println("Transcript VegaID: "+t.getVegaID()); 314// System.out.println("Transcript retrieved from translation: VegaID: "+t2.getVegaID()); 315// System.out.println("Translation VegaID: "+tr.getVegaID()); 316// 317// 318// if (gene.getStableID().equals(gene2.getStableID())) { 319// System.out.println("The two genes ARE the same (according to their stableIDS)"); 320// } else { 321// System.out.println("The two genes are NOT the same (according to their stableIDs)"); 322// } 323// if (gene.getStableID().equals(gene60.getStableID())) { 324// System.out.println("The genes pre and post v67 ARE the same (according to their stableIDS)"); 325// } else { 326// System.out.println("The genes pre and post v67 are NOT the same (according to their stableIDs)"); 327// } 328// if (t.getStableID().equals(t2.getStableID())) { 329// System.out.println("The two transcripts ARE the same (according to their stableIDS)"); 330// } else { 331// System.out.println("The two transcripts are NOT the same (according to their stableIDs)"); 332// } 333// if (t.getStableID().equals(t60.getStableID())) { 334// System.out.println("The transcripts pre and post v67 ARE the same (according to their stableIDS)"); 335// } else { 336// System.out.println("The transcripts pre and post v67 are NOT the same (according to their stableIDs)"); 337// } 338// if (tr.getStableID().equals(tr60.getStableID())) { 339// System.out.println("The translations pre and post v67 ARE the same (according to their stableIDS)"); 340// } else { 341// System.out.println("The translations pre and post v67 are NOT the same (according to their stableIDs)"); 342// } 343// 344// System.out.println("*** PIG ***"); 345// sp = eReg.getSpeciesByAlias("pig"); 346// g = sp.getGeneByStableID("ENSSSCG00000007520"); 347// //t = sp.getTranscriptByStableID("ENSSSCT00000034146" ); 348// tr = sp.getTranslationByStableID("ENSSSCP00000028626" ); 349// t = tr.getTranscript(); 350// 351// System.out.println("Gene VegaID: "+g.getVegaID()); 352// System.out.println("Transcript VegaID: "+t.getVegaID()); 353// System.out.println("Translation VegaID: "+tr.getVegaID()); 354// 355// System.out.println("*** Pyrococcus ***");//there won't be any Vega annotations 356// 357// DBCollectionSpecies sp1 = (DBCollectionSpecies) eRegG.getSpeciesByAlias("Pyrococcus abyssi"); 358// 359// DAGene gene1 = sp1.getGeneByStableID("EBPYRG00000002639", "15"); 360// DATranscript t1 = sp1.getTranscriptByStableID("EBPYRT00000002639" ,"15"); 361// DATranslation tr1 = sp1.getTranslationByStableID("EBPYRP00000002570","15" ); 362// DATranscript t12 = tr1.getTranscript(); 363// DAGene gene12 = t1.getGene(); 364// 365// System.out.println("Gene VegaID: "+gene1.getVegaID()); 366// System.out.println("Gene retrieved from Transcript VegaID: "+gene12.getVegaID()); 367// System.out.println("Transcript VegaID: "+t1.getVegaID()); 368// System.out.println("Transcript retrieved from translation: VegaID: "+t12.getVegaID()); 369// System.out.println("Translation VegaID: "+tr1.getVegaID()); 370// 371// 372// if (gene1.getStableID().equals(gene12.getStableID())) { 373// System.out.println("The two genes ARE the same (according to their stableIDS)"); 374// } else { 375// System.out.println("The two genes are NOT the same (according to their stableIDs)"); 376// } 377// if (t1.getStableID().equals(t12.getStableID())) { 378// System.out.println("The two transcripts ARE the same (according to their stableIDS)"); 379// } else { 380// System.out.println("The two transcripts are NOT the same (according to their stableIDs)"); 381// } 382// 383 System.out.println("\n\n*************************\nCOMPLETED FUNCTIONAL TEST\n*************************\n"); 384 385 } 386}