001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Set; 027import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 028import uk.ac.roslin.ensembl.config.EnsemblDBType; 029import uk.ac.roslin.ensembl.config.FeatureType; 030import uk.ac.roslin.ensembl.dao.database.*; 031import uk.ac.roslin.ensembl.dao.factory.DAOCoreFactory; 032import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 033import uk.ac.roslin.ensembl.datasourceaware.core.DACoordinateSystem; 034import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 035import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 036import uk.ac.roslin.ensembl.model.Coordinate; 037import uk.ac.roslin.ensembl.model.Mapping; 038import uk.ac.roslin.ensembl.model.core.Gene; 039 040public class Genes { 041 042 //retreiving genes 043 //genes are 'feature' annotations - 044 //when genes are retrieved - they are mapped on to a chromosome 045 046 public static void main(String[] args) throws Exception { 047 048 DBRegistry bactRegistry = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA); 049 050 DBCollectionSpecies sp1 = (DBCollectionSpecies) bactRegistry.getSpeciesByAlias("pyrococcus_abyssi_ge5"); 051 //DBDAOCollectionCoreFactory f =(DBDAOCollectionCoreFactory) ((DBCollectionCoreDatabase) sp1.getMostRecentCoreDatabase()).getCoreFactory(sp1); 052 053 DAChromosome chr = sp1.getChromosomeByName("Chromosome"); 054 //DAChromosome chr = f.getChromosomeDAO().getChromosomeByName("Chromosome"); 055 056 System.out.println("species: "+chr.getSpecies().getSpeciesBinomial()); 057 System.out.println("chr: "+chr.getChromosomeName()); 058 System.out.println("chrID: "+chr.getId()); 059 System.out.println("chr DB version: "+chr.getDBVersion()); 060 061 System.out.println(chr.getBioBegin()); 062 System.out.println(chr.getLength()); 063 System.out.println(chr.getDBSeqLength()); 064 System.out.println(chr.getBioEnd()); 065 System.out.println(chr.getDBSeqLength()); 066 System.out.println(chr.getCoordSystem().getId()); 067 System.out.println(chr.getCoordSystem().getType().toString()); 068 069 System.out.println(chr.getSequenceAsString(1, 100)); 070 System.out.println(chr.getReverseComplementSequenceAsString(1, 100)); 071 072 //lets look at all the features that might be mapped on a chromosome ( very few implemented so far:) 073 System.out.println("\n------------\nFeatures\n-------------"); 074 075 for (FeatureType t : FeatureType.getAllTypes()) { 076 System.out.println(t.toString()); 077 078 Set<DACoordinateSystem> s = (sp1.getMostRecentCoreDatabase()).getCSForFeature(sp1, t); 079 080 if (s != null && !s.isEmpty()) { 081 082 for (DACoordinateSystem cs : s) { 083 System.out.println("\tcs-id: " + cs.getId() + " - " + "cs-type: " + cs.getType().toString()); 084 } 085 } else { 086 System.out.println("NO INFORMATION"); 087 } 088 System.out.println(""); 089 } 090 091 DAGene gene; 092 DAOCoreFactory f = ((DBCollectionCoreDatabase)sp1.getDatabaseByTypeAndVersion( 093 EnsemblDBType.collection_core, ""+bactRegistry.getHighestReleaseVersion())).getCoreFactory(sp1); 094 gene = new DAGene(f); 095 gene.setStableID("PAB1143"); 096 //a gene with a factory and stableID can lazyload everything else.... 097 098 System.out.println("getAnalysisId "+gene.getAnalysisID()); 099 System.out.println("getAnalysis "+gene.getAnalysis().getDisplayLabel()); 100 System.out.println("getAnalysisDescription "+gene.getAnalysis().getDescription()); 101 System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype()); 102 System.out.println("\t"+gene.getDescription()); 103 System.out.println("Species: "+gene.getSpecies().getCommonName()); 104 System.out.println("Created "+gene.getCreationDate().toString()); 105 System.out.println("Modified "+gene.getModificationDate().toString()); 106 System.out.println("getAssembly "+gene.getAssembly()); 107 System.out.println("getBiotype "+gene.getBiotype()); 108 System.out.println("getDescription "+gene.getDescription()); 109 System.out.println("getDBVersion "+gene.getDBVersion()); 110 System.out.println("getDisplayName "+gene.getDisplayName()); 111 System.out.println("getId "+gene.getId()); 112 113 System.out.println("getSchemaVersion "+gene.getSchemaVersion()); 114 System.out.println("getStatus "+gene.getStatus()); 115 116 117 System.out.println("Gene's loaded mappings..."); 118 for (Mapping m : gene.getLoadedMappings()) { 119 System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID()); 120 System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() ); 121 System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 122 + "type: "+m.getTarget().getType() ); 123 System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart() 124 +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]"); 125 126 } 127 System.out.println(""); 128 System.out.println("Gene's anotation level mappings..."); 129 for (Mapping m : gene.getAnnotationLevelMappings() ) { 130 System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID()); 131 System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() ); 132 System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 133 + "type: "+m.getTarget().getType() ); 134 System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart() 135 +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]"); 136 137 } 138 System.out.println(""); 139 140 System.out.println("\nGenes mapped on the chromosome:..."); 141 for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) { 142 System.out.println(m.getTarget().getClass().getSimpleName() 143 + " gene stableID: " 144 + ((DAGene) m.getTarget()).getStableID() 145 + " id: " + m.getTarget().getId()); 146 System.out.println("Gene on chromosome: HashID: " + m.getTarget().getHashID() 147 + " VMID:" + m.getTarget().hashCode()); 148 } 149 150 151 152 153 154 //v1.15 removed the ability to get a gene by internal db id from a species 155 // DAGene gene = sp1.getGeneByID(3555, "15"); 156 157 bactRegistry = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA, 15); 158 159 sp1 = (DBCollectionSpecies) bactRegistry.getSpeciesByAlias("Pyrococcus abyssi"); 160 161 gene = (DAGene) ((DBCollectionCoreDatabase)sp1.getDatabaseByTypeAndVersion( 162 EnsemblDBType.collection_core, "15")).getCoreFactory(sp1).getGeneDAO().getGeneByID(3555); 163 164 System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype()); 165 System.out.println("\t"+gene.getDescription()); 166 System.out.println("Species: "+gene.getSpecies().getCommonName()); 167 System.out.println("Created "+gene.getCreationDate().toString()); 168 System.out.println("Modified "+gene.getModificationDate().toString()); 169 System.out.println("getAssembly "+gene.getAssembly()); 170 System.out.println("getBiotype "+gene.getBiotype()); 171 System.out.println("getDescription "+gene.getDescription()); 172 System.out.println("getDBVersion "+gene.getDBVersion()); 173 System.out.println("getDisplayName "+gene.getDisplayName()); 174 System.out.println("getId "+gene.getId()); 175 System.out.println("getAnalysisId "+gene.getAnalysisID()); 176 System.out.println("getAnalysis "+gene.getAnalysis().getDisplayLabel()); 177 System.out.println("getAnalysisDescription "+gene.getAnalysis().getDescription()); 178 System.out.println("getSchemaVersion "+gene.getSchemaVersion()); 179 System.out.println("getStatus "+gene.getStatus()); 180 181 182 System.out.println("getCanonicalTranscriptID "+gene.getCanonicalTranscriptID()); 183 System.out.println("getCanonicalTranscript().getBiotype() "+gene.getCanonicalTranscript().getBiotype()); 184 System.out.println("getCanonicalTranscript().getDisplayName() "+gene.getCanonicalTranscript().getDisplayName()); 185 System.out.println("getCanonicalTranscript().getStableID() "+gene.getCanonicalTranscript().getStableID()); 186 System.out.println("getCanonicalTranscript().getCanonicalTranslationID() "+gene.getCanonicalTranscript().getCanonicalTranslationID()); 187 System.out.println("gene.getCanonicalTranslation().getId() "+gene.getCanonicalTranslation().getId()); 188 System.out.println("getCanonicalTranslation().getStableID() "+gene.getCanonicalTranslation().getStableID()); 189 System.out.println("getCanonicalTranscript().getCanonicalTranslation().getProteinSequenceAsString() "+gene.getCanonicalTranscript().getCanonicalTranslation().getProteinSequenceAsString()); 190 System.out.println("getCanonicalTranscript().getCanonicalTranslation().getRNASequenceAsString() "+gene.getCanonicalTranscript().getCanonicalTranslation().getRNASequenceAsString()); 191 192 193 System.out.println("Gene's loaded mappings..."); 194 for (Mapping m : gene.getLoadedMappings()) { 195 System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID()); 196 System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() ); 197 System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 198 + "type: "+m.getTarget().getType() ); 199 System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart() 200 +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]"); 201 202 } 203 System.out.println(""); 204 System.out.println("Gene's anotation level mappings..."); 205 for (Mapping m : gene.getAnnotationLevelMappings() ) { 206 System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID()); 207 System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() ); 208 System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 209 + "type: "+m.getTarget().getType() ); 210 System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart() 211 +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]"); 212 213 } 214 System.out.println(""); 215 //this single gene will have been added to the chromosome 216 System.out.println("\nGenes mapped on the chromosome:..."); 217 for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) { 218 System.out.println(m.getTarget().getClass().getSimpleName() 219 + " gene stableID: " 220 + ((DAGene) m.getTarget()).getStableID() 221 + " id: " + m.getTarget().getId()); 222 System.out.println("Gene on chromosome: HashID: " + m.getTarget().getHashID() 223 + " VMID:" + m.getTarget().hashCode()); 224 } 225 226 gene = sp1.getGeneByStableID("EBPYRG00000003245", "15"); 227 //gene = f.getGeneDAO().getGeneByStableID("EBPYRG00000003245"); 228 229 System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype()); 230 System.out.println("\t"+gene.getDescription()); 231 System.out.println("Species: "+gene.getSpecies().getCommonName()); 232 System.out.println("Created "+gene.getCreationDate().toString()); 233 System.out.println("Modified "+gene.getModificationDate().toString()); 234 235 System.out.println("Gene's loaded mappings..."); 236 for (Mapping m : gene.getLoadedMappings()) { 237 System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID()); 238 System.out.println("Mapping source: "+m.getSource().getId()); 239 System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 240 + "type: "+m.getTarget().getType() ); 241 System.out.println("target coords:"+m.getTargetCoordinates().getStart()+"-" 242 +m.getTargetCoordinates().getEnd() +"(" 243 +m.getTargetCoordinates().getStrand().toString() +")"); 244 System.out.println("CS: "+((DADNASequence) m.getTarget()).getCoordSystem().getType().toString()); 245 } 246 247 System.out.println(""); 248 System.out.println("Gene's anotation level mappings..."); 249 for (Mapping m : gene.getAnnotationLevelMappings() ) { 250 System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID()); 251 System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() ); 252 System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 253 + "type: "+m.getTarget().getType() ); 254 System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart() 255 +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]"); 256 257 } 258 System.out.println(""); 259 //two genes will have been added to the chromosome 260 System.out.println("\nGenes now mapped on the chromosome:..."); 261 for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) { 262 System.out.println(m.getTarget().getClass().getSimpleName() 263 + " gene stableID: " 264 + ((DAGene) m.getTarget()).getStableID() 265 + " id: " + m.getTarget().getId()); 266 System.out.println("Gene on chromosome: HashID: " + m.getTarget().getHashID() 267 + " VMID:" + m.getTarget().hashCode()); 268 } 269 270 System.out.println(""); 271 Coordinate coord = new Coordinate(); 272 273 coord.setStart(1); 274 coord.setEnd(1000000); 275 276 List<? extends Gene> genes = chr.getGenesOnRegion(1, 500000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND); 277 System.out.println(""); 278 System.out.println(genes.size()+ " returned genes"); 279 System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size()); 280 281 genes = chr.getGenesOnRegion(1,1000000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND); 282 System.out.println(genes.size()+ " returned genes"); 283 System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size()); 284 285 genes = chr.getGenesOnRegion(500000,1000000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND); 286 System.out.println(genes.size()+ " returned genes"); 287 System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size()); 288 289 genes = chr.getGenesOnRegion(1010000, 1020000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND); 290 System.out.println(genes.size()+ " returned genes"); 291 System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size()); 292 293 genes = chr.getGenesOnRegion(1030000, 1040000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND); 294 System.out.println(genes.size()+ " returned genes"); 295 System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size()); 296 297 genes = chr.getGenesOnRegion(1050000, 1060000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND); 298 System.out.println(genes.size()+ " returned genes"); 299 System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size()); 300 301 302 for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) { 303 304 System.out.println( ((DAGene) m.getTarget()).getStableID()+" "+m.getSourceHashID()+" "+m.getTargetHashID()); 305 306 } 307 308 309 System.out.println("gaps in the gene mappings: "); 310// for (Coordinate cd : Coordinate.getCoordinateGaps(chr.getMappedRegions().get(FeatureType.gene))) { 311 for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getGaps()) { 312 System.out.println("gap "+cd.getStart()+" - "+cd.getEnd()); 313 } 314 System.out.println("gaps in the gene mappings for the whole chromosome: "); 315 Coordinate cx = new Coordinate(chr.getBioBegin(), chr.getBioEnd(),1); 316// for (Coordinate cd : cx.getGaps(chr.getMappedRegions().get(FeatureType.gene))) { 317 for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(cx)) { 318 System.out.println("gap "+cd.getStart()+" - "+cd.getEnd()); 319 } 320 321 Coordinate test = new Coordinate(900000,1200000,1); 322 323 System.out.println("testing 900000-1200000 query"); 324// System.out.println("test lies within known region: "+test.liesWithinCoordinateSetWithoutGaps(chr.getMappedRegions().get(FeatureType.gene))); 325 System.out.println("test lies within known region: "+chr.getMappedRegions().get(FeatureType.gene).containsCoordinateWithoutGaps(test)); 326 327// if (test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).isEmpty()) { 328 if (chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).isEmpty()) { 329 330 System.out.println("test region is fully covered"); 331 332 } else { 333 System.out.println("test region is not fully covered"); 334 // for (Coordinate cd : test.getGaps(chr.getMappedRegions().get(FeatureType.gene))) { 335 for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test)) { 336 System.out.println("gap "+cd.getStart()+" - "+cd.getEnd()); 337 } 338 } 339 340// System.out.println("test has regions not covered: "+test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).size()); 341 System.out.println("test has regions not covered: "+chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).size()); 342 343 test = new Coordinate(1,900000,1); 344 345 System.out.println("testing 1-900000 query"); 346 347// System.out.println("test lies within known region: "+test.liesWithinCoordinateSetWithoutGaps(chr.getMappedRegions().get(FeatureType.gene))); 348 System.out.println("test lies within known region: "+chr.getMappedRegions().get(FeatureType.gene).containsCoordinateWithoutGaps(test)); 349 350// if (test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).isEmpty()) { 351 if (chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).isEmpty()) { 352 System.out.println("test region is fully covered"); 353 } else { 354 System.out.println("test region is not fully covered"); 355 for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test)) { 356 System.out.println("gap "+cd.getStart()+" - "+cd.getEnd()); 357 } 358 } 359 360 // System.out.println("test has regions not covered: "+test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).size()); 361 System.out.println("test has regions not covered: "+chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).size()); 362 363 364 genes = chr.getGenesOnRegion(1, 10000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND); 365 366 System.out.println(genes.size()+ " genes"); 367 368 DBRegistry ensemblRegistry = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 369 DBSpecies hs = ensemblRegistry.getSpeciesByAlias("human"); 370 371 List<String> tester = new ArrayList<String>(); 372 tester.add("69"); 373 tester.add("72"); 374 375 for (String str:tester) 376 { 377 f= ((DBSingleSpeciesCoreDatabase)hs.getDatabaseByTypeAndVersion( 378 EnsemblDBType.core, str)).getCoreFactory(); 379 gene = new DAGene(f); 380 gene.setStableID("ENSG00000139618"); 381 //a gene with a factory and stableID can lazyload everything else.... 382 383 System.out.println("getAnalysisId "+gene.getAnalysisID()); 384 System.out.println("getAnalysisDescription "+gene.getAnalysis().getDescription()); 385 System.out.println("getAnalysis "+gene.getAnalysis().getDisplayLabel()); 386 System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype()); 387 System.out.println("\t"+gene.getDescription()); 388 System.out.println("Species: "+gene.getSpecies().getCommonName()); 389 System.out.println("Created "+gene.getCreationDate().toString()); 390 System.out.println("Modified "+gene.getModificationDate().toString()); 391 System.out.println("getAssembly "+gene.getAssembly()); 392 System.out.println("getBiotype "+gene.getBiotype()); 393 System.out.println("getDescription "+gene.getDescription()); 394 System.out.println("getDBVersion "+gene.getDBVersion()); 395 System.out.println("getDisplayName "+gene.getDisplayName()); 396 System.out.println("getId "+gene.getId()); 397 398 System.out.println("getSchemaVersion "+gene.getSchemaVersion()); 399 System.out.println("getStatus "+gene.getStatus()); 400 } 401 402 System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n"); 403 404 405 } 406 407 408 409 410 411}