001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024 025import java.io.File; 026import java.nio.charset.Charset; 027import java.nio.file.Files; 028import java.nio.file.Paths; 029import java.util.List; 030import java.util.TreeMap; 031import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 032import uk.ac.roslin.ensembl.config.EnsemblDBType; 033import uk.ac.roslin.ensembl.dao.database.DBDatabase; 034import uk.ac.roslin.ensembl.dao.database.DBRegistry; 035import uk.ac.roslin.ensembl.dao.database.DBSingleSpeciesCoreDatabase; 036import uk.ac.roslin.ensembl.dao.database.DBSpecies; 037import uk.ac.roslin.ensembl.dao.database.factory.DBDAOSingleSpeciesCoreFactory; 038import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 039import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 040import uk.ac.roslin.ensembl.model.core.Chromosome; 041import uk.ac.roslin.ensembl.model.core.Gene; 042import uk.ac.roslin.ensembl.model.database.Database; 043import uk.ac.roslin.ensembl.model.database.SingleSpeciesCoreDatabase; 044 045 046public class EnsemblConnection { 047 048 //Demonstrating typical connection to the Ensembl (Vertebrate) datasource, 049 //Registry autoconfiguration and basic data retrieval functions. 050 public static void main(String[] args) throws Exception { 051 052 053 //we specify the Datasource for connection, and initialize the registry 054 //this reads all the databases and species from the source 055 056 long start = System.currentTimeMillis(); 057 DBRegistry ensembldbRegistry =DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 058 System.out.println("Initialising Registry took: " + (System.currentTimeMillis() - start) / 1000 + " seconds"); 059 060 System.out.println(ensembldbRegistry.getVersionReport()); 061 062 //print a detailed report on this 063 System.out.println("ensembldb registry report: "); 064 System.out.println("************************** "); 065 File f = ensembldbRegistry.getRegistryReport(); 066 List<String> lines = Files.readAllLines(Paths.get(f.getCanonicalPath()), Charset.forName("UTF-8")); 067 for (String l:lines) { 068 System.out.println(l); 069 } 070 071 072 //just print details on species present 073 System.out.println("species count : "+ensembldbRegistry.getSpecies().size()); 074 int i = 1; 075 for (DBSpecies s: ensembldbRegistry.getSpecies()) { 076 System.out.println(i++ +": "+s.getSpeciesBinomial()); 077 for(DBDatabase d : s.getDatabasesByType(EnsemblDBType.core)) { 078 System.out.println(d.getdBName()+ " : "+d.getBuild()); 079 } 080 } 081 082 083 084 //get a species 085 DBSpecies human = ensembldbRegistry.getSpeciesByAlias("human"); 086 087 //you can add aliases to the hashmap read from the datasource meta tables... 088 try { 089 ensembldbRegistry.addSessionAlias("homo_sapiens", "Bob"); 090 System.out.println("added alias Bob ok"); 091 } catch (Exception exception) { 092 System.out.println("failed to add alias Bob for homo_sapiens"); 093 } 094 095 // aliases must be to the ensembl database style name 096 try { 097 ensembldbRegistry.addSessionAlias("homo sapiens", "Norman"); 098 System.out.println("added alias Norman ok - but shouldnt have"); 099 } catch (Exception exception) { 100 System.out.println("failed to add alias Norman - this is correct behaviour"); 101 System.out.println("The correct database style name for humans is: " 102 + human.getDatabaseStyleName()); 103 } 104 105 106 //get a chromosome by name - the current release version 107 DAChromosome ch20 = human.getChromosomeByName("20"); 108 109 //get a chromosome by name and release version 110 DAChromosome ch20_65 = human.getChromosomeByName("20", "65"); 111 112 //get all the chromosomes at once 113 //Note for humans this also get the patches 114 TreeMap<String, DAChromosome> chrs = human.getChromosomes(); 115 116 for(String s: chrs.keySet()) { 117 System.out.println("Human Chromosome: "+s+" length: "+chrs.get(s).getLength()); 118 } 119 120 //get some genes on a chromosome 121 List<DAGene> genesOnRegion = ch20.getGenesOnRegion(1,1000000); 122 123 // get all the genes on a chromosome 124 List<DAGene> allGenes = ch20.getGenesOnRegion(ch20.getBioBegin(), ch20.getBioEnd()); 125 //or 126 List<DAGene> allGenes2 = ch20.getGenesOnRegion(1, ch20.getLength()); 127 128 System.out.println("count of genes on human chromosome 1: "+allGenes.size()); 129 130 //you can get and query Database objects - but this is not typically necessary 131 //as these data access methods are wrapped by the datasource-aware layer 132 SingleSpeciesCoreDatabase db = (SingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("human"); 133 134 System.out.println("current human top level: "+db.getTopLevelCoordSystem().getId()); 135 System.out.println("current human chromosome level: "+db.getChromosomeLevelCoordSystem().getId()); 136 System.out.println("current human sequence level: "+db.getSequenceLevelCoordSystem().getId()); 137 138 //you can also get and use the factory object for a database that is responsible 139 //for correctly configuring the dataaccess queries 140 141 142 143 DBDAOSingleSpeciesCoreFactory fact = ((DBSingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("chicken")).getCoreFactory(); 144 DBDAOSingleSpeciesCoreFactory fact60 = ((DBSingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("chicken","60")).getCoreFactory(); 145 DBDAOSingleSpeciesCoreFactory fact61 = ((DBSingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("chicken","61")).getCoreFactory(); 146 147 DAChromosome chr = fact.getChromosomeDAO().getChromosomeByName("25"); 148 DAChromosome chr60 = fact60.getChromosomeDAO().getChromosomeByName("25"); 149 DAChromosome chr61 = fact61.getChromosomeDAO().getChromosomeByName("25"); 150 151 System.out.println("Chromosome:\n\tName: "+chr.getChromosomeName()+ " db version-"+chr.getDBVersion()); 152 System.out.println("\tLength: "+chr.getDBSeqLength()); 153 System.out.println("\tcoord sys ID: "+chr.getCoordSystem().getId()); 154 System.out.println("\tseq level coord sys ID: "+((DBSingleSpeciesCoreDatabase) chr.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId()); 155 System.out.println("\tseq region ID :"+chr.getId()); 156 System.out.println("\tseq type :"+chr.getType().toString()); 157 158 System.out.println("Chromosome:\n\tName: "+chr60.getChromosomeName()+" db version-"+chr60.getDBVersion()); 159 System.out.println("\tLength: "+chr60.getDBSeqLength()); 160 System.out.println("\tcoord sys ID: "+chr60.getCoordSystem().getId()); 161 System.out.println("\tseq level coord sys ID: "+((DBSingleSpeciesCoreDatabase) chr60.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId()); 162 System.out.println("\tseq region ID :"+chr60.getId()); 163 System.out.println("\tseq type :"+chr60.getType().toString()); 164 165 System.out.println("Chromosome:\n\tName: "+chr61.getChromosomeName()+ " db version-"+chr61.getDBVersion()); 166 System.out.println("\tLength: "+chr61.getDBSeqLength()); 167 System.out.println("\tcoord sys ID: "+chr61.getCoordSystem().getId()); 168 System.out.println("\tseq level coord sys ID: "+ ((DBSingleSpeciesCoreDatabase) chr61.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId()); 169 System.out.println("\tseq region ID :"+chr61.getId()); 170 System.out.println("\tseq type :"+chr61.getType().toString()); 171 172 System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n"); 173 174 175 176 System.out.println("Mapping all versions of ENSG00000153551"); 177 178 StringBuilder sb = new StringBuilder(); 179 start = System.currentTimeMillis(); 180 181 for (Database d : human.getDatabasesByType(EnsemblDBType.core)) { 182 sb.append("Ensembl Release:" + d.getSchemaVersion() + " Assembly:" + d.getBuild()); 183 Gene g2 = human.getGeneByStableID("ENSG00000153551", d.getSchemaVersion()); 184 if (g2 != null) { 185 sb.append("\tChromosome:" + ((Chromosome) g2.getChromosomeMapping().getTarget()).getName() 186 + " " + g2.getChromosomeMapping().getTargetCoordinates().toString()); 187 } else { 188 sb.append("\tNot Present"); 189 } 190 sb.append("\n"); 191 } 192 193 System.out.println("Mapping all versions of ENSG00000153551 took: " + (System.currentTimeMillis() - start) / 1000 + " seconds"); 194 System.out.println(sb.toString()); 195 } 196 197}