001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.io.File; 025import java.util.List; 026import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 027import uk.ac.roslin.ensembl.config.EnsemblDBType; 028import uk.ac.roslin.ensembl.config.FeatureType; 029import uk.ac.roslin.ensembl.config.RegistryConfiguration; 030import uk.ac.roslin.ensembl.dao.database.DBRegistry; 031import uk.ac.roslin.ensembl.model.Mapping; 032import uk.ac.roslin.ensembl.model.MappingSet; 033import uk.ac.roslin.ensembl.model.compara.HomologyPairRelationship; 034import uk.ac.roslin.ensembl.model.core.Chromosome; 035import uk.ac.roslin.ensembl.model.core.Gene; 036import uk.ac.roslin.ensembl.model.core.Species; 037import uk.ac.roslin.ensembl.model.database.Database; 038import uk.ac.roslin.ensembl.model.database.Registry; 039import uk.ac.roslin.ensembl.model.variation.Variation; 040 041public class BioinformaticsPublicationExamples { 042 043 044 //Reproduces the code examples shown in the JEnsembl publication in 'Bioinformatics'. 045 public static void main(String[] args) throws Exception { 046 047 //2a. Examples from Ensembl Registry. 048 049// DBRegistry eReg = new DBRegistry(DataSource.ENSEMBLDB); 050 051//RegistryConfiguration conf = new RegistryConfiguration(); 052//conf.setDBByFile(new File("dbConn.properties")); 053//conf.setSchemaByFile(new File("schema.properties")); 054//eReg = new DBRegistry(conf, true); 055 056 057// // Get species Gallus gallus by alias 058// DBSpecies sp = eReg.getSpeciesByAlias("chicken"); 059// 060// // Get the default (i.e. current) or specific (v60) version of chromosome 2 061// DAChromosome chr2 = sp.getChromosomeByName("2"); 062// DAChromosome chr2v60 = sp.getChromosomeByName("2","60"); 063// 064// // Get all the genes on chromosome 2 065// List<DAGene> genes = chr2.getGenesOnRegion(1, 1000000); 066// 067// // Chromosomal mappings can be retrieved from a gene, as can gene homologies 068// DAGene g = sp.getGeneByStableID("ENSGALG00000009011"); 069// MappingSet cms = g.getLoadedMappings(EnsemblCoordSystemType.chromosome); 070// List<DAHomologyPairRelationship> homos = g.getHomologies(); 071// 072// // lazy loading DNA sequence is retrieved from Ensembl assembly mappings 073// String seq = chr2.getSequenceAsString(1, 10000); 074// 075//// 076// List<DAVariation> variations = chr2.getVariationsOnRegion(1, 1000000); 077// 078// DBRegistry ensemblgenomesRegistry = new DBRegistry(DataSource.ENSEMBLGENOMES); 079// 080// DBCollectionSpecies pumi = 081// (DBCollectionSpecies) ensemblgenomesRegistry.getSpeciesByAlias("Bacillus pumilus (strain SAFR-032)"); 082// 083// 084// DBCollectionSpecies sp1 = (DBCollectionSpecies) ensemblgenomesRegistry.getSpeciesByAlias("p_abyssi"); 085// DBCollectionSpecies ecoli = (DBCollectionSpecies) ensemblgenomesRegistry.getSpeciesByAlias("e_coli_k12"); 086// 087// 088// System.out.println("DB for Bacillus pumilus SAFR-032"); 089// 090// TreeSet<Integer> versions = new TreeSet<Integer>(); 091// 092// for (DBCollectionDatabase d : pumi.getDatabasesByType(EnsemblDBType.collection_core)) { 093// 094// System.out.println("core version " + d.getDBVersion()); 095// versions.add(Integer.parseInt(d.getDBVersion())); 096// 097// } 098// 099// 100// TreeMap<String, DAChromosome> chrs = ecoli.getChromosomes(); 101// 102// 103// 104// // DAChromosome chrP = ((DBCollectionCoreDatabase) pumi.getDatabaseByTypeAndVersion(EnsemblDBType.collection_core, versions.last().toString())) 105//// .getCoreFactory(pumi).getChromosomeDAO().getChromosomeByName("Chromosome"); 106// 107// 108// for (DAChromosome c : chrs.values()) { 109// System.out.println(c.getSpecies().getSpeciesBinomial() + " ID: " + c.getId() 110// + " Name: " + c.getChromosomeName() + " version: " + c.getDBVersion()); 111// } 112// 113 114 115 // Connect to default Ensembl datasource 116 Registry eReg = null; 117 Registry eReg2 = null; 118 eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 119 120 //alternately Connect to locally configured datasource 121 RegistryConfiguration conf = new RegistryConfiguration(); 122 conf.setDBByFile(new File("src/main/resources/dbConn.properties")); 123 conf.setSchemaByFile(new File("src/main/resources/schema.properties")); 124 eReg2 = DBRegistry.createRegistryForConfiguration(conf); 125 126 // Get species Gallus gallus by alias 127 Species sp = eReg.getSpeciesByAlias("chicken"); 128 129 // Get default (current) or specific (v60) version of chromosome 2 130 Chromosome chr2 = sp.getChromosomeByName("2"); 131 Chromosome chr2v60 = sp.getChromosomeByName("2", "60"); 132 133 // Get all the genes on chromosome 2 134 List<? extends Gene> genes = chr2.getGenesOnRegion(1, chr2.getLength()); 135 136 // Or get them as mappings with coordinates 137 MappingSet mps = chr2.getLoadedMappings(FeatureType.gene); 138 139 // Chromosome mappings can be retrieved from a gene, as can gene 140 //homologies 141 Gene g = sp.getGeneByStableID("ENSGALG00000009011"); 142 Mapping m = g.getChromosomeMapping(); 143 List<? extends HomologyPairRelationship> homols = g.getHomologies(); 144 145 // Transparent lazy loading of DNA sequence using Ensembl assembly 146 //mappings 147 String seq = chr2.getSequenceAsString(1, 1000000); 148 149 // Variations (e.g. SNPs) can be found for a DNA Region 150 List<? extends Variation> vars = chr2.getVariationsOnRegion(1, 1000000); 151 152 // Accessing data from a multi-species collection 153 eReg2 = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA,22); 154 Species ecoli = eReg2.getSpeciesByAlias("Escherichia coli str. K12 substr. MG1655"); 155 Chromosome chr = ecoli.getChromosomeByName("Chromosome"); 156 157 System.out.println("Mapping all versions of ENSG00000153551"); 158 159 StringBuilder sb = new StringBuilder(); 160 Species human = eReg.getSpeciesByAlias("human"); 161 long start = System.currentTimeMillis(); 162 163 for (Database d : human.getDatabasesByType(EnsemblDBType.core)) { 164 sb.append("Ensembl Release:" + d.getSchemaVersion() + " Assembly:" + d.getBuild()); 165 Gene g2 = human.getGeneByStableID("ENSG00000153551", d.getSchemaVersion()); 166 if (g2 != null) { 167 sb.append("\tChromosome:" + ((Chromosome) g2.getChromosomeMapping().getTarget()).getName() 168 + " " + g2.getChromosomeMapping().getTargetCoordinates().toString()); 169 } else { 170 sb.append("\tNot Present"); 171 } 172 sb.append("\n"); 173 } 174 175 System.out.println("Mapping all versions of ENSG00000153551 took: " + (System.currentTimeMillis() - start) / 1000 + " seconds"); 176 System.out.println(sb.toString()); 177 } 178}