001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022/* 023 * To change this template, choose Tools | Templates 024 * and open the template in the editor. 025 */ 026package uk.ac.roslin.ensembl.demo; 027 028import java.util.*; 029import uk.ac.roslin.ensembl.config.AssemblyExceptionType; 030import uk.ac.roslin.ensembl.config.DBConnection; 031import uk.ac.roslin.ensembl.config.EnsemblDBType; 032import uk.ac.roslin.ensembl.dao.database.*; 033import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship; 034import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 035import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 036import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 037import uk.ac.roslin.ensembl.model.Coordinate; 038import uk.ac.roslin.ensembl.model.Mapping; 039import uk.ac.roslin.ensembl.model.MappingSet; 040import uk.ac.roslin.ensembl.model.variation.Variation; 041 042/** 043 * 044 * @author tpaterso 045 */ 046public class LogicFromArkMAP { 047 048 /* 049 * ArkMAP is a downloadable map drawing application the uses the JEnsembl API 050 * to download gene-annotated chromosome maps from Ensembl datasources. 051 * The application integrates JEnsembl data retrieval with ArkDB map drawing 052 * code which uses the Java Swing API. Salient features of Ensembl data retrieval 053 * are combined in this demonstration code (Getting a basic Ensembl map, 054 * Getting gene homologies, Getting SNP variations on an Ensembl map, 055 * Finding regions of conserved synteny for a selected chromosomal region, 056 * Displaying maps of assembly exceptions and haplotypes.) 057 */ 058 public static void main(String[] args) throws Exception { 059 060 // **************** GETTING A BASIC ENSEMBL MAP ********************* // 061 062 //the user selects which source to use 063 // from DataSource.ENSEMBLDB,DataSource.ENSEMBLGENOMES or DataSource.ENSEMBLBACTERIA 064 DBConnection.DataSource source = DBConnection.DataSource.ENSEMBLDB; 065 066 // a registry is made ( and cached in the App ) 067 DBRegistry registry = DBRegistry.createRegistryForDataSource(source); 068 069 //the collection of available species is used as the basis for a user selection widget 070 Collection<DBSpecies> species = registry.getSpecies(); 071 072 //if we have chosen to work with DataSource.ENSEMBLBACTERIA 073 // instead we do 074 //Collection<? extends DBSpecies> species = registry.getCollectionSpecies(); 075 076 // the chosen species is actually selected from the list 077 DBSpecies currentSpecies = registry.getSpeciesByAlias("human"); 078 079 080 //the list of available core databases is used for a release/version selection widget 081 // actually the widget is built using a sorted collection of strings representing build/version details 082 TreeSet<? extends DBDatabase> dbs; 083 if (currentSpecies instanceof DBCollectionSpecies) { 084 dbs = currentSpecies.getDatabasesByType(EnsemblDBType.collection_core); 085 } else { 086 dbs = currentSpecies.getDatabasesByType(EnsemblDBType.core); 087 } 088 089 090 //again the database (i.e. version) is actually selected by the user, here we just get one from the registry 091 DBDatabase currentDB = registry.getDatabase("human", EnsemblDBType.core, "72"); 092 093 //or for bacteria.. 094 //DBDatabase currentDB = registry.getDatabase("Bacillus pumilus (strain SAFR-032)", EnsemblDBType.collection_corecore, "14"); 095 096 //the list of chromosomes for that species is retireved and used as the basis of a selection widget 097 TreeMap<String, DAChromosome> chromosomes = new TreeMap<String, DAChromosome>(); 098 099 List<DAChromosome> temp = null; 100 if (currentDB instanceof DBSingleSpeciesCoreDatabase) { 101 temp = ((DBSingleSpeciesCoreDatabase) currentDB).getChromosomes(); 102 } else { 103 temp = ((DBCollectionCoreDatabase) currentDB).getChromosomes(currentSpecies); 104 } 105 for (DAChromosome c : temp) { 106 chromosomes.put(c.getChromosomeName(), c); 107 } 108 109 //the user selects a chromosome 110 DAChromosome currentChromosome = chromosomes.get("3"); 111 //in ArkMAP the user can specify to look at particular coordinates 112 //this is not used to retrieve partial chromosomes map 113 // but does limit the extent of the chromosome searched for genes 114 //genes = chr.getGenesOnRegion(requestStart, requestStop); 115 116 //note that aswell as the genes - we also get Exceptions to show on the map 117 118 //if this is a real chromosome we actually load all the genes 119 //but if we are just creating a map of an assembly exception here we would restrict this 120 //to the extent of the patch 121 int start = currentChromosome.getBioBegin(); 122 int stop = currentChromosome.getBioEnd(); 123 124 //force the lazy load the genes 125 List<DAGene> genesOnRegion = currentChromosome.getGenesOnRegion(start, stop); 126 //force the lazy load all types of exeptions 127 currentChromosome.getAssemblyExceptions(AssemblyExceptionType.PATCH_FIX); 128 129 // the chromosome is then used to make a map - which is zoomed to the specified coordinates for display 130 //EnsemblMap map = new EnsemblMap( currentChromosome, start, stop ); 131 132 // the map is made by creating an EnsemblMapping object for each gene mapping on the chromosome 133 // and making an EnsemblExceptionMapping for each AssemblException mapped on the chromosome 134 135 /* 136 * 137 public EnsemblMap(DAChromosome _chr) throws DAOException { 138 139 this.chr = _chr; 140 this.start = chr.getBioBegin(); 141 this.stop = chr.getBioEnd(); 142 143 try { 144 this.setAnalysis(Analyses.getAnalysis(this.chr.getDaoFactory().getDatabase())); 145 } catch (Exception e) {} 146 147 mapName = 148 this.chr.getSpecies().getSpeciesBinomial() + " (" 149 + this.chr.getSpecies().getShortName() + ")" + " chromosome " 150 + this.chr.getChromosomeName(); 151 152 //these must already be loaded 153 for (Mapping m : this.getEnsemblChr().getLoadedMappings(FeatureType.gene)) { 154 addEnsemblMapping(m); 155 } 156 for (AssemblyExceptionType t : AssemblyExceptionType.values()) { 157 if (!t.equals(AssemblyExceptionType.PAR)) { 158 for (Mapping m : this.getEnsemblChr().getAssemblyExceptions(t)) { 159 addEnsemblExceptionMapping(m, t); 160 } 161 } 162 } 163 } 164 * 165 */ 166 167 // ***************** GETTING GENE HOMOLOGIES ************************ // 168 169 //if the user selects an Ensembl gene on a Map 170 //they can search for homologies - on alll species or just a selected species 171 //these results are shown in a table 172 List<DAHomologyPairRelationship> homologies; 173 DBSpecies selectedSpecies = null; 174 DAGene selectedGene = currentSpecies.getGeneByStableID("ENSG00000153551", "72"); 175 176 177 try { 178 if (selectedSpecies != null) { 179 homologies = selectedGene.getHomologies(selectedSpecies); 180 //note that this actually does a query to get all the homologies and then filters the result 181 //so is no faster than below 182 } else { 183 homologies = selectedGene.getHomologies(); 184 } 185 } catch (Exception e) { 186 System.out.println("Error in thread to get homologues of a Gene Ensembl DataSource: " + e.toString()); 187 } 188 189 190 191 // ************* GETTING SNP VARIATIONS ON AN ENSEMBL MAP *********** // 192 Coordinate zoomCoord = new Coordinate(32900000, 33000000); 193 194 195 try { 196 197 List<? extends Variation> vars = currentChromosome.getVariationsOnRegion(zoomCoord); 198 199 if (vars == null || vars.isEmpty()) { 200 throw new Exception("No Variations found on " + currentChromosome.getSpecies().getCommonName() 201 + " chromosome " + currentChromosome.getChromosomeName() + " (release " + currentChromosome.getDBVersion() + ")"); 202 } 203 204 //the App makes a new map - as above for this region of the chromosome 205 //EnsemblMap map = new EnsemblMap(currentChromosome, zoomCoord.getStart(), zoomCoord.getEnd()); 206 207 //we then make ArkDB API objects for the Marker and the Mapping and 208 //add these to the EnsemblMap object 209 for (Variation v : vars) { 210 211 Mapping mp = (Mapping) v.getLoadedMappings().first(); 212 Coordinate coord = mp.getTargetCoordinates(); 213 214 //ArkDB code 215 /* 216 * 217 DatasourceAwareMarker mkr = new DatasourceAwareMarker(); 218 mkr.setDbsnpID(v.getName()); 219 mkr.setAccession(v.getId().toString()); 220 mkr.setMarkerType("SNP"); 221 * 222 */ 223 224 // if the variation has a 'synonym' set - this equals the search name, and there is a different 'name' 225 // if the 'synonym' is not set, the search name = 'name' 226 227 //we have a synonym 228 if (v.getSynonym() != null && !v.getSynonym().isEmpty()) { 229 //ArkDB code 230 //mkr.setName(v.getSynonym()); 231 } else { 232 //ArkDB code 233 //mkr.setName(v.getName()); 234 } 235 236 //ArkDB code 237// DatasourceAwareMapping mpp = new DatasourceAwareMapping(); 238// mpp.setMarker(mkr); 239 240 double begin = coord.getStart().doubleValue(); 241 double end = coord.getEnd().doubleValue(); 242 243 if (end < begin) { 244 double d = end; 245 end = begin; 246 begin = d; 247 } 248 //ArkDB code 249// mpp.setMarkerStart(begin); 250// mpp.setMarkerEnd(end); 251// mpp.setMarkerMiddle(begin + (end-begin)/2 ); 252// mpp.setMap(map); 253 254 //ArkMAP code - we add each mapping to the map 255// map.addMapping(mpp); 256// snpMappings.add(mpp); 257 258 } 259 260 } catch (Exception e) { 261 } 262 263 // *************** FIND REGIONS OF CONSERVED SYNTENY ******************// 264 // *************** FOR A SELECTED REGION OF A CHROMOSOME *************// 265 266 //ArrayList<EnsemblMap> maps = new ArrayList<EnsemblMap>(); 267 268 HashMap<DADNASequence, MappingSet> syntenies; 269 Coordinate searchCoord = new Coordinate(30000000, 35000000); 270 selectedSpecies = registry.getSpeciesByAlias("chimp"); 271 272 273 try { 274 275 syntenies = currentChromosome.getRegionsOfConservedSynteny(searchCoord, selectedSpecies); 276 277 if (syntenies != null && !syntenies.isEmpty()) { 278 279 for (DADNASequence seq : syntenies.keySet()) { 280 if (seq instanceof DAChromosome) { 281 Coordinate coords = syntenies.get(seq).getExtent(); 282 283 284 // making a map of the whole chromsome 285 //- but just going to initially display the bit with conserved synteny 286 //EnsemblMap map = new EnsemblMap((DAChromosome) seq); 287 288 //we then pass the map back together with the Coordinates to display 289 // the chromosome is searched for genes and exceptions before 290 //display 291 292 } 293 } 294 } 295 } catch (Exception e) { 296 System.out.println("Error in thread to get syntenic regions: " + e.toString()); 297 } 298 299 300 // ************ DISPLAY THE MAP OF AN ASSEMBLY EXCEPTION *************// 301 302 //ArkMAP displays the position of Assembly Exceptions on the EnsemblMaps as 303 //special 'EnsemblExceptionMappings' . 304 //These are colour coded depending on the type of exception (e.g for Human v68 305 //chromosome 17 there are AssemblyPatches, AssemblyNovel and Haplotypes 306 //displayed) 307 //if a user selects a Marker that is an AssemblyException 308 //they get the option to display this Exception as a new map 309 310 DAChromosome patchChromosome = null; 311 Coordinate patchCoordinates = null; 312 313 /* 314 * 315 if (mapping instanceof EnsemblMappingView ) { 316 EnsemblMapping ming = ((EnsemblMappingView) mapping).getMapping(); 317 if (ming instanceof EnsemblExceptionMapping) { 318 EnsemblExceptionMapping exMapping = (EnsemblExceptionMapping) ming; 319 patchChromosome = exMapping.getPatchChromosome(); 320 patchCoordinates = exMapping.getPatchChromosomeCoordinates(); } } 321 * 322 */ 323 324 //only the region of the patch/exception is real sequence, so we only display this region, 325 //and we only fetch the genes on this region by lazy load 326 327 /* 328 patchChromosome.getGenesOnRegion (patchCoordinates.getStart(), patchCoordinates.getEnd()); 329 */ 330 331 //redundant here.. 332 /* 333 patchChromosome.getAssemblyExceptions (AssemblyExceptionType.PATCH_FIX); 334 */ 335 336 //Make the new map for display 337 //EnsemblMap map = new EnsemblMap( patchChromosome, patchCoordinates.getStart(), patchCoordinates.getStart() ); 338 339 340 341 } 342}