001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.util.HashMap; 025import java.util.List; 026import java.util.TreeMap; 027import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 028import uk.ac.roslin.ensembl.dao.database.DBRegistry; 029import uk.ac.roslin.ensembl.dao.database.DBSpecies; 030import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship; 031import uk.ac.roslin.ensembl.datasourceaware.core.DAAssembledDNASequence; 032import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 033import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 034import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 035import uk.ac.roslin.ensembl.model.Coordinate; 036import uk.ac.roslin.ensembl.model.Mapping; 037import uk.ac.roslin.ensembl.model.MappingSet; 038 039 040public class SpeciesWithNoChromosomes { 041 042 //some genome assemblies in Ensembl have not yet reached chromosome map maturity 043 //we can also query species for 'Fragments' which are AssembledDNASequences at the 044 //'top' (highest ranked) coordinate system. 045 //Theses are handled just like chromosomes in that they can be searched for 046 //features such as genes , and can have regions of conserved syntemy discovered. 047 //Like Chromosomes - these 'fragments' are cached for each species/release version 048 //so behave as singletons 049 public static void main(String[] args) throws Exception { 050 051 DBRegistry reg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 052 DBSpecies speciesByAlias = reg.getSpeciesByAlias("northern tree shrew"); 053 TreeMap<String, DAChromosome> chromosomes = speciesByAlias.getChromosomes("74"); 054 System.out.println("The shrew has "+chromosomes.size()+ " chromosomes."); 055 056 TreeMap<String, DAAssembledDNASequence> topLevelAssembledDNASequences = speciesByAlias.getFragments("74"); 057 System.out.println("The shrew has "+topLevelAssembledDNASequences.size()+ " fragments."); 058 059 for (String s:topLevelAssembledDNASequences.keySet() ) { 060 System.out.println(s+" - "+topLevelAssembledDNASequences.get(s).getHashID() 061 + " "+topLevelAssembledDNASequences.get(s).getDBSeqLength()); 062 } 063 DAAssembledDNASequence value = topLevelAssembledDNASequences.firstEntry().getValue(); 064 065 066 for (DAGene g :value.getGenesOnRegion(1, value.getDBSeqLength()) ) { 067 068 System.out.println("GENE "+g.getStableID()+" "+g.getDisplayName() +" ["+g.getDescription()+"]"); 069 for (Mapping m:g.getLoadedMappings()) { 070 System.out.println(""+m.getTarget().getHashID()+" "+m.getTargetCoordinates()); 071 } 072 System.out.println(""); 073 List<DAHomologyPairRelationship> homologies = g.getHomologies(); 074 075 for (DAHomologyPairRelationship hp: homologies) { 076 System.out.println("HOMOLOGY "+ 077 hp.getTarget().getSpecies().getShortName()+ 078 " "+hp.getTarget().getStableID()+ 079 " "+hp.getTarget().getDisplayName()); 080 081 } 082 System.out.println(""); 083 } 084 085 086 DBSpecies lemur = reg.getSpeciesByAlias("Mouse Lemur"); 087 HashMap<DADNASequence, MappingSet> regionsOfConservedSynteny = value.getRegionsOfConservedSynteny(new Coordinate(1, value.getDBSeqLength()), lemur); 088 089 for (DADNASequence s:regionsOfConservedSynteny.keySet() ) { 090 System.out.println(""+s.getHashID()); 091 } 092 093 094 //currently asking for fragments in a species that has chromosome will return AssembledDNASequences 095 //not Chromosomes... 096 DBSpecies hs = reg.getSpeciesByAlias("human"); 097 DBSpecies ch = reg.getSpeciesByAlias("chimp"); 098 DAAssembledDNASequence chr1 = hs.getFragmentByName("1"); 099 regionsOfConservedSynteny = chr1.getRegionsOfConservedSynteny(new Coordinate(100000,1000000), ch); 100 101 for (DADNASequence s:regionsOfConservedSynteny.keySet() ) { 102 System.out.print(s.getHashID()+" - "); 103 System.out.println(regionsOfConservedSynteny.get(s).size()+" mappings"); 104 } 105 106 //the fragment routines also work for collection species 107 reg = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA); 108 DBSpecies coli = reg.getSpeciesByAlias("Escherichia coli str. K12 substr. MG1655"); 109 TreeMap<String, DAAssembledDNASequence> fragments = coli.getFragments(); 110 for (String s:fragments.keySet() ) { 111 System.out.println(s+" - "+fragments.get(s).getHashID() 112 + " "+fragments.get(s).getDBSeqLength()); 113 } 114 115 } 116 117}