001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.util.HashMap;
025import java.util.List;
026import java.util.TreeMap;
027import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
028import uk.ac.roslin.ensembl.dao.database.DBRegistry;
029import uk.ac.roslin.ensembl.dao.database.DBSpecies;
030import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship;
031import uk.ac.roslin.ensembl.datasourceaware.core.DAAssembledDNASequence;
032import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
033import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence;
034import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
035import uk.ac.roslin.ensembl.model.Coordinate;
036import uk.ac.roslin.ensembl.model.Mapping;
037import uk.ac.roslin.ensembl.model.MappingSet;
038
039
040public class SpeciesWithNoChromosomes {
041    
042    //some genome assemblies in Ensembl have not yet reached chromosome map maturity 
043    //we can also query species for 'Fragments' which are AssembledDNASequences at the
044    //'top' (highest ranked) coordinate system.
045    //Theses are handled just like chromosomes in that they can be searched for 
046    //features such as genes , and can have regions of conserved syntemy discovered.
047    //Like Chromosomes - these 'fragments' are cached for each species/release version 
048    //so behave as singletons
049    public static void main(String[] args) throws Exception {
050                
051        DBRegistry reg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
052        DBSpecies speciesByAlias = reg.getSpeciesByAlias("northern tree shrew");
053        TreeMap<String, DAChromosome> chromosomes = speciesByAlias.getChromosomes("74");
054        System.out.println("The shrew has "+chromosomes.size()+ " chromosomes.");
055        
056        TreeMap<String, DAAssembledDNASequence> topLevelAssembledDNASequences = speciesByAlias.getFragments("74");
057        System.out.println("The shrew has "+topLevelAssembledDNASequences.size()+ " fragments.");
058        
059        for (String s:topLevelAssembledDNASequences.keySet() ) {
060            System.out.println(s+" - "+topLevelAssembledDNASequences.get(s).getHashID()
061                    + " "+topLevelAssembledDNASequences.get(s).getDBSeqLength());
062        }
063        DAAssembledDNASequence value = topLevelAssembledDNASequences.firstEntry().getValue();
064        
065        
066        for (DAGene g :value.getGenesOnRegion(1, value.getDBSeqLength()) ) {
067            
068            System.out.println("GENE "+g.getStableID()+" "+g.getDisplayName() +" ["+g.getDescription()+"]");
069            for (Mapping m:g.getLoadedMappings()) {
070                System.out.println(""+m.getTarget().getHashID()+" "+m.getTargetCoordinates());
071            }
072            System.out.println("");
073            List<DAHomologyPairRelationship> homologies = g.getHomologies();
074            
075            for (DAHomologyPairRelationship hp: homologies) {
076                System.out.println("HOMOLOGY "+
077                hp.getTarget().getSpecies().getShortName()+
078                        " "+hp.getTarget().getStableID()+
079                        " "+hp.getTarget().getDisplayName());
080                       
081            } 
082            System.out.println("");
083     }
084        
085        
086        DBSpecies lemur = reg.getSpeciesByAlias("Mouse Lemur");
087        HashMap<DADNASequence, MappingSet> regionsOfConservedSynteny = value.getRegionsOfConservedSynteny(new Coordinate(1, value.getDBSeqLength()), lemur);
088
089        for (DADNASequence s:regionsOfConservedSynteny.keySet() ) {
090            System.out.println(""+s.getHashID());
091        }
092        
093        
094        //currently asking for fragments in a species that has chromosome will return AssembledDNASequences
095        //not Chromosomes...
096        DBSpecies hs = reg.getSpeciesByAlias("human");
097        DBSpecies ch = reg.getSpeciesByAlias("chimp");
098        DAAssembledDNASequence chr1 = hs.getFragmentByName("1");
099        regionsOfConservedSynteny = chr1.getRegionsOfConservedSynteny(new Coordinate(100000,1000000), ch);
100        
101        for (DADNASequence s:regionsOfConservedSynteny.keySet() ) {
102            System.out.print(s.getHashID()+"  -   ");
103            System.out.println(regionsOfConservedSynteny.get(s).size()+" mappings");
104        }  
105        
106        //the fragment routines also work for collection species
107        reg = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA);
108        DBSpecies coli = reg.getSpeciesByAlias("Escherichia coli str. K12 substr. MG1655");
109        TreeMap<String, DAAssembledDNASequence> fragments = coli.getFragments();
110        for (String s:fragments.keySet() ) {
111            System.out.println(s+" - "+fragments.get(s).getHashID()
112                    + " "+fragments.get(s).getDBSeqLength());
113        }
114        
115    }
116    
117}