001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024
025import java.io.File;
026import java.nio.charset.Charset;
027import java.nio.file.Files;
028import java.nio.file.Paths;
029import java.util.List;
030import java.util.TreeMap;
031import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
032import uk.ac.roslin.ensembl.config.EnsemblDBType;
033import uk.ac.roslin.ensembl.dao.database.DBDatabase;
034import uk.ac.roslin.ensembl.dao.database.DBRegistry;
035import uk.ac.roslin.ensembl.dao.database.DBSingleSpeciesCoreDatabase;
036import uk.ac.roslin.ensembl.dao.database.DBSpecies;
037import uk.ac.roslin.ensembl.dao.database.factory.DBDAOSingleSpeciesCoreFactory;
038import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
039import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
040import uk.ac.roslin.ensembl.model.core.Chromosome;
041import uk.ac.roslin.ensembl.model.core.Gene;
042import uk.ac.roslin.ensembl.model.database.Database;
043import uk.ac.roslin.ensembl.model.database.SingleSpeciesCoreDatabase;
044
045
046public class EnsemblConnection {
047
048    //Demonstrating typical connection to the Ensembl (Vertebrate) datasource, 
049    //Registry autoconfiguration and basic data retrieval functions.
050    public  static void main(String[] args) throws Exception {
051
052
053        //we specify the Datasource for connection, and initialize the registry
054        //this reads all the databases and species from the source
055        
056        long start = System.currentTimeMillis();
057        DBRegistry ensembldbRegistry =DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);      
058        System.out.println("Initialising Registry took: " + (System.currentTimeMillis() - start) / 1000 + " seconds");
059        
060        System.out.println(ensembldbRegistry.getVersionReport());
061        
062        //print a detailed report on this 
063        System.out.println("ensembldb registry report: ");
064        System.out.println("************************** ");
065        File f = ensembldbRegistry.getRegistryReport();
066        List<String> lines = Files.readAllLines(Paths.get(f.getCanonicalPath()), Charset.forName("UTF-8")); 
067        for (String l:lines) {
068            System.out.println(l);
069        }
070        
071        
072        //just print details on species present
073        System.out.println("species count : "+ensembldbRegistry.getSpecies().size());
074        int i = 1;
075        for (DBSpecies s: ensembldbRegistry.getSpecies()) {
076            System.out.println(i++ +": "+s.getSpeciesBinomial());
077            for(DBDatabase d : s.getDatabasesByType(EnsemblDBType.core)) {
078                System.out.println(d.getdBName()+ " : "+d.getBuild());
079            }
080        }
081        
082        
083        
084        //get a species
085        DBSpecies human = ensembldbRegistry.getSpeciesByAlias("human");
086        
087                //you can add aliases to the hashmap read from the datasource meta tables...
088        try {
089            ensembldbRegistry.addSessionAlias("homo_sapiens", "Bob");
090            System.out.println("added alias Bob ok");
091        } catch (Exception exception) {
092            System.out.println("failed to add alias Bob for homo_sapiens");
093        }
094
095        // aliases must be to the ensembl database style name
096        try {
097            ensembldbRegistry.addSessionAlias("homo sapiens", "Norman");
098            System.out.println("added alias Norman ok - but shouldnt have");
099        } catch (Exception exception) {
100            System.out.println("failed to add alias Norman - this is correct behaviour");
101            System.out.println("The correct database style name for humans is: "
102                    + human.getDatabaseStyleName());
103        }
104        
105        
106        //get a chromosome by name - the current release version
107        DAChromosome ch20 = human.getChromosomeByName("20");
108        
109        //get a chromosome by name and release version 
110        DAChromosome ch20_65 = human.getChromosomeByName("20", "65");
111        
112        //get all the chromosomes at once
113        //Note for humans this also get the patches
114        TreeMap<String, DAChromosome> chrs = human.getChromosomes();
115
116        for(String s: chrs.keySet()) {
117               System.out.println("Human Chromosome: "+s+" length: "+chrs.get(s).getLength());
118         }
119        
120        //get some genes on a chromosome
121        List<DAGene> genesOnRegion = ch20.getGenesOnRegion(1,1000000);
122        
123        // get all the genes on a chromosome
124        List<DAGene> allGenes = ch20.getGenesOnRegion(ch20.getBioBegin(), ch20.getBioEnd());
125        //or
126        List<DAGene> allGenes2 = ch20.getGenesOnRegion(1, ch20.getLength());
127        
128        System.out.println("count of genes on human chromosome 1: "+allGenes.size());
129        
130        //you can get and query Database objects - but this is not typically necessary
131        //as these data access methods are wrapped by the datasource-aware layer
132        SingleSpeciesCoreDatabase db = (SingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("human");
133
134        System.out.println("current human top level: "+db.getTopLevelCoordSystem().getId());
135        System.out.println("current human chromosome level: "+db.getChromosomeLevelCoordSystem().getId());
136        System.out.println("current human sequence level: "+db.getSequenceLevelCoordSystem().getId());
137
138        //you can also get and use the factory object for a database that is responsible 
139        //for correctly configuring the dataaccess queries
140
141
142        
143        DBDAOSingleSpeciesCoreFactory fact = ((DBSingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("chicken")).getCoreFactory();
144        DBDAOSingleSpeciesCoreFactory fact60 = ((DBSingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("chicken","60")).getCoreFactory();
145        DBDAOSingleSpeciesCoreFactory fact61 = ((DBSingleSpeciesCoreDatabase) ensembldbRegistry.getDatabase("chicken","61")).getCoreFactory();
146
147        DAChromosome chr = fact.getChromosomeDAO().getChromosomeByName("25");
148        DAChromosome chr60 = fact60.getChromosomeDAO().getChromosomeByName("25");
149        DAChromosome chr61 = fact61.getChromosomeDAO().getChromosomeByName("25");
150
151        System.out.println("Chromosome:\n\tName: "+chr.getChromosomeName()+ " db version-"+chr.getDBVersion());
152        System.out.println("\tLength: "+chr.getDBSeqLength());
153        System.out.println("\tcoord sys ID: "+chr.getCoordSystem().getId());
154        System.out.println("\tseq level coord sys ID: "+((DBSingleSpeciesCoreDatabase) chr.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId());
155        System.out.println("\tseq region ID :"+chr.getId());
156        System.out.println("\tseq type :"+chr.getType().toString());
157
158        System.out.println("Chromosome:\n\tName: "+chr60.getChromosomeName()+" db version-"+chr60.getDBVersion());
159        System.out.println("\tLength: "+chr60.getDBSeqLength());
160        System.out.println("\tcoord sys ID: "+chr60.getCoordSystem().getId());
161        System.out.println("\tseq level coord sys ID: "+((DBSingleSpeciesCoreDatabase) chr60.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId());
162        System.out.println("\tseq region ID :"+chr60.getId());
163        System.out.println("\tseq type :"+chr60.getType().toString());
164
165        System.out.println("Chromosome:\n\tName: "+chr61.getChromosomeName()+ " db version-"+chr61.getDBVersion());
166        System.out.println("\tLength: "+chr61.getDBSeqLength());
167        System.out.println("\tcoord sys ID: "+chr61.getCoordSystem().getId());
168        System.out.println("\tseq level coord sys ID: "+ ((DBSingleSpeciesCoreDatabase) chr61.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId());
169        System.out.println("\tseq region ID :"+chr61.getId());
170        System.out.println("\tseq type :"+chr61.getType().toString());
171
172        System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n");
173
174        
175        
176        System.out.println("Mapping all versions of ENSG00000153551");
177        
178        StringBuilder sb = new StringBuilder();
179        start = System.currentTimeMillis();
180        
181        for (Database d : human.getDatabasesByType(EnsemblDBType.core)) {
182            sb.append("Ensembl Release:" + d.getSchemaVersion() + " Assembly:" + d.getBuild());
183            Gene g2 = human.getGeneByStableID("ENSG00000153551", d.getSchemaVersion());
184            if (g2 != null) {
185                sb.append("\tChromosome:" + ((Chromosome) g2.getChromosomeMapping().getTarget()).getName()
186                        + " " + g2.getChromosomeMapping().getTargetCoordinates().toString());
187            } else {
188                sb.append("\tNot Present");
189            }
190            sb.append("\n");
191        }
192
193       System.out.println("Mapping all versions of ENSG00000153551 took: " + (System.currentTimeMillis() - start) / 1000 + " seconds");
194       System.out.println(sb.toString());
195    }
196
197}