001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.io.File;
025import java.util.List;
026import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
027import uk.ac.roslin.ensembl.config.EnsemblDBType;
028import uk.ac.roslin.ensembl.config.FeatureType;
029import uk.ac.roslin.ensembl.config.RegistryConfiguration;
030import uk.ac.roslin.ensembl.dao.database.DBRegistry;
031import uk.ac.roslin.ensembl.model.Mapping;
032import uk.ac.roslin.ensembl.model.MappingSet;
033import uk.ac.roslin.ensembl.model.compara.HomologyPairRelationship;
034import uk.ac.roslin.ensembl.model.core.Chromosome;
035import uk.ac.roslin.ensembl.model.core.Gene;
036import uk.ac.roslin.ensembl.model.core.Species;
037import uk.ac.roslin.ensembl.model.database.Database;
038import uk.ac.roslin.ensembl.model.database.Registry;
039import uk.ac.roslin.ensembl.model.variation.Variation;
040
041public class BioinformaticsPublicationExamples {
042    
043    
044    //Reproduces the code examples shown in the JEnsembl publication in 'Bioinformatics'.
045    public static void main(String[] args) throws Exception {
046
047        //2a. Examples from Ensembl Registry.
048
049//        DBRegistry eReg = new DBRegistry(DataSource.ENSEMBLDB);
050
051//RegistryConfiguration conf = new RegistryConfiguration();
052//conf.setDBByFile(new File("dbConn.properties"));
053//conf.setSchemaByFile(new File("schema.properties"));
054//eReg = new DBRegistry(conf, true);
055
056
057//        // Get species Gallus gallus by alias 
058//        DBSpecies sp = eReg.getSpeciesByAlias("chicken");
059//
060//        // Get the default (i.e. current) or specific (v60) version of chromosome 2
061//        DAChromosome chr2 = sp.getChromosomeByName("2");
062//        DAChromosome chr2v60 = sp.getChromosomeByName("2","60");
063//
064//        // Get all the genes on chromosome 2
065//        List<DAGene> genes = chr2.getGenesOnRegion(1, 1000000);
066//
067//        // Chromosomal mappings can be retrieved from a gene, as can gene homologies
068//        DAGene g = sp.getGeneByStableID("ENSGALG00000009011");
069//        MappingSet cms = g.getLoadedMappings(EnsemblCoordSystemType.chromosome);
070//        List<DAHomologyPairRelationship> homos = g.getHomologies();
071//
072//        // lazy loading DNA sequence is retrieved from Ensembl assembly mappings 
073//        String seq = chr2.getSequenceAsString(1, 10000);
074//        
075////        
076//        List<DAVariation> variations = chr2.getVariationsOnRegion(1, 1000000);
077//
078//        DBRegistry ensemblgenomesRegistry = new DBRegistry(DataSource.ENSEMBLGENOMES);
079//
080//        DBCollectionSpecies pumi =
081//                (DBCollectionSpecies) ensemblgenomesRegistry.getSpeciesByAlias("Bacillus pumilus (strain SAFR-032)");
082//
083//
084//        DBCollectionSpecies sp1 = (DBCollectionSpecies) ensemblgenomesRegistry.getSpeciesByAlias("p_abyssi");
085//        DBCollectionSpecies ecoli = (DBCollectionSpecies) ensemblgenomesRegistry.getSpeciesByAlias("e_coli_k12");
086//
087//
088//        System.out.println("DB for Bacillus pumilus SAFR-032");
089//
090//        TreeSet<Integer> versions = new TreeSet<Integer>();
091//
092//        for (DBCollectionDatabase d : pumi.getDatabasesByType(EnsemblDBType.collection_core)) {
093//
094//            System.out.println("core version " + d.getDBVersion());
095//            versions.add(Integer.parseInt(d.getDBVersion()));
096//
097//        }
098//
099//
100//        TreeMap<String, DAChromosome> chrs = ecoli.getChromosomes();
101//
102//
103//
104//        //         DAChromosome chrP = ((DBCollectionCoreDatabase) pumi.getDatabaseByTypeAndVersion(EnsemblDBType.collection_core, versions.last().toString()))
105////                 .getCoreFactory(pumi).getChromosomeDAO().getChromosomeByName("Chromosome");
106//
107//
108//        for (DAChromosome c : chrs.values()) {
109//            System.out.println(c.getSpecies().getSpeciesBinomial() + " ID: " + c.getId()
110//                    + " Name: " + c.getChromosomeName() + " version: " + c.getDBVersion());
111//        }
112//
113
114
115        // Connect to default Ensembl datasource
116        Registry eReg = null;
117        Registry eReg2 = null;
118        eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
119
120        //alternately Connect to locally configured datasource
121        RegistryConfiguration conf = new RegistryConfiguration();        
122        conf.setDBByFile(new File("src/main/resources/dbConn.properties"));
123        conf.setSchemaByFile(new File("src/main/resources/schema.properties"));     
124        eReg2 = DBRegistry.createRegistryForConfiguration(conf);
125        
126        // Get species Gallus gallus by alias
127        Species sp = eReg.getSpeciesByAlias("chicken");
128
129        // Get default (current) or specific (v60) version of chromosome 2
130        Chromosome chr2 = sp.getChromosomeByName("2");
131        Chromosome chr2v60 = sp.getChromosomeByName("2", "60");
132
133        // Get all the genes on chromosome 2
134        List<? extends Gene> genes = chr2.getGenesOnRegion(1, chr2.getLength());
135
136        // Or get them as mappings with coordinates
137        MappingSet mps = chr2.getLoadedMappings(FeatureType.gene);
138
139        // Chromosome mappings can be retrieved from a gene, as can gene
140        //homologies 
141        Gene g = sp.getGeneByStableID("ENSGALG00000009011");
142        Mapping m = g.getChromosomeMapping();
143        List<? extends HomologyPairRelationship> homols = g.getHomologies();
144
145        // Transparent lazy loading of DNA sequence using Ensembl assembly
146        //mappings 
147        String seq = chr2.getSequenceAsString(1, 1000000);
148
149        // Variations (e.g. SNPs) can be found for a DNA Region
150        List<? extends Variation> vars = chr2.getVariationsOnRegion(1, 1000000);
151
152        // Accessing data from a multi-species collection
153        eReg2 = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA,22);
154        Species ecoli = eReg2.getSpeciesByAlias("Escherichia coli str. K12 substr. MG1655");
155        Chromosome chr = ecoli.getChromosomeByName("Chromosome");
156
157        System.out.println("Mapping all versions of ENSG00000153551");
158        
159        StringBuilder sb = new StringBuilder();
160        Species human = eReg.getSpeciesByAlias("human");
161        long start = System.currentTimeMillis();
162        
163        for (Database d : human.getDatabasesByType(EnsemblDBType.core)) {
164            sb.append("Ensembl Release:" + d.getSchemaVersion() + " Assembly:" + d.getBuild());
165            Gene g2 = human.getGeneByStableID("ENSG00000153551", d.getSchemaVersion());
166            if (g2 != null) {
167                sb.append("\tChromosome:" + ((Chromosome) g2.getChromosomeMapping().getTarget()).getName()
168                        + " " + g2.getChromosomeMapping().getTargetCoordinates().toString());
169            } else {
170                sb.append("\tNot Present");
171            }
172            sb.append("\n");
173        }
174
175       System.out.println("Mapping all versions of ENSG00000153551 took: " + (System.currentTimeMillis() - start) / 1000 + " seconds");
176       System.out.println(sb.toString());        
177    }
178}