001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022/*
023 * To change this template, choose Tools | Templates
024 * and open the template in the editor.
025 */
026package uk.ac.roslin.ensembl.demo;
027
028import java.util.*;
029import uk.ac.roslin.ensembl.config.AssemblyExceptionType;
030import uk.ac.roslin.ensembl.config.DBConnection;
031import uk.ac.roslin.ensembl.config.EnsemblDBType;
032import uk.ac.roslin.ensembl.dao.database.*;
033import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship;
034import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
035import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence;
036import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
037import uk.ac.roslin.ensembl.model.Coordinate;
038import uk.ac.roslin.ensembl.model.Mapping;
039import uk.ac.roslin.ensembl.model.MappingSet;
040import uk.ac.roslin.ensembl.model.variation.Variation;
041
042/**
043 *
044 * @author tpaterso
045 */
046public class LogicFromArkMAP {
047
048    /*
049     * ArkMAP is a downloadable map drawing application the uses the JEnsembl API 
050     * to download gene-annotated chromosome maps from Ensembl datasources. 
051     * The application integrates JEnsembl data retrieval with ArkDB map drawing 
052     * code which uses the Java Swing API. Salient features of Ensembl data retrieval 
053     * are combined in this demonstration code (Getting a basic Ensembl map, 
054     * Getting gene homologies, Getting SNP variations on an Ensembl map, 
055     * Finding regions of conserved synteny for a selected chromosomal region, 
056     * Displaying maps of assembly exceptions and haplotypes.)
057     */
058    public static void main(String[] args) throws Exception {
059
060        // **************** GETTING A BASIC ENSEMBL MAP ********************* //
061
062        //the user selects which source to use
063        // from DataSource.ENSEMBLDB,DataSource.ENSEMBLGENOMES or DataSource.ENSEMBLBACTERIA
064        DBConnection.DataSource source = DBConnection.DataSource.ENSEMBLDB;
065
066        // a registry is made ( and cached in the App )
067        DBRegistry registry = DBRegistry.createRegistryForDataSource(source);
068
069        //the collection of available species is used as the basis for a user selection widget
070        Collection<DBSpecies> species = registry.getSpecies();
071
072        //if we have chosen to work with DataSource.ENSEMBLBACTERIA
073        // instead we do 
074        //Collection<? extends DBSpecies> species = registry.getCollectionSpecies();
075
076        // the chosen species is actually selected from the list
077        DBSpecies currentSpecies = registry.getSpeciesByAlias("human");
078
079
080        //the list of available core databases is used for a release/version selection widget
081        // actually the widget is built using a sorted collection of strings representing build/version details
082        TreeSet<? extends DBDatabase> dbs;
083        if (currentSpecies instanceof DBCollectionSpecies) {
084            dbs = currentSpecies.getDatabasesByType(EnsemblDBType.collection_core);
085        } else {
086            dbs = currentSpecies.getDatabasesByType(EnsemblDBType.core);
087        }
088
089
090        //again the database (i.e. version) is actually selected by the user, here we just get one from the registry
091        DBDatabase currentDB = registry.getDatabase("human", EnsemblDBType.core, "72");
092
093        //or for bacteria..
094        //DBDatabase currentDB = registry.getDatabase("Bacillus pumilus (strain SAFR-032)", EnsemblDBType.collection_corecore, "14");
095
096        //the list of chromosomes for that species is retireved and used as the basis of a selection widget
097        TreeMap<String, DAChromosome> chromosomes = new TreeMap<String, DAChromosome>();
098
099        List<DAChromosome> temp = null;
100        if (currentDB instanceof DBSingleSpeciesCoreDatabase) {
101            temp = ((DBSingleSpeciesCoreDatabase) currentDB).getChromosomes();
102        } else {
103            temp = ((DBCollectionCoreDatabase) currentDB).getChromosomes(currentSpecies);
104        }
105        for (DAChromosome c : temp) {
106            chromosomes.put(c.getChromosomeName(), c);
107        }
108
109        //the user selects a chromosome
110        DAChromosome currentChromosome = chromosomes.get("3");
111        //in ArkMAP the user can specify to look at particular coordinates
112        //this is not used to retrieve partial chromosomes map
113        // but does limit the extent of the chromosome searched for genes
114        //genes = chr.getGenesOnRegion(requestStart, requestStop);
115
116        //note that aswell as the genes - we also get Exceptions to show on the map
117
118        //if this is a real chromosome we actually load all the genes
119        //but if we are just creating a map of an assembly exception here we would restrict this
120        //to the extent of the patch
121        int start = currentChromosome.getBioBegin();
122        int stop = currentChromosome.getBioEnd();
123
124        //force the lazy load the genes
125        List<DAGene> genesOnRegion = currentChromosome.getGenesOnRegion(start, stop);
126        //force the lazy load all types of exeptions
127        currentChromosome.getAssemblyExceptions(AssemblyExceptionType.PATCH_FIX);
128
129        // the chromosome is then used to make a map - which is zoomed to the specified coordinates for display
130        //EnsemblMap map = new EnsemblMap( currentChromosome, start, stop );
131
132        // the map is made by creating an EnsemblMapping object for each gene mapping on the chromosome
133        // and making an EnsemblExceptionMapping for each AssemblException mapped on the chromosome
134         
135        /*
136        *
137        public EnsemblMap(DAChromosome _chr) throws DAOException {
138
139            this.chr = _chr;
140            this.start = chr.getBioBegin();
141            this.stop = chr.getBioEnd();
142
143            try {
144                this.setAnalysis(Analyses.getAnalysis(this.chr.getDaoFactory().getDatabase()));
145            } catch (Exception e) {}
146
147            mapName =
148                    this.chr.getSpecies().getSpeciesBinomial() + " ("
149                    + this.chr.getSpecies().getShortName() + ")" + " chromosome "
150                    + this.chr.getChromosomeName(); 
151
152            //these must already be loaded 
153            for (Mapping m : this.getEnsemblChr().getLoadedMappings(FeatureType.gene)) {
154            addEnsemblMapping(m);
155            }
156            for (AssemblyExceptionType t : AssemblyExceptionType.values()) {
157                if (!t.equals(AssemblyExceptionType.PAR)) {
158                    for (Mapping m : this.getEnsemblChr().getAssemblyExceptions(t)) {
159                        addEnsemblExceptionMapping(m, t);
160                    }
161                }
162            }
163        }
164        * 
165        */
166        
167        // ***************** GETTING GENE HOMOLOGIES ************************ //
168        
169        //if the user selects an Ensembl gene on a Map
170        //they can search for homologies -  on alll species or just a selected species
171        //these results are shown in a table 
172        List<DAHomologyPairRelationship> homologies;
173        DBSpecies selectedSpecies = null;
174        DAGene selectedGene = currentSpecies.getGeneByStableID("ENSG00000153551", "72");
175
176
177        try {
178            if (selectedSpecies != null) {
179                homologies = selectedGene.getHomologies(selectedSpecies);
180                //note that this actually does a query to get all the homologies and then filters the result
181                //so is no faster than below
182            } else {
183                homologies = selectedGene.getHomologies();
184            }
185        } catch (Exception e) {
186            System.out.println("Error in thread to get homologues of a Gene Ensembl DataSource: " + e.toString());
187        }
188        
189        
190        
191        // ************* GETTING SNP VARIATIONS ON AN ENSEMBL MAP *********** //
192        Coordinate zoomCoord = new Coordinate(32900000, 33000000);
193
194
195        try {
196
197            List<? extends Variation> vars = currentChromosome.getVariationsOnRegion(zoomCoord);
198
199            if (vars == null || vars.isEmpty()) {
200                throw new Exception("No Variations found on " + currentChromosome.getSpecies().getCommonName()
201                        + " chromosome " + currentChromosome.getChromosomeName() + " (release " + currentChromosome.getDBVersion() + ")");
202            }
203
204            //the App makes a new map  - as above for this region of the chromosome
205            //EnsemblMap map = new EnsemblMap(currentChromosome, zoomCoord.getStart(), zoomCoord.getEnd());
206
207            //we then make ArkDB API objects for the Marker and the Mapping and 
208            //add these to the EnsemblMap object
209            for (Variation v : vars) {
210
211                Mapping mp = (Mapping) v.getLoadedMappings().first();
212                Coordinate coord = mp.getTargetCoordinates();
213
214                //ArkDB code
215                /*
216                 * 
217                    DatasourceAwareMarker mkr = new DatasourceAwareMarker();
218                    mkr.setDbsnpID(v.getName());
219                    mkr.setAccession(v.getId().toString());     
220                    mkr.setMarkerType("SNP");   
221                 * 
222                 */
223                
224                // if the variation has a 'synonym' set - this equals the search name, and there is a different 'name'
225                // if the 'synonym' is not set, the search name = 'name'
226
227                //we have a synonym
228                if (v.getSynonym() != null && !v.getSynonym().isEmpty()) {
229                    //ArkDB code
230                    //mkr.setName(v.getSynonym()); 
231                } else {
232                    //ArkDB code
233                    //mkr.setName(v.getName());
234                }
235
236                //ArkDB code
237//                    DatasourceAwareMapping mpp = new DatasourceAwareMapping();
238//                    mpp.setMarker(mkr);
239
240                double begin = coord.getStart().doubleValue();
241                double end = coord.getEnd().doubleValue();
242
243                if (end < begin) {
244                    double d = end;
245                    end = begin;
246                    begin = d;
247                }
248                //ArkDB code
249//                    mpp.setMarkerStart(begin);
250//                    mpp.setMarkerEnd(end);
251//                    mpp.setMarkerMiddle(begin + (end-begin)/2 );
252//                    mpp.setMap(map);
253
254                //ArkMAP code - we add each mapping to the map
255//                    map.addMapping(mpp);
256//                    snpMappings.add(mpp);
257
258            }
259
260        } catch (Exception e) {
261        }
262
263        // *************** FIND REGIONS OF CONSERVED SYNTENY ******************//
264        // *************** FOR A SELECTED REGION OF A CHROMOSOME *************//
265
266        //ArrayList<EnsemblMap> maps = new ArrayList<EnsemblMap>();
267
268        HashMap<DADNASequence, MappingSet> syntenies;
269        Coordinate searchCoord = new Coordinate(30000000, 35000000);
270        selectedSpecies = registry.getSpeciesByAlias("chimp");
271
272
273        try {
274
275            syntenies = currentChromosome.getRegionsOfConservedSynteny(searchCoord, selectedSpecies);
276
277            if (syntenies != null && !syntenies.isEmpty()) {
278
279                for (DADNASequence seq : syntenies.keySet()) {
280                    if (seq instanceof DAChromosome) {
281                        Coordinate coords = syntenies.get(seq).getExtent();
282
283
284                        // making a map of the whole chromsome  
285                        //- but just going to initially display the bit with conserved synteny
286                        //EnsemblMap map = new EnsemblMap((DAChromosome) seq);
287
288                        //we then pass the map back together with the Coordinates to display
289                        // the chromosome is searched for genes and exceptions before
290                        //display
291
292                    }
293                }
294            }
295        } catch (Exception e) {
296            System.out.println("Error in thread to get syntenic regions: " + e.toString());
297        }
298        
299        
300    // ************ DISPLAY THE MAP OF AN ASSEMBLY EXCEPTION *************//
301        
302    //ArkMAP displays the position of Assembly Exceptions on the EnsemblMaps as 
303    //special 'EnsemblExceptionMappings' .
304    //These  are colour coded depending on the type of exception (e.g for Human v68 
305    //chromosome 17 there are AssemblyPatches, AssemblyNovel and Haplotypes 
306    //displayed)
307    //if a user selects a Marker that is an AssemblyException  
308    //they get the option to display this Exception as a new map
309        
310    DAChromosome patchChromosome = null;
311    Coordinate patchCoordinates = null;
312
313    /*
314     * 
315      if (mapping instanceof EnsemblMappingView ) { 
316          EnsemblMapping ming = ((EnsemblMappingView) mapping).getMapping(); 
317          if (ming instanceof EnsemblExceptionMapping) { 
318                EnsemblExceptionMapping exMapping = (EnsemblExceptionMapping) ming; 
319                patchChromosome = exMapping.getPatchChromosome(); 
320                patchCoordinates = exMapping.getPatchChromosomeCoordinates(); } }
321     *
322     */
323    
324    //only the region of the patch/exception is real sequence, so we only display this region,
325    //and we only fetch the genes on this region by lazy load
326    
327    /*
328    patchChromosome.getGenesOnRegion (patchCoordinates.getStart(), patchCoordinates.getEnd());
329    */
330    
331    //redundant here..
332    /*
333    patchChromosome.getAssemblyExceptions (AssemblyExceptionType.PATCH_FIX);
334    */
335    
336    //Make the new map for display
337    //EnsemblMap map = new EnsemblMap( patchChromosome, patchCoordinates.getStart(), patchCoordinates.getStart() );
338
339    
340    
341    }
342}