001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.util.ArrayList;
025import java.util.List;
026import java.util.Set;
027import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
028import uk.ac.roslin.ensembl.config.EnsemblDBType;
029import uk.ac.roslin.ensembl.config.FeatureType;
030import uk.ac.roslin.ensembl.dao.database.*;
031import uk.ac.roslin.ensembl.dao.factory.DAOCoreFactory;
032import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
033import uk.ac.roslin.ensembl.datasourceaware.core.DACoordinateSystem;
034import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence;
035import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
036import uk.ac.roslin.ensembl.model.Coordinate;
037import uk.ac.roslin.ensembl.model.Mapping;
038import uk.ac.roslin.ensembl.model.core.Gene;
039
040public class Genes {
041
042    //retreiving genes
043    //genes are 'feature' annotations - 
044    //when genes are retrieved - they are mapped on to a chromosome
045    
046    public  static void main(String[] args) throws Exception {
047
048        DBRegistry bactRegistry = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA);
049        
050        DBCollectionSpecies sp1 = (DBCollectionSpecies) bactRegistry.getSpeciesByAlias("pyrococcus_abyssi_ge5");
051        //DBDAOCollectionCoreFactory f =(DBDAOCollectionCoreFactory) ((DBCollectionCoreDatabase) sp1.getMostRecentCoreDatabase()).getCoreFactory(sp1);
052
053        DAChromosome chr = sp1.getChromosomeByName("Chromosome");
054        //DAChromosome chr = f.getChromosomeDAO().getChromosomeByName("Chromosome");
055
056        System.out.println("species: "+chr.getSpecies().getSpeciesBinomial());
057        System.out.println("chr: "+chr.getChromosomeName());
058        System.out.println("chrID: "+chr.getId());
059        System.out.println("chr DB version: "+chr.getDBVersion());
060
061        System.out.println(chr.getBioBegin());
062        System.out.println(chr.getLength());
063        System.out.println(chr.getDBSeqLength());
064        System.out.println(chr.getBioEnd());
065        System.out.println(chr.getDBSeqLength());
066        System.out.println(chr.getCoordSystem().getId());
067        System.out.println(chr.getCoordSystem().getType().toString());
068
069        System.out.println(chr.getSequenceAsString(1, 100));
070        System.out.println(chr.getReverseComplementSequenceAsString(1, 100));
071
072        //lets look at all the features that might be mapped on a chromosome ( very few implemented so far:)
073       System.out.println("\n------------\nFeatures\n-------------");
074
075        for (FeatureType t : FeatureType.getAllTypes()) {
076            System.out.println(t.toString());
077
078            Set<DACoordinateSystem> s = (sp1.getMostRecentCoreDatabase()).getCSForFeature(sp1, t);
079
080            if (s != null && !s.isEmpty()) {
081
082                for (DACoordinateSystem cs : s) {
083                    System.out.println("\tcs-id: " + cs.getId() + " - " + "cs-type: " + cs.getType().toString());
084                }
085            } else {
086                System.out.println("NO INFORMATION");
087            }
088            System.out.println("");
089        }
090
091        DAGene gene;
092        DAOCoreFactory f = ((DBCollectionCoreDatabase)sp1.getDatabaseByTypeAndVersion(
093                    EnsemblDBType.collection_core, ""+bactRegistry.getHighestReleaseVersion())).getCoreFactory(sp1);
094        gene = new DAGene(f);
095        gene.setStableID("PAB1143");
096        //a gene with a factory and stableID can lazyload everything else....
097        
098         System.out.println("getAnalysisId "+gene.getAnalysisID());
099                  System.out.println("getAnalysis "+gene.getAnalysis().getDisplayLabel());  
100         System.out.println("getAnalysisDescription "+gene.getAnalysis().getDescription());     
101         System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype());
102         System.out.println("\t"+gene.getDescription());
103         System.out.println("Species: "+gene.getSpecies().getCommonName());
104         System.out.println("Created "+gene.getCreationDate().toString());
105         System.out.println("Modified "+gene.getModificationDate().toString());
106         System.out.println("getAssembly "+gene.getAssembly());
107         System.out.println("getBiotype "+gene.getBiotype());
108         System.out.println("getDescription "+gene.getDescription());
109         System.out.println("getDBVersion "+gene.getDBVersion());
110         System.out.println("getDisplayName "+gene.getDisplayName());
111         System.out.println("getId "+gene.getId());
112
113         System.out.println("getSchemaVersion "+gene.getSchemaVersion());
114         System.out.println("getStatus "+gene.getStatus());
115        
116        
117         System.out.println("Gene's loaded mappings...");
118         for (Mapping m : gene.getLoadedMappings()) {
119             System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID());
120             System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() );
121             System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 
122                     + "type: "+m.getTarget().getType() );
123             System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart()
124                     +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]");
125
126         }
127         System.out.println("");
128         System.out.println("Gene's anotation level mappings...");
129          for (Mapping m : gene.getAnnotationLevelMappings() ) {
130                           System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID());
131             System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() );
132             System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 
133                     + "type: "+m.getTarget().getType() );
134             System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart()
135                     +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]");
136
137          }
138         System.out.println("");
139         
140         System.out.println("\nGenes mapped on the chromosome:...");
141         for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) {
142            System.out.println(m.getTarget().getClass().getSimpleName()
143                    + " gene stableID: "
144                    + ((DAGene) m.getTarget()).getStableID()
145                    + " id: " + m.getTarget().getId());
146            System.out.println("Gene on chromosome: HashID: " + m.getTarget().getHashID()
147                    + " VMID:" + m.getTarget().hashCode());
148        }
149         
150         
151         
152         
153         
154        //v1.15 removed the ability to get a gene by internal db id from a species 
155        // DAGene gene =   sp1.getGeneByID(3555, "15");
156
157         bactRegistry = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA, 15);
158        
159         sp1 = (DBCollectionSpecies) bactRegistry.getSpeciesByAlias("Pyrococcus abyssi");
160        
161         gene = (DAGene) ((DBCollectionCoreDatabase)sp1.getDatabaseByTypeAndVersion(
162                    EnsemblDBType.collection_core, "15")).getCoreFactory(sp1).getGeneDAO().getGeneByID(3555);
163
164         System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype());
165         System.out.println("\t"+gene.getDescription());
166         System.out.println("Species: "+gene.getSpecies().getCommonName());
167         System.out.println("Created "+gene.getCreationDate().toString());
168         System.out.println("Modified "+gene.getModificationDate().toString());
169         System.out.println("getAssembly "+gene.getAssembly());
170         System.out.println("getBiotype "+gene.getBiotype());
171         System.out.println("getDescription "+gene.getDescription());
172         System.out.println("getDBVersion "+gene.getDBVersion());
173         System.out.println("getDisplayName "+gene.getDisplayName());
174         System.out.println("getId "+gene.getId());
175         System.out.println("getAnalysisId "+gene.getAnalysisID());
176                  System.out.println("getAnalysis "+gene.getAnalysis().getDisplayLabel());  
177         System.out.println("getAnalysisDescription "+gene.getAnalysis().getDescription());
178         System.out.println("getSchemaVersion "+gene.getSchemaVersion());
179         System.out.println("getStatus "+gene.getStatus());
180
181
182         System.out.println("getCanonicalTranscriptID "+gene.getCanonicalTranscriptID());
183         System.out.println("getCanonicalTranscript().getBiotype() "+gene.getCanonicalTranscript().getBiotype());
184         System.out.println("getCanonicalTranscript().getDisplayName() "+gene.getCanonicalTranscript().getDisplayName());
185         System.out.println("getCanonicalTranscript().getStableID() "+gene.getCanonicalTranscript().getStableID());
186         System.out.println("getCanonicalTranscript().getCanonicalTranslationID() "+gene.getCanonicalTranscript().getCanonicalTranslationID());
187                  System.out.println("gene.getCanonicalTranslation().getId() "+gene.getCanonicalTranslation().getId());
188                  System.out.println("getCanonicalTranslation().getStableID() "+gene.getCanonicalTranslation().getStableID());
189         System.out.println("getCanonicalTranscript().getCanonicalTranslation().getProteinSequenceAsString() "+gene.getCanonicalTranscript().getCanonicalTranslation().getProteinSequenceAsString());
190         System.out.println("getCanonicalTranscript().getCanonicalTranslation().getRNASequenceAsString() "+gene.getCanonicalTranscript().getCanonicalTranslation().getRNASequenceAsString());
191
192
193         System.out.println("Gene's loaded mappings...");
194         for (Mapping m : gene.getLoadedMappings()) {
195             System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID());
196             System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() );
197             System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 
198                     + "type: "+m.getTarget().getType() );
199             System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart()
200                     +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]");
201
202         }
203         System.out.println("");
204         System.out.println("Gene's anotation level mappings...");
205          for (Mapping m : gene.getAnnotationLevelMappings() ) {
206                           System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID());
207             System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() );
208             System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 
209                     + "type: "+m.getTarget().getType() );
210             System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart()
211                     +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]");
212
213          }
214         System.out.println("");
215        //this single gene will have been added to the chromosome
216        System.out.println("\nGenes mapped on the chromosome:...");
217         for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) {
218            System.out.println(m.getTarget().getClass().getSimpleName()
219                    + " gene stableID: "
220                    + ((DAGene) m.getTarget()).getStableID()
221                    + " id: " + m.getTarget().getId());
222            System.out.println("Gene on chromosome: HashID: " + m.getTarget().getHashID()
223                    + " VMID:" + m.getTarget().hashCode());
224        }
225        
226         gene =   sp1.getGeneByStableID("EBPYRG00000003245", "15");
227         //gene = f.getGeneDAO().getGeneByStableID("EBPYRG00000003245");
228         
229         System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype());
230         System.out.println("\t"+gene.getDescription());
231         System.out.println("Species: "+gene.getSpecies().getCommonName());
232         System.out.println("Created "+gene.getCreationDate().toString());
233         System.out.println("Modified "+gene.getModificationDate().toString());
234         
235         System.out.println("Gene's loaded mappings...");
236         for (Mapping m : gene.getLoadedMappings()) {
237             System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID());
238            System.out.println("Mapping source: "+m.getSource().getId());
239             System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId()
240                     + "type: "+m.getTarget().getType() );
241            System.out.println("target coords:"+m.getTargetCoordinates().getStart()+"-"
242                     +m.getTargetCoordinates().getEnd() +"("
243                     +m.getTargetCoordinates().getStrand().toString() +")");
244             System.out.println("CS: "+((DADNASequence) m.getTarget()).getCoordSystem().getType().toString());
245         }
246         
247         System.out.println("");
248         System.out.println("Gene's anotation level mappings...");
249          for (Mapping m : gene.getAnnotationLevelMappings() ) {
250                           System.out.println("HASHES "+m.getSourceHashID()+" "+m.getTargetHashID());
251             System.out.println("Mapping: "+m.getSource().getClass().getSimpleName() + " id: "+ m.getSource().getId() );
252             System.out.println("\tTarget: "+m.getTarget().getClass().getSimpleName() +" id: "+m.getTarget().getId() 
253                     + "type: "+m.getTarget().getType() );
254             System.out.println("\tTargetCoords: "+m.getTargetCoordinates().getStart()
255                     +" - "+m.getTargetCoordinates().getEnd() + "["+m.getTargetCoordinates().getStrand() +"]");
256
257          }
258        System.out.println("");
259        //two genes will have been added to the chromosome
260         System.out.println("\nGenes now  mapped on the chromosome:...");         
261         for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) {
262            System.out.println(m.getTarget().getClass().getSimpleName()
263                    + " gene stableID: "
264                    + ((DAGene) m.getTarget()).getStableID()
265                    + " id: " + m.getTarget().getId());
266            System.out.println("Gene on chromosome: HashID: " + m.getTarget().getHashID()
267                    + " VMID:" + m.getTarget().hashCode());
268        }
269         
270         System.out.println("");
271         Coordinate coord = new Coordinate();
272
273         coord.setStart(1);
274         coord.setEnd(1000000);
275
276         List<? extends Gene> genes = chr.getGenesOnRegion(1, 500000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND);
277         System.out.println("");
278         System.out.println(genes.size()+ " returned  genes");
279         System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size());
280
281         genes = chr.getGenesOnRegion(1,1000000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND);
282         System.out.println(genes.size()+ " returned genes");
283         System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size());
284
285         genes = chr.getGenesOnRegion(500000,1000000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND);
286         System.out.println(genes.size()+ " returned genes");
287         System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size());
288
289         genes = chr.getGenesOnRegion(1010000, 1020000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND);
290         System.out.println(genes.size()+ " returned genes");
291         System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size());
292
293         genes = chr.getGenesOnRegion(1030000, 1040000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND);
294         System.out.println(genes.size()+ " returned genes");
295         System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size());
296
297         genes = chr.getGenesOnRegion(1050000, 1060000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND);
298         System.out.println(genes.size()+ " returned genes");
299         System.out.println("total gene mappings: "+chr.getLoadedMappings(FeatureType.gene).size());
300
301
302         for (Mapping m : chr.getLoadedMappings(FeatureType.gene)) {
303
304             System.out.println( ((DAGene) m.getTarget()).getStableID()+" "+m.getSourceHashID()+" "+m.getTargetHashID());
305
306         }
307
308
309         System.out.println("gaps in the gene mappings: ");
310//         for (Coordinate cd : Coordinate.getCoordinateGaps(chr.getMappedRegions().get(FeatureType.gene))) {
311         for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getGaps()) {
312             System.out.println("gap "+cd.getStart()+" - "+cd.getEnd());
313         }
314         System.out.println("gaps in the gene mappings for the whole chromosome: ");
315         Coordinate cx = new Coordinate(chr.getBioBegin(), chr.getBioEnd(),1);
316//         for (Coordinate cd : cx.getGaps(chr.getMappedRegions().get(FeatureType.gene))) {
317         for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(cx)) {
318             System.out.println("gap "+cd.getStart()+" - "+cd.getEnd());
319         }
320
321         Coordinate test = new Coordinate(900000,1200000,1);
322
323         System.out.println("testing 900000-1200000 query");
324//         System.out.println("test lies within known region: "+test.liesWithinCoordinateSetWithoutGaps(chr.getMappedRegions().get(FeatureType.gene)));
325         System.out.println("test lies within known region: "+chr.getMappedRegions().get(FeatureType.gene).containsCoordinateWithoutGaps(test));
326
327//         if (test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).isEmpty()) {
328         if (chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).isEmpty()) {
329
330             System.out.println("test region is fully covered");
331
332         } else {
333             System.out.println("test region is not fully covered");
334 //            for (Coordinate cd : test.getGaps(chr.getMappedRegions().get(FeatureType.gene))) {
335             for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test)) {
336                System.out.println("gap "+cd.getStart()+" - "+cd.getEnd());
337             }
338         }
339
340//         System.out.println("test has regions not covered: "+test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).size());
341         System.out.println("test has regions not covered: "+chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).size());
342
343         test = new Coordinate(1,900000,1);
344
345         System.out.println("testing 1-900000 query");
346
347//         System.out.println("test lies within known region: "+test.liesWithinCoordinateSetWithoutGaps(chr.getMappedRegions().get(FeatureType.gene)));
348         System.out.println("test lies within known region: "+chr.getMappedRegions().get(FeatureType.gene).containsCoordinateWithoutGaps(test));
349
350//         if (test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).isEmpty()) {
351         if (chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).isEmpty()) {
352             System.out.println("test region is fully covered");
353         } else {
354             System.out.println("test region is not fully covered");
355             for (Coordinate cd : chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test)) {
356                System.out.println("gap "+cd.getStart()+" - "+cd.getEnd());
357             }
358         }
359
360 //        System.out.println("test has regions not covered: "+test.getGaps(chr.getMappedRegions().get(FeatureType.gene)).size());
361         System.out.println("test has regions not covered: "+chr.getMappedRegions().get(FeatureType.gene).getUncoveredRegions(test).size());
362
363
364         genes = chr.getGenesOnRegion(1, 10000, uk.ac.roslin.ensembl.model.Coordinate.Strand.REVERSE_STRAND);
365
366         System.out.println(genes.size()+ " genes");
367         
368         DBRegistry ensemblRegistry = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
369         DBSpecies hs = ensemblRegistry.getSpeciesByAlias("human");
370         
371         List<String> tester = new ArrayList<String>();
372         tester.add("69");
373         tester.add("72");
374
375         for (String str:tester)
376         {
377             f= ((DBSingleSpeciesCoreDatabase)hs.getDatabaseByTypeAndVersion(
378                    EnsemblDBType.core, str)).getCoreFactory();
379        gene = new DAGene(f);
380        gene.setStableID("ENSG00000139618");
381        //a gene with a factory and stableID can lazyload everything else....
382        
383         System.out.println("getAnalysisId "+gene.getAnalysisID());
384         System.out.println("getAnalysisDescription "+gene.getAnalysis().getDescription());   
385         System.out.println("getAnalysis "+gene.getAnalysis().getDisplayLabel());  
386         System.out.println("gene: "+gene.getId()+" ("+gene.getStableID()+") "+gene.getBiotype());
387         System.out.println("\t"+gene.getDescription());
388         System.out.println("Species: "+gene.getSpecies().getCommonName());
389         System.out.println("Created "+gene.getCreationDate().toString());
390         System.out.println("Modified "+gene.getModificationDate().toString());
391         System.out.println("getAssembly "+gene.getAssembly());
392         System.out.println("getBiotype "+gene.getBiotype());
393         System.out.println("getDescription "+gene.getDescription());
394         System.out.println("getDBVersion "+gene.getDBVersion());
395         System.out.println("getDisplayName "+gene.getDisplayName());
396         System.out.println("getId "+gene.getId());
397
398         System.out.println("getSchemaVersion "+gene.getSchemaVersion());
399         System.out.println("getStatus "+gene.getStatus());
400         }
401
402       System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n");
403
404
405    }
406
407
408
409
410
411}