001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.util.ArrayList;
025import java.util.Collection;
026import java.util.HashSet;
027import java.util.Iterator;
028import java.util.List;
029import org.biojava3.core.sequence.transcription.TranscriptionEngine;
030import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
031import uk.ac.roslin.ensembl.config.EnsemblCoordSystemType;
032import uk.ac.roslin.ensembl.dao.database.DBRegistry;
033import uk.ac.roslin.ensembl.dao.database.DBSpecies;
034import uk.ac.roslin.ensembl.datasourceaware.core.*;
035import uk.ac.roslin.ensembl.exception.NonUniqueException;
036import uk.ac.roslin.ensembl.model.Mapping;
037import uk.ac.roslin.ensembl.model.core.Exon;
038
039public class ExonsTranscriptionAndTranslation {
040
041    
042    //demonstrates integration of BioJava3 transcription and translation functions
043    //and JEnsembls retrieval and stitching together of exon sequences
044    //uses BioJava transcription engines for translation
045    //the datasource is queried to use the correct codon table if specified
046    //show works with chordate, plant and bacterial genes
047
048    
049    public static void main(String[] args) throws Exception {
050
051        DBRegistry eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
052        
053        //note by default translation by this default engine is configured to convert 
054        //non-Methionine initiations to Methionine
055        TranscriptionEngine te = eReg.getDefaultTranscriptionEngine();
056        
057       /* valid codes are
058         * 1 - UNIVERSAL
059         * 2 - VERTEBRATE_MITOCHONDRIAL
060         * 3 - YEAST_MITOCHONDRIAL
061         * 4 - MOLD_MITOCHONDRIAL
062         * 5 - INVERTEBRATE_MITOCHONDRIAL
063         * 6 - CILIATE_NUCLEAR
064         * 9 - ECHINODERM_MITOCHONDRIAL
065         * 10 - EUPLOTID_NUCLEAR
066         * 11 - BACTERIAL
067         * 12 - ALTERNATIVE_YEAST_NUCLEAR
068         * 13 - ASCIDIAN_MITOCHONDRIAL
069         * 14 - FLATWORM_MITOCHONDRIAL
070         * 15 - BLEPHARISMA_MACRONUCLEAR
071         * 16 - 2CHLOROPHYCEAN_MITOCHONDRIAL
072         * 21 - TREMATODE_MITOCHONDRIAL
073         * 23 - SCENEDESMUS_MITOCHONDRIAL 
074         */
075        TranscriptionEngine te2 = eReg.getTranscriptionEngine(2);
076        TranscriptionEngine te30 = eReg.getTranscriptionEngine(30);
077
078        
079        DBSpecies cow = eReg.getSpeciesByAlias("cow");
080        DAGene g = cow.getGeneByStableID("ENSBTAG00000021527");
081        Mapping mapping = g.getChromosomeMapping();
082        
083        //if the genes are not annotated on the chromosome level
084       // Mapping mapping = gene.getAnnotationLevelMappings().first();
085
086
087        System.out.println("Gene: "+ g.getStableID());
088        System.out.println("\tversion: "+ g.getDBVersion());
089        System.out.println("\tchr start: "+mapping.getTargetCoordinates().getStart());
090        System.out.println("\tchr stop: "+mapping.getTargetCoordinates().getEnd());
091        System.out.println("\tassembly: "+g.getAssembly());
092        System.out.println("\tdescription: "+ g.getDescription());
093        System.out.println("\tsymbol: "+g.getDisplayName());
094        System.out.println("\tstrand: "+mapping.getTargetCoordinates().getStrand());
095        System.out.println("\ttaxonID: "+g.getSpecies().getTaxonomyID());
096        System.out.println("\tstatus: "+g.getStatus());
097        System.out.println("\ttype: "+ g.getBiotype());
098
099        
100        //look at all the transcripts
101        
102        System.out.println("\tTranscript Count: "+ g.getTranscripts().size());
103        for (DATranscript t : g.getTranscripts()) {
104            System.out.println("\t\tTranscript: "+ t.getStableID());
105            System.out.println("\t\t "+ t.getDisplayName());
106            System.out.println("\t\t "+ t.getStatus());
107            System.out.println("\t\t "+ t.getBiotype());
108            System.out.println("\t\t "+ t.getDescription());
109            System.out.println("\t\t "+ t.getGene().getStableID());
110            System.out.println("\t\tCanonical ?  "+ t.isCanonical());
111            System.out.println("\t\tXREF: "+ t.getDisplayXRef().getDBDisplayName());
112            System.out.println("\t\tXREF: "+ t.getDisplayXRef().getDisplayID());
113            System.out.println("\t\tXREF: "+ t.getDisplayXRef().getInfoType());
114            System.out.println("\t\tXREF: "+ t.getDisplayXRef().getInfo());
115            for (Mapping m:t.getLoadedMappings(EnsemblCoordSystemType.chromosome)) {
116                System.out.println("\t\tMapping: "+m.getTargetHashID());
117                System.out.println("\t\t\tCoords: "+m.getTargetCoordinates().toString());
118            }
119
120            // look at all the exons of the transcript 
121            
122            System.out.println("EXONS");
123            for (DAExon e : t.getExons()) {
124            System.out.println("\t\tRank: "+ e.getRank());
125            System.out.println("\t\tStableID: "+ e.getStableID());
126            System.out.println("\t\tID: "+ e.getId());
127            System.out.println("\t\tstart phase: "+ e.getPhase());
128            System.out.println("\t\tend phase: "+ e.getEndPhase());
129            System.out.println("\t\tcurrent: "+ e.isCurrent());
130            System.out.println("\t\tconstitutive: "+ e.isConstitutive());
131            
132            //get locations of exon
133            for (Mapping m:e.getLoadedMappings(EnsemblCoordSystemType.chromosome)) {
134                System.out.println("\t\tMapping: "+m.getTargetHashID());
135                System.out.println("\t\t\tCoords: "+m.getTargetCoordinates().toString());
136            }
137            }
138
139
140
141
142        }
143
144//        
145        List<DAGene> genes = new ArrayList<DAGene>();
146//
147        DBSpecies sp = eReg.getSpeciesByAlias("human");
148        
149        //chr1
150        //DAGene ge = sp.getGeneByStableID("ENSG00000197049","68");//this id retired in 69
151        DAGene ge = sp.getGeneByStableID("ENSG00000185495");
152        System.out.println("GENE ENSG00000185495 \tversion: "+ ge.getDBVersion());
153
154        
155         if (ge.getCanonicalTranscript().isTranslated()) {
156             System.out.println(ge.getCanonicalTranslation().getTranslatedSequenceAsString());
157             System.out.println("");
158             //checks whether the codon table is specified by the datasource
159             System.out.println(ge.getCanonicalTranslation().getProteinSequence().getSequenceAsString());
160             System.out.println("");
161         }
162
163        DATranscript tt = (DATranscript) sp.getTranscriptByStableID("ENST00000400701", "76");
164        
165        System.out.println("A Transcript on chr X with out of phase start sequence: ENST00000400701"
166        +"\nNB: only in release 76!!");
167        
168         Collection<DAExon> exons = (Collection<DAExon>) tt.getExons();
169            
170            System.out.println("EXONS:...");
171
172            for (DAExon ex : exons) {
173                System.out.print(ex.getRank());
174                System.out.print("\t" + ex.getStableID());
175                System.out.print("\t" + ex.getId());
176                System.out.print("\t" + ex.getPhase());
177                System.out.print("\t" + ex.getEndPhase());
178                
179                
180                //not necessarily on a  chromosome :)
181                //for (Mapping m : ex.getLoadedMappings(EnsemblCoordSystemType.chromosome)) {
182                for (Mapping m : ex.getLoadedMappings()) {
183                    System.out.println("\t" +((DADNASequence) m.getTarget()).getName() +": "+ m.getTargetCoordinates().toString());
184                }
185            }
186
187
188            if (tt.isTranslated()) {
189
190                DATranslation trl = (DATranslation) tt.getCanonicalTranslation();
191
192                Exon e = trl.getFirstExon();
193                Exon e2 = trl.getLastExon();
194                Integer i = trl.getFirstExonStart();
195                Integer ii = trl.getLastExonEnd();
196
197                System.out.println("\nTranslateable Sequence:");
198                System.out.println(trl.getTranslatedSequenceAsString());
199                System.out.println("");
200
201                System.out.println("PROTEIN: ");
202                System.out.println(trl.getProteinSequence().getSequenceAsString());
203
204            } else {
205                System.out.println("\n"+tt.getStableID() + ": NOT TRANSLATED");
206            }
207        
208        //MT
209        DAGene g8 = sp.getGeneByStableID("ENSG00000198899");    
210            
211        //all chr1
212        
213        DAGene g5 = sp.getGeneByStableID("ENSG00000197049","56");
214        DAGene g1 = sp.getGeneByStableID("ENSG00000237330");
215        DAGene g6 = sp.getGeneByStableID("ENSG00000237330","56");
216        
217        //retired after 67
218        DAGene g2 = sp.getGeneByStableID("ENSG00000238916", "67");
219
220        DBSpecies sp2 = eReg.getSpeciesByAlias("cow");
221        DAGene g3 = sp2.getGeneByStableID("ENSBTAG00000021527", "68");
222        DAGene g4 = sp2.getGeneByStableID("ENSBTAG00000021531","68");
223        
224        DBSpecies sp3 = eReg.getSpeciesByAlias("chicken");
225        DAGene g7 = sp3.getGeneByStableID("ENSGALG00000021950","68");
226        
227        
228
229        genes.add(g8);
230        genes.add(g7);
231        genes.add(g);
232        genes.add(g5);
233        genes.add(g1);
234        genes.add(g6);
235        genes.add(g2);
236        genes.add(g3);
237        genes.add(g4);
238        
239        DBRegistry eRegG = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLGENOMES);
240        DBRegistry bRegG = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA);
241        //DBRegistry bRegG = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA, 19);
242       
243        DBSpecies spG = eRegG.getSpeciesByAlias("Arabidopsis lyrata");
244        DBSpecies spG2 = eRegG.getSpeciesByAlias("Arabidopsis thaliana");
245        
246        //there are two rcords with this sp/strain name - but with different gca ids
247        
248        DBSpecies spG3 = null;
249        DBSpecies spG4 = null;
250        
251        //we need to try to add a 'begins with' function to look up aliases here 
252        //- the aliases keep on changing and becoming more specific
253        
254        try {
255            //spG3 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv");
256            spG3 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv (ASM27773v2)");
257//            spG4 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv (Myco_tube_H37Rv_V2)");
258            spG4 = bRegG.getSpeciesByAlias("Mycobacterium tuberculosis H37Rv (ASM19595v2)");
259        } catch (NonUniqueException nonUniqueException) {
260            System.out.print("Mycobacterium tuberculosis H37Rv is not unique....");
261            HashSet<DBSpecies> allHits = (HashSet<DBSpecies>) nonUniqueException.getAllHits();
262            System.out.println(allHits.size());
263            Iterator<DBSpecies> iterator = allHits.iterator();
264            spG3 = iterator.next();
265            spG4 = iterator.next();
266        }
267         
268        //DBSpecies spG4 = eRegG.getSpeciesByAlias("gca_000195955");
269        
270        DAGene geneG1_15 = spG.getGeneByStableID("fgenesh2_kg.1__2__AT1G02190.2", "15");
271        DAGene geneG1_19 = spG.getGeneByStableID("fgenesh2_kg.1__2__AT1G02190.2", "19");
272        DAGene geneG2_15 = spG2.getGeneByStableID("AT1G78060", "15");
273        DAGene geneG2_19 = spG2.getGeneByStableID("AT1G78060", "19");
274        //DAGene geneG3 = spG3.getGeneByStableID("EBMYCG00000000423", "15");
275        DAGene geneG3 = spG3.getGeneByStableID("RVBD_0301");
276        DAGene geneG4 = spG4.getGeneByStableID("Rv0301");
277//        List<DAGene>  geneG3 = spG3.getGenesForExactName("lysU", "18");
278//        List<DAGene>  geneG4 = spG4.getGenesForExactName("EBG00313316", "18");
279        
280        genes.add(geneG1_15);
281        genes.add(geneG1_19);
282        genes.add(geneG2_15);
283        genes.add(geneG2_19);
284        if (geneG3 != null) {genes.add(geneG3);}
285        if (geneG4 != null) {genes.add(geneG4);}
286        
287        
288        
289        //showing we can get genes, transcripts and translations by stableID 
290        //from a species
291
292        
293//        DBSpecies hs = eReg.getSpeciesByAlias("human");
294//        System.out.println("*** HUMAN ***");
295//        DAGene gene = hs.getGeneByStableID("ENSG00000139618");
296//       
297//        DATranscript t = sp.getTranscriptByStableID("ENST00000380152" );
298//        DATranslation tr = sp.getTranslationByStableID("ENSP00000369497" );
299//        
300//        //note the queries used pre v67 need to link to the relevant  stableid table
301//        DAGene gene60 = hs.getGeneByStableID("ENSG00000139618", "60");
302//        DATranscript t60 = sp.getTranscriptByStableID("ENST00000380152","60" );        
303//        DATranslation tr60 = sp.getTranslationByStableID("ENSP00000369497","60" );
304//        
305//        
306//        // can lazy load a gene to a transcript
307//        DAGene gene2 = t.getGene();
308//        // can lazy load a transcript to a translation 
309//        DATranscript t2 = tr.getTranscript();
310//        
311//        System.out.println("Gene VegaID: "+gene.getVegaID());
312//        System.out.println("Gene retrieved from transcript: VegaID: "+gene2.getVegaID());
313//        System.out.println("Transcript VegaID: "+t.getVegaID());
314//        System.out.println("Transcript retrieved from translation: VegaID: "+t2.getVegaID());
315//        System.out.println("Translation VegaID: "+tr.getVegaID());
316//
317//        
318//        if (gene.getStableID().equals(gene2.getStableID())) {
319//            System.out.println("The two genes ARE the same (according to their stableIDS)");
320//        } else {
321//            System.out.println("The two genes are NOT the same (according to their stableIDs)");
322//        }
323//        if (gene.getStableID().equals(gene60.getStableID())) {
324//            System.out.println("The  genes pre and post v67 ARE the same (according to their stableIDS)");
325//        } else {
326//            System.out.println("The genes pre and post v67 are NOT the same (according to their stableIDs)");
327//        }
328//        if (t.getStableID().equals(t2.getStableID())) {
329//            System.out.println("The two transcripts ARE the same (according to their stableIDS)");
330//        } else {
331//            System.out.println("The two transcripts are NOT the same (according to their stableIDs)");
332//        }
333//        if (t.getStableID().equals(t60.getStableID())) {
334//            System.out.println("The  transcripts pre and post v67 ARE the same (according to their stableIDS)");
335//        } else {
336//            System.out.println("The  transcripts pre and post v67 are NOT the same (according to their stableIDs)");
337//        }
338//        if (tr.getStableID().equals(tr60.getStableID())) {
339//            System.out.println("The  translations pre and post v67 ARE the same (according to their stableIDS)");
340//        } else {
341//            System.out.println("The  translations pre and post v67 are NOT the same (according to their stableIDs)");
342//        }
343//        
344//        System.out.println("*** PIG ***");
345//        sp = eReg.getSpeciesByAlias("pig");
346//         g = sp.getGeneByStableID("ENSSSCG00000007520");
347//         //t = sp.getTranscriptByStableID("ENSSSCT00000034146" );
348//         tr = sp.getTranslationByStableID("ENSSSCP00000028626" );
349//         t = tr.getTranscript();
350//         
351//        System.out.println("Gene VegaID: "+g.getVegaID());
352//        System.out.println("Transcript VegaID: "+t.getVegaID());
353//        System.out.println("Translation VegaID: "+tr.getVegaID());
354//
355//        System.out.println("*** Pyrococcus ***");//there won't be any Vega annotations
356// 
357//        DBCollectionSpecies sp1 = (DBCollectionSpecies) eRegG.getSpeciesByAlias("Pyrococcus abyssi");
358//        
359//        DAGene gene1 = sp1.getGeneByStableID("EBPYRG00000002639", "15");
360//        DATranscript t1 = sp1.getTranscriptByStableID("EBPYRT00000002639" ,"15");
361//        DATranslation tr1 = sp1.getTranslationByStableID("EBPYRP00000002570","15" );
362//        DATranscript t12 = tr1.getTranscript();
363//        DAGene gene12 = t1.getGene();
364//        
365//        System.out.println("Gene VegaID: "+gene1.getVegaID());
366//        System.out.println("Gene retrieved from Transcript VegaID: "+gene12.getVegaID());
367//        System.out.println("Transcript VegaID: "+t1.getVegaID());
368//        System.out.println("Transcript retrieved from translation: VegaID: "+t12.getVegaID());
369//        System.out.println("Translation VegaID: "+tr1.getVegaID());
370//
371//
372//        if (gene1.getStableID().equals(gene12.getStableID())) {
373//            System.out.println("The two genes ARE the same (according to their stableIDS)");
374//        } else {
375//            System.out.println("The two genes are NOT the same (according to their stableIDs)");
376//        }        
377//        if (t1.getStableID().equals(t12.getStableID())) {
378//            System.out.println("The two transcripts ARE the same (according to their stableIDS)");
379//        } else {
380//            System.out.println("The two transcripts are NOT the same (according to their stableIDs)");
381//        }
382//        
383        System.out.println("\n\n*************************\nCOMPLETED FUNCTIONAL TEST\n*************************\n");
384        
385   }
386}