001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.io.BufferedWriter;
025import java.io.FileWriter;
026import java.util.TreeMap;
027import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
028import uk.ac.roslin.ensembl.dao.database.DBRegistry;
029import uk.ac.roslin.ensembl.dao.database.DBSpecies;
030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
031import uk.ac.roslin.ensembl.datasourceaware.core.DAExon;
032import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
033import uk.ac.roslin.ensembl.datasourceaware.core.DATranscript;
034import uk.ac.roslin.ensembl.model.Mapping;
035
036/*
037 * request from Paul S on 20120412
038 * >
039 * I am trying to extract some annotation information out of the ensembl DB with the help of JEnsembl.
040I need the following data:
041
042StableGeneID (e.g. ENSBTAG00000021527)
043ChrStart
044ChrStop 
045Assembly (e.g. BTau_4.0)
046Chromosome 
047Gene description
048Gene Symbol (e.g. ALB)
049Strand
050Taxon ID
051Gene Status
052Gene Type (e.g. protein-coding)
053
054+ all transcripts of every gene
055StableTranscriptID (ENSBTAT00000028690)
056ChrStart
057ChrStop
058
059+ all exons of every transcript
060ChrStart
061ChrStop
062
063I tried serveral approaches but I could not figure out how to receive for example the transcripts and "StableTranscriptId"s.
064I already took a look at your example code.
065Is it possible to extract these information with JEnsembl? Could you give me an example?
066This would be very helpful.
067
068 * 
069 * initial response:
070 * >At the moment I think it can do most of those requests - but not all (e.g. I haven't implemented exons yet!)
071  * 
072 * then i aded exon functionality  - see rt5044
073 * 
074 * and replied on 20120503
075 * >
076 have done some more work with JEnsembl and the code now does most of
077the things that you were interested in (I hope...:)
078
079if you are still interested in trying it, I have made a new release
080version of the code on sourceforge
081
082https://sourceforge.net/projects/jensembl/files/Releases/1.09/
083
084I'd appreciate any feedback if you do try it out.
085
086to get the information on chromosomes, genes, transcripts, exons ....
087see code below, 
088
089 * 
090 */
091public class UserSetzermann {
092
093    // fetching all the genes, transcripts and exons on all the chromosomes
094    // and writing detials out to files
095    //note: here we default to most recent currently configured release version
096    
097    // i have put breaks in so we only do it for one chromosome in this script
098    public static void main(String[] args) throws Exception {
099
100
101        DBRegistry eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
102        DBSpecies cow = eReg.getSpeciesByAlias("cow");
103        String taxonID = cow.getTaxonomyID();
104        
105        //could specify release verion here - 
106        // or we could get a particular chromosome by Name and version 
107        TreeMap<String, DAChromosome> chrHash = cow.getChromosomes();
108        String assembly = chrHash.get(chrHash.firstKey()).getAssembly();
109        
110        
111        FileWriter fstream = new FileWriter("genes.txt");
112        BufferedWriter outFile = new BufferedWriter(fstream);
113        
114        outFile.append("GeneID|start|stop|strand|assembly|chromosome|description|"
115                +"DisplayName|taxonID|Status|Biotype\r\n"
116                );
117        
118        for (DAChromosome chr: chrHash.values() ) {
119            for (DAGene g: chr.getGenesOnRegion(chr.getBioBegin(), chr.getBioEnd())) {
120                    
121                Mapping m = g.getChromosomeMapping(chr);
122                
123                outFile.append(g.getStableID()).append("|");
124                if (m != null && m.getTargetCoordinates()!= null) {
125                    outFile.append(m.getTargetCoordinates().getStart().toString()).append("|");
126                    outFile.append(m.getTargetCoordinates().getEnd().toString()).append("|");
127                    outFile.append(m.getTargetCoordinates().getStrand().toString()).append("|");
128                } else {
129                    outFile.append(" | | |");
130                }
131                outFile.append(assembly).append("|");
132                outFile.append(chr.getName()).append("|");
133                outFile.append(g.getDescription()).append("|");
134                outFile.append(g.getDisplayName()).append("|");
135                outFile.append(taxonID).append("|");
136                outFile.append(g.getStatus()).append("|");
137                outFile.append(g.getBiotype()).append("\r\n");
138            }
139            //so we only do it for one chromosome :)
140            break;
141        }
142        
143        outFile.close();
144        
145        fstream = new FileWriter("transcripts.txt");
146        outFile = new BufferedWriter(fstream);
147
148        outFile.append("geneID|transcriptID|start|stop\r\n"); 
149              
150        for (DAChromosome chr: chrHash.values() ) {
151           
152            for (DAGene g : chr.getGenesOnRegion(chr.getBioBegin(), chr.getBioEnd())) {
153                for (DATranscript t: g.getTranscripts()) {
154                    
155                    Mapping m = t.getChromosomeMapping(chr);
156                    outFile.append(g.getStableID()).append("|");
157                    outFile.append(t.getStableID());
158                    if (m!= null&& m.getTargetCoordinates()!=null) {
159                        outFile.append("|").append(m.getTargetCoordinates().getStart().toString()).append("|") ;
160                        outFile.append(m.getTargetCoordinates().getEnd().toString()); 
161                    }
162                    outFile.append("\r\n");
163                }
164            }
165            //so we only do it for one chromosome :)
166            break;
167        }
168        
169
170        outFile.close();
171        
172        fstream = new FileWriter("exons.txt");
173        outFile = new BufferedWriter(fstream);
174        
175        outFile.append("geneID|transcriptID|exonID|rank|start|stop|constitutive\r\n"); 
176        
177        for (DAChromosome chr: chrHash.values() ) {
178            for (DAGene g : chr.getGenesOnRegion(chr.getBioBegin(), chr.getBioEnd())) {
179                for (DATranscript t: g.getTranscripts()) {
180                    for (DAExon e : t.getExons()) {
181                        Mapping m = e.getChromosomeMapping(chr); 
182                        outFile.append(g.getStableID()).append("|");
183                        outFile.append(t.getStableID()).append("|");
184                        outFile.append(e.getStableID()).append("|");
185                        outFile.append(e.getRank().toString()).append("|");
186                        if (m!= null&& m.getTargetCoordinates()!=null) {
187                            outFile.append(m.getTargetCoordinates().getStart().toString()).append("|") ;
188                            outFile.append(m.getTargetCoordinates().getEnd().toString()).append("|") ; 
189                        }
190                        outFile.append(e.isConstitutive().toString()).append("\r\n");                        
191                      
192                    }
193                }
194            }
195            //so we only do it for one chromosome :)
196            break;
197        }
198        
199        outFile.close();
200
201        System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n");
202
203    }
204}