001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.io.BufferedWriter; 025import java.io.FileWriter; 026import java.util.TreeMap; 027import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 028import uk.ac.roslin.ensembl.dao.database.DBRegistry; 029import uk.ac.roslin.ensembl.dao.database.DBSpecies; 030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 031import uk.ac.roslin.ensembl.datasourceaware.core.DAExon; 032import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 033import uk.ac.roslin.ensembl.datasourceaware.core.DATranscript; 034import uk.ac.roslin.ensembl.model.Mapping; 035 036/* 037 * request from Paul S on 20120412 038 * > 039 * I am trying to extract some annotation information out of the ensembl DB with the help of JEnsembl. 040I need the following data: 041 042StableGeneID (e.g. ENSBTAG00000021527) 043ChrStart 044ChrStop 045Assembly (e.g. BTau_4.0) 046Chromosome 047Gene description 048Gene Symbol (e.g. ALB) 049Strand 050Taxon ID 051Gene Status 052Gene Type (e.g. protein-coding) 053 054+ all transcripts of every gene 055StableTranscriptID (ENSBTAT00000028690) 056ChrStart 057ChrStop 058 059+ all exons of every transcript 060ChrStart 061ChrStop 062 063I tried serveral approaches but I could not figure out how to receive for example the transcripts and "StableTranscriptId"s. 064I already took a look at your example code. 065Is it possible to extract these information with JEnsembl? Could you give me an example? 066This would be very helpful. 067 068 * 069 * initial response: 070 * >At the moment I think it can do most of those requests - but not all (e.g. I haven't implemented exons yet!) 071 * 072 * then i aded exon functionality - see rt5044 073 * 074 * and replied on 20120503 075 * > 076 have done some more work with JEnsembl and the code now does most of 077the things that you were interested in (I hope...:) 078 079if you are still interested in trying it, I have made a new release 080version of the code on sourceforge 081 082https://sourceforge.net/projects/jensembl/files/Releases/1.09/ 083 084I'd appreciate any feedback if you do try it out. 085 086to get the information on chromosomes, genes, transcripts, exons .... 087see code below, 088 089 * 090 */ 091public class UserSetzermann { 092 093 // fetching all the genes, transcripts and exons on all the chromosomes 094 // and writing detials out to files 095 //note: here we default to most recent currently configured release version 096 097 // i have put breaks in so we only do it for one chromosome in this script 098 public static void main(String[] args) throws Exception { 099 100 101 DBRegistry eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 102 DBSpecies cow = eReg.getSpeciesByAlias("cow"); 103 String taxonID = cow.getTaxonomyID(); 104 105 //could specify release verion here - 106 // or we could get a particular chromosome by Name and version 107 TreeMap<String, DAChromosome> chrHash = cow.getChromosomes(); 108 String assembly = chrHash.get(chrHash.firstKey()).getAssembly(); 109 110 111 FileWriter fstream = new FileWriter("genes.txt"); 112 BufferedWriter outFile = new BufferedWriter(fstream); 113 114 outFile.append("GeneID|start|stop|strand|assembly|chromosome|description|" 115 +"DisplayName|taxonID|Status|Biotype\r\n" 116 ); 117 118 for (DAChromosome chr: chrHash.values() ) { 119 for (DAGene g: chr.getGenesOnRegion(chr.getBioBegin(), chr.getBioEnd())) { 120 121 Mapping m = g.getChromosomeMapping(chr); 122 123 outFile.append(g.getStableID()).append("|"); 124 if (m != null && m.getTargetCoordinates()!= null) { 125 outFile.append(m.getTargetCoordinates().getStart().toString()).append("|"); 126 outFile.append(m.getTargetCoordinates().getEnd().toString()).append("|"); 127 outFile.append(m.getTargetCoordinates().getStrand().toString()).append("|"); 128 } else { 129 outFile.append(" | | |"); 130 } 131 outFile.append(assembly).append("|"); 132 outFile.append(chr.getName()).append("|"); 133 outFile.append(g.getDescription()).append("|"); 134 outFile.append(g.getDisplayName()).append("|"); 135 outFile.append(taxonID).append("|"); 136 outFile.append(g.getStatus()).append("|"); 137 outFile.append(g.getBiotype()).append("\r\n"); 138 } 139 //so we only do it for one chromosome :) 140 break; 141 } 142 143 outFile.close(); 144 145 fstream = new FileWriter("transcripts.txt"); 146 outFile = new BufferedWriter(fstream); 147 148 outFile.append("geneID|transcriptID|start|stop\r\n"); 149 150 for (DAChromosome chr: chrHash.values() ) { 151 152 for (DAGene g : chr.getGenesOnRegion(chr.getBioBegin(), chr.getBioEnd())) { 153 for (DATranscript t: g.getTranscripts()) { 154 155 Mapping m = t.getChromosomeMapping(chr); 156 outFile.append(g.getStableID()).append("|"); 157 outFile.append(t.getStableID()); 158 if (m!= null&& m.getTargetCoordinates()!=null) { 159 outFile.append("|").append(m.getTargetCoordinates().getStart().toString()).append("|") ; 160 outFile.append(m.getTargetCoordinates().getEnd().toString()); 161 } 162 outFile.append("\r\n"); 163 } 164 } 165 //so we only do it for one chromosome :) 166 break; 167 } 168 169 170 outFile.close(); 171 172 fstream = new FileWriter("exons.txt"); 173 outFile = new BufferedWriter(fstream); 174 175 outFile.append("geneID|transcriptID|exonID|rank|start|stop|constitutive\r\n"); 176 177 for (DAChromosome chr: chrHash.values() ) { 178 for (DAGene g : chr.getGenesOnRegion(chr.getBioBegin(), chr.getBioEnd())) { 179 for (DATranscript t: g.getTranscripts()) { 180 for (DAExon e : t.getExons()) { 181 Mapping m = e.getChromosomeMapping(chr); 182 outFile.append(g.getStableID()).append("|"); 183 outFile.append(t.getStableID()).append("|"); 184 outFile.append(e.getStableID()).append("|"); 185 outFile.append(e.getRank().toString()).append("|"); 186 if (m!= null&& m.getTargetCoordinates()!=null) { 187 outFile.append(m.getTargetCoordinates().getStart().toString()).append("|") ; 188 outFile.append(m.getTargetCoordinates().getEnd().toString()).append("|") ; 189 } 190 outFile.append(e.isConstitutive().toString()).append("\r\n"); 191 192 } 193 } 194 } 195 //so we only do it for one chromosome :) 196 break; 197 } 198 199 outFile.close(); 200 201 System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n"); 202 203 } 204}