001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.io.BufferedWriter; 025import java.io.File; 026import java.io.FileWriter; 027import java.io.IOException; 028import java.io.Writer; 029import java.util.ArrayList; 030import java.util.Collection; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Set; 034import static org.junit.Assert.assertFalse; 035import static org.junit.Assert.assertNotNull; 036import uk.ac.roslin.ensembl.config.DBConnection; 037import uk.ac.roslin.ensembl.dao.compara.HomologyDAO; 038import uk.ac.roslin.ensembl.dao.database.DBRegistry; 039import uk.ac.roslin.ensembl.dao.database.DBSpecies; 040import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship; 041import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 042import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 043import uk.ac.roslin.ensembl.datasourceaware.core.DAExon; 044import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 045import uk.ac.roslin.ensembl.datasourceaware.core.DATranscript; 046import uk.ac.roslin.ensembl.model.Coordinate; 047import uk.ac.roslin.ensembl.model.Mapping; 048import uk.ac.roslin.ensembl.model.MappingSet; 049import uk.ac.roslin.ensembl.model.core.CollectionSpecies; 050import uk.ac.roslin.ensembl.model.core.Exon; 051import uk.ac.roslin.ensembl.model.core.Species; 052import uk.ac.roslin.ensembl.model.core.Translation; 053 054public class scratch { 055 056 057private static final int ITERATIONS = 5; 058private static final double MEG = (Math.pow(1024, 2)); 059private static final int RECORD_COUNT = 4000000; 060private static final String RECORD = "Help I am trapped in a fortune cookie factory\n"; 061private static final int RECSIZE = RECORD.getBytes().length; 062 063public static void main(String[] args) throws Exception { 064 065 066// DBRegistry reg = DBRegistry.createRegistryForDataSource(DBConnection.DataSource.ENSEMBLDB); 067// Set<String> ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasBeginning("chimp"); 068// ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasContaining("himp"); 069// DBSpecies sp = reg.getSpeciesByAlias("human"); 070// reg.addSessionAlias(sp, "chimp"); 071// ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasBeginning("chim"); 072// ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasContaining("himp"); 073// Collection<? extends Species> setOfSpeciesByAlias = reg.getSetOfSpeciesByAlias("chimp"); 074// 075// Set<? extends Species> speciesForAliasBeginning = reg.getSpeciesForAliasBeginning("chimp"); 076// 077// 078 079 DBRegistry breg = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DBConnection.DataSource.ENSEMBLBACTERIA,22); 080 Collection<String> s = breg.getEnsemblNamesForAliasBeginning("Escher"); 081 Collection<String> a = breg.getGCAssemblyAccessionsForAliasBeginning("Escher"); 082 s = breg.getEnsemblNamesForAliasContaining("scher"); 083 a = breg.getGCAssemblyAccessionsForAliasContaining("scher"); 084 Set<? extends Species> speciesForAliasBeginning = breg.getSpeciesForAliasBeginning("Escher"); 085 Set<CollectionSpecies> oldStyleBacterialSpeciesForAliasBeginning = (Set<CollectionSpecies>) breg.getOldStyleBacterialSpeciesForAliasBeginning("Escher"); 086 Set<CollectionSpecies> bacterialSpeciesForAliasBeginning = (Set<CollectionSpecies>) breg.getBacterialSpeciesForAliasBeginning("Escher"); 087 088 089 Set<? extends Species> speciesForAliasContaining = breg.getSpeciesForAliasContaining("scher"); 090 Set<CollectionSpecies> oldStyleBacterialSpeciesForAliasContaining = (Set<CollectionSpecies>) breg.getOldStyleBacterialSpeciesForAliasContaining("scher"); 091 Set<CollectionSpecies> bacterialSpeciesForAliasContaining = (Set<CollectionSpecies>) breg.getBacterialSpeciesForAliasContaining("scher"); 092 093 094 095 DBSpecies spb = breg.getSpeciesByAlias("bacillus_pumilus_safr_032"); 096 DAGene gene = spb.getGeneByStableID("BPUM_0001", "22"); 097 try { 098 gene = spb.getGeneByStableID("BPUM_0001", "21"); 099 } catch (Exception e) { 100 System.out.println(e.getMessage()); 101 } 102 103 104 105 Collection<? extends Species> setOfEverySpeciesByAlias = breg.getSetOfEverySpeciesByAlias("k-12"); 106 107 System.out.println("there are "+setOfEverySpeciesByAlias.size()+" species with name 'k-12'."); 108 109 Collection<? extends Species> setOfSpeciesByAlias = breg.getSetOfSpeciesByAlias("k-12"); 110 111 try { 112 spb = breg.getSpeciesByAlias("k-12"); 113 } catch (Exception e) { 114 System.out.println(e.getMessage()); 115 } 116 117 118 DBRegistry reg = DBRegistry.createRegistryForDataSource(DBConnection.DataSource.ENSEMBLDB); 119 DBSpecies sp = reg.getSpeciesByAlias("human"); 120 DATranscript t = sp.getTranscriptByStableID("ENST00000534020"); 121 System.out.println("transcript"); 122 int count = 0; 123 for (Exon x:t.getExons() ) { 124 System.out.print("Exon "+ ++count); 125 System.out.println(((DAExon)x).getRNASequenceAsString()); 126 } 127 Collection<? extends Translation> translations = t.getTranslations(); 128 for (Translation translation : translations) { 129 System.out.println("translation"); 130 System.out.println(translation.getProteinSequence()); 131 } 132 133 134 135/* //DBRegistry eReg = DBRegistry.createRegistryForDataSource(DBConnection.DataSource.ENSEMBLDB); 136 //DBSpecies mouse = eReg.getSpeciesByAlias("cow"); 137 //DAGene gene = mouse.getGeneByStableID("ENSBTAG00000021527"); 138 139// DAGene gene2 = new DAGene(); 140// gene2.setStableID("ENSBTAG00000021531"); 141// gene2.setDaoFactory(gene.getDaoFactory()); 142// Integer homoeologueCount = gene2.getHomoeologueCount(); 143// Integer paralogueCount = gene2.getParalogueCount(); 144 145 DBRegistry eReg = DBRegistry.createRegistryForDataSourceCurrentRelease(DBConnection.DataSource.ENSEMBLBACTERIA); 146 Set<DBSpecies> panComparaSpecies = eReg.getPanComparaSpecies(null); 147 String release = ""+eReg.getHighestReleaseVersion(); 148 149 //System.out.println("DBVersion "+release +" pan compara species: ["+panComparaSpecies.size() +" in total]:"); 150 for (DBSpecies sp: panComparaSpecies) { 151 System.out.println("\t"+sp.getSpeciesBinomial()+ 152 "\t\t"+sp.getComparaName(release)); 153 } 154// System.out.println(""); 155 156 DBSpecies sp = eReg 157 .getSpeciesByAlias("bacillus_pumilus_safr_032"); 158 159 //from release17 - only 100 odd species are in compara - and this is now the pancompara - not a bacterial compara 160 DAGene gene = sp.getGeneByStableID("BPUM_0001"); 161 162 DAGene gene2 = new DAGene(); 163 gene2.setStableID("BPUM_0002"); 164 gene2.setDaoFactory(gene.getDaoFactory()); 165 Integer homoeologueCount = gene2.getHomoeologueCount(); 166 Integer paralogueCount = gene2.getParalogueCount(); 167 168 sp = eReg 169 .getSpeciesByAlias("mannheimia_haemolytica_serotype_a2_str_ovine"); 170 171 gene = sp.getGeneByStableID("COI_1025"); 172 173 gene2 = new DAGene(); 174 gene2.setStableID("COI_1024"); 175 gene2.setDaoFactory(gene.getDaoFactory()); 176 homoeologueCount = gene2.getHomoeologueCount(); 177 paralogueCount = gene2.getParalogueCount(); 178 179 180 DBSpecies human = eReg.getSpeciesByAlias("human"); 181 DBSpecies chimp = eReg.getSpeciesByAlias("chimpanzee"); 182 183 DAChromosome h1_74 = human.getChromosomeByName("1", "76"); 184 HashMap<DADNASequence, MappingSet> regionsOfConservedSynteny74 = h1_74.getRegionsOfConservedSynteny(new Coordinate(1400000,1600000), chimp); 185 186 assertNotNull(regionsOfConservedSynteny74); 187 188 assertFalse(regionsOfConservedSynteny74.isEmpty()); 189 190 191 DBSpecies finch = eReg.getSpeciesByAlias("zebra finch"); 192 DBSpecies chicken = eReg.getSpeciesByAlias("chicken"); 193 194 DAChromosome cChr3 = chicken.getChromosomeByName("3"); 195 HomologyDAO dao = cChr3.getComparaFactory().getHomologyDAO(); 196 197 HashMap<DADNASequence, MappingSet> syntenies = 198 (HashMap<DADNASequence, MappingSet>) 199 dao.getRegionsOfConservedSynteny(cChr3, new Coordinate(1, 5000000), finch); 200 201 202 int size = 0; 203 int targetsize = 0; 204 for (MappingSet ms: syntenies.values()) { 205 size += ms.size(); 206 207 for (Mapping m: ms) { 208 for (DAHomologyPairRelationship h: ((DAGene)m.getTarget()).getHomologiesWithoutLazyLoad()) { 209 targetsize++; 210 } 211 } 212 213 } 214 System.out.println("target count: "+targetsize); 215 System.out.println("homologue count: "+size); 216 217 218 219 List<String> records = new ArrayList<String>(RECORD_COUNT); 220 size = 0; 221 for (int i = 0; i < RECORD_COUNT; i++) { 222 records.add(RECORD); 223 size += RECSIZE; 224 } 225 System.out.println(records.size() + " 'records'"); 226 System.out.println(size / MEG + " MB"); 227 228 for (int i = 0; i < ITERATIONS; i++) { 229 System.out.println("\nIteration " + i); 230 231 writeRaw(records); 232 writeBuffered(records, 8192); 233 writeBuffered(records, (int) MEG); 234 writeBuffered(records, 4 * (int) MEG); 235 }*/ 236} 237 238private static void writeRaw(List<String> records) throws IOException { 239 File file = File.createTempFile("foo", ".txt"); 240 try { 241 FileWriter writer = new FileWriter(file); 242 System.out.print("Writing raw... "); 243 write(records, writer); 244 } finally { 245 // comment this out if you want to inspect the files afterward 246 file.delete(); 247 } 248} 249 250private static void writeBuffered(List<String> records, int bufSize) throws IOException { 251 File file = File.createTempFile("foo", ".txt"); 252 try { 253 FileWriter writer = new FileWriter(file); 254 BufferedWriter bufferedWriter = new BufferedWriter(writer, bufSize); 255 256 System.out.print("Writing buffered (buffer size: " + bufSize + ")... "); 257 write(records, bufferedWriter); 258 } finally { 259 // comment this out if you want to inspect the files afterward 260 file.delete(); 261 } 262} 263 264private static void write(List<String> records, Writer writer) throws IOException { 265 long start = System.currentTimeMillis(); 266 for (String record: records) { 267 writer.write(record); 268 } 269 writer.flush(); 270 writer.close(); 271 long end = System.currentTimeMillis(); 272 System.out.println((end - start) / 1000f + " seconds"); 273} 274} 275