001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.io.BufferedWriter;
025import java.io.File;
026import java.io.FileWriter;
027import java.io.IOException;
028import java.io.Writer;
029import java.util.ArrayList;
030import java.util.Collection;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Set;
034import static org.junit.Assert.assertFalse;
035import static org.junit.Assert.assertNotNull;
036import uk.ac.roslin.ensembl.config.DBConnection;
037import uk.ac.roslin.ensembl.dao.compara.HomologyDAO;
038import uk.ac.roslin.ensembl.dao.database.DBRegistry;
039import uk.ac.roslin.ensembl.dao.database.DBSpecies;
040import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship;
041import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
042import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence;
043import uk.ac.roslin.ensembl.datasourceaware.core.DAExon;
044import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
045import uk.ac.roslin.ensembl.datasourceaware.core.DATranscript;
046import uk.ac.roslin.ensembl.model.Coordinate;
047import uk.ac.roslin.ensembl.model.Mapping;
048import uk.ac.roslin.ensembl.model.MappingSet;
049import uk.ac.roslin.ensembl.model.core.CollectionSpecies;
050import uk.ac.roslin.ensembl.model.core.Exon;
051import uk.ac.roslin.ensembl.model.core.Species;
052import uk.ac.roslin.ensembl.model.core.Translation;
053
054public class scratch {
055
056
057private static final int ITERATIONS = 5;
058private static final double MEG = (Math.pow(1024, 2));
059private static final int RECORD_COUNT = 4000000;
060private static final String RECORD = "Help I am trapped in a fortune cookie factory\n";
061private static final int RECSIZE = RECORD.getBytes().length;
062
063public static void main(String[] args) throws Exception {
064    
065    
066//    DBRegistry reg = DBRegistry.createRegistryForDataSource(DBConnection.DataSource.ENSEMBLDB);
067//    Set<String> ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasBeginning("chimp");
068//    ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasContaining("himp");
069//        DBSpecies sp = reg.getSpeciesByAlias("human");
070//        reg.addSessionAlias(sp, "chimp");
071//        ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasBeginning("chim");
072//        ensemblNameForAliasStartingWith = reg.getEnsemblNamesForAliasContaining("himp");
073//    Collection<? extends Species> setOfSpeciesByAlias = reg.getSetOfSpeciesByAlias("chimp");
074//
075//    Set<? extends Species> speciesForAliasBeginning = reg.getSpeciesForAliasBeginning("chimp");
076//        
077//        
078    
079    DBRegistry breg = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DBConnection.DataSource.ENSEMBLBACTERIA,22);
080    Collection<String> s = breg.getEnsemblNamesForAliasBeginning("Escher");
081    Collection<String> a = breg.getGCAssemblyAccessionsForAliasBeginning("Escher");
082     s = breg.getEnsemblNamesForAliasContaining("scher");
083     a = breg.getGCAssemblyAccessionsForAliasContaining("scher");
084    Set<? extends Species> speciesForAliasBeginning = breg.getSpeciesForAliasBeginning("Escher");
085    Set<CollectionSpecies> oldStyleBacterialSpeciesForAliasBeginning = (Set<CollectionSpecies>) breg.getOldStyleBacterialSpeciesForAliasBeginning("Escher");
086    Set<CollectionSpecies> bacterialSpeciesForAliasBeginning = (Set<CollectionSpecies>) breg.getBacterialSpeciesForAliasBeginning("Escher");
087    
088    
089    Set<? extends Species> speciesForAliasContaining = breg.getSpeciesForAliasContaining("scher");
090    Set<CollectionSpecies> oldStyleBacterialSpeciesForAliasContaining = (Set<CollectionSpecies>) breg.getOldStyleBacterialSpeciesForAliasContaining("scher");
091    Set<CollectionSpecies> bacterialSpeciesForAliasContaining = (Set<CollectionSpecies>) breg.getBacterialSpeciesForAliasContaining("scher");
092    
093    
094    
095    DBSpecies spb =  breg.getSpeciesByAlias("bacillus_pumilus_safr_032");
096    DAGene gene = spb.getGeneByStableID("BPUM_0001", "22");
097    try {
098        gene = spb.getGeneByStableID("BPUM_0001", "21");
099    } catch (Exception e) {
100        System.out.println(e.getMessage());
101    }
102    
103    
104        
105        Collection<? extends Species> setOfEverySpeciesByAlias = breg.getSetOfEverySpeciesByAlias("k-12");
106        
107        System.out.println("there are "+setOfEverySpeciesByAlias.size()+" species with name 'k-12'.");
108        
109        Collection<? extends Species> setOfSpeciesByAlias = breg.getSetOfSpeciesByAlias("k-12");   
110        
111   try {
112         spb = breg.getSpeciesByAlias("k-12"); 
113    } catch (Exception e) {
114        System.out.println(e.getMessage());
115    }
116        
117    
118    DBRegistry reg = DBRegistry.createRegistryForDataSource(DBConnection.DataSource.ENSEMBLDB);
119        DBSpecies sp = reg.getSpeciesByAlias("human");
120        DATranscript t = sp.getTranscriptByStableID("ENST00000534020");
121        System.out.println("transcript");
122        int count = 0;
123        for (Exon x:t.getExons() ) {
124            System.out.print("Exon "+ ++count);
125            System.out.println(((DAExon)x).getRNASequenceAsString());
126        }
127        Collection<? extends Translation> translations = t.getTranslations();
128        for (Translation translation : translations) {
129            System.out.println("translation");
130            System.out.println(translation.getProteinSequence());
131        }
132
133    
134    
135/*       //DBRegistry eReg = DBRegistry.createRegistryForDataSource(DBConnection.DataSource.ENSEMBLDB);
136       //DBSpecies mouse = eReg.getSpeciesByAlias("cow");
137       //DAGene gene = mouse.getGeneByStableID("ENSBTAG00000021527");
138    
139//       DAGene gene2 = new DAGene();
140//       gene2.setStableID("ENSBTAG00000021531");
141//       gene2.setDaoFactory(gene.getDaoFactory());
142//        Integer homoeologueCount = gene2.getHomoeologueCount();
143//        Integer paralogueCount = gene2.getParalogueCount();
144       
145        DBRegistry eReg = DBRegistry.createRegistryForDataSourceCurrentRelease(DBConnection.DataSource.ENSEMBLBACTERIA);
146        Set<DBSpecies> panComparaSpecies = eReg.getPanComparaSpecies(null);
147        String release = ""+eReg.getHighestReleaseVersion();
148        
149        //System.out.println("DBVersion "+release +" pan compara species: ["+panComparaSpecies.size() +" in total]:");
150        for (DBSpecies sp: panComparaSpecies) {
151            System.out.println("\t"+sp.getSpeciesBinomial()+
152                    "\t\t"+sp.getComparaName(release));
153        }
154//        System.out.println("");
155
156        DBSpecies sp =  eReg
157                .getSpeciesByAlias("bacillus_pumilus_safr_032");       
158
159        //from release17 - only 100 odd species are in compara - and this is now the pancompara - not a bacterial compara
160        DAGene gene = sp.getGeneByStableID("BPUM_0001");
161        
162        DAGene gene2 = new DAGene();
163        gene2.setStableID("BPUM_0002");
164        gene2.setDaoFactory(gene.getDaoFactory());
165        Integer homoeologueCount = gene2.getHomoeologueCount();
166        Integer paralogueCount = gene2.getParalogueCount();        
167    
168         sp =  eReg
169                .getSpeciesByAlias("mannheimia_haemolytica_serotype_a2_str_ovine");
170         
171        gene = sp.getGeneByStableID("COI_1025");
172
173        gene2 = new DAGene();
174        gene2.setStableID("COI_1024");
175        gene2.setDaoFactory(gene.getDaoFactory());
176        homoeologueCount = gene2.getHomoeologueCount();
177        paralogueCount = gene2.getParalogueCount();
178       
179    
180        DBSpecies human = eReg.getSpeciesByAlias("human");
181        DBSpecies chimp = eReg.getSpeciesByAlias("chimpanzee");    
182        
183        DAChromosome h1_74 = human.getChromosomeByName("1", "76");
184        HashMap<DADNASequence, MappingSet> regionsOfConservedSynteny74 = h1_74.getRegionsOfConservedSynteny(new Coordinate(1400000,1600000), chimp);
185        
186        assertNotNull(regionsOfConservedSynteny74);
187        
188        assertFalse(regionsOfConservedSynteny74.isEmpty());
189        
190        
191        DBSpecies finch = eReg.getSpeciesByAlias("zebra finch");
192        DBSpecies chicken = eReg.getSpeciesByAlias("chicken");
193
194        DAChromosome cChr3 = chicken.getChromosomeByName("3");
195        HomologyDAO dao = cChr3.getComparaFactory().getHomologyDAO();
196        
197        HashMap<DADNASequence, MappingSet> syntenies =
198                (HashMap<DADNASequence, MappingSet>)
199                    dao.getRegionsOfConservedSynteny(cChr3, new Coordinate(1, 5000000), finch);
200        
201        
202        int size = 0;
203        int targetsize = 0;
204        for (MappingSet ms: syntenies.values()) {
205          size += ms.size();  
206          
207           for (Mapping m: ms) {
208               for (DAHomologyPairRelationship h: ((DAGene)m.getTarget()).getHomologiesWithoutLazyLoad()) {
209                   targetsize++;
210               }
211           }
212
213        }
214        System.out.println("target count: "+targetsize);
215        System.out.println("homologue count: "+size);
216        
217    
218    
219    List<String> records = new ArrayList<String>(RECORD_COUNT);
220     size = 0;
221    for (int i = 0; i < RECORD_COUNT; i++) {
222        records.add(RECORD);
223        size += RECSIZE;
224    }
225    System.out.println(records.size() + " 'records'");
226    System.out.println(size / MEG + " MB");
227
228    for (int i = 0; i < ITERATIONS; i++) {
229        System.out.println("\nIteration " + i);
230
231        writeRaw(records);
232        writeBuffered(records, 8192);
233        writeBuffered(records, (int) MEG);
234        writeBuffered(records, 4 * (int) MEG);
235    }*/
236}
237
238private static void writeRaw(List<String> records) throws IOException {
239    File file = File.createTempFile("foo", ".txt");
240    try {
241        FileWriter writer = new FileWriter(file);
242        System.out.print("Writing raw... ");
243        write(records, writer);
244    } finally {
245        // comment this out if you want to inspect the files afterward
246        file.delete();
247    }
248}
249
250private static void writeBuffered(List<String> records, int bufSize) throws IOException {
251    File file = File.createTempFile("foo", ".txt");
252    try {
253        FileWriter writer = new FileWriter(file);
254        BufferedWriter bufferedWriter = new BufferedWriter(writer, bufSize);
255
256        System.out.print("Writing buffered (buffer size: " + bufSize + ")... ");
257        write(records, bufferedWriter);
258    } finally {
259        // comment this out if you want to inspect the files afterward
260        file.delete();
261    }
262}
263
264private static void write(List<String> records, Writer writer) throws IOException {
265    long start = System.currentTimeMillis();
266    for (String record: records) {
267        writer.write(record);
268    }
269    writer.flush();
270    writer.close();
271    long end = System.currentTimeMillis();
272    System.out.println((end - start) / 1000f + " seconds");
273}
274}
275