001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.util.Collection;
025import java.util.HashSet;
026import java.util.List;
027import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
028import uk.ac.roslin.ensembl.dao.database.DBCollection;
029import uk.ac.roslin.ensembl.dao.database.DBCollectionSpecies;
030import uk.ac.roslin.ensembl.dao.database.DBRegistry;
031import uk.ac.roslin.ensembl.dao.database.DBSpecies;
032import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship;
033import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
034import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
035import uk.ac.roslin.ensembl.exception.NonUniqueException;
036import uk.ac.roslin.ensembl.model.core.Species;
037
038
039public class SpeciesVersions {
040    
041    public  static void main(String[] args) throws Exception {
042
043       
044        DBRegistry greg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLGENOMES);
045        
046        Collection<? extends Species> setOfEverySpeciesByAlias = greg.getSetOfEverySpeciesByAlias("Zymoseptoria tritici");
047        
048        System.out.println("there are "+setOfEverySpeciesByAlias.size()+" species with name 'Zymoseptoria tritici'.");
049        
050        Collection<? extends Species> setOfSpeciesByAlias = greg.getSetOfSpeciesByAlias("Zymoseptoria tritici");       
051        
052        System.out.println("there are "+setOfSpeciesByAlias.size()+" CURRENT species with name 'Zymoseptoria tritici'.");
053
054        //a Fungus that has a rename in v17
055        
056        DBSpecies f = greg.getSpeciesByAlias("Zymoseptoria tritici");
057        
058        System.out.println("f.getAssemblyAccession(\"21\") "+f.getAssemblyAccession("21"));
059        System.out.println("f.getAssemblyAccessionStem(\"21\") "+f.getAssemblyAccessionStem("21"));
060        System.out.println("f.getAssemblyAccessionStem() "+f.getAssemblyAccessionStem());
061        
062        System.out.println("f.getAssemblyAccession(\"17\") "+f.getAssemblyAccession("17"));
063        System.out.println("f.getAssemblyAccessionStem(\"17\") "+f.getAssemblyAccessionStem("17"));
064        System.out.println("f.getAssemblyAccessionStem() "+f.getAssemblyAccessionStem());
065                
066        System.out.println("f.getAssemblyAccession(\"16\") "+f.getAssemblyAccession("16"));
067        System.out.println("f.getAssemblyAccessionStem(\"16\") "+f.getAssemblyAccessionStem("16"));   
068        
069        System.out.println("f.getAssemblyName(\"21\") "+f.getAssemblyName("21"));
070        System.out.println("f.getAssemblyName(\"17\") "+f.getAssemblyName("17"));
071        System.out.println("f.getAssemblyName(\"16\") "+f.getAssemblyName("16"));
072        
073        System.out.println("f.getComparaName(\"21\") "+f.getComparaName("21"));
074        System.out.println("f.getComparaName(\"17\") "+f.getComparaName("17"));
075        System.out.println("f.getComparaName(\"16\") "+f.getComparaName("16"));// note old name used
076        System.out.println("f.getComparaName(\"15\") "+f.getComparaName("15"));
077        System.out.println("f.getComparaName(\"14\") "+f.getComparaName("14"));
078        
079        //bacteria - in collections - note major change in v17 to compara names 
080        //and use of assembly accession stem to assert species identity
081        
082        DBRegistry reg = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA);
083//        
084//        setOfEverySpeciesByAlias = reg.getSetOfEverySpeciesByAlias("Wolbachia sp. wRi");
085//        System.out.println("there are "+setOfEverySpeciesByAlias.size()+" species with name 'Wolbachia sp. wRi'.");
086//        setOfSpeciesByAlias = reg.getSetOfSpeciesByAlias("Wolbachia sp. wRi");
087//        System.out.println("there are "+setOfSpeciesByAlias.size()+" CURRENT species with name 'Wolbachia sp. wRi'.");        
088//        setOfEverySpeciesByAlias = reg.getSetOfEverySpeciesByAlias("Escherichia coli ATCC 33849");
089//        System.out.println("there are "+setOfEverySpeciesByAlias.size()+" species with name 'Escherichia coli ATCC 33849'.");
090//        setOfSpeciesByAlias = reg.getSetOfSpeciesByAlias("Escherichia coli ATCC 33849");
091//        System.out.println("there are "+setOfSpeciesByAlias.size()+" CURRENT species with name 'Escherichia coli ATCC 33849'.");        
092//        setOfEverySpeciesByAlias = reg.getSetOfEverySpeciesByAlias("Shigella dysenteriae Sd197");
093//        System.out.println("there are "+setOfEverySpeciesByAlias.size()+" species with name 'Shigella dysenteriae Sd197'.");
094//        setOfSpeciesByAlias = reg.getSetOfSpeciesByAlias("Shigella dysenteriae Sd197");
095//        System.out.println("there are "+setOfSpeciesByAlias.size()+" CURRENT species with name 'Shigella dysenteriae Sd197'.");        
096//        setOfEverySpeciesByAlias = reg.getSetOfEverySpeciesByAlias("Escherichia coli str. K-12 substr. MG1655");
097//        System.out.println("there are "+setOfEverySpeciesByAlias.size()+" species with name 'Escherichia coli str. K-12 substr. MG1655'.");
098//        setOfSpeciesByAlias = reg.getSetOfSpeciesByAlias("Escherichia coli str. K-12 substr. MG1655");
099//        System.out.println("there are "+setOfSpeciesByAlias.size()+" CURRENT species with name 'Escherichia coli str. K-12 substr. MG1655'.");        
100//        
101//        
102//        DBSpecies s = reg.getSpeciesByAlias("Wolbachia sp. wRi");
103//        try {
104//            DBSpecies s2 = reg.getSpeciesByAlias("Escherichia coli ATCC 33849");
105//        } catch (NonUniqueException e) {
106//            System.out.println("Multiple species called Escherichia coli ATCC 33849:");
107//           for (DBSpecies sp : (HashSet<DBSpecies>) e.getAllHits()) {
108//               System.out.println(sp.getSpeciesBinomial()+" "+sp.getSchemaVersions().toString());
109//           }
110//        }
111//        DBSpecies s3 = reg.getSpeciesByAlias("Shigella dysenteriae Sd197");
112//        DBSpecies s4 = reg.getSpeciesByAlias("Escherichia coli str. K-12 substr. MG1655");
113//        
114//        System.out.println("s4.getAssemblyAccession(\"21\") "+s4.getAssemblyAccession("21"));
115//        System.out.println("s4.getAssemblyAccession(\"19\") "+s4.getAssemblyAccession("19"));
116//        System.out.println("s4.getAssemblyAccession(\"18\") "+s4.getAssemblyAccession("18"));
117//        System.out.println("s4.getAssemblyAccession(\"17\") "+s4.getAssemblyAccession("17"));
118//        System.out.println("s4.getAssemblyAccession(\"16\") "+s4.getAssemblyAccession("16"));
119//        System.out.println("s4.getAssemblyAccession(\"15\") "+s4.getAssemblyAccession("15"));
120//        System.out.println("s4.getAssemblyAccession(\"14\") "+s4.getAssemblyAccession("14"));
121//        System.out.println("s4.getAssemblyAccession(\"13\") "+s4.getAssemblyAccession("13"));
122//        
123//        System.out.println("s4.getAssemblyName(\"21\") "+s4.getAssemblyName("21"));
124//        System.out.println("s4.getAssemblyName(\"19\") "+s4.getAssemblyName("19"));
125//        System.out.println("s4.getAssemblyName(\"18\") "+s4.getAssemblyName("18"));
126//        System.out.println("s4.getAssemblyName(\"17\") "+s4.getAssemblyName("17"));
127//        System.out.println("s4.getAssemblyName(\"16\") "+s4.getAssemblyName("16"));
128//        System.out.println("s4.getAssemblyName(\"15\") "+s4.getAssemblyName("15"));
129//        System.out.println("s4.getAssemblyName(\"14\") "+s4.getAssemblyName("14"));
130//        System.out.println("s4.getAssemblyName(\"13\") "+s4.getAssemblyName("13"));
131//        
132//        System.out.println("s4.getComparaName(\"21\") "+s4.getComparaName("21"));
133//        System.out.println("s4.getComparaName(\"19\") "+s4.getComparaName("19"));
134//        System.out.println("s4.getComparaName(\"18\") "+s4.getComparaName("18"));
135//        System.out.println("s4.getComparaName(\"17\") "+s4.getComparaName("17"));
136//        System.out.println("s4.getComparaName(\"16\") "+s4.getComparaName("16"));
137//        System.out.println("s4.getComparaName(\"15\") "+s4.getComparaName("15"));
138//        System.out.println("s4.getComparaName(\"14\") "+s4.getComparaName("14"));
139//        System.out.println("s4.getComparaName(\"13\") "+s4.getComparaName("13"));
140
141       DBRegistry reg2 = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
142       
143       //vertebrate species - with a rename
144       
145       DBSpecies s5 = reg2.getSpeciesByAlias("Orangutan");
146                
147        System.out.println("s5.getAssemblyAccession(null) "+s5.getAssemblyAccession(null));
148        System.out.println("s5.getAssemblyAccessionStem() "+s5.getAssemblyAccessionStem());
149        System.out.println("s5.getAssemblyAccession(\"74\") "+s5.getAssemblyAccession("74"));
150        System.out.println("s5.getAssemblyAccession(\"72\") "+s5.getAssemblyAccession("72"));
151        System.out.println("s5.getAssemblyAccessionStem(\"72\") "+s5.getAssemblyAccessionStem("72"));
152        System.out.println("s5.getAssemblyAccession(\"71\") "+s5.getAssemblyAccession("71"));
153        System.out.println("s5.getAssemblyAccessionStem(\"71\") "+s5.getAssemblyAccessionStem("71"));
154        System.out.println("s5.getAssemblyAccession(\"70\") "+s5.getAssemblyAccession("70"));
155        System.out.println("s5.getAssemblyAccessionStem(\"70\") "+s5.getAssemblyAccessionStem("70"));
156        System.out.println("s5.getAssemblyAccessionStem() "+s5.getAssemblyAccessionStem());
157        System.out.println("s5.getAssemblyAccession(\"62\") "+s5.getAssemblyAccession("62"));
158        System.out.println("s5.getAssemblyAccessionStem(\"62\") "+s5.getAssemblyAccessionStem("62"));
159        System.out.println("s5.getAssemblyAccession(\"61\") "+s5.getAssemblyAccession("61"));
160        System.out.println("s5.getAssemblyAccessionStem(\"61\") "+s5.getAssemblyAccessionStem("61"));
161        System.out.println("s5.getAssemblyName(null) "+s5.getAssemblyName(null));
162        System.out.println("s5.getAssemblyName(\"74\") "+s5.getAssemblyName("74"));
163        System.out.println("s5.getAssemblyName(\"72\") "+s5.getAssemblyName("72"));
164        System.out.println("s5.getAssemblyName(\"71\") "+s5.getAssemblyName("71"));
165        System.out.println("s5.getAssemblyName(\"70\") "+s5.getAssemblyName("70"));
166        System.out.println("s5.getAssemblyName(\"62\") "+s5.getAssemblyName("62"));
167        System.out.println("s5.getAssemblyName(\"61\") "+s5.getAssemblyName("61"));
168        System.out.println("s5.getComparaName(\"71\") "+s5.getComparaName("71"));
169        System.out.println("s5.getComparaName(\"70\") "+s5.getComparaName("70"));
170        System.out.println("s5.getComparaName(\"62\") "+s5.getComparaName("62"));
171        System.out.println("s5.getComparaName(\"61\") "+s5.getComparaName("61"));
172        
173        //back to bacteria
174        
175//        DAChromosome chr = s4.getChromosomeByName("Chromosome", "16");
176////        HashMap<DADNASequence, MappingSet> regionsOfConservedSynteny 
177////                = chr.getRegionsOfConservedSynteny(new Coordinate(1,20000), s3);
178//        
179//        
180//        DAGene bsdnaA16 = s3.getGenesForExactName("dnaA", "16").get(0);
181//        DAGene bsdnaA17 = s3.getGenesForExactName("dnaA", "17").get(0);
182//        DAGene bsdnaA18 = s3.getGenesForExactName("dnaA", "18").get(0);
183//        DAGene bsdnaA19 = s3.getGenesForExactName("dnaA", "19").get(0);
184//        DAGene bsdnaA21 = s3.getGenesForExactName("dnaA", "21").get(0);
185//        List<DAHomologyPairRelationship> homologies16 = bsdnaA16.getHomologies();
186//        
187//        System.out.println("v16...");
188//        for (DAHomologyPairRelationship hpr: homologies16) {
189//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
190//            
191//        }
192//        
193//        List<DAHomologyPairRelationship> homologies17 = bsdnaA17.getHomologies();
194//        
195//        System.out.println("v17...");
196//        for (DAHomologyPairRelationship hpr: homologies17) {
197//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
198//            
199//        }
200//        List<DAHomologyPairRelationship> homologies18 = bsdnaA18.getHomologies();
201//        
202//        System.out.println("v18...");
203//        for (DAHomologyPairRelationship hpr: homologies18) {
204//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
205//            
206//        }
207//        List<DAHomologyPairRelationship> homologies19 = bsdnaA19.getHomologies();
208//        System.out.println("v19...");
209//        for (DAHomologyPairRelationship hpr: homologies19) {
210//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
211//            
212//        }
213//        List<DAHomologyPairRelationship> homologies21 = bsdnaA21.getHomologies();
214//        System.out.println("v21...");
215//        for (DAHomologyPairRelationship hpr: homologies21) {
216//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
217//            
218//        }
219//        
220//        List<DAHomologyPairRelationship> homologies16ek = bsdnaA16.getHomologies(s4);
221//                System.out.println("v16ek...");
222//        for (DAHomologyPairRelationship hpr: homologies16ek) {
223//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
224//        }
225//        
226//        
227//        List<DAHomologyPairRelationship> homologies17ek = bsdnaA17.getHomologies(s4);
228//                System.out.println("v17ek...");
229//        for (DAHomologyPairRelationship hpr: homologies17ek) {
230//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
231//        }
232//        
233//        List<DAHomologyPairRelationship> homologies18ek = bsdnaA18.getHomologies(s4);
234//                System.out.println("v18ek...");
235//        for (DAHomologyPairRelationship hpr: homologies18ek) {
236//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
237//        }
238//        
239//        List<DAHomologyPairRelationship> homologies19ek = bsdnaA19.getHomologies(s4);
240//                System.out.println("v19ek...");
241//        for (DAHomologyPairRelationship hpr: homologies19ek) {
242//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
243//        }
244//        List<DAHomologyPairRelationship> homologies21ek = bsdnaA21.getHomologies(s4);
245//                System.out.println("v21ek...");
246//        for (DAHomologyPairRelationship hpr: homologies21ek) {
247//            System.out.println(hpr.getTargetProperties().getSpeciesName() + "\t"+hpr.getTargetProperties().getGeneID());
248//        }
249//        
250        System.out.println("");
251        
252//                DBSpecies hs = reg.getSpeciesByAlias("vitis_vinifera");
253//        
254//                DAChromosome chr15 = hs.getChromosomeByName("1", "15");
255//                DAChromosome chr17 = hs.getChromosomeByName("1", "17");
256//                DAChromosome chr21 = hs.getChromosomeByName("1", "21");
257//                System.out.println("chr15 version "+chr15.getDBVersion());
258//                System.out.println("schema schema "+chr15.getSchemaVersion());
259//                System.out.println("chr17 version "+chr17.getDBVersion());
260//                System.out.println("chr17 schema "+chr17.getSchemaVersion());
261//                System.out.println("chr21 version "+chr21.getDBVersion());
262//                System.out.println("chr21 schema "+chr21.getSchemaVersion());
263//                System.out.println("reg most recent schema  "+reg.getMostRecentEnsemblVersion());
264//                System.out.println("reg highest release "+reg.getHighestReleaseVersion());
265        
266        
267        
268                for(DBSpecies sp: reg.getSpecies()) {
269                     System.out.println(sp.getSpeciesBinomial()
270                             +"\n\t'highest'="+sp.getHighestDBRelease()
271                             +" "+sp.getAssemblyName(""+sp.getHighestDBRelease())
272                             +" "+sp.getAssemblyAccession(""+sp.getHighestDBRelease())
273                             +" "+sp.getAssemblyAccessionStem(""+sp.getHighestDBRelease())
274        
275                             +"\n\t'null'"
276                             +" "+sp.getAssemblyName(null)
277                             +" "+sp.getAssemblyAccession(null)
278                             +" "+sp.getAssemblyAccessionStem(null)
279                             
280                             +"\n\t'highest-1'="+(sp.getHighestDBRelease()-1)
281                             +" "+sp.getAssemblyName(""+(sp.getHighestDBRelease()-1))
282                             +" "+sp.getAssemblyAccession(""+(sp.getHighestDBRelease()-1))
283                             +" "+sp.getAssemblyAccessionStem(""+(sp.getHighestDBRelease()-1))
284        
285                             );
286                }
287        
288//                DBSpecies K12 = reg.getSpeciesByAlias("Escherichia coli K12");
289//                DBSpecies ZP = reg.getSpeciesByAlias("zunongwangia_profunda_sm_a87");
290//                DBCollectionSpecies mg = (DBCollectionSpecies) reg.getSpeciesByAlias("escherichia_coli_str_k_12_substr_mg1655");
291               Collection<DBCollection> collectionRegistries = reg.getCollections();
292        
293                int x = 0;
294        
295                for (DBCollection c : collectionRegistries) {
296                   if (c.getCollectionName().startsWith("bacteria_")) {
297        
298                    System.out.println(c.getCollectionName()+" v"+c.getDBVersion()+" "+c.getSpecies().size() + " species");
299                    x += c.getSpecies().size() ;
300        //                for (DBCollectionSpecies sp: c.getSpecies()) {
301        //                    System.out.println(sp.getSpeciesBinomial());
302        //
303        //
304        //               }
305                    }
306                }
307        
308                System.out.println("toatl number of species in 'bacteria_%' collections = "+x);
309        
310                System.out.println("\ngetGCAccessionedCollectionSpecies() count "+ reg.getGCAccessionedCollectionSpecies().size());
311                System.out.println("getNamedCollectionSpecies() count "+ reg.getNamedCollectionSpecies().size());
312
313        
314                System.out.println("\nGCAccessionedCollectionSpecies\n-----------------------------");
315               for (DBCollectionSpecies csp: reg.getGCAccessionedCollectionSpecies() ) {
316        //           if (dbstylebugnames.contains(csp.getDatabaseStyleName())) {
317        //               System.out.println("duplicate: " +csp.getDatabaseStyleName());
318        //           } else {
319        //               dbstylebugnames.add(csp.getDatabaseStyleName());
320        //           }
321        
322                   if(csp.getAssemblyAccessionStem()==null || csp.getAssemblyAccessionStem().isEmpty()) {
323                       System.out.println("");
324                   }
325        
326                   System.out.println(csp.getDatabaseStyleName()+"\t"+csp.getAssemblyAccessionStem());
327        
328               }
329               System.out.println("\n\nNamedCollectionSpecies\n-----------------------------");
330               for (DBCollectionSpecies csp: reg.getNamedCollectionSpecies() ) {
331        //           if (dbstylebugnames.contains(csp.getDatabaseStyleName())) {
332        //               System.out.println("duplicate: " +csp.getDatabaseStyleName());
333        //           } else {
334        //               dbstylebugnames.add(csp.getDatabaseStyleName());
335        //           }
336        
337                   System.out.println(csp.getDatabaseStyleName()+"\t"+csp.getAssemblyAccessionStem());
338               }
339        
340               System.out.println("\nCOLLECTIONS\n------------------");
341               
342                for (DBCollection c : collectionRegistries) {
343                    System.out.println("COLLECTION: "+c.getCollectionName()+" VERSION: "+c.getDBVersion());
344                    System.out.println("-----------------------------------");
345                    for (DBCollectionSpecies sp: c.getSpecies()) {
346                        System.out.println("version:"+c.getDBVersion()+"\tID"+sp.getDBSpeciesID(c.getDBVersion())
347                                +"\tDatabaseStyleName:"+sp.getDatabaseStyleName()
348                                +"\tURLName:"+sp.getUrlName()
349                                +"\tAssemblyName (default):"+sp.getAssemblyName(null)
350                                +"\tAssemblyAccession (default):"+sp.getAssemblyAccession(null)
351                                +"\tAssemblyAccessionStem (default):"+sp.getAssemblyAccessionStem());
352        
353                        int y = Integer.parseInt(c.getDBVersion()) -1 ;
354        
355                        if (!sp.getDBVersions().contains(""+y)) {
356                            System.out.println("version:"+y+" NOT PRESENT");
357                        } else {
358                        
359                        System.out.println("version:"+y+"\tID"+sp.getDBSpeciesID(""+ y )
360        
361                                +"\tDatabaseStyleName "+sp.getDatabaseStyleName()
362                                +"\tURLName "+sp.getUrlName()
363                                +"\tAssemblyName ("+y+") "+sp.getAssemblyName(""+y)
364                                +"\tAssemblyAccession ("+y+")" +sp.getAssemblyAccession(""+y)
365                                +"\tAssemblyAccessionStem ("+y+")) "+sp.getAssemblyAccessionStem(""+y));
366                        }
367                    }
368                    System.out.println("");
369        //            if( c.getSpecies().contains(mg)) {
370        //                System.out.println(c.getCollectionName()+" "+c.toString());
371        //            }
372                }
373        
374               
375        
376                
377      
378        
379        
380    }    
381}
382
383