001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.util.HashMap;
025import java.util.List;
026import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
027import uk.ac.roslin.ensembl.dao.database.DBRegistry;
028import uk.ac.roslin.ensembl.dao.database.DBSpecies;
029import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship;
030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
031import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence;
032import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
033import uk.ac.roslin.ensembl.model.Coordinate;
034import uk.ac.roslin.ensembl.model.Mapping;
035import uk.ac.roslin.ensembl.model.MappingSet;
036
037/**
038 *
039 * @author tpaterso
040 */
041public class BacterialSyntenies {
042
043    //demonstrating ability to find regions of conserved synteny between 
044    //one species and another
045    
046    /*
047    to find bacteria that have chromosome coords  but not used in pan homology....
048    
049       select version from bacteria_12_collection_core_23_76_1.coord_system  where name='chromosome' 
050       and version not in ( select assembly from ensembl_compara_pan_homology_23_76.genome_db)  ;
051            e.g.
052            GCA_000007465.2
053            GCA_000007545.1
054            GCA_000009005.1
055            GCA_000009165.1
056            GCA_000009525.1
057            GCA_000009925.1
058            GCA_000010585.1   
059     */
060    
061    /*
062    from release 24 'whats new'
063    
064    Note: Those accessing MySQL databases directly should note that the reference 
065    bacterial genomes used for the pan-taxonomic compara are now in a separate 
066    collection, bacteria_0. 
067    */
068    
069    public static void main(String[] args) throws Exception {
070
071
072        DBRegistry eReg = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA);     
073        //DBRegistry eReg = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA, 23);      
074        HashMap<DADNASequence, MappingSet> syntenies;
075   
076        DBSpecies K12 = eReg.getSpeciesByAlias("escherichia_coli_str_k_12_substr_mg1655");
077//        DBSpecies out1 = eReg.getSpeciesByAlias("enterococcus_faecalis_v583_ente_faec_v583_v1");
078//        DBSpecies out2 = eReg.getSpeciesByAlias("Yersinia pestis Angola");
079//        DBSpecies in = eReg.getSpeciesByAlias("Shigella dysenteriae Sd197");
080        
081        DBSpecies out1 = eReg.getCSpeciesByGCAccessionStem("GCA_000007545");
082        //DBSpecies out2 = eReg.getCSpeciesByGCAccessionStem("GCA_000007545");
083        DBSpecies in = eReg.getCSpeciesByGCAccessionStem("GCA_000012005");
084
085        System.out.println(K12.getCommonName());
086        for (String s :K12.getDBVersions()) {
087            System.out.println("\t"+K12.getAssemblyAccessionStem());
088            System.out.println("\tK12 v"+s+" is in pan compara: "+K12.isInPanCompara(s));
089        }
090
091        System.out.println(out1.getCommonName());
092        for (String s :out1.getDBVersions()) {
093            System.out.println("\tGCA_000007545 v"+s+" is in pan compara: "+out1.isInPanCompara(s));
094        }
095//        System.out.println(out2.getCommonName());
096//        for (String s :out2.getDBVersions()) {
097//            System.out.println("\tGCA_000007545 v"+s+" is in pan compara: "+out2.isInPanCompara(s));
098//        }
099        System.out.println(in.getCommonName());
100        for (String s :in.getDBVersions()) {
101            System.out.println("\tGCA_000006625 v"+s+" is in pan compara: "+in.isInPanCompara(s));
102        }
103        
104        
105
106        DAChromosome kChr = K12.getChromosomeByName("Chromosome");
107        
108
109        
110        syntenies = kChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out1);
111        System.out.println(kChr.getSpecies().getCommonName()+" version"+ kChr.getDBVersion()
112                +" has "+syntenies.size()+" syntenic regions with "+out1.getCommonName());
113        
114//        syntenies = kChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out2);
115//        System.out.println(kChr.getSpecies().getCommonName()+" version"+ kChr.getDBVersion()
116//                +" has "+syntenies.size()+" syntenic regions with "+out2.getCommonName());
117        
118        syntenies = kChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), in);
119        System.out.println(kChr.getSpecies().getCommonName()+" version"+ kChr.getDBVersion()
120                +" has "+syntenies.size()+" syntenic regions with "+in.getCommonName());
121        
122        
123        
124        
125        DAChromosome jChr = out1.getChromosomeByName("Chromosome");
126        
127        syntenies = jChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12);
128        System.out.println(jChr.getSpecies().getCommonName()+" version"+ jChr.getDBVersion()
129                +" has "+syntenies.size()+" syntenic regions with "+K12.getCommonName());            
130          
131        
132//        syntenies = jChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out2);
133//        System.out.println(jChr.getSpecies().getCommonName()+" version"+ jChr.getDBVersion()
134//                +" has "+syntenies.size()+" syntenic regions with "+out2.getCommonName());            
135        
136        syntenies = jChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), in);
137        System.out.println(jChr.getSpecies().getCommonName()+" version"+ jChr.getDBVersion()
138                +" has "+syntenies.size()+" syntenic regions with "+in.getCommonName());            
139        
140        //DAChromosome pChr = out2.getChromosomeByName("Chromosome");
141        
142//        syntenies = pChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12);
143//        System.out.println(pChr.getSpecies().getCommonName()+" version"+ pChr.getDBVersion()
144//                +" has "+syntenies.size()+" syntenic regions with "+K12.getCommonName());         
145//           
146//        syntenies = pChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out1);
147//        System.out.println(pChr.getSpecies().getCommonName()+" version"+ pChr.getDBVersion()
148//                +" has "+syntenies.size()+" syntenic regions with "+out1.getCommonName());  
149//        
150//        syntenies = pChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), in);
151//        System.out.println(pChr.getSpecies().getCommonName()+" version"+ pChr.getDBVersion()
152//                +" has "+syntenies.size()+" syntenic regions with "+in.getCommonName());         
153//        
154        
155        DAChromosome dChr = in.getChromosomeByName("Chromosome");
156        
157        syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12);
158        System.out.println(dChr.getSpecies().getCommonName()+" version"+ dChr.getDBVersion()
159                +" has "+syntenies.size()+" syntenic regions with "+K12.getCommonName());         
160         
161        
162        syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out1);
163        System.out.println(dChr.getSpecies().getCommonName()+" version"+ dChr.getDBVersion()
164                +" has "+syntenies.size()+" syntenic regions with "+out1.getCommonName());  
165        
166//        syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out2);
167//        System.out.println(dChr.getSpecies().getCommonName()+" version"+ dChr.getDBVersion()
168//                +" has "+syntenies.size()+" syntenic regions with "+out2.getCommonName());         
169//        
170        
171//        HashMap<DADNASequence, MappingSet> syntenies =
172//                (HashMap<DADNASequence, MappingSet>)
173//                    dao.getRegionsOfConservedSynteny(kChr, new Coordinate(1, 10000), dec);
174
175        System.out.println("_________________\nK12 syntenic region\n");
176        
177        syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12);
178
179        for ( DADNASequence dna: syntenies.keySet()) {
180
181
182            System.out.println(dna.getCoordSystem().getType().toString()
183                        +" "
184                        +dna.getName()+ " "+syntenies.get(dna).getExtent().toString());
185
186            for (Mapping mp : syntenies.get(dna)) {
187                System.out.println("\t"+((DAGene) mp.getTarget()).getStableID()
188                        +" : "+mp.getSourceCoordinates().toString());
189            }
190
191        }
192
193        List<DAGene> genes = dChr.getGenesOnRegion(1, 10000);
194
195        for (DAGene g : genes) {
196
197            System.out.println(g.getSpecies().getCommonName()+" Gene "+g.getStableID()
198                    +" "+K12.getCommonName()+ " homologues:");
199
200
201
202            for (DAHomologyPairRelationship hpr : g.getHomologies(K12)) {
203
204                DAGene target = hpr.getTarget();
205                MappingSet mappings = target.getAnnotationLevelMappings();
206
207                for (Mapping m : mappings) {
208
209                System.out.println("\t"+hpr.getType().toString()+" : "
210                        +target.getStableID()+" ["+m.getTargetType().toString()
211                        +" "+((DADNASequence) m.getTarget()).getName()
212                        +":"+m.getTargetCoordinates().toString()+"]");
213                }
214            }
215        }
216
217
218        System.out.println("\n\n*************************\nCOMPLETED FUNCTIONAL TEST\n*************************\n");
219
220    }
221
222}