001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.util.HashMap; 025import java.util.List; 026import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 027import uk.ac.roslin.ensembl.dao.database.DBRegistry; 028import uk.ac.roslin.ensembl.dao.database.DBSpecies; 029import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship; 030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 031import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 032import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 033import uk.ac.roslin.ensembl.model.Coordinate; 034import uk.ac.roslin.ensembl.model.Mapping; 035import uk.ac.roslin.ensembl.model.MappingSet; 036 037/** 038 * 039 * @author tpaterso 040 */ 041public class BacterialSyntenies { 042 043 //demonstrating ability to find regions of conserved synteny between 044 //one species and another 045 046 /* 047 to find bacteria that have chromosome coords but not used in pan homology.... 048 049 select version from bacteria_12_collection_core_23_76_1.coord_system where name='chromosome' 050 and version not in ( select assembly from ensembl_compara_pan_homology_23_76.genome_db) ; 051 e.g. 052 GCA_000007465.2 053 GCA_000007545.1 054 GCA_000009005.1 055 GCA_000009165.1 056 GCA_000009525.1 057 GCA_000009925.1 058 GCA_000010585.1 059 */ 060 061 /* 062 from release 24 'whats new' 063 064 Note: Those accessing MySQL databases directly should note that the reference 065 bacterial genomes used for the pan-taxonomic compara are now in a separate 066 collection, bacteria_0. 067 */ 068 069 public static void main(String[] args) throws Exception { 070 071 072 DBRegistry eReg = DBRegistry.createRegistryForDataSourceCurrentRelease(DataSource.ENSEMBLBACTERIA); 073 //DBRegistry eReg = DBRegistry.createRegistryForDataSourceAtReleaseVersion(DataSource.ENSEMBLBACTERIA, 23); 074 HashMap<DADNASequence, MappingSet> syntenies; 075 076 DBSpecies K12 = eReg.getSpeciesByAlias("escherichia_coli_str_k_12_substr_mg1655"); 077// DBSpecies out1 = eReg.getSpeciesByAlias("enterococcus_faecalis_v583_ente_faec_v583_v1"); 078// DBSpecies out2 = eReg.getSpeciesByAlias("Yersinia pestis Angola"); 079// DBSpecies in = eReg.getSpeciesByAlias("Shigella dysenteriae Sd197"); 080 081 DBSpecies out1 = eReg.getCSpeciesByGCAccessionStem("GCA_000007545"); 082 //DBSpecies out2 = eReg.getCSpeciesByGCAccessionStem("GCA_000007545"); 083 DBSpecies in = eReg.getCSpeciesByGCAccessionStem("GCA_000012005"); 084 085 System.out.println(K12.getCommonName()); 086 for (String s :K12.getDBVersions()) { 087 System.out.println("\t"+K12.getAssemblyAccessionStem()); 088 System.out.println("\tK12 v"+s+" is in pan compara: "+K12.isInPanCompara(s)); 089 } 090 091 System.out.println(out1.getCommonName()); 092 for (String s :out1.getDBVersions()) { 093 System.out.println("\tGCA_000007545 v"+s+" is in pan compara: "+out1.isInPanCompara(s)); 094 } 095// System.out.println(out2.getCommonName()); 096// for (String s :out2.getDBVersions()) { 097// System.out.println("\tGCA_000007545 v"+s+" is in pan compara: "+out2.isInPanCompara(s)); 098// } 099 System.out.println(in.getCommonName()); 100 for (String s :in.getDBVersions()) { 101 System.out.println("\tGCA_000006625 v"+s+" is in pan compara: "+in.isInPanCompara(s)); 102 } 103 104 105 106 DAChromosome kChr = K12.getChromosomeByName("Chromosome"); 107 108 109 110 syntenies = kChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out1); 111 System.out.println(kChr.getSpecies().getCommonName()+" version"+ kChr.getDBVersion() 112 +" has "+syntenies.size()+" syntenic regions with "+out1.getCommonName()); 113 114// syntenies = kChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out2); 115// System.out.println(kChr.getSpecies().getCommonName()+" version"+ kChr.getDBVersion() 116// +" has "+syntenies.size()+" syntenic regions with "+out2.getCommonName()); 117 118 syntenies = kChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), in); 119 System.out.println(kChr.getSpecies().getCommonName()+" version"+ kChr.getDBVersion() 120 +" has "+syntenies.size()+" syntenic regions with "+in.getCommonName()); 121 122 123 124 125 DAChromosome jChr = out1.getChromosomeByName("Chromosome"); 126 127 syntenies = jChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12); 128 System.out.println(jChr.getSpecies().getCommonName()+" version"+ jChr.getDBVersion() 129 +" has "+syntenies.size()+" syntenic regions with "+K12.getCommonName()); 130 131 132// syntenies = jChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out2); 133// System.out.println(jChr.getSpecies().getCommonName()+" version"+ jChr.getDBVersion() 134// +" has "+syntenies.size()+" syntenic regions with "+out2.getCommonName()); 135 136 syntenies = jChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), in); 137 System.out.println(jChr.getSpecies().getCommonName()+" version"+ jChr.getDBVersion() 138 +" has "+syntenies.size()+" syntenic regions with "+in.getCommonName()); 139 140 //DAChromosome pChr = out2.getChromosomeByName("Chromosome"); 141 142// syntenies = pChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12); 143// System.out.println(pChr.getSpecies().getCommonName()+" version"+ pChr.getDBVersion() 144// +" has "+syntenies.size()+" syntenic regions with "+K12.getCommonName()); 145// 146// syntenies = pChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out1); 147// System.out.println(pChr.getSpecies().getCommonName()+" version"+ pChr.getDBVersion() 148// +" has "+syntenies.size()+" syntenic regions with "+out1.getCommonName()); 149// 150// syntenies = pChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), in); 151// System.out.println(pChr.getSpecies().getCommonName()+" version"+ pChr.getDBVersion() 152// +" has "+syntenies.size()+" syntenic regions with "+in.getCommonName()); 153// 154 155 DAChromosome dChr = in.getChromosomeByName("Chromosome"); 156 157 syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12); 158 System.out.println(dChr.getSpecies().getCommonName()+" version"+ dChr.getDBVersion() 159 +" has "+syntenies.size()+" syntenic regions with "+K12.getCommonName()); 160 161 162 syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out1); 163 System.out.println(dChr.getSpecies().getCommonName()+" version"+ dChr.getDBVersion() 164 +" has "+syntenies.size()+" syntenic regions with "+out1.getCommonName()); 165 166// syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), out2); 167// System.out.println(dChr.getSpecies().getCommonName()+" version"+ dChr.getDBVersion() 168// +" has "+syntenies.size()+" syntenic regions with "+out2.getCommonName()); 169// 170 171// HashMap<DADNASequence, MappingSet> syntenies = 172// (HashMap<DADNASequence, MappingSet>) 173// dao.getRegionsOfConservedSynteny(kChr, new Coordinate(1, 10000), dec); 174 175 System.out.println("_________________\nK12 syntenic region\n"); 176 177 syntenies = dChr.getRegionsOfConservedSynteny(new Coordinate(1, 10000), K12); 178 179 for ( DADNASequence dna: syntenies.keySet()) { 180 181 182 System.out.println(dna.getCoordSystem().getType().toString() 183 +" " 184 +dna.getName()+ " "+syntenies.get(dna).getExtent().toString()); 185 186 for (Mapping mp : syntenies.get(dna)) { 187 System.out.println("\t"+((DAGene) mp.getTarget()).getStableID() 188 +" : "+mp.getSourceCoordinates().toString()); 189 } 190 191 } 192 193 List<DAGene> genes = dChr.getGenesOnRegion(1, 10000); 194 195 for (DAGene g : genes) { 196 197 System.out.println(g.getSpecies().getCommonName()+" Gene "+g.getStableID() 198 +" "+K12.getCommonName()+ " homologues:"); 199 200 201 202 for (DAHomologyPairRelationship hpr : g.getHomologies(K12)) { 203 204 DAGene target = hpr.getTarget(); 205 MappingSet mappings = target.getAnnotationLevelMappings(); 206 207 for (Mapping m : mappings) { 208 209 System.out.println("\t"+hpr.getType().toString()+" : " 210 +target.getStableID()+" ["+m.getTargetType().toString() 211 +" "+((DADNASequence) m.getTarget()).getName() 212 +":"+m.getTargetCoordinates().toString()+"]"); 213 } 214 } 215 } 216 217 218 System.out.println("\n\n*************************\nCOMPLETED FUNCTIONAL TEST\n*************************\n"); 219 220 } 221 222}