001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024 025import java.util.List; 026import uk.ac.roslin.ensembl.config.AssemblyExceptionType; 027import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 028import uk.ac.roslin.ensembl.dao.database.DBRegistry; 029import uk.ac.roslin.ensembl.dao.database.DBSpecies; 030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 031import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 032import uk.ac.roslin.ensembl.datasourceaware.core.DAGene; 033import uk.ac.roslin.ensembl.datasourceaware.core.GapSequence; 034import uk.ac.roslin.ensembl.model.Mapping; 035import uk.ac.roslin.ensembl.model.MappingSet; 036 037public class AssemblyExceptions { 038 039 //The human genome assembly has assembly updates associated with release versions 040 //between major Genome builds. These are represented as Novel, Patch and Haplotype Exceptions. 041 //(Note: Ensembl treats PseudoAutosomalRegions in a similar fashion, but JEnsembl 042 //integrates this information into the standard Chromosome Model). 043 public static void main(String[] args) throws Exception { 044 045 046 DBRegistry eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 047 048 DBSpecies sp = eReg.getSpeciesByAlias("human"); 049 050 System.out.println("current chr 17"); 051 052 DAChromosome chr = sp.getChromosomeByName("17"); 053 MappingSet assemblyFixes = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_FIX); 054 MappingSet assemblyNovel = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_NOVEL); 055 MappingSet haplotypes = chr.getAssemblyExceptions(AssemblyExceptionType.HAP); 056 057 System.out.println(assemblyFixes.size()+ " ASSEMBLY PATCH FIXES"); 058 System.out.println("-----------------------"); 059 060 for (Mapping m : assemblyFixes) { 061 062 063 System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates() 064 +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates()); 065 System.out.println("_______________________________________"); 066 067 068 DADNASequence s = (DADNASequence ) m.getTarget(); 069 List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd()); 070 071 for (DAGene g : genesOnRegion) { 072 System.out.println(g.getStableID()+" "+g.getDisplayName()); 073 System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates()); 074 } 075 System.out.println(""); 076 System.out.println("START:"); 077 078 if (m.getTargetCoordinates().getStart()<20) { 079 080 System.out.println("PATCH: " 081 +GapSequence.getGapString(21-m.getTargetCoordinates().getStart()) 082 +s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20)); 083 } else { 084 System.out.println("PATCH: "+s.getSequenceAsString(m.getTargetCoordinates().getStart()-20, m.getTargetCoordinates().getStart()+20)); 085 } 086 if (m.getSourceCoordinates().getStart()<20) { 087 088 System.out.println("CHR : " 089 +GapSequence.getGapString(21-m.getSourceCoordinates().getStart()) 090 +s.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20)); 091 } else { 092 System.out.println("CHR : "+s.getSequenceAsString(m.getSourceCoordinates().getStart()-20, m.getSourceCoordinates().getStart()+20)); 093 } 094 System.out.println("END:"); 095 System.out.println("PATCH: "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()+20)); 096 System.out.println("CHR : "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()+20)); 097 System.out.println(""); 098 } 099 100 101 System.out.println(""); 102 System.out.println(assemblyNovel.size()+ " ASSEMBLY PATCH NOVEL"); 103 System.out.println("-----------------------"); 104 105 106 for (Mapping m : assemblyNovel) { 107 108 109 System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates() 110 +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates()); 111 System.out.println("_______________________________________"); 112 113 114 DADNASequence s = (DADNASequence ) m.getTarget(); 115 List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd()); 116 117 for (DAGene g : genesOnRegion) { 118 System.out.println(g.getStableID()+" "+g.getDisplayName()); 119 System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates()); 120 } 121 System.out.println(""); 122 System.out.println("START:"); 123 System.out.println("NOVEL: "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20)); 124 System.out.println("CHR : "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20)); 125 System.out.println("END:"); 126 System.out.println("NOVEL: "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd())); 127 System.out.println("CHR : "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd())); 128 System.out.println(""); 129 } 130 131 System.out.println(""); 132 System.out.println(haplotypes.size()+ " HAPLOTYPES"); 133 System.out.println("-------------"); 134 135 136 for (Mapping m : assemblyNovel) { 137 138 139 System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates() 140 +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates()); 141 System.out.println("_______________________________________"); 142 143 144 DADNASequence s = (DADNASequence ) m.getTarget(); 145 List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd()); 146 147 for (DAGene g : genesOnRegion) { 148 System.out.println(g.getStableID()+" "+g.getDisplayName()); 149 System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates()); 150 } 151 System.out.println(""); 152 System.out.println("START:"); 153 System.out.println("HAP: "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20)); 154 System.out.println("CHR: "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20)); 155 System.out.println("END:"); 156 System.out.println("HAP: "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd())); 157 System.out.println("CHR: "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd())); 158 System.out.println(""); 159 } 160 161 System.out.println("v75 chr 17"); 162 163 chr = sp.getChromosomeByName("17", "75"); 164 assemblyFixes = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_FIX); 165 assemblyNovel = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_NOVEL); 166 haplotypes = chr.getAssemblyExceptions(AssemblyExceptionType.HAP); 167 168 System.out.println(assemblyFixes.size()+ " ASSEMBLY PATCH FIXES"); 169 System.out.println("-----------------------"); 170 171 for (Mapping m : assemblyFixes) { 172 173 174 System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates() 175 +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates()); 176 System.out.println("_______________________________________"); 177 178 179 DADNASequence s = (DADNASequence ) m.getTarget(); 180 List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd()); 181 182 for (DAGene g : genesOnRegion) { 183 System.out.println(g.getStableID()+" "+g.getDisplayName()); 184 System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates()); 185 } 186 System.out.println(""); 187 System.out.println("START:"); 188 189 if (m.getTargetCoordinates().getStart()<20) { 190 191 System.out.println("PATCH: " 192 +GapSequence.getGapString(21-m.getTargetCoordinates().getStart()) 193 +s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20)); 194 } else { 195 System.out.println("PATCH: "+s.getSequenceAsString(m.getTargetCoordinates().getStart()-20, m.getTargetCoordinates().getStart()+20)); 196 } 197 if (m.getSourceCoordinates().getStart()<20) { 198 199 System.out.println("CHR : " 200 +GapSequence.getGapString(21-m.getSourceCoordinates().getStart()) 201 +s.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20)); 202 } else { 203 System.out.println("CHR : "+s.getSequenceAsString(m.getSourceCoordinates().getStart()-20, m.getSourceCoordinates().getStart()+20)); 204 } 205 System.out.println("END:"); 206 System.out.println("PATCH: "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()+20)); 207 System.out.println("CHR : "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()+20)); 208 System.out.println(""); 209 } 210 211 212 System.out.println(""); 213 System.out.println(assemblyNovel.size()+ " ASSEMBLY PATCH NOVEL"); 214 System.out.println("-----------------------"); 215 216 217 for (Mapping m : assemblyNovel) { 218 219 220 System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates() 221 +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates()); 222 System.out.println("_______________________________________"); 223 224 225 DADNASequence s = (DADNASequence ) m.getTarget(); 226 List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd()); 227 228 for (DAGene g : genesOnRegion) { 229 System.out.println(g.getStableID()+" "+g.getDisplayName()); 230 System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates()); 231 } 232 System.out.println(""); 233 System.out.println("START:"); 234 System.out.println("NOVEL: "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20)); 235 System.out.println("CHR : "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20)); 236 System.out.println("END:"); 237 System.out.println("NOVEL: "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd())); 238 System.out.println("CHR : "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd())); 239 System.out.println(""); 240 } 241 242 System.out.println(""); 243 System.out.println(haplotypes.size()+ " HAPLOTYPES"); 244 System.out.println("-------------"); 245 246 247 for (Mapping m : assemblyNovel) { 248 249 250 System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates() 251 +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates()); 252 System.out.println("_______________________________________"); 253 254 255 DADNASequence s = (DADNASequence ) m.getTarget(); 256 List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd()); 257 258 for (DAGene g : genesOnRegion) { 259 System.out.println(g.getStableID()+" "+g.getDisplayName()); 260 System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates()); 261 } 262 System.out.println(""); 263 System.out.println("START:"); 264 System.out.println("HAP: "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20)); 265 System.out.println("CHR: "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20)); 266 System.out.println("END:"); 267 System.out.println("HAP: "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd())); 268 System.out.println("CHR: "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd())); 269 System.out.println(""); 270 } 271 272 273 System.out.println("\n\n*************************\nCOMPLETED FUNCTIONAL TEST\n*************************\n"); 274 275 } 276 277 278}