001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import org.biojava3.core.sequence.Strand; 025 026import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 027import uk.ac.roslin.ensembl.dao.database.DBRegistry; 028import uk.ac.roslin.ensembl.dao.database.DBSingleSpeciesCoreDatabase; 029import uk.ac.roslin.ensembl.dao.database.DBSpecies; 030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 031import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 032import uk.ac.roslin.ensembl.model.Coordinate; 033import uk.ac.roslin.ensembl.model.Mapping; 034 035public class ComponentAssembledSequencesOfChromosomes { 036 037 // demonstration of how to get sequences and complementary seequences from a chromosome: 038 // the transparent assembly of a chromosome from its component sequences 039 //we look at the internals of a chromosome assembly - made up of mapped component DADNASequences 040 041 public static void main(String[] args) throws Exception { 042 043 044 DBRegistry ensembldbRegistry = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 045 DBSpecies sp = ensembldbRegistry.getSpeciesByAlias("chicken"); 046 047 DAChromosome chr = sp.getChromosomeByName("25"); 048 System.out.println("current version:"+chr.getDBVersion()); 049 System.out.println("chr type: " + chr.getType().toString()); 050 System.out.println("Chromosome:\n\tName: " + chr.getChromosomeName() + " db version-" + chr.getDBVersion()); 051 System.out.println("\tLength: " + chr.getDBSeqLength()); 052 System.out.println("\tcoord sys ID: " + chr.getCoordSystem().getId()); 053 System.out.println("\tseq level coord sys ID: " + ((DBSingleSpeciesCoreDatabase) chr.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId()); 054 System.out.println("\tseq region ID :" + chr.getId()); 055 System.out.println("\tseq type :" + chr.getType().toString()); 056 057 System.out.println("chromosome sequence 70000-70100\n" 058 + chr.getSequenceAsString(70000, 70100)); 059 060 //JEnsembl discourages use of the BioJava Strand specification and 061 //encourages use of theless ambiguous use of 'ReverseComplement methods 062 System.out.println("(deprecated) chromosome sequence 70000-70100\n" 063 + chr.getSequenceAsString(70000, 70100, Strand.POSITIVE)); 064 065 System.out.println("chromosome rev complement sequence 70000-70100\n" 066 + chr.getReverseComplementSequenceAsString(70000, 70100)); 067 068 System.out.println("(deprecated) chromosome sequence 70000-70100, Strand.NEGATIVE\n" 069 + chr.getSequenceAsString(70000, 70100, Strand.NEGATIVE)); 070 071 System.out.println(); 072 073 System.out.println("chromosome sequence 1-50\n" 074 + chr.getSequenceAsString(1, 50)); 075 System.out.println("chromosome rev complement sequence 1-50\n" 076 + chr.getReverseComplementSequenceAsString(1, 50)); 077 078 System.out.println(); 079 System.out.println("chromosome sequence 1750-1800\n" 080 + chr.getSequenceAsString(1750, 1800)); 081 System.out.println("chromosome ReverseComplement sequence 1750-1800\n" 082 + chr.getReverseComplementSequenceAsString(1750, 1800)); 083 084 System.out.println(); 085 System.out.println("chromosome sequence 142250, 142300\n" 086 + chr.getSequenceAsString(142250, 142300)); 087 System.out.println("chromosome ReverseComplement sequence 142250, 142300\n" 088 + chr.getReverseComplementSequenceAsString(142250, 142300)); 089 090 System.out.println(); 091 System.out.println("chromosome sequence 142250, 142400\n" 092 + chr.getSequenceAsString(142250, 142400)); 093 System.out.println("chromosome ReverseComplement sequence 142250, 142400\n" 094 + chr.getReverseComplementSequenceAsString(142250, 142400)); 095 096 System.out.println(); 097 098 099 //examining the complete assembly 100 101 for (Mapping mapping : chr.getCompleteAssembly().getStitchedMappings()) { 102 System.out.println(((DADNASequence) mapping.getTarget()).getName() 103 + " : " + mapping.getTargetCoordinates().getStart() 104 + " - " + mapping.getTargetCoordinates().getEnd() 105 + " " + (mapping.getTargetCoordinates().getStrand() == Coordinate.Strand.REVERSE_STRAND ? "reverse" : "") 106 + " (source: " + mapping.getSourceCoordinates().getStart() 107 + " - " + mapping.getSourceCoordinates().getEnd() 108 + ")"); 109 } 110 111 System.out.println("--------------------------------------------------------"); 112 113 114 //looking at the component sequences at the very beginning of the chromosome 115 116 for (Mapping mapping : chr.getCompleteAssembly().getStitchedMappings(1, 3000)) { 117 System.out.println(((DADNASequence) mapping.getTarget()).getName() 118 + " : " + mapping.getTargetCoordinates().getStart() 119 + " - " + mapping.getTargetCoordinates().getEnd() 120 + " " + (mapping.getTargetCoordinates().getStrand() == Coordinate.Strand.REVERSE_STRAND ? "reverse" : "") 121 + " (source: " + mapping.getSourceCoordinates().getStart() 122 + " - " + mapping.getSourceCoordinates().getEnd() 123 + ")"); 124 125 System.out.println("mapped sequence length: " + ((DADNASequence) mapping.getTarget()).getDBSeqLength()); 126 System.out.println("mapped sequence start: " + ((DADNASequence) mapping.getTarget()).getBioBegin()); 127 System.out.println("mapped sequence end: " + ((DADNASequence) mapping.getTarget()).getBioEnd()); 128 System.out.println("mapped sequence fullseq length: " + ((DADNASequence) mapping.getTarget()).getSequenceAsString().length()); 129 130 if (mapping.getTargetCoordinates().getStrand() == Coordinate.Strand.FORWARD_STRAND) { 131 132 System.out.println("mapped sequence desired (+) fragment:\n" 133 + ((DADNASequence) mapping.getTarget()).getSequenceAsString(mapping.getTargetCoordinates().getStart(), 134 mapping.getTargetCoordinates().getEnd())); 135 } else { 136 137 System.out.println("mapped sequence desired (-) fragment:\n" 138 + ((DADNASequence) mapping.getTarget()).getReverseComplementSequenceAsString(mapping.getTargetCoordinates().getStart(), 139 mapping.getTargetCoordinates().getEnd())); 140 141 } 142 System.out.println("----------------------"); 143 144 } 145 146 147 148 System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n"); 149 150 151 } 152}