001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import org.biojava3.core.sequence.Strand;
025
026import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
027import uk.ac.roslin.ensembl.dao.database.DBRegistry;
028import uk.ac.roslin.ensembl.dao.database.DBSingleSpeciesCoreDatabase;
029import uk.ac.roslin.ensembl.dao.database.DBSpecies;
030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
031import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence;
032import uk.ac.roslin.ensembl.model.Coordinate;
033import uk.ac.roslin.ensembl.model.Mapping;
034
035public class ComponentAssembledSequencesOfChromosomes {
036
037    // demonstration of how to get sequences and complementary seequences from a chromosome:
038    // the transparent assembly of a chromosome from its component sequences
039    //we look at the internals of  a chromosome assembly - made up of mapped component DADNASequences
040    
041    public static void main(String[] args) throws Exception {
042
043
044        DBRegistry ensembldbRegistry = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
045        DBSpecies sp = ensembldbRegistry.getSpeciesByAlias("chicken");
046
047        DAChromosome chr = sp.getChromosomeByName("25");
048        System.out.println("current version:"+chr.getDBVersion());
049        System.out.println("chr type: " + chr.getType().toString());
050        System.out.println("Chromosome:\n\tName: " + chr.getChromosomeName() + " db version-" + chr.getDBVersion());
051        System.out.println("\tLength: " + chr.getDBSeqLength());
052        System.out.println("\tcoord sys ID: " + chr.getCoordSystem().getId());
053        System.out.println("\tseq level coord sys ID: " + ((DBSingleSpeciesCoreDatabase) chr.getDaoFactory().getDatabase()).getSequenceLevelCoordSystem().getId());
054        System.out.println("\tseq region ID :" + chr.getId());
055        System.out.println("\tseq type :" + chr.getType().toString());
056
057        System.out.println("chromosome sequence 70000-70100\n"
058                + chr.getSequenceAsString(70000, 70100));
059
060        //JEnsembl discourages use of the BioJava Strand specification and  
061        //encourages use of theless ambiguous use of 'ReverseComplement methods
062        System.out.println("(deprecated) chromosome sequence 70000-70100\n"
063                + chr.getSequenceAsString(70000, 70100, Strand.POSITIVE));
064
065        System.out.println("chromosome rev complement sequence 70000-70100\n"
066                + chr.getReverseComplementSequenceAsString(70000, 70100));
067        
068        System.out.println("(deprecated) chromosome sequence 70000-70100, Strand.NEGATIVE\n"
069                + chr.getSequenceAsString(70000, 70100, Strand.NEGATIVE));
070
071        System.out.println();
072
073        System.out.println("chromosome sequence 1-50\n"
074                + chr.getSequenceAsString(1, 50));
075        System.out.println("chromosome rev complement sequence 1-50\n"
076                + chr.getReverseComplementSequenceAsString(1, 50));
077
078        System.out.println();
079        System.out.println("chromosome sequence 1750-1800\n"
080                + chr.getSequenceAsString(1750, 1800));
081        System.out.println("chromosome ReverseComplement sequence 1750-1800\n"
082                + chr.getReverseComplementSequenceAsString(1750, 1800));
083
084        System.out.println();
085        System.out.println("chromosome sequence 142250, 142300\n"
086                + chr.getSequenceAsString(142250, 142300));
087        System.out.println("chromosome ReverseComplement sequence 142250, 142300\n"
088                + chr.getReverseComplementSequenceAsString(142250, 142300));
089
090        System.out.println();
091        System.out.println("chromosome sequence 142250, 142400\n"
092                + chr.getSequenceAsString(142250, 142400));
093        System.out.println("chromosome ReverseComplement sequence 142250, 142400\n"
094                + chr.getReverseComplementSequenceAsString(142250, 142400));
095
096        System.out.println();
097
098
099        //examining the complete assembly
100
101        for (Mapping mapping : chr.getCompleteAssembly().getStitchedMappings()) {
102            System.out.println(((DADNASequence) mapping.getTarget()).getName()
103                    + " : " + mapping.getTargetCoordinates().getStart()
104                    + " - " + mapping.getTargetCoordinates().getEnd()
105                    + " " + (mapping.getTargetCoordinates().getStrand() == Coordinate.Strand.REVERSE_STRAND ? "reverse" : "")
106                    + " (source: " + mapping.getSourceCoordinates().getStart()
107                    + " - " + mapping.getSourceCoordinates().getEnd()
108                    + ")");
109        }
110
111        System.out.println("--------------------------------------------------------");
112
113
114        //looking at the component sequences at the very beginning of the chromosome
115
116        for (Mapping mapping : chr.getCompleteAssembly().getStitchedMappings(1, 3000)) {
117            System.out.println(((DADNASequence) mapping.getTarget()).getName()
118                    + " : " + mapping.getTargetCoordinates().getStart()
119                    + " - " + mapping.getTargetCoordinates().getEnd()
120                    + " " + (mapping.getTargetCoordinates().getStrand() == Coordinate.Strand.REVERSE_STRAND ? "reverse" : "")
121                    + " (source: " + mapping.getSourceCoordinates().getStart()
122                    + " - " + mapping.getSourceCoordinates().getEnd()
123                    + ")");
124
125            System.out.println("mapped sequence length: " + ((DADNASequence) mapping.getTarget()).getDBSeqLength());
126            System.out.println("mapped sequence start: " + ((DADNASequence) mapping.getTarget()).getBioBegin());
127            System.out.println("mapped sequence end: " + ((DADNASequence) mapping.getTarget()).getBioEnd());
128            System.out.println("mapped sequence fullseq length: " + ((DADNASequence) mapping.getTarget()).getSequenceAsString().length());
129
130            if (mapping.getTargetCoordinates().getStrand() == Coordinate.Strand.FORWARD_STRAND) {
131
132                System.out.println("mapped sequence desired (+) fragment:\n"
133                        + ((DADNASequence) mapping.getTarget()).getSequenceAsString(mapping.getTargetCoordinates().getStart(),
134                        mapping.getTargetCoordinates().getEnd()));
135            } else {
136
137                System.out.println("mapped sequence desired (-) fragment:\n"
138                        + ((DADNASequence) mapping.getTarget()).getReverseComplementSequenceAsString(mapping.getTargetCoordinates().getStart(),
139                        mapping.getTargetCoordinates().getEnd()));
140
141            }
142            System.out.println("----------------------");
143
144        }
145
146
147
148        System.out.println("\n\n*****************************\n* COMPLETED FUNCTIONAL TEST *\n*****************************\n");
149
150
151    }
152}