001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024
025import java.util.List;
026import uk.ac.roslin.ensembl.config.AssemblyExceptionType;
027import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
028import uk.ac.roslin.ensembl.dao.database.DBRegistry;
029import uk.ac.roslin.ensembl.dao.database.DBSpecies;
030import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
031import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence;
032import uk.ac.roslin.ensembl.datasourceaware.core.DAGene;
033import uk.ac.roslin.ensembl.datasourceaware.core.GapSequence;
034import uk.ac.roslin.ensembl.model.Mapping;
035import uk.ac.roslin.ensembl.model.MappingSet;
036
037public class AssemblyExceptions {
038
039    //The human genome assembly has assembly updates associated with release versions 
040    //between major Genome builds. These are represented as Novel, Patch and Haplotype Exceptions.
041    //(Note: Ensembl treats PseudoAutosomalRegions in a similar fashion, but JEnsembl 
042    //integrates this information into the standard Chromosome Model).
043    public static void main(String[] args) throws Exception {
044
045
046        DBRegistry eReg = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
047
048        DBSpecies sp = eReg.getSpeciesByAlias("human");
049        
050        System.out.println("current chr 17");
051        
052        DAChromosome chr = sp.getChromosomeByName("17");
053        MappingSet assemblyFixes = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_FIX);
054        MappingSet assemblyNovel = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_NOVEL);
055        MappingSet haplotypes = chr.getAssemblyExceptions(AssemblyExceptionType.HAP);
056        
057        System.out.println(assemblyFixes.size()+ " ASSEMBLY PATCH FIXES");
058        System.out.println("-----------------------");
059        
060        for (Mapping m : assemblyFixes) {
061            
062            
063            System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates()
064                    +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates());
065            System.out.println("_______________________________________");
066            
067            
068            DADNASequence s = (DADNASequence ) m.getTarget();
069            List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd());
070            
071            for (DAGene g : genesOnRegion) {
072                System.out.println(g.getStableID()+" "+g.getDisplayName());
073                System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates());
074            }
075            System.out.println("");
076            System.out.println("START:");
077            
078            if (m.getTargetCoordinates().getStart()<20) {
079
080                System.out.println("PATCH:  "
081                        +GapSequence.getGapString(21-m.getTargetCoordinates().getStart())
082                        +s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20));
083            } else {
084                System.out.println("PATCH:  "+s.getSequenceAsString(m.getTargetCoordinates().getStart()-20, m.getTargetCoordinates().getStart()+20));
085            }
086            if (m.getSourceCoordinates().getStart()<20) {
087
088                System.out.println("CHR  :  "
089                        +GapSequence.getGapString(21-m.getSourceCoordinates().getStart())
090                        +s.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20));
091            } else {
092                System.out.println("CHR  :  "+s.getSequenceAsString(m.getSourceCoordinates().getStart()-20, m.getSourceCoordinates().getStart()+20));
093            }            
094            System.out.println("END:");
095            System.out.println("PATCH:  "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()+20));
096            System.out.println("CHR  :  "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()+20));
097            System.out.println("");
098        }
099        
100        
101        System.out.println("");
102        System.out.println(assemblyNovel.size()+ " ASSEMBLY PATCH NOVEL");
103        System.out.println("-----------------------");
104        
105        
106        for (Mapping m : assemblyNovel) {
107            
108            
109            System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates()
110                    +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates());
111            System.out.println("_______________________________________");
112            
113            
114            DADNASequence s = (DADNASequence ) m.getTarget();
115            List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd());
116            
117            for (DAGene g : genesOnRegion) {
118                System.out.println(g.getStableID()+" "+g.getDisplayName());
119                System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates());
120            }
121            System.out.println("");
122            System.out.println("START:");
123            System.out.println("NOVEL:  "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20));
124            System.out.println("CHR  :  "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20));
125            System.out.println("END:");
126            System.out.println("NOVEL:  "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()));
127            System.out.println("CHR  :  "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()));
128            System.out.println("");
129        }
130        
131        System.out.println("");
132        System.out.println(haplotypes.size()+ " HAPLOTYPES");
133        System.out.println("-------------");
134        
135        
136        for (Mapping m : assemblyNovel) {
137            
138            
139            System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates()
140                    +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates());
141            System.out.println("_______________________________________");
142            
143            
144            DADNASequence s = (DADNASequence ) m.getTarget();
145            List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd());
146            
147            for (DAGene g : genesOnRegion) {
148                System.out.println(g.getStableID()+" "+g.getDisplayName());
149                System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates());
150            }
151            System.out.println("");
152            System.out.println("START:");
153            System.out.println("HAP:  "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20));
154            System.out.println("CHR:  "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20));
155            System.out.println("END:");
156            System.out.println("HAP:  "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()));
157            System.out.println("CHR:  "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()));
158            System.out.println("");
159        }
160        
161                System.out.println("v75 chr 17");
162        
163        chr = sp.getChromosomeByName("17", "75");
164         assemblyFixes = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_FIX);
165         assemblyNovel = chr.getAssemblyExceptions(AssemblyExceptionType.PATCH_NOVEL);
166         haplotypes = chr.getAssemblyExceptions(AssemblyExceptionType.HAP);
167        
168        System.out.println(assemblyFixes.size()+ " ASSEMBLY PATCH FIXES");
169        System.out.println("-----------------------");
170        
171        for (Mapping m : assemblyFixes) {
172            
173            
174            System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates()
175                    +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates());
176            System.out.println("_______________________________________");
177            
178            
179            DADNASequence s = (DADNASequence ) m.getTarget();
180            List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd());
181            
182            for (DAGene g : genesOnRegion) {
183                System.out.println(g.getStableID()+" "+g.getDisplayName());
184                System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates());
185            }
186            System.out.println("");
187            System.out.println("START:");
188            
189            if (m.getTargetCoordinates().getStart()<20) {
190
191                System.out.println("PATCH:  "
192                        +GapSequence.getGapString(21-m.getTargetCoordinates().getStart())
193                        +s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20));
194            } else {
195                System.out.println("PATCH:  "+s.getSequenceAsString(m.getTargetCoordinates().getStart()-20, m.getTargetCoordinates().getStart()+20));
196            }
197            if (m.getSourceCoordinates().getStart()<20) {
198
199                System.out.println("CHR  :  "
200                        +GapSequence.getGapString(21-m.getSourceCoordinates().getStart())
201                        +s.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20));
202            } else {
203                System.out.println("CHR  :  "+s.getSequenceAsString(m.getSourceCoordinates().getStart()-20, m.getSourceCoordinates().getStart()+20));
204            }            
205            System.out.println("END:");
206            System.out.println("PATCH:  "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()+20));
207            System.out.println("CHR  :  "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()+20));
208            System.out.println("");
209        }
210        
211        
212        System.out.println("");
213        System.out.println(assemblyNovel.size()+ " ASSEMBLY PATCH NOVEL");
214        System.out.println("-----------------------");
215        
216        
217        for (Mapping m : assemblyNovel) {
218            
219            
220            System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates()
221                    +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates());
222            System.out.println("_______________________________________");
223            
224            
225            DADNASequence s = (DADNASequence ) m.getTarget();
226            List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd());
227            
228            for (DAGene g : genesOnRegion) {
229                System.out.println(g.getStableID()+" "+g.getDisplayName());
230                System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates());
231            }
232            System.out.println("");
233            System.out.println("START:");
234            System.out.println("NOVEL:  "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20));
235            System.out.println("CHR  :  "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20));
236            System.out.println("END:");
237            System.out.println("NOVEL:  "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()));
238            System.out.println("CHR  :  "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()));
239            System.out.println("");
240        }
241        
242        System.out.println("");
243        System.out.println(haplotypes.size()+ " HAPLOTYPES");
244        System.out.println("-------------");
245        
246        
247        for (Mapping m : assemblyNovel) {
248            
249            
250            System.out.println(chr.getChromosomeName()+ ": "+m.getSourceCoordinates()
251                    +" - "+((DADNASequence)m.getTarget()).getName()+ ": "+m.getTargetCoordinates());
252            System.out.println("_______________________________________");
253            
254            
255            DADNASequence s = (DADNASequence ) m.getTarget();
256            List<DAGene> genesOnRegion = s.getGenesOnRegion(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getEnd());
257            
258            for (DAGene g : genesOnRegion) {
259                System.out.println(g.getStableID()+" "+g.getDisplayName());
260                System.out.println("\t"+ g.getChromosomeMapping((DAChromosome)s).getTargetCoordinates());
261            }
262            System.out.println("");
263            System.out.println("START:");
264            System.out.println("HAP:  "+s.getSequenceAsString(m.getTargetCoordinates().getStart(), m.getTargetCoordinates().getStart()+20));
265            System.out.println("CHR:  "+chr.getSequenceAsString(m.getSourceCoordinates().getStart(), m.getSourceCoordinates().getStart()+20));
266            System.out.println("END:");
267            System.out.println("HAP:  "+s.getSequenceAsString(m.getTargetCoordinates().getEnd()-20, m.getTargetCoordinates().getEnd()));
268            System.out.println("CHR:  "+chr.getSequenceAsString(m.getSourceCoordinates().getEnd()-20, m.getSourceCoordinates().getEnd()));
269            System.out.println("");
270        }
271        
272
273    System.out.println("\n\n*************************\nCOMPLETED FUNCTIONAL TEST\n*************************\n");
274
275    }
276    
277
278}