001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.datasourceaware.core; 023 024import java.util.Iterator; 025import java.util.LinkedList; 026import org.biojava3.core.sequence.DNASequence; 027import uk.ac.roslin.ensembl.exception.DAOException; 028import uk.ac.roslin.ensembl.exception.RangeException; 029import uk.ac.roslin.ensembl.model.Coordinate; 030import uk.ac.roslin.ensembl.model.Mapping; 031import uk.ac.roslin.ensembl.model.MappingSet; 032import uk.ac.roslin.ensembl.model.core.AssembledDNASequence; 033import uk.ac.roslin.ensembl.model.core.Assembly; 034import uk.ac.roslin.ensembl.model.core.Chromosome; 035 036/** 037 * 038 * @author paterson 039 */ 040public class DAAssembly implements Assembly { 041 042 DAAssembledDNASequence parent = null; 043 protected MappingSet componentMappings = null; 044 protected MappingSet pARMappings = null; 045 protected MappingSet stitchedComponentMappings = null; 046 protected Integer assemblyStart = null; 047 protected Integer assemblyStop = null; 048 protected boolean hasPAR = false; 049 050 public static DAAssembly getAssembly(DAAssembledDNASequence parent) { 051 return new DAAssembly(parent); 052 } 053 054 public static DAAssembly getAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) { 055 return new DAAssembly(parent, start, stop); 056 } 057 public DAAssembly() { 058 } 059 060 /** 061 * Private constructor that sets the assembly to hold an AssembledDNASequence. If this 062 * parent sequence is a chromosome, 'hasPAR' indicates whether it is asserted to be 063 * a sex chromosome with a pseudoautosomal region (e.g. mammalian 'Y'); 064 * @param parent 065 */ 066 private DAAssembly(DAAssembledDNASequence parent) { 067 this.parent = parent; 068 if (parent instanceof Chromosome) { 069 hasPAR = ((Chromosome) parent).isPAR(); 070 } 071 } 072 073 /** 074 * Private constructor that sets the assembly to hold an AssembledDNASequence, 075 * and sets the desired range of the assembly. If this 076 * parent sequence is a chromosome, 'hasPAR' indicates whether it is asserted to be 077 * a sex chromosome with a pseudoautosomal region (e.g. mammalian 'Y'); 078 * @param parent 079 */ 080 private DAAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) { 081 this.parent = parent; 082 if (parent instanceof Chromosome) { 083 hasPAR = ((Chromosome) parent).isPAR(); 084 } 085 this.assemblyStart = start; 086 this.assemblyStop = stop; 087 } 088 089 /** 090 * Returns the DAAssembledDNASequence that this assembly holds (its 'Parent'). 091 */ 092 @Override 093 public DAAssembledDNASequence getParent() { 094 return parent; 095 } 096 097 /** 098 * Sets the DAAssembledDNASequence that this assembly holds (its 'Parent'). 099 */ 100 @Override 101 public void setParent(AssembledDNASequence parent) { 102 this.parent = (DAAssembledDNASequence) parent; 103 } 104 105 @Override 106 public Integer getAssemblyStart() { 107 if (assemblyStart == null) { 108 try { 109 assemblyStart = (parent.getBioBegin() != null) ? parent.getBioBegin() : 1; 110 } catch (Exception e) { 111 } 112 } 113 if (assemblyStart == null) { 114 assemblyStart = 1; 115 } 116 return assemblyStart; 117 } 118 119 @Override 120 public void setAssemblyStart(Integer assemblyStart) { 121 this.assemblyStart = assemblyStart; 122 } 123 124 @Override 125 public Integer getAssemblyStop() { 126 if (assemblyStop != null || parent == null) { 127 return assemblyStop; 128 } 129 assemblyStop = parent.getBioEnd(); 130 return assemblyStop; 131 } 132 133 @Override 134 public void setAssemblyStop(Integer assemblyStop) { 135 this.assemblyStop = assemblyStop; 136 } 137 138 /** 139 * Private DA method to get all of the mappings of all of the DNASequences that make up this 140 * assembly. These are added to a local MappingSet 'componentMappings'. If the assembly 141 * is of a Chromosome with a PAR, the mappings from the cognate pseudoautosome are 142 * also fetched and added to 'componentMappings'. 143 * @throws DAOException 144 */ 145 private void setMappings() throws DAOException { 146 componentMappings = new MappingSet(); 147 try { 148 componentMappings = (MappingSet) parent.getDaoFactory().getAssemblyDAO().getComponentMappingsByStartStop(parent, this.getAssemblyStart(), this.getAssemblyStop()); 149 } catch (Exception e) { 150 throw new DAOException("Failed to create the stitched mappings for a DAAssembly", e); 151 } 152 153 if (parent instanceof Chromosome && hasPAR) { 154 155 try { 156 pARMappings = (MappingSet) parent.getDaoFactory().getAssemblyDAO().getPARMappingsByStartStop((Chromosome) parent, this.getAssemblyStart(), this.getAssemblyStop()); 157 } catch (Exception e) { 158 throw new DAOException("Failed to fetch all the assembly mappings for a PseudoAutosomal Region of a DAChromosome", e); 159 } 160 161 for (Mapping m : pARMappings) { 162 163 //remove any mappings in component mappings 164 165 //i dont think i have to do this - there should be no mappings for the PAR regions 166 167 //and then add the parMappings 168 169 componentMappings.add(m); 170 171 } 172 173 174 } 175 176 } 177 178 /** 179 * Returns the MappingSet 'componentMappings' of all of the mappings of all of the DNASequences that make up this 180 * assembly. Triggers a lazy load if not already set. 181 * @throws DAOException 182 */ 183 @Override 184 public MappingSet getMappings() throws DAOException { 185 if (this.componentMappings == null) { 186 this.setMappings(); 187 } 188 return this.componentMappings; 189 } 190 191 /** 192 * Returns the MappingSet 'stitchedComponentMappings' of all of the mappings 193 * of all of the DNASequences that make up this assembly, that have been 'edited', 194 * i.e. stitched together into a single continuous gap free assembly. 195 * Triggers method 'stitchComponents' if not yet set . 196 * @throws DAOException 197 */ 198 @Override 199 public MappingSet getStitchedMappings() throws DAOException { 200 if (this.stitchedComponentMappings == null) { 201 this.stitchedComponentMappings = this.stitchComponents(); 202 } 203 return this.stitchedComponentMappings; 204 } 205 206 /** 207 * For a given range, returns a subset of the MappingSet 'stitchedComponentMappings' of all of the mappings 208 * of all of the DNASequences that make up this assembly, that have been 'edited', 209 * i.e. stitched together into a single continuous gap free assembly. 210 * Triggers method 'stitchComponents' if not yet set . 211 * @throws DAOException 212 */ 213 public MappingSet getStitchedMappings(Integer start, Integer stop) throws DAOException, RangeException { 214 215 if (start== null|| start==0 || stop == null || stop ==0) { 216 throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world." 217 +" Use -1 for one base upstream or +1 for the first base."); 218 } 219 220 MappingSet out = new MappingSet(); 221 Integer begin = start; 222 Integer end = stop; 223 if (begin>end) { 224 begin = stop; 225 end = start; 226 } 227 228 if (parent==null) { 229 return out; 230 } 231 232 if (begin < this.getAssemblyStart() || end > this.getAssemblyStop()) { 233 throw new RangeException("The specified range is outwith the extent of the Assembly."); 234 } 235 236 if (this.getStitchedMappings().isEmpty()) { 237 return out; 238 } 239 240 for (Mapping mapping : this.getStitchedMappings()) { 241 242 243 if (mapping.getSourceCoordinates().getStart() > end) { 244 break; 245 } else if (mapping.getSourceCoordinates().getEnd() < begin) { 246 continue; 247 } else { 248 Integer upstream = mapping.getSourceCoordinates().getStart() - begin; 249 Integer downstream = mapping.getSourceCoordinates().getEnd() - end; 250 251 if (upstream >= 0 && downstream <= 0) { 252 out.add((Mapping) mapping); 253 continue; 254 } 255 Mapping newMapping = new Mapping(); 256 newMapping.setSource(mapping.getSource()); 257 newMapping.setSourceCoordinates(mapping.getSourceCoordinates()); 258 newMapping.setTarget(mapping.getTarget()); 259 newMapping.setTargetCoordinates(mapping.getTargetCoordinates()); 260 261 if (upstream < 0) { 262 newMapping.getTargetCoordinates().setStart(mapping.getTargetCoordinates().getStart() - upstream); 263 newMapping.getSourceCoordinates().setStart(begin); 264 } 265 if (downstream > 0) { 266 newMapping.getTargetCoordinates().setEnd(mapping.getTargetCoordinates().getEnd() - downstream); 267 newMapping.getSourceCoordinates().setEnd(end); 268 } 269 out.add(newMapping); 270 continue; 271 } 272 } 273 274 return out; 275 } 276 277 /** 278 * Protected method to join together (stitch) all of the component sub-sequences 279 * of the assembly, together with GapSequences to form a single continuous 280 * ungapped assembly, held in the MappingSet 'stitchedComponentMappings'; 281 * @throws DAOException 282 */ 283 protected MappingSet stitchComponents() throws DAOException { 284 285 MappingSet out = new MappingSet(); 286 Integer assStart = this.getAssemblyStart(); 287 Integer assEnd = this.getAssemblyStop(); 288 if (assStart ==null || assEnd == null || this.getMappings()==null || this.getMappings().isEmpty()) { 289 return out; 290 } 291 Integer currentPosition = assStart; 292// //the no worry scenario - probably this is all that i want to do 293// if (currentPosition > 0) { 294 295 for (Mapping mapping : this.getMappings()) { 296 297 Integer sourceStart = mapping.getSourceCoordinates().getStart(); 298 Integer sourceEnd = mapping.getSourceCoordinates().getEnd(); 299 DADNASequence source = (DADNASequence) mapping.getSource(); 300 DADNASequence target = (DADNASequence) mapping.getTarget(); 301 Integer distanceLeft = assEnd - currentPosition; 302 303 //skip ones completely before the desired start 304 if (sourceStart > assEnd || sourceEnd < currentPosition) { 305 continue; 306 } else if (sourceStart <= currentPosition) { 307 308 //we overlap the start 309 Integer overlap = currentPosition - sourceStart; 310 Mapping m = new Mapping(); 311 m.setSource(source); 312 m.setTarget(target); 313 314 //if we also overlap the end 315 if (sourceEnd > assEnd) { 316 m.setTargetCoordinates( 317 mapping.getTargetCoordinates().getStart() + overlap, 318 mapping.getTargetCoordinates().getStart() + overlap + distanceLeft, 319 mapping.getTargetCoordinates().getStrand()); 320 m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND); 321 out.add(m); 322 //we're done 323 currentPosition = assEnd + 1; 324 break; 325 } else { 326 m.setTargetCoordinates( 327 mapping.getTargetCoordinates().getStart() + overlap, 328 mapping.getTargetCoordinates().getEnd(), 329 mapping.getTargetCoordinates().getStrand()); 330 m.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND); 331 out.add(m); 332 if (sourceEnd == assEnd) { 333 //we're done 334 currentPosition = assEnd + 1; 335 break; 336 } else { 337 currentPosition = sourceEnd + 1; 338 continue; 339 } 340 } 341 342 343 344 } else if (currentPosition < sourceStart) { 345 346 //plug a gap 347 Integer gapLength = sourceStart - currentPosition; 348 Mapping m = new Mapping(); 349 m.setSource(source); 350 m.setTarget(GapSequence.makeGap(gapLength)); 351 m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND); 352 m.setSourceCoordinates(currentPosition, currentPosition + gapLength - 1, Coordinate.Strand.FORWARD_STRAND); 353 out.add(m); 354 currentPosition = currentPosition + gapLength; 355 356 357 //if we are within bounds 358 if (assEnd >= sourceEnd) { 359 Mapping m1 = new Mapping(); 360 m1.setSource(source); 361 m1.setTarget(target); 362 m1.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND); 363 m1.setTargetCoordinates(mapping.getTargetCoordinates()); 364 out.add(m1); 365 if (assEnd == sourceEnd) { 366 // we're done 367 currentPosition = assEnd + 1; 368 break; 369 } else { 370 currentPosition = sourceEnd + 1; 371 continue; 372 } 373 } //if we run past the desired assEnd 374 else { 375 Mapping m2 = new Mapping(); 376 m2.setSource(source); 377 m2.setTarget(target); 378 m2.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND); 379 m2.setTargetCoordinates(1, assEnd - sourceStart + 1, mapping.getTargetCoordinates().getStrand()); 380 out.add(m2); 381 //we're done 382 currentPosition = assEnd + 1; 383 break; 384 } 385 386 } 387 388 } 389 390 if (currentPosition <= assEnd) { 391 //plug a terminal gap 392 Integer gapLength = assEnd - currentPosition + 1; 393 Mapping m = new Mapping(); 394 m.setSource(parent); 395 m.setTarget(GapSequence.makeGap(gapLength)); 396 m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND); 397 m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND); 398 out.add(m); 399 400 } 401 402// } 403// //if we have to worry about zero 404// //TODO not sure that this is working yet 405// else { 406// for (Mapping mapping : this.getMappings()) { 407// 408// Integer sourceStart = mapping.getSourceCoordinates().getStart(); 409// Integer sourceEnd = mapping.getSourceCoordinates().getEnd(); 410// DADNASequence source = (DADNASequence) mapping.getSource(); 411// DADNASequence target = (DADNASequence) mapping.getTarget(); 412// Integer distanceLeft = assEnd - currentPosition; 413// 414// //skip ones completely before the desired assStart 415// if (sourceStart > assEnd || sourceEnd < currentPosition) { 416// continue; 417// } else if (sourceStart <= currentPosition) { 418// 419// //we overlap the assStart 420// Integer overlap = currentPosition - sourceStart; 421// Mapping m = new Mapping(); 422// m.setSource(source); 423// m.setTarget(target); 424// 425// //if we also overlap the assEnd 426// if (sourceEnd > assEnd) { 427// m.setTargetCoordinates( 428// mapping.getTargetCoordinates().getStart() + overlap, 429// mapping.getTargetCoordinates().getStart() + overlap + distanceLeft, 430// mapping.getTargetCoordinates().getStrand()); 431// m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND); 432// out.add(m); 433// //we're done 434// currentPosition = assEnd + 1; 435// break; 436// } else { 437// m.setTargetCoordinates( 438// mapping.getTargetCoordinates().getStart() + overlap, 439// mapping.getTargetCoordinates().getEnd(), 440// mapping.getTargetCoordinates().getStrand()); 441// m.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND); 442// out.add(m); 443// if (sourceEnd == assEnd) { 444// //we're done 445// currentPosition = assEnd + 1; 446// break; 447// } else { 448// currentPosition = sourceEnd + 1; 449// //we dont use ZERO 450// if (currentPosition ==0) { 451// currentPosition = 1; 452// } 453// continue; 454// } 455// } 456// 457// 458// 459// } else if (currentPosition < sourceStart) { 460// 461// //plug a gap 462// Integer gapLength = 0; 463// if ( sourceStart>0 && currentPosition <0) { 464// //the length is shorter where it spans the -1/+1 junction 465// gapLength = sourceStart - currentPosition-1; 466// } else { 467// gapLength = sourceStart - currentPosition; 468// } 469// 470// Mapping m = new Mapping(); 471// m.setSource(source); 472// m.setTarget(GapSequence.makeGap(gapLength)); 473// m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND); 474// //the gap length should be correct here so that currentPosition + gapLength - 1 will never = 0 475// m.setSourceCoordinates(currentPosition, currentPosition + gapLength - 1, Coordinate.Strand.FORWARD_STRAND); 476// out.add(m); 477// currentPosition = currentPosition + gapLength; 478// 479// //we dont use ZERO 480// if (currentPosition==0) { 481// currentPosition=1; 482// } 483// 484// //if we are within bounds 485// if (assEnd >= sourceEnd) { 486// Mapping m1 = new Mapping(); 487// m1.setSource(source); 488// m1.setTarget(target); 489// m1.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND); 490// m1.setTargetCoordinates(mapping.getTargetCoordinates()); 491// out.add(m1); 492// if (assEnd == sourceEnd) { 493// // we're done 494// currentPosition = assEnd + 1; 495// break; 496// } else { 497// currentPosition = sourceEnd + 1; 498// continue; 499// } 500// } //if we run past the desired assEnd 501// else { 502// Mapping m2 = new Mapping(); 503// m2.setSource(source); 504// m2.setTarget(target); 505// m2.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND); 506// //surely i cant get a ZERO FOR END here! 507// m2.setTargetCoordinates(1, assEnd - sourceStart + 1, mapping.getTargetCoordinates().getStrand()); 508// out.add(m2); 509// //we're done 510// currentPosition = assEnd + 1; 511// break; 512// } 513// 514// } 515// 516// } 517// 518// if (currentPosition <= assEnd) { 519// //plug a terminal gap 520// Integer gapLength = 0; 521// if (currentPosition<0 && assEnd>0 ) { 522// //if we are spanning the -1/+1 junction the distance is shorter! 523// gapLength = assEnd - currentPosition; 524// } else { 525// gapLength = assEnd - currentPosition + 1; 526// } 527// Mapping m = new Mapping(); 528// m.setSource(parent); 529// m.setTarget(GapSequence.makeGap(gapLength)); 530// m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND); 531// m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND); 532// out.add(m); 533// 534// } 535// 536// 537// } 538 539 return out; 540 } 541 542 /** 543 * Returns a String representation of the reverse complement of the assembly sequence for the given range. 544 * If there is no sequence for this assembly null is returned. 545 * If the given range is greater than the assembly coordinates, a RangeException 546 * is thrown (try using getPaddedReverseComplementSequenceAsString(int,int) instead). 547 * @param start 548 * @param stop 549 * @throws RangeException 550 * @throws DAOException 551 */ 552 public String getReverseComplementSequenceAsString(Integer start, Integer stop) throws RangeException, DAOException { 553 String seq = this.getSequenceAsString(start, stop); 554 if (seq==null) { 555 return null; 556 } 557 DNASequence temp = new DNASequence(seq); 558 return temp.getReverseComplement().getSequenceAsString(); 559 } 560 561 /** 562 * Returns a String representation of the reverse complement of theassembly sequence for the given range. 563 * If there is no sequence for this assembly null is returned. 564 * If the given range is greater than the assembly coordinates, the result is padded 565 * upstream and/or downstream. 566 * @param start 567 * @param stop 568 * @throws RangeException 569 * @throws DAOException 570 */ 571 public String getPaddedReverseComplementSequenceAsString(Integer start, Integer stop) throws DAOException { 572 String seq = this.getPaddedSequenceAsString(start, stop); 573 if (seq==null) { 574 return null; 575 } 576 DNASequence temp = new DNASequence(seq); 577 return temp.getReverseComplement().getSequenceAsString(); 578 } 579 580 /** 581 * Returns a String representation of the assembly sequence for the given range. 582 * If there is no sequence for this assembly null is returned. 583 * If the given range is greater than the assembly coordinates, a RangeException 584 * is thrown (try using getPaddedSequenceAsString(int,int) instead). 585 * @param start 586 * @param stop 587 * @throws RangeException 588 * @throws DAOException 589 */ 590 public String getSequenceAsString(Integer start, Integer stop) throws RangeException, DAOException { 591 if (this.parent == null || this.parent.getLength() == 0 || this.getAssemblyStop() == null || this.getAssemblyStart() == null) { 592 return null; 593 } 594 if (start== null|| start==0 || stop == null || stop ==0) { 595 throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world." 596 +" Use -1 for one base upstream or +1 for the first base."); 597 } 598 599 StringBuilder sb = new StringBuilder(); 600 String out = ""; 601 602 //range of sequence I want 603 Integer begin = start; 604 Integer end = stop; 605 if (begin>end) { 606 begin = stop; 607 end = start; 608 } 609 610 //make sure range is not greater than the assembly extent 611 if ( end > this.getAssemblyStop()) { 612 throw new RangeException("Requested range greater than assembly range."); 613 } 614 if ( begin < this.getAssemblyStart()) { 615 throw new RangeException("Requested range greater than assembly range."); 616 } 617 618 Integer nextStart = begin; 619 620 for (Mapping mapping : this.getStitchedMappings()) { 621 622 if (nextStart > end) { 623 //we're done 624 break; 625 } 626 627 ////////////////////////////////////// 628 if (mapping.getSourceCoordinates().getStart() > end) { 629 //we're past the end - so quit - shouldn't get here cos of previous test 630 break; 631 } else if (mapping.getSourceCoordinates().getEnd() < nextStart) { 632 //we're not yet at the position we want so skip 633 continue; 634 } else { 635 636 //this mapping is at least partially in range 637 638 if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) { 639 640 // desiredStart should be == targetCoord.start 641 // because we are walking along the assembled components that have had gaps inserted. 642 //[i.e. at this point nextStart should == sourceCoord.start] 643 Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart()); 644 645 //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range 646 Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (end - mapping.getSourceCoordinates().getStart()); 647 648 //the assEnd point that we are looking for may be beyond the assEnd of this source->target compoment mapping 649 //so obviously we dont want to read further 650 if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) { 651 desiredOutputEnd = mapping.getTargetCoordinates().getEnd(); 652 } else { 653 //this will complete the sequence 654 } 655 656 nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1; 657 658 DADNASequence seq = (DADNASequence) mapping.getTarget(); 659 660 sb.append(seq.getSequenceAsString( 661 desiredOutputStart, desiredOutputEnd)); 662 } else { 663 664 665 //this is the assStart relative to the direction that we are walking - so should be correct 666 // because we are walking along the source mappings and the assembled components that have had gaps inserted. 667 // therefore the desired assEnd should be == targetCoord.assEnd 668 //[i.e. at this point nextStart should == sourceCoord.assStart] 669 Integer desiredOutputEnd = mapping.getTargetCoordinates().getEnd() - (Math.abs(mapping.getSourceCoordinates().getStart() - nextStart)); 670 671 672 //this is the assEnd relative to the direction that we are walking 673 //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range 674 //in this case this will be going UPSTREAM in the mapped component 675 //Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() - (Math.abs(assEnd - mapping.getSourceCoordinates().getEnd() )); 676 Integer desiredOutputStart = desiredOutputEnd - (Math.abs(end - nextStart )); 677 678 679 // if we are attempting to walk beyond the extent of this mapping... 680 if (desiredOutputStart < mapping.getTargetCoordinates().getStart()) { 681 desiredOutputStart = mapping.getTargetCoordinates().getStart(); 682 } else { 683 //this will complete the sequence 684 } 685 686 nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1; 687 688 DADNASequence seq = (DADNASequence) mapping.getTarget(); 689 690 sb.append(seq.getReverseComplementSequenceAsString(desiredOutputStart, desiredOutputEnd)); 691 692 } 693 } 694 } 695 return sb.toString(); 696 } 697 698 /** 699 * Returns a String representation of the assembly sequence for the given range. 700 * If there is no sequence for this assembly null is returned. 701 * If the given range is greater than the assembly coordinates, the result is padded 702 * upstream and/or downstream. 703 * @param start 704 * @param stop 705 * @return String representation of the Assembly Sequence 706 * @throws DAOException 707 */ 708 public String getPaddedSequenceAsString(Integer start, Integer stop) throws DAOException { 709 if (this.parent == null || this.parent.getLength() == 0 ) { 710 return null; 711 } 712 if (start== null|| start==0 || stop == null || stop ==0) { 713 throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world." 714 +" Use -1 for one base upstream or +1 for the first base."); 715 } 716 717 StringBuilder sb = new StringBuilder(); 718 String out = ""; 719 int downstream = 0, upstream = 0; 720 721 //range of sequence I want 722 Integer begin = start; 723 Integer end = stop; 724 if (begin>end) { 725 begin = stop; 726 end = start; 727 } 728 729 //make sure range is not greater than the assembly extent 730 if ( this.getAssemblyStop() == null || end > this.getAssemblyStop()) { 731 //calculate Downstream padding & reset assEnd 732 downstream = end - this.getAssemblyStop(); 733 end = this.getAssemblyStop(); 734 } 735 if ( this.getAssemblyStart() == null || begin < this.getAssemblyStart()) { 736 //calculate upstream padding & reset begin 737 if (begin<0) { 738 upstream = this.getAssemblyStart()-(begin+1); 739 } else { 740 upstream = this.getAssemblyStart()-begin; 741 } 742 begin = this.getAssemblyStart(); 743 } 744 745 Integer nextStart = begin; 746 747 for (Mapping mapping : this.getStitchedMappings()) { 748 749 if (nextStart > end) { 750 //we're done 751 break; 752 } 753 754 ////////////////////////////////////// 755 if (mapping.getSourceCoordinates().getStart() > end) { 756 //we're past the assEnd - so quit - shouldn't get here cos of previous test 757 break; 758 } else if (mapping.getSourceCoordinates().getEnd() < nextStart) { 759 //we're not yet at the position we want so skip 760 continue; 761 } else { 762 763 //this mapping is at least partially in range 764 765 if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) { 766 767 // desiredStart should be == targetCoord.assStart 768 // because we are walking along the assembled components that have had gaps inserted. 769 //[i.e. at this point nextStart should == sourceCoord.assStart] 770 Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart()); 771 772 //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range 773 Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (end - mapping.getSourceCoordinates().getStart()); 774 775 //the assEnd point that we are looking for may be beyond the assEnd of this source->target compoment mapping 776 //so obviously we dont want to read further 777 if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) { 778 desiredOutputEnd = mapping.getTargetCoordinates().getEnd(); 779 } else { 780 //this will complete the sequence 781 } 782 783 nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1; 784 785 DADNASequence seq = (DADNASequence) mapping.getTarget(); 786 787 sb.append(seq.getSequenceAsString( 788 desiredOutputStart, desiredOutputEnd)); 789 } else { 790 791 792 //this is the assStart relative to the direction that we are walking - so should be correct 793 // because we are walking along the source mappings and the assembled components that have had gaps inserted. 794 // therefore the desired assEnd should be == targetCoord.assEnd 795 //[i.e. at this point nextStart should == sourceCoord.assStart] 796 Integer desiredOutputEnd = mapping.getTargetCoordinates().getEnd() - (Math.abs(mapping.getSourceCoordinates().getStart() - nextStart)); 797 798 799 //this is the assEnd relative to the direction that we are walking 800 //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range 801 //in this case this will be going UPSTREAM in the mapped component 802 //Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() - (Math.abs(assEnd - mapping.getSourceCoordinates().getEnd() )); 803 Integer desiredOutputStart = desiredOutputEnd - (Math.abs(end - nextStart )); 804 805 806 // if we are attempting to walk beyond the extent of this mapping... 807 if (desiredOutputStart < mapping.getTargetCoordinates().getStart()) { 808 desiredOutputStart = mapping.getTargetCoordinates().getStart(); 809 } else { 810 //this will complete the sequence 811 } 812 813 nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1; 814 815 DADNASequence seq = (DADNASequence) mapping.getTarget(); 816 817 sb.append(seq.getReverseComplementSequenceAsString(desiredOutputStart, desiredOutputEnd)); 818 819 } 820 } 821 } 822 return pad(sb,upstream,downstream); 823 } 824 825 /** 826 * Private method to pad a string representation of a sequence with given 827 * numbers of upstream and downstream unknown bases. 828 * @param seq String: the sequence top be padded. 829 * @param upstream int: the number of upstream unknown bases to be prepended. 830 * @param downstream int: the number of downstream unknown bases to be appended. 831 */ 832 private String pad(StringBuilder seq, int upstream, int downstream) { 833 seq.insert(0, GapSequence.getGapString(upstream)).append(GapSequence.getGapString(downstream)); 834 return seq.toString(); 835 } 836 837}