001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.datasourceaware.core;
023
024import java.util.Iterator;
025import java.util.LinkedList;
026import org.biojava3.core.sequence.DNASequence;
027import uk.ac.roslin.ensembl.exception.DAOException;
028import uk.ac.roslin.ensembl.exception.RangeException;
029import uk.ac.roslin.ensembl.model.Coordinate;
030import uk.ac.roslin.ensembl.model.Mapping;
031import uk.ac.roslin.ensembl.model.MappingSet;
032import uk.ac.roslin.ensembl.model.core.AssembledDNASequence;
033import uk.ac.roslin.ensembl.model.core.Assembly;
034import uk.ac.roslin.ensembl.model.core.Chromosome;
035
036/**
037 *
038 * @author paterson
039 */
040public class DAAssembly implements Assembly {
041
042    DAAssembledDNASequence parent = null;
043    protected MappingSet componentMappings = null;
044    protected MappingSet pARMappings = null;
045    protected MappingSet stitchedComponentMappings = null;
046    protected Integer assemblyStart = null;
047    protected Integer assemblyStop = null;
048    protected boolean hasPAR = false;
049
050    public static DAAssembly getAssembly(DAAssembledDNASequence parent) {
051        return new DAAssembly(parent);
052    }
053
054    public static DAAssembly getAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) {
055        return new DAAssembly(parent, start, stop);
056    }
057    public DAAssembly() {
058    }
059
060    /**
061     * Private constructor that sets the assembly to hold an AssembledDNASequence. If this 
062     * parent sequence is a chromosome, 'hasPAR' indicates whether it is asserted to be 
063     * a sex chromosome with a pseudoautosomal region (e.g. mammalian 'Y'); 
064     * @param parent 
065     */
066    private DAAssembly(DAAssembledDNASequence parent) {
067        this.parent = parent;
068        if (parent instanceof Chromosome) {
069            hasPAR = ((Chromosome) parent).isPAR();
070        }
071    }
072
073    /**
074     * Private constructor that sets the assembly to hold an AssembledDNASequence, 
075     * and sets the desired range of the assembly. If this 
076     * parent sequence is a chromosome, 'hasPAR' indicates whether it is asserted to be 
077     * a sex chromosome with a pseudoautosomal region (e.g. mammalian 'Y'); 
078     * @param parent 
079     */    
080    private DAAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) {
081        this.parent = parent;
082        if (parent instanceof Chromosome) {
083            hasPAR = ((Chromosome) parent).isPAR();
084        }
085        this.assemblyStart = start;
086        this.assemblyStop = stop;
087    }
088
089    /**
090     * Returns the DAAssembledDNASequence that this assembly holds (its 'Parent'). 
091     */
092    @Override
093    public DAAssembledDNASequence getParent() {
094        return parent;
095    }
096
097    /**
098     * Sets the DAAssembledDNASequence that this assembly holds (its 'Parent'). 
099     */
100    @Override
101    public void setParent(AssembledDNASequence parent) {
102        this.parent = (DAAssembledDNASequence) parent;
103    }
104
105    @Override
106    public Integer getAssemblyStart() {
107        if (assemblyStart == null) {
108            try {
109                assemblyStart = (parent.getBioBegin() != null) ? parent.getBioBegin() : 1;
110            } catch (Exception e) {
111            }
112        }
113        if (assemblyStart == null) {
114            assemblyStart = 1;
115        }
116        return assemblyStart;
117    }
118
119    @Override
120    public void setAssemblyStart(Integer assemblyStart) {
121        this.assemblyStart = assemblyStart;
122    }
123
124    @Override
125    public Integer getAssemblyStop() {
126        if (assemblyStop != null || parent == null) {
127            return assemblyStop;
128        }
129        assemblyStop = parent.getBioEnd();
130        return assemblyStop;
131    }
132
133    @Override
134    public void setAssemblyStop(Integer assemblyStop) {
135        this.assemblyStop = assemblyStop;
136    }
137
138    /**
139     * Private DA method to get all of the mappings of all of the DNASequences that make up this
140     * assembly. These are added to a local MappingSet 'componentMappings'. If the assembly
141     * is of a Chromosome with a PAR, the mappings from the cognate pseudoautosome are
142     * also fetched and added to 'componentMappings'.
143     * @throws DAOException 
144     */
145    private void setMappings() throws DAOException {
146        componentMappings = new MappingSet();
147        try {
148            componentMappings = (MappingSet) parent.getDaoFactory().getAssemblyDAO().getComponentMappingsByStartStop(parent, this.getAssemblyStart(), this.getAssemblyStop());
149        } catch (Exception e) {
150            throw new DAOException("Failed to create the stitched mappings for a DAAssembly", e);
151        }
152
153        if (parent instanceof Chromosome && hasPAR) {
154
155            try {
156                pARMappings = (MappingSet) parent.getDaoFactory().getAssemblyDAO().getPARMappingsByStartStop((Chromosome) parent, this.getAssemblyStart(), this.getAssemblyStop());
157            } catch (Exception e) {
158                throw new DAOException("Failed to fetch all the assembly  mappings for a PseudoAutosomal Region of a DAChromosome", e);
159            }
160
161            for (Mapping m : pARMappings) {
162
163                //remove any mappings in component mappings
164
165                //i dont think i have to do this - there should be no mappings for the PAR regions
166
167                //and then add the parMappings
168
169                componentMappings.add(m);
170
171            }
172
173
174        }
175
176    }
177
178    /**
179     * Returns the MappingSet 'componentMappings' of all of the  mappings of all of the DNASequences that make up this
180     * assembly. Triggers a lazy load if not already set.
181     * @throws DAOException 
182     */
183    @Override
184    public MappingSet getMappings() throws DAOException {
185        if (this.componentMappings == null) {
186            this.setMappings();
187        }
188        return this.componentMappings;
189    }
190
191    /**
192     * Returns the MappingSet 'stitchedComponentMappings' of all of the  mappings 
193     * of all of the DNASequences that make up this assembly, that have been 'edited', 
194     * i.e. stitched together into a single continuous gap free assembly. 
195     * Triggers  method 'stitchComponents' if not yet set .
196     * @throws DAOException 
197     */
198    @Override
199    public MappingSet getStitchedMappings() throws DAOException {
200        if (this.stitchedComponentMappings == null) {
201            this.stitchedComponentMappings = this.stitchComponents();
202        }
203        return this.stitchedComponentMappings;
204    }
205
206    /**
207     * For a given range, returns a subset of the MappingSet 'stitchedComponentMappings' of all of the  mappings 
208     * of all of the DNASequences that make up this assembly, that have been 'edited', 
209     * i.e. stitched together into a single continuous gap free assembly. 
210     * Triggers  method 'stitchComponents' if not yet set .
211     * @throws DAOException 
212     */    
213    public MappingSet getStitchedMappings(Integer start, Integer stop) throws DAOException, RangeException {
214        
215        if (start== null|| start==0 || stop == null ||  stop ==0) {
216            throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world."
217                    +" Use -1 for one base upstream or +1 for the first base.");
218        }  
219
220        MappingSet out = new MappingSet();
221        Integer begin = start;
222        Integer end = stop;
223        if (begin>end) {
224            begin = stop;
225            end = start;
226        } 
227        
228        if (parent==null) {
229            return out;
230        }
231        
232        if (begin < this.getAssemblyStart() ||  end > this.getAssemblyStop()) {
233            throw new RangeException("The specified range is outwith the extent of the Assembly.");
234        }  
235
236        if (this.getStitchedMappings().isEmpty()) {
237            return out;
238        }
239
240        for (Mapping mapping : this.getStitchedMappings()) {
241
242
243            if (mapping.getSourceCoordinates().getStart() > end) {
244                break;
245            } else if (mapping.getSourceCoordinates().getEnd() < begin) {
246                continue;
247            } else {
248                Integer upstream = mapping.getSourceCoordinates().getStart() - begin;
249                Integer downstream = mapping.getSourceCoordinates().getEnd() - end;
250
251                if (upstream >= 0 && downstream <= 0) {
252                    out.add((Mapping) mapping);
253                    continue;
254                }
255                Mapping newMapping = new Mapping();
256                newMapping.setSource(mapping.getSource());
257                newMapping.setSourceCoordinates(mapping.getSourceCoordinates());
258                newMapping.setTarget(mapping.getTarget());
259                newMapping.setTargetCoordinates(mapping.getTargetCoordinates());
260
261                if (upstream < 0) {
262                    newMapping.getTargetCoordinates().setStart(mapping.getTargetCoordinates().getStart() - upstream);
263                    newMapping.getSourceCoordinates().setStart(begin);
264                }
265                if (downstream > 0) {
266                    newMapping.getTargetCoordinates().setEnd(mapping.getTargetCoordinates().getEnd() - downstream);
267                    newMapping.getSourceCoordinates().setEnd(end);
268                }
269                out.add(newMapping);
270                continue;
271            }
272        }
273
274        return out;
275    }
276
277    /**
278     * Protected method to join together (stitch) all of the component sub-sequences 
279     * of the assembly, together with GapSequences to form a single continuous
280     * ungapped assembly, held in the MappingSet 'stitchedComponentMappings';
281     * @throws DAOException 
282     */
283    protected MappingSet stitchComponents() throws DAOException {
284
285        MappingSet out = new MappingSet();
286        Integer assStart = this.getAssemblyStart();
287        Integer assEnd = this.getAssemblyStop();
288        if (assStart ==null || assEnd == null || this.getMappings()==null || this.getMappings().isEmpty()) {
289            return out;
290        }
291        Integer currentPosition = assStart;
292//        //the no worry scenario - probably this is all that i want to do
293//        if (currentPosition > 0) {
294
295            for (Mapping mapping : this.getMappings()) {
296
297                Integer sourceStart = mapping.getSourceCoordinates().getStart();
298                Integer sourceEnd = mapping.getSourceCoordinates().getEnd();
299                DADNASequence source = (DADNASequence) mapping.getSource();
300                DADNASequence target = (DADNASequence) mapping.getTarget();
301                Integer distanceLeft = assEnd - currentPosition;
302
303                //skip ones completely before the desired start
304                if (sourceStart > assEnd || sourceEnd < currentPosition) {
305                    continue;
306                } else if (sourceStart <= currentPosition) {
307
308                    //we overlap the start
309                    Integer overlap = currentPosition - sourceStart;
310                    Mapping m = new Mapping();
311                    m.setSource(source);
312                    m.setTarget(target);
313
314                    //if we also overlap the end
315                    if (sourceEnd > assEnd) {
316                        m.setTargetCoordinates(
317                                mapping.getTargetCoordinates().getStart() + overlap,
318                                mapping.getTargetCoordinates().getStart() + overlap + distanceLeft,
319                                mapping.getTargetCoordinates().getStrand());
320                        m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND);
321                        out.add(m);
322                        //we're done
323                        currentPosition = assEnd + 1;
324                        break;
325                    } else {
326                        m.setTargetCoordinates(
327                                mapping.getTargetCoordinates().getStart() + overlap,
328                                mapping.getTargetCoordinates().getEnd(),
329                                mapping.getTargetCoordinates().getStrand());
330                        m.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND);
331                        out.add(m);
332                        if (sourceEnd == assEnd) {
333                            //we're done
334                            currentPosition = assEnd + 1;
335                            break;
336                        } else {
337                            currentPosition = sourceEnd + 1;
338                            continue;
339                        }
340                    }
341
342
343
344                } else if (currentPosition < sourceStart) {
345
346                    //plug a gap
347                    Integer gapLength = sourceStart - currentPosition;
348                    Mapping m = new Mapping();
349                    m.setSource(source);
350                    m.setTarget(GapSequence.makeGap(gapLength));
351                    m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND);
352                    m.setSourceCoordinates(currentPosition, currentPosition + gapLength - 1, Coordinate.Strand.FORWARD_STRAND);
353                    out.add(m);
354                    currentPosition = currentPosition + gapLength;
355
356
357                    //if we are within bounds
358                    if (assEnd >= sourceEnd) {
359                        Mapping m1 = new Mapping();
360                        m1.setSource(source);
361                        m1.setTarget(target);
362                        m1.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND);
363                        m1.setTargetCoordinates(mapping.getTargetCoordinates());
364                        out.add(m1);
365                        if (assEnd == sourceEnd) {
366                            // we're done
367                            currentPosition = assEnd + 1;
368                            break;
369                        } else {
370                            currentPosition = sourceEnd + 1;
371                            continue;
372                        }
373                    } //if we run past the desired assEnd
374                    else {
375                        Mapping m2 = new Mapping();
376                        m2.setSource(source);
377                        m2.setTarget(target);
378                        m2.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND);
379                        m2.setTargetCoordinates(1, assEnd - sourceStart + 1, mapping.getTargetCoordinates().getStrand());
380                        out.add(m2);
381                        //we're done
382                        currentPosition = assEnd + 1;
383                        break;
384                    }
385
386                }
387
388            }
389
390            if (currentPosition <= assEnd) {
391                //plug a terminal gap
392                Integer gapLength = assEnd - currentPosition + 1;
393                Mapping m = new Mapping();
394                m.setSource(parent);
395                m.setTarget(GapSequence.makeGap(gapLength));
396                m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND);
397                m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND);
398                out.add(m);
399
400            }
401
402//        } 
403//        //if  we have to worry about zero
404//        //TODO not sure that this is working yet
405//        else {
406//            for (Mapping mapping : this.getMappings()) {
407//
408//                Integer sourceStart = mapping.getSourceCoordinates().getStart();
409//                Integer sourceEnd = mapping.getSourceCoordinates().getEnd();
410//                DADNASequence source = (DADNASequence) mapping.getSource();
411//                DADNASequence target = (DADNASequence) mapping.getTarget();
412//                Integer distanceLeft = assEnd - currentPosition;
413//
414//                //skip ones completely before the desired assStart
415//                if (sourceStart > assEnd || sourceEnd < currentPosition) {
416//                    continue;
417//                } else if (sourceStart <= currentPosition) {
418//
419//                    //we overlap the assStart
420//                    Integer overlap = currentPosition - sourceStart;
421//                    Mapping m = new Mapping();
422//                    m.setSource(source);
423//                    m.setTarget(target);
424//
425//                    //if we also overlap the assEnd
426//                    if (sourceEnd > assEnd) {
427//                        m.setTargetCoordinates(
428//                                mapping.getTargetCoordinates().getStart() + overlap,
429//                                mapping.getTargetCoordinates().getStart() + overlap + distanceLeft,
430//                                mapping.getTargetCoordinates().getStrand());
431//                        m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND);
432//                        out.add(m);
433//                        //we're done
434//                        currentPosition = assEnd + 1;
435//                        break;
436//                    } else {
437//                        m.setTargetCoordinates(
438//                                mapping.getTargetCoordinates().getStart() + overlap,
439//                                mapping.getTargetCoordinates().getEnd(),
440//                                mapping.getTargetCoordinates().getStrand());
441//                        m.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND);
442//                        out.add(m);
443//                        if (sourceEnd == assEnd) {
444//                            //we're done
445//                            currentPosition = assEnd + 1;
446//                            break;
447//                        } else {
448//                            currentPosition = sourceEnd + 1;
449//                            //we dont use ZERO
450//                            if (currentPosition ==0) {
451//                                currentPosition = 1;
452//                            }
453//                            continue;
454//                        }
455//                    }
456//
457//
458//
459//                } else if (currentPosition < sourceStart) {
460//
461//                    //plug a gap
462//                    Integer gapLength = 0;
463//                    if ( sourceStart>0 && currentPosition <0) {
464//                        //the length is shorter where it spans the -1/+1 junction
465//                        gapLength = sourceStart - currentPosition-1;
466//                    } else {
467//                        gapLength = sourceStart - currentPosition;
468//                    }
469//                    
470//                    Mapping m = new Mapping();
471//                    m.setSource(source);
472//                    m.setTarget(GapSequence.makeGap(gapLength));
473//                    m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND);
474//                    //the gap length should be correct here so that currentPosition + gapLength - 1 will never = 0
475//                    m.setSourceCoordinates(currentPosition, currentPosition + gapLength - 1, Coordinate.Strand.FORWARD_STRAND);
476//                    out.add(m);
477//                    currentPosition = currentPosition + gapLength;
478//                    
479//                    //we dont use ZERO                    
480//                    if (currentPosition==0) {
481//                        currentPosition=1;
482//                    }
483//
484//                    //if we are within bounds
485//                    if (assEnd >= sourceEnd) {
486//                        Mapping m1 = new Mapping();
487//                        m1.setSource(source);
488//                        m1.setTarget(target);
489//                        m1.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND);
490//                        m1.setTargetCoordinates(mapping.getTargetCoordinates());
491//                        out.add(m1);
492//                        if (assEnd == sourceEnd) {
493//                            // we're done
494//                            currentPosition = assEnd + 1;
495//                            break;
496//                        } else {
497//                            currentPosition = sourceEnd + 1;
498//                            continue;
499//                        }
500//                    } //if we run past the desired assEnd
501//                    else {
502//                        Mapping m2 = new Mapping();
503//                        m2.setSource(source);
504//                        m2.setTarget(target);
505//                        m2.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND);
506//                        //surely i cant get a ZERO FOR END here!
507//                        m2.setTargetCoordinates(1, assEnd - sourceStart + 1, mapping.getTargetCoordinates().getStrand());
508//                        out.add(m2);
509//                        //we're done
510//                        currentPosition = assEnd + 1;
511//                        break;
512//                    }
513//
514//                }
515//
516//            }
517//
518//            if (currentPosition <= assEnd) {
519//                //plug a terminal gap
520//                Integer gapLength = 0;
521//                if (currentPosition<0 && assEnd>0 ) {
522//                    //if we are spanning the -1/+1 junction the distance is shorter!
523//                    gapLength = assEnd - currentPosition;
524//                } else {
525//                    gapLength = assEnd - currentPosition + 1;
526//                }
527//                Mapping m = new Mapping();
528//                m.setSource(parent);
529//                m.setTarget(GapSequence.makeGap(gapLength));
530//                m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND);
531//                m.setSourceCoordinates(currentPosition, assEnd, Coordinate.Strand.FORWARD_STRAND);
532//                out.add(m);
533//
534//            }
535//
536//            
537//        }
538
539        return out;
540    }
541
542    /**
543     * Returns a String representation of the reverse complement of the assembly sequence for the given range.
544     * If there is no sequence for this assembly null is returned.
545     * If the given range is greater than the assembly coordinates, a RangeException
546     * is thrown (try using getPaddedReverseComplementSequenceAsString(int,int) instead).
547     * @param start
548     * @param stop
549     * @throws RangeException
550     * @throws DAOException 
551     */
552    public String getReverseComplementSequenceAsString(Integer start, Integer stop) throws RangeException, DAOException {
553        String seq = this.getSequenceAsString(start, stop);
554        if (seq==null) {
555            return null;
556        }
557        DNASequence temp = new DNASequence(seq);
558        return temp.getReverseComplement().getSequenceAsString();
559    }
560    
561    /**
562     * Returns a String representation of the reverse complement of theassembly sequence for the given range.
563     * If there is no sequence for this assembly null is returned.
564     * If the given range is greater than the assembly coordinates, the result is padded
565     * upstream  and/or downstream. 
566     * @param start
567     * @param stop
568     * @throws RangeException
569     * @throws DAOException 
570     */
571    public String getPaddedReverseComplementSequenceAsString(Integer start, Integer stop) throws DAOException {
572        String seq = this.getPaddedSequenceAsString(start, stop);
573        if (seq==null) {
574            return null;
575        }        
576        DNASequence temp = new DNASequence(seq);
577        return temp.getReverseComplement().getSequenceAsString();
578    }
579
580    /**
581     * Returns a String representation of the assembly sequence for the given range.
582     * If there is no sequence for this assembly null is returned.
583     * If the given range is greater than the assembly coordinates, a RangeException
584     * is thrown (try using getPaddedSequenceAsString(int,int) instead).
585     * @param start
586     * @param stop
587     * @throws RangeException
588     * @throws DAOException 
589     */
590    public String getSequenceAsString(Integer start, Integer stop) throws RangeException, DAOException {
591        if (this.parent == null ||  this.parent.getLength() == 0 || this.getAssemblyStop() == null || this.getAssemblyStart() == null) {
592            return null;
593        }        
594        if (start== null|| start==0 || stop == null ||  stop ==0) {
595            throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world."
596                    +" Use -1 for one base upstream or +1 for the first base.");
597        }  
598
599        StringBuilder sb = new StringBuilder();
600        String out = "";
601
602        //range of sequence I want
603        Integer begin = start;
604        Integer end = stop;
605        if (begin>end) {
606            begin = stop;
607            end = start;
608        }  
609
610        //make sure range is not greater than the assembly extent
611        if (  end > this.getAssemblyStop()) {
612            throw new RangeException("Requested range greater than assembly range.");
613        }
614        if (  begin < this.getAssemblyStart()) {
615            throw new RangeException("Requested range greater than assembly range.");
616        }
617
618        Integer nextStart = begin;
619
620        for (Mapping mapping : this.getStitchedMappings()) {
621
622            if (nextStart > end) {
623                //we're done
624                break;
625            }
626
627            //////////////////////////////////////
628            if (mapping.getSourceCoordinates().getStart() > end) {
629                //we're past the end - so quit - shouldn't get here cos of previous test
630                break;
631            } else if (mapping.getSourceCoordinates().getEnd() < nextStart) {
632                //we're not yet at the position we want so skip
633                continue;
634            } else {
635
636                //this mapping is at least partially in range
637
638                if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) {
639
640                    // desiredStart should be == targetCoord.start 
641                    // because we are walking along the assembled components that have had gaps inserted.
642                    //[i.e. at this point nextStart should == sourceCoord.start]
643                    Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart());
644                    
645                    //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range
646                    Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (end - mapping.getSourceCoordinates().getStart());
647
648                    //the assEnd point that we are looking for may be beyond the assEnd of this source->target compoment mapping 
649                    //so obviously we dont want to read further 
650                    if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) {
651                        desiredOutputEnd = mapping.getTargetCoordinates().getEnd();
652                    } else {
653                        //this will complete the sequence
654                    }
655
656                    nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1;
657
658                    DADNASequence seq = (DADNASequence) mapping.getTarget();
659
660                    sb.append(seq.getSequenceAsString(
661                            desiredOutputStart, desiredOutputEnd));
662                } else {
663
664                   
665                    //this is the assStart relative to the direction that we are walking - so should be correct
666                    // because we are walking along the source mappings and the assembled components that have had gaps inserted.
667                    // therefore the desired assEnd should be == targetCoord.assEnd 
668                    //[i.e. at this point nextStart should == sourceCoord.assStart]
669                    Integer desiredOutputEnd = mapping.getTargetCoordinates().getEnd() - (Math.abs(mapping.getSourceCoordinates().getStart() - nextStart));
670                    
671                    
672                    //this is the assEnd relative to the direction that we are walking
673                    //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range
674                    //in this case this will be going UPSTREAM in the mapped component
675                    //Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() - (Math.abs(assEnd - mapping.getSourceCoordinates().getEnd() ));
676                    Integer desiredOutputStart =  desiredOutputEnd - (Math.abs(end - nextStart ));
677                    
678
679                    // if we are attempting to walk beyond the extent of this mapping...
680                    if (desiredOutputStart < mapping.getTargetCoordinates().getStart()) {
681                        desiredOutputStart = mapping.getTargetCoordinates().getStart();
682                    } else {
683                        //this will complete the sequence
684                    }
685
686                    nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1;
687
688                    DADNASequence seq = (DADNASequence) mapping.getTarget();
689
690                    sb.append(seq.getReverseComplementSequenceAsString(desiredOutputStart, desiredOutputEnd));
691
692                }
693            }
694        }
695        return sb.toString();
696    }
697    
698    /**
699     * Returns a String representation of the assembly sequence for the given range.
700     * If there is no sequence for this assembly null is returned.
701     * If the given range is greater than the assembly coordinates, the result is padded
702     * upstream  and/or downstream. 
703     * @param start
704     * @param stop
705     * @return String representation of the Assembly Sequence
706     * @throws DAOException 
707     */
708    public String getPaddedSequenceAsString(Integer start, Integer stop) throws DAOException {
709        if (this.parent == null ||  this.parent.getLength() == 0 ) {
710            return null;
711        }
712        if (start== null|| start==0 || stop == null ||  stop ==0) {
713            throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world."
714                    +" Use -1 for one base upstream or +1 for the first base.");
715        }  
716       
717        StringBuilder sb = new StringBuilder();
718        String out = "";
719        int downstream = 0, upstream = 0;
720
721        //range of sequence I want
722        Integer begin = start;
723        Integer end = stop;
724        if (begin>end) {
725            begin = stop;
726            end = start;
727        }        
728
729        //make sure range is not greater than the assembly extent
730        if ( this.getAssemblyStop() == null || end > this.getAssemblyStop()) {
731            //calculate Downstream padding & reset assEnd
732            downstream = end - this.getAssemblyStop();
733            end = this.getAssemblyStop();            
734        }
735        if ( this.getAssemblyStart() == null || begin < this.getAssemblyStart()) {
736           //calculate upstream padding & reset begin
737            if (begin<0) {
738               upstream = this.getAssemblyStart()-(begin+1);
739            } else {
740               upstream = this.getAssemblyStart()-begin; 
741            }
742            begin = this.getAssemblyStart();
743        }
744
745        Integer nextStart = begin;
746
747        for (Mapping mapping : this.getStitchedMappings()) {
748
749            if (nextStart > end) {
750                //we're done
751                break;
752            }
753
754            //////////////////////////////////////
755            if (mapping.getSourceCoordinates().getStart() > end) {
756                //we're past the assEnd - so quit - shouldn't get here cos of previous test
757                break;
758            } else if (mapping.getSourceCoordinates().getEnd() < nextStart) {
759                //we're not yet at the position we want so skip
760                continue;
761            } else {
762
763                //this mapping is at least partially in range
764
765                if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) {
766
767                    // desiredStart should be == targetCoord.assStart 
768                    // because we are walking along the assembled components that have had gaps inserted.
769                    //[i.e. at this point nextStart should == sourceCoord.assStart]
770                    Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart());
771                    
772                    //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range
773                    Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (end - mapping.getSourceCoordinates().getStart());
774
775                    //the assEnd point that we are looking for may be beyond the assEnd of this source->target compoment mapping 
776                    //so obviously we dont want to read further 
777                    if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) {
778                        desiredOutputEnd = mapping.getTargetCoordinates().getEnd();
779                    } else {
780                        //this will complete the sequence
781                    }
782
783                    nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1;
784
785                    DADNASequence seq = (DADNASequence) mapping.getTarget();
786
787                    sb.append(seq.getSequenceAsString(
788                            desiredOutputStart, desiredOutputEnd));
789                } else {
790
791                   
792                    //this is the assStart relative to the direction that we are walking - so should be correct
793                    // because we are walking along the source mappings and the assembled components that have had gaps inserted.
794                    // therefore the desired assEnd should be == targetCoord.assEnd 
795                    //[i.e. at this point nextStart should == sourceCoord.assStart]
796                    Integer desiredOutputEnd = mapping.getTargetCoordinates().getEnd() - (Math.abs(mapping.getSourceCoordinates().getStart() - nextStart));
797                    
798                    
799                    //this is the assEnd relative to the direction that we are walking
800                    //we want walk along this mapped component as far as we can till our desired stop point, or the assEnd of the mapped component range
801                    //in this case this will be going UPSTREAM in the mapped component
802                    //Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() - (Math.abs(assEnd - mapping.getSourceCoordinates().getEnd() ));
803                    Integer desiredOutputStart =  desiredOutputEnd - (Math.abs(end - nextStart ));
804                    
805
806                    // if we are attempting to walk beyond the extent of this mapping...
807                    if (desiredOutputStart < mapping.getTargetCoordinates().getStart()) {
808                        desiredOutputStart = mapping.getTargetCoordinates().getStart();
809                    } else {
810                        //this will complete the sequence
811                    }
812
813                    nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1;
814
815                    DADNASequence seq = (DADNASequence) mapping.getTarget();
816
817                    sb.append(seq.getReverseComplementSequenceAsString(desiredOutputStart, desiredOutputEnd));
818
819                }
820            }
821        }
822        return pad(sb,upstream,downstream);
823    }
824    
825    /**
826     * Private method to pad a string representation of a sequence with given
827     * numbers of upstream and downstream unknown bases.
828     * @param seq String: the sequence top be padded.
829     * @param upstream int: the number of upstream unknown bases to be prepended.
830     * @param downstream int: the number of downstream unknown bases to be appended.
831     */
832    private String pad(StringBuilder seq, int upstream, int downstream) {
833        seq.insert(0, GapSequence.getGapString(upstream)).append(GapSequence.getGapString(downstream));
834        return seq.toString();
835    }    
836
837}