001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.model;
023
024import java.io.Serializable;
025import java.util.TreeSet;
026
027/**
028 * Extension of a TreeSet for holding COordinates ordered using their Comparator 
029 * which orders Coordinate objects by the plus strand start, or the plus strand
030 * end if the starts are identical.
031 * Utility methods for this set take account of the DNA coordinate dogma that
032 * there is no base 0 (ZERO) (i.e. ... -3,-2,-1,+1,+2,+3...).
033 * 
034 */
035public class CoordinateSet extends TreeSet<Coordinate>  implements Serializable {
036
037    /**
038     * Public Constructor for a new Empty instance.
039     */
040    public CoordinateSet() {
041    }
042
043    /**
044     * Returns the very first position of the first Coordinate in this CoordinateSet.
045     * @return Integer
046     */
047    public Integer getStart() {
048        Integer out = null;
049
050        if ( !this.isEmpty()) {
051            out = this.first().getStart();
052        }
053
054        return out;
055    }
056
057    /**
058     * Returns the very last position of the last Coordinate in this CoordinateSet.
059     * @return Integer
060     */    
061    public Integer getEnd() {
062        Integer out = null;
063
064        if ( !this.isEmpty()) {
065
066            out = this.last().getEnd();
067
068            //this check is incase the last Coorinate object does not have the 
069            //highest end
070            for (Coordinate c : this) {
071             if (c.getEnd()>out) {
072                 out = c.getEnd();
073             }
074            }
075        }
076         return out;
077    }
078
079    /**
080     * Returns a Coordinate representing the extent or range of this CoordinateSet
081     * @return Coordinate extent
082     */
083    public Coordinate getRange() {
084         Coordinate out = null;
085         if (  !this.isEmpty()) {
086
087             out = new Coordinate(this.getStart(),this.getEnd() ,1);
088
089         }
090         return out;
091     }
092    
093    /**
094     * Returns the length of the extent/range of this CoordinateSet (accounting for 
095     * any span of the absent ZERO).
096     * @return Integer length
097     */
098    public Integer getLength() {
099        return  this.getRange()!=null?this.getRange().getLength():0;
100    }
101
102    /**
103     * Returns a CoordinateSet containing  Coordinates for all of the gaps in this 
104     * CoordinateSet.
105     * @todo may only be ok for positive coordinates that don't span zero!
106     * @return CoordinateSet of gap Coordinates
107     */
108    public CoordinateSet getGaps(){
109        CoordinateSet gaps = new CoordinateSet();
110        
111        if ( this.isEmpty() || this.getEnd() == null || this.getStart()==null ){
112            return gaps;
113        }
114        
115        //we search allong the set for gaps, without taking acount of the absence 
116        //of ZERO until we come to make the gap coordinates - when we must shift left or right
117        //to omit a ZERO
118        
119        Integer rangeEnd = this.getEnd();
120        Integer currentPosition = this.getStart();
121
122        for (Coordinate c : this) {
123           
124            if (c.getStart()==null || c.getEnd()==null) {
125                continue;
126            }
127
128            Integer s = c.getStart();
129            Integer e = c.getEnd();
130            
131            //if we're already past the extent of this region
132            if (currentPosition>=e) {
133                continue;
134            }
135
136            if (s>currentPosition) {
137                
138                //ignore a gap just between -1 and +1
139                if (currentPosition == 0 && s==1) {
140                    
141                } 
142                
143                //do the hacking to omit point ZERO
144                else {
145                    Coordinate newC =null;
146                    if (currentPosition == 0) {
147                        newC = new Coordinate(1, s-1, 1);
148                    } else if (s==1) {
149                       newC = new Coordinate(currentPosition, -1, 1); 
150                    } 
151                    //no hacking necessary
152                    else {
153                        newC = new Coordinate(currentPosition, s-1, 1);
154                    }    
155                    gaps.add(newC);
156                }
157            }
158
159            if (e>=rangeEnd) {
160                break;
161            }
162
163
164            currentPosition = e+1;
165        }
166        return gaps;
167    }
168
169    /**
170     * Returns whether a test Coordinate is contained in this CoordinateSet range WITHOUT any gaps.
171     * @param test
172     * @return Boolean
173     */
174    public Boolean containsCoordinateWithoutGaps(Coordinate test) {
175        Boolean out = false;
176
177        if (test.getStart()==null || test.getEnd()==null) {
178            return false;
179        }
180        
181        Integer testBegin = test.getStart();
182        Integer testEnd = test.getEnd();
183
184        if (testBegin<this.getStart() || testEnd>this.getEnd()) {
185            out = false;
186        } else if (this.getGaps().isEmpty()) {
187            out = true;
188        } else {
189
190            out = true;
191            for(Coordinate c: this.getGaps()) {
192                if(test.overlaps(c)) {
193                    out = false;
194                    break;
195                }
196            }
197
198        }
199        return out;
200    }
201
202    /**
203     * Returns the CoordinateSet  (ordered set of Coordinates) that define regions 
204     * of the test Coordinate not represented (covered)  in this CoordinateSet.
205     * Hopefully this handles the horror of test ranges spanning the non-existent zero point!
206     * @param test
207     * @return CoordinateSet
208     */
209    public CoordinateSet getUncoveredRegions(Coordinate test) {
210
211        //not sure if this is the correct return for an invalid coordinate
212        if (test.getStart()==null || test.getEnd()==null) {
213            return null;
214        }
215        
216        CoordinateSet gaps = new CoordinateSet();
217
218        if (this.isEmpty()) {
219            gaps.add(test);
220            return gaps;
221        }
222
223        if (this.containsCoordinateWithoutGaps(test)) {
224            //the empty set
225            return gaps;
226        }
227
228        if (!this.getRange().overlaps(test)) {
229            gaps.add(test);
230            return gaps;
231        }
232       
233        Integer testBegin = test.getStart();
234        Integer testEnd = test.getEnd();
235        Integer thisBegin = this.getStart();
236        Integer thisEnd = this.getEnd();
237
238        CoordinateSet thisGaps = this.getGaps();
239
240        //overlap at start
241        if (testBegin<thisBegin) {
242            Coordinate newC = null; 
243            if (thisBegin !=1) {
244                newC = new Coordinate(testBegin, thisBegin-1, 1);
245            } else {
246                newC = new Coordinate(testBegin, -1, 1);
247            }
248            gaps.add(newC);
249        }
250
251        //overlap at end
252        if (testEnd>thisEnd) {
253            Coordinate newC2 = null;
254            if (thisEnd!=-1 ) {
255                    newC2 = new Coordinate(thisEnd+1, testEnd,  1);
256            } else {
257                newC2 = new Coordinate(1, testEnd,  1);
258            }
259            gaps.add(newC2);
260        }
261
262        for (Coordinate cd : thisGaps) {
263
264            //this gap completely contains the test Coordinate
265            if (test.liesWithinCoordinate(cd)) {
266                gaps.add(test);
267                break;
268            }
269
270            // this gap lies completely within the range
271            else if (cd.liesWithinCoordinate(test)) {
272                gaps.add(cd);
273            }
274
275            else if (cd.overlaps(test)) {
276
277                gaps.add(cd.getOverlap(test));
278            }
279
280        }
281
282        return gaps;
283    }
284
285}