001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.model; 023 024import java.io.Serializable; 025import java.util.TreeSet; 026 027/** 028 * Extension of a TreeSet for holding COordinates ordered using their Comparator 029 * which orders Coordinate objects by the plus strand start, or the plus strand 030 * end if the starts are identical. 031 * Utility methods for this set take account of the DNA coordinate dogma that 032 * there is no base 0 (ZERO) (i.e. ... -3,-2,-1,+1,+2,+3...). 033 * 034 */ 035public class CoordinateSet extends TreeSet<Coordinate> implements Serializable { 036 037 /** 038 * Public Constructor for a new Empty instance. 039 */ 040 public CoordinateSet() { 041 } 042 043 /** 044 * Returns the very first position of the first Coordinate in this CoordinateSet. 045 * @return Integer 046 */ 047 public Integer getStart() { 048 Integer out = null; 049 050 if ( !this.isEmpty()) { 051 out = this.first().getStart(); 052 } 053 054 return out; 055 } 056 057 /** 058 * Returns the very last position of the last Coordinate in this CoordinateSet. 059 * @return Integer 060 */ 061 public Integer getEnd() { 062 Integer out = null; 063 064 if ( !this.isEmpty()) { 065 066 out = this.last().getEnd(); 067 068 //this check is incase the last Coorinate object does not have the 069 //highest end 070 for (Coordinate c : this) { 071 if (c.getEnd()>out) { 072 out = c.getEnd(); 073 } 074 } 075 } 076 return out; 077 } 078 079 /** 080 * Returns a Coordinate representing the extent or range of this CoordinateSet 081 * @return Coordinate extent 082 */ 083 public Coordinate getRange() { 084 Coordinate out = null; 085 if ( !this.isEmpty()) { 086 087 out = new Coordinate(this.getStart(),this.getEnd() ,1); 088 089 } 090 return out; 091 } 092 093 /** 094 * Returns the length of the extent/range of this CoordinateSet (accounting for 095 * any span of the absent ZERO). 096 * @return Integer length 097 */ 098 public Integer getLength() { 099 return this.getRange()!=null?this.getRange().getLength():0; 100 } 101 102 /** 103 * Returns a CoordinateSet containing Coordinates for all of the gaps in this 104 * CoordinateSet. 105 * @todo may only be ok for positive coordinates that don't span zero! 106 * @return CoordinateSet of gap Coordinates 107 */ 108 public CoordinateSet getGaps(){ 109 CoordinateSet gaps = new CoordinateSet(); 110 111 if ( this.isEmpty() || this.getEnd() == null || this.getStart()==null ){ 112 return gaps; 113 } 114 115 //we search allong the set for gaps, without taking acount of the absence 116 //of ZERO until we come to make the gap coordinates - when we must shift left or right 117 //to omit a ZERO 118 119 Integer rangeEnd = this.getEnd(); 120 Integer currentPosition = this.getStart(); 121 122 for (Coordinate c : this) { 123 124 if (c.getStart()==null || c.getEnd()==null) { 125 continue; 126 } 127 128 Integer s = c.getStart(); 129 Integer e = c.getEnd(); 130 131 //if we're already past the extent of this region 132 if (currentPosition>=e) { 133 continue; 134 } 135 136 if (s>currentPosition) { 137 138 //ignore a gap just between -1 and +1 139 if (currentPosition == 0 && s==1) { 140 141 } 142 143 //do the hacking to omit point ZERO 144 else { 145 Coordinate newC =null; 146 if (currentPosition == 0) { 147 newC = new Coordinate(1, s-1, 1); 148 } else if (s==1) { 149 newC = new Coordinate(currentPosition, -1, 1); 150 } 151 //no hacking necessary 152 else { 153 newC = new Coordinate(currentPosition, s-1, 1); 154 } 155 gaps.add(newC); 156 } 157 } 158 159 if (e>=rangeEnd) { 160 break; 161 } 162 163 164 currentPosition = e+1; 165 } 166 return gaps; 167 } 168 169 /** 170 * Returns whether a test Coordinate is contained in this CoordinateSet range WITHOUT any gaps. 171 * @param test 172 * @return Boolean 173 */ 174 public Boolean containsCoordinateWithoutGaps(Coordinate test) { 175 Boolean out = false; 176 177 if (test.getStart()==null || test.getEnd()==null) { 178 return false; 179 } 180 181 Integer testBegin = test.getStart(); 182 Integer testEnd = test.getEnd(); 183 184 if (testBegin<this.getStart() || testEnd>this.getEnd()) { 185 out = false; 186 } else if (this.getGaps().isEmpty()) { 187 out = true; 188 } else { 189 190 out = true; 191 for(Coordinate c: this.getGaps()) { 192 if(test.overlaps(c)) { 193 out = false; 194 break; 195 } 196 } 197 198 } 199 return out; 200 } 201 202 /** 203 * Returns the CoordinateSet (ordered set of Coordinates) that define regions 204 * of the test Coordinate not represented (covered) in this CoordinateSet. 205 * Hopefully this handles the horror of test ranges spanning the non-existent zero point! 206 * @param test 207 * @return CoordinateSet 208 */ 209 public CoordinateSet getUncoveredRegions(Coordinate test) { 210 211 //not sure if this is the correct return for an invalid coordinate 212 if (test.getStart()==null || test.getEnd()==null) { 213 return null; 214 } 215 216 CoordinateSet gaps = new CoordinateSet(); 217 218 if (this.isEmpty()) { 219 gaps.add(test); 220 return gaps; 221 } 222 223 if (this.containsCoordinateWithoutGaps(test)) { 224 //the empty set 225 return gaps; 226 } 227 228 if (!this.getRange().overlaps(test)) { 229 gaps.add(test); 230 return gaps; 231 } 232 233 Integer testBegin = test.getStart(); 234 Integer testEnd = test.getEnd(); 235 Integer thisBegin = this.getStart(); 236 Integer thisEnd = this.getEnd(); 237 238 CoordinateSet thisGaps = this.getGaps(); 239 240 //overlap at start 241 if (testBegin<thisBegin) { 242 Coordinate newC = null; 243 if (thisBegin !=1) { 244 newC = new Coordinate(testBegin, thisBegin-1, 1); 245 } else { 246 newC = new Coordinate(testBegin, -1, 1); 247 } 248 gaps.add(newC); 249 } 250 251 //overlap at end 252 if (testEnd>thisEnd) { 253 Coordinate newC2 = null; 254 if (thisEnd!=-1 ) { 255 newC2 = new Coordinate(thisEnd+1, testEnd, 1); 256 } else { 257 newC2 = new Coordinate(1, testEnd, 1); 258 } 259 gaps.add(newC2); 260 } 261 262 for (Coordinate cd : thisGaps) { 263 264 //this gap completely contains the test Coordinate 265 if (test.liesWithinCoordinate(cd)) { 266 gaps.add(test); 267 break; 268 } 269 270 // this gap lies completely within the range 271 else if (cd.liesWithinCoordinate(test)) { 272 gaps.add(cd); 273 } 274 275 else if (cd.overlaps(test)) { 276 277 gaps.add(cd.getOverlap(test)); 278 } 279 280 } 281 282 return gaps; 283 } 284 285}