001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.datasourceaware.core; 023 024import java.lang.ref.SoftReference; 025import java.util.ArrayList; 026import java.util.Iterator; 027import java.util.LinkedHashMap; 028import java.util.List; 029import org.biojava3.core.exceptions.CompoundNotFoundError; 030import org.biojava3.core.sequence.AccessionID; 031import org.biojava3.core.sequence.DNASequence; 032import org.biojava3.core.sequence.Strand; 033import org.biojava3.core.sequence.compound.NucleotideCompound; 034import org.biojava3.core.sequence.storage.ArrayListSequenceReader; 035import org.biojava3.core.sequence.template.CompoundSet; 036import org.biojava3.core.sequence.template.SequenceMixin; 037import org.biojava3.core.sequence.template.SequenceProxyView; 038import org.biojava3.core.sequence.template.SequenceReader; 039import org.biojava3.core.sequence.template.SequenceView; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042import uk.ac.roslin.ensembl.biojava3.EnsemblDNACompoundSet; 043import uk.ac.roslin.ensembl.biojava3.EnsemblDNASequenceReader; 044import uk.ac.roslin.ensembl.dao.factory.DAOCoreFactory; 045import uk.ac.roslin.ensembl.exception.DAOException; 046 047/** 048 * 049 * @author paterson 050 */ 051public class DAEnsemblDNASequenceReader implements EnsemblDNASequenceReader { 052 053 protected String sequence; 054 protected EnsemblDNACompoundSet compoundSet; 055 protected DADNASequence dASequence; 056 protected Integer coordSystemID; 057 058 protected Integer seqRegionID; 059 protected Integer length; 060 protected String name; 061 062 protected DAOCoreFactory factory; 063 protected DADNASequence parent; 064 065 066 067 private LinkedHashMap<SoftReference<Integer>, SoftReference<DNASequence>> cache; 068// private SoftReference<Map<Integer,SoftReference<DNASequence>>> cacheMap; 069// private Map<Integer,SoftReference<DNASequence>> cache; 070 private Integer chunkSize = 250000; 071 private boolean usesCache = false; 072 private boolean nonCacheLazyLoaded = false; 073 private Integer chunks = 0; 074 075 076 private SequenceReader<NucleotideCompound> sequenceReader; 077 078 final static Logger LOGGER = LoggerFactory.getLogger(DAEnsemblDNASequenceReader.class); 079 080 public DAEnsemblDNASequenceReader(){ 081 this.setCompoundSet(EnsemblDNACompoundSet.getDNACompoundSet()); 082 } 083 084 public DAEnsemblDNASequenceReader(Integer coordSystemID, Integer seqRegionID, Integer length, String name, DAOCoreFactory factory){ 085 this.setCompoundSet(EnsemblDNACompoundSet.getDNACompoundSet()); 086 this.setCoordSystemID(coordSystemID); 087 this.setLength(length); 088 if (this.getLengthInteger()>2*chunkSize) { 089 usesCache=true; 090 chunks = (this.getLengthInteger()/chunkSize) -1; 091 cache = new LinkedHashMap<SoftReference<Integer> ,SoftReference<DNASequence>>(); 092 } 093 this.setSeqRegionID(seqRegionID); 094 this.setName(name); 095 this.setFactory(factory); 096 } 097 098 private void initCache() { 099 100 } 101 102 private void lazyLoadNonCache() throws DAOException, CompoundNotFoundError { 103 104 String out = null; 105 if (this.getFactory()!=null 106 && this.getFactory().getSequenceDAO()!=null) { 107 out = this.getFactory().getSequenceDAO().getFullSequence(this); 108 setNonCacheLazyLoaded(true); 109 } 110 if (out!=null) { 111 this.setContents(out); 112 } 113 114 } 115 116 public DADNASequence getParent() { 117 return parent; 118 } 119 120 public void setParent(DADNASequence parent) { 121 this.parent = parent; 122 if (parent!=null) { 123 this.setFactory(parent.getDaoFactory()); 124 } 125 } 126 127 //************************************* 128 129 130 131 public DAOCoreFactory getFactory() { 132 if (factory == null) { 133 if (this.getParent() != null) { 134 factory = this.getParent().getDaoFactory(); 135 } 136 } 137 return factory; 138 } 139 140 public void setFactory(DAOCoreFactory factory) { 141 this.factory = factory; 142 } 143 144 public String getName() { 145 return name; 146 } 147 148 public void setName(String name) { 149 this.name = name; 150 } 151 152 public Integer getCoordSystemID() { 153 return coordSystemID; 154 } 155 156 public void setCoordSystemID(Integer coordSystemID) { 157 this.coordSystemID = coordSystemID; 158 } 159 160 public int getLength() { 161 return (length!=null)?length:0; 162 } 163 164 public Integer getLengthInteger() { 165 return length; 166 } 167 168 public void setLength(Integer length) { 169 this.length = length; 170 } 171 172 public Integer getSeqRegionID() { 173 return seqRegionID; 174 } 175 176 public void setSeqRegionID(Integer seqRegionID) { 177 this.seqRegionID = seqRegionID; 178 } 179 180 //***************************** 181 182 public void setCompoundSet(CompoundSet compoundSet) { 183 this.compoundSet = (EnsemblDNACompoundSet) compoundSet; 184 } 185 186 public void setContents(String sequence) { 187 sequenceReader = new ArrayListSequenceReader<NucleotideCompound>(sequence, getCompoundSet()); 188 setNonCacheLazyLoaded(true); 189 length = sequenceReader.getLength(); 190 } 191 192 //****************************** 193 194 public NucleotideCompound getCompoundAt(int position) { 195 triggerLoad(); 196 if (this.getLength()==0 || sequenceReader == null ) { 197 return null; 198 } 199 return sequenceReader.getCompoundAt(position); 200 } 201 202 public int getIndexOf(NucleotideCompound compound) { 203 triggerLoad(); 204 if (this.getLength()==0) { 205 return 0; 206 } 207 return SequenceMixin.indexOf(this, compound); 208 } 209 210 public int getLastIndexOf(NucleotideCompound compound) { 211 triggerLoad(); 212 if (this.getLength()==0) { 213 return 0; 214 } 215 return SequenceMixin.lastIndexOf(this, compound); 216 } 217 218 public String getSequenceAsString() { 219 triggerLoad(); 220 if (this.getLength()==0) { 221 return ""; 222 } 223 return SequenceMixin.toString(this); 224 } 225 226 public String getSequenceAsString(Integer start, Integer end, Strand strand) { 227 triggerLoad(); 228 if (this.getLength()==0) { 229 return ""; 230 } 231 if(Strand.NEGATIVE.equals(strand)) { 232 return getSubSequence(start, end).getInverse().getSequenceAsString(); 233 } 234 else { 235 return getSubSequence(start, end).getSequenceAsString(); 236 } 237 } 238 239 private void triggerLoad() { 240 if (usesCache) { 241 //TODO Need to support caching version 242 } 243 else { 244 if(! nonCacheLazyLoaded) { 245 try { 246 //initialize the sequence 247 this.lazyLoadNonCache(); 248 } catch (DAOException ex) { 249 LOGGER.info("Error in loading data from remote source:\n" + ex.getMessage()); 250 251 } catch (CompoundNotFoundError ex) { 252 LOGGER.info("Error parsing sequence:\n" + ex.getMessage()); 253 254 } 255 } 256 } 257 258 } 259 260 public String getReverseComplementSequenceAsString(Integer start, Integer end) { 261 triggerLoad(); 262 if (this.getLength()==0) { 263 return ""; 264 } 265 return getSubSequence(start, end).getInverse().getSequenceAsString(); 266 } 267 268 public List<NucleotideCompound> getAsList() { 269 triggerLoad(); 270 if (this.getLength()==0) { 271 return new ArrayList<NucleotideCompound>(); 272 } 273 return SequenceMixin.toList(this); 274 } 275 276 public SequenceView<NucleotideCompound> getSubSequence(Integer start, Integer end) { 277 triggerLoad(); 278 if (this.getLength()==0) { 279 return null; 280 } 281 return new SequenceProxyView<NucleotideCompound>(this, start, end); 282 } 283 284 public CompoundSet<NucleotideCompound> getCompoundSet() { 285 return compoundSet; 286 } 287 288 public AccessionID getAccession() { 289 throw new UnsupportedOperationException("Not supported yet."); 290 } 291 292 public int countCompounds(NucleotideCompound... compounds) { 293 triggerLoad(); 294 if (this.getLength()==0) { 295 return 0; 296 } 297 return SequenceMixin.countCompounds(this, compounds); 298 } 299 300 public Iterator<NucleotideCompound> iterator() { 301 triggerLoad(); 302 return (sequenceReader!= null) ? SequenceMixin.createIterator(sequenceReader): 303 new EmptyIterator(); 304 } 305 306 public SequenceView<NucleotideCompound> getInverse() { 307 triggerLoad(); 308 if (this.getLength()==0) { 309 return null; 310 } 311 return SequenceMixin.inverse(this); 312 } 313 314 315 private void setNonCacheLazyLoaded(boolean nonCacheLazyLoaded) { 316 this.nonCacheLazyLoaded = nonCacheLazyLoaded; 317 } 318 319 public static class EmptyIterator<NucleotideCompound> implements 320 Iterator<NucleotideCompound> { 321 322 public EmptyIterator() { 323 324 } 325 326 public boolean hasNext() { 327 return false; 328 } 329 330 public NucleotideCompound next() { 331 return null; 332 } 333 334 public void remove() { 335 336 } 337 338 } 339 340}