001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.datasourceaware.core; 023 024import java.util.*; 025import org.biojava3.core.sequence.RNASequence; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028import uk.ac.roslin.ensembl.config.DBConnection; 029import uk.ac.roslin.ensembl.config.ExternalDBType; 030import uk.ac.roslin.ensembl.config.FeatureType; 031import uk.ac.roslin.ensembl.dao.factory.DAOCoreFactory; 032import uk.ac.roslin.ensembl.datasourceaware.DAXRef; 033import uk.ac.roslin.ensembl.exception.DAOException; 034import uk.ac.roslin.ensembl.exception.RangeException; 035import uk.ac.roslin.ensembl.model.Coordinate.Strand; 036import uk.ac.roslin.ensembl.model.*; 037import uk.ac.roslin.ensembl.model.core.Exon; 038import uk.ac.roslin.ensembl.model.core.Transcript; 039import uk.ac.roslin.ensembl.model.core.Translation; 040import uk.ac.roslin.ensembl.model.core.VegaFeature; 041 042public class DATranscript extends DAAnalyzedFeature implements Transcript, VegaFeature, XRefed { 043 044 Integer canonicalTranslationID = null; 045 DATranslation canonicalTranslation = null; 046 private TreeMap<Integer, DATranslation> translationHash = null; 047 private String biotype = null; 048 private DAGene gene = null; 049 private boolean canonical = false; 050 private Integer geneID = null; 051 private TreeMap<Integer, DAExon> exonHash = null; 052 MappingSet primaryTranscriptExonMappings = null; 053 MappingSet processedTranscriptExonMappings = null; 054 Integer processedTranscriptLength = null; 055 private DADNASequence processedTranscript = null; 056 final static Logger LOGGER = LoggerFactory.getLogger(DATranscript.class); 057 058 private TreeSet<String> synonyms = null; 059 protected Set<DAXRef> xrefs = new HashSet<DAXRef>(); 060 protected HashMap<ExternalDBType, Set<DAXRef>> typedXRefs = new HashMap<ExternalDBType, Set<DAXRef>>(); 061 boolean xrefsInitialized = false; 062 private String vegaTranscriptID = null; 063 private String ccdsID =null; 064 065 public DATranscript() { 066 super(); 067 } 068 069 /** 070 * 071 * @param daoFactory 072 */ 073 public DATranscript(DAOCoreFactory daoFactory) { 074 super(daoFactory); 075 } 076 077 @Override 078 public ObjectType getType() { 079 return FeatureType.transcript; 080 } 081 082 /** 083 * Returns all of the Translation objects associated with this Transcript 084 * (typically there will only be one, the canonical translation, there may 085 * also be no translation for a transcript). If 086 * the 'translationHash' object has not yet been instantiated, a lazy load 087 * by the TranslationDAO object returned by the DAOFactory is triggered. 088 * @return Collection of Translation objects. 089 * 090 */ 091 @Override 092 public Collection<? extends Translation> getTranslations() { 093 094 List<? extends Translation> out = null; 095 if (this.translationHash != null) { 096 return this.translationHash.values(); 097 } 098 this.reinitialize(); // we need an ID for this method 099 //this clears any existing translations 100 this.translationHash = new TreeMap<Integer, DATranslation>(); 101 102 try { 103 out = this.getDaoFactory().getTranslationDAO().getTranslationsForTranscript(this); 104 105 } catch (Exception e) { 106 LOGGER.info("failed to get Translations", e); 107 } 108 109 return out; 110 } 111 112 /** 113 * Returns the canonical Translation for this Transcript (if there is one). 114 * If not set, a lazy load using 'getTranslations()' is triggered. 115 */ 116 @Override 117 public DATranslation getCanonicalTranslation() { 118 119 if (canonicalTranslation != null) { 120 return this.canonicalTranslation; 121 } else { 122 123 if (this.getDaoFactory()==null || this.getStableID()==null || this.getStableID().isEmpty()) { 124 return null; 125 } 126 this.reinitialize(); // need canonicalTranslationID 127 //prior to version 57 there was no concept of a canonicalTranslation storedin the transcript table 128 if (this.getSchemaVersion()!=null && Integer.parseInt(this.getSchemaVersion())>56 129 && (this.canonicalTranslationID == null || this.canonicalTranslationID.equals(0)) ) { 130 return null; 131 } 132 133 //get all of the transcripts not just the canonical one! 134 this.getTranslations(); 135 return this.canonicalTranslation; 136 } 137 } 138 139 /** 140 * Utility method to reset or initialize the important fields of this Transcript 141 * (which must have a 'stableID' set). 142 * @throws DAOException 143 */ 144 @Override 145 void reinitialize() { 146 if (!this.isLazyloadAllowed() || this.isInitialized()) { 147 return; 148 } 149 try { 150 //nb getDaoFActory() will try and make a factory if we have at least species and ensembl version 151 this.getDaoFactory().getTranscriptDAO().reInitialize(this); 152 } catch (Exception ex) { 153 LOGGER.info("Failed to reinitialize the Transcript from the Database (using its stableID: " 154 +this.stableID+").", ex); 155 } finally { 156 //always set this so dont try again 157 this.setInitialized(true); 158 } 159 } 160 161 public String getBiotype() { 162 this.reinitialize(); 163 return this.biotype; 164 } 165 166 public void setBiotype(String biotype) { 167 this.biotype = biotype; 168 } 169 170 /** 171 * Gets the Gene associated with this transcript, using the stableID for the 172 * gene set on Transcript initialization. 173 */ 174 @Override 175 public DAGene getGene() { 176 this.reinitialize(); // need the geneID 177 if (gene == null && geneID != null) { 178 try { 179 gene = (DAGene) this.getDaoFactory().getGeneDAO().getGeneByID(this.geneID); 180 } catch (Exception e) { 181 LOGGER.info("Error thrown whilst trying to retrieve Gene for a Transcript", e); 182 } 183 } 184 185 return gene; 186 } 187 188 public void setGene(DAGene gene) { 189 this.gene = gene; 190 } 191 192 public Integer getGeneID() { 193 this.reinitialize(); 194 return geneID; 195 } 196 197 public void setGeneID(Integer geneID) { 198 this.geneID = geneID; 199 } 200 201 /** 202 * Returns true if this Transcript has been annotated by Ensembl as 'Canonical', 203 * according to the following rules: 204 * A 'Canonical' Transcript is the longest CCDS model in a gene. 205 * If none is available then the longest coding Ensembl-Havana merged transcript 206 * is chosen. If no merged transcript is present, the longest coding transcript 207 * is used, regardless of their source; this can be either an Ensembl or a 208 * Havana transcript. Finally, if there are no coding transcripts in the gene, 209 * the longest non-coding transcript is selected. 210 */ 211 public boolean isCanonical() { 212 this.reinitialize(); 213 return canonical; 214 } 215 216 public void setCanonical(boolean canonical) { 217 this.canonical = canonical; 218 } 219 220 @Override 221 public String getDisplayName() { 222 this.reinitialize(); 223 return (displayName != null) ? displayName : stableID; 224 } 225 226 /** 227 * Returns a Collection of Exons for this Transcript. These are returned in 228 * (Ranked) order from the exonHash of this Transcript, which will be initialized 229 * by lazy loading. The ordered exons are used to make the 'processedTranscript' 230 * object for this Transcript (the full length transcript being considered 231 * the 'pimaryTranscript' 232 */ 233 @Override 234 public Collection<DAExon> getExons() { 235 this.reinitialize(); //needs transcript id 236 if (exonHash == null) { 237 exonHash = new TreeMap<Integer, DAExon>(); 238 try { 239 Collection<DAExon> out = (Collection<DAExon>) this.getDaoFactory().getExonDAO().getExonsForTranscript(this); 240 // don't use this - use the values ordered in the exonHash 241 //return out; 242 } catch (Exception e) { 243 LOGGER.info("Threw DAOException on trying to populate exons for a transcript", e); 244 } 245 246 } 247 return this.exonHash.values(); 248 } 249 250 /** 251 * Method used by the ExonDAO to add an Exon to the 'exonHash', hashed on the 252 * 'Rank' integer held by the Exon. 253 * @param exon 254 */ 255 public void addExon(Exon exon) { 256 if (exonHash == null) { 257 exonHash = new TreeMap<Integer, DAExon>(); 258 } 259 try { 260 DAExon e = (DAExon) exon; 261 this.exonHash.put(e.getRank(), e); 262 } catch (Exception ex) { 263 LOGGER.info("failed to add an exon to the transcript ", ex); 264 } 265 } 266 267 /** 268 * Returns the internal database ID for the Translation object marked as 'Canonical'. 269 * (Prior to Ensembl57 there was a single translation object for each transcript, 270 * considered as canonical.) 271 */ 272 public Integer getCanonicalTranslationID() { 273 reinitialize(); 274 return canonicalTranslationID; 275 } 276 277 public void setCanonicalTranslationID(Integer canonicalTranslationID) { 278 this.canonicalTranslationID = canonicalTranslationID; 279 } 280 281 /** 282 * Package method to add a Translation to the 'translationHash', 283 * hashed on the internal database ID of the Translation, and to set the 284 * canonicalTranlation/ID where applicable. 285 * @param trl 286 */ 287 protected void addTranslation(DATranslation trl) { 288 if (trl != null && trl.getId() != null) { 289 290 if (this.translationHash==null) { 291 this.translationHash = new TreeMap<Integer, DATranslation>(); 292 } 293 294 trl.setTranscript(this); 295 this.translationHash.put(trl.getId(), trl); 296 297 if(this.getSchemaVersion()!= null && Integer.parseInt(this.getSchemaVersion())<57 ){ 298 trl.setCanonical(true); 299 this.canonicalTranslation = trl; 300 this.canonicalTranslationID = trl.getId(); 301 } else if (trl.getId().equals(this.getCanonicalTranslationID())) { 302 trl.setCanonical(true); 303 this.canonicalTranslation = trl; 304 } 305 } 306 } 307 308 /** 309 * Method used by the TranslationDAO to add Translations to the 'translationHash', 310 * hashed on the internal database ID of the Translation. 311 * @param trls Collection<DATranslation> 312 */ 313 public void addTranslations(Collection<DATranslation> trls) { 314 315 if (trls != null) { 316 for (DATranslation trl : trls) { 317 this.addTranslation(trl); 318 } 319 } 320 } 321 322 /** 323 * Returns whether this Transcript has (at least one, canonical) Translation. 324 */ 325 @Override 326 public boolean isTranslated() { 327 328 if (this.getDaoFactory()==null) { 329 return false; 330 } 331 332 if (this.getStableID()==null || this.getStableID().isEmpty()) { 333 return false; 334 } 335 this.reinitialize(); 336 //prior to version 57 there was no concept of a canonicalTranslation storedin the transcript table 337 if (this.getSchemaVersion()!=null && Integer.parseInt(this.getSchemaVersion())>56 ) { 338 return (this.canonicalTranslationID != null && !this.canonicalTranslationID.equals(0)) ; 339 } else { 340 return (this.getTranslations()!= null && !this.getTranslations().isEmpty()); 341 } 342 } 343 344 /** 345 * Private method to initialize both MappingSets 'primaryTranscriptExonMappings' 346 * and 'processedTranscriptExonMappings' which hold the (ordered) Mappings of 347 * Coordinates on the primary or processed Transcript against the Exons. 348 * @throws DAOException 349 */ 350 private void stitchExons() throws DAOException { 351 352 super.inititializeTopLevel(); 353 354 primaryTranscriptExonMappings = new MappingSet(); 355 processedTranscriptExonMappings = new MappingSet(); 356 357 boolean intranslated = false; 358 String seq = ""; 359 360 Collection<DAExon> exons = this.getExons(); 361 if (exons == null || exons.isEmpty() || this.topLevelTargetCoordinates==null || 362 this.topLevelTargetCoordinates.getStart()==null || 363 this.topLevelTargetCoordinates.getEnd()==null) { 364 return; 365 } 366 367 //@TODO makesafe 368 //@TODO make get from best top level mapping 369 Integer transcriptChrStart = this.topLevelTargetCoordinates.getStart(); 370 Integer transcriptChrEnd = this.topLevelTargetCoordinates.getEnd(); 371 Strand strand = this.topLevelTargetCoordinates.getStrand(); 372 373 int length = 0; 374 int primaryPosition = 1; 375 int processedPosition = 1; 376 377 for (DAExon ex : exons) { 378 Coordinate exonCoord = ex.getTopLevelMappings().first().getTargetCoordinates(); 379 Coordinate primaryTranscriptCoord = null; 380 Coordinate processedTranscriptCoord = null; 381 if (Strand.REVERSE_STRAND.equals(strand)) { 382 primaryPosition = transcriptChrEnd - exonCoord.getEnd() +1; 383 primaryTranscriptCoord = new Coordinate(primaryPosition, primaryPosition+exonCoord.getLength()-1, strand); 384 processedTranscriptCoord = new Coordinate(processedPosition , processedPosition+exonCoord.getLength()-1 , strand); 385 processedPosition += exonCoord.getLength(); 386 } else { 387 primaryPosition = exonCoord.getStart() - transcriptChrStart + 1; 388 primaryTranscriptCoord = new Coordinate(primaryPosition, primaryPosition+exonCoord.getLength()-1, strand); 389 processedTranscriptCoord = new Coordinate(processedPosition, processedPosition+exonCoord.getLength()-1, strand); 390 processedPosition += exonCoord.getLength(); 391 } 392 393 394 Mapping primaryTMappings = new Mapping(); 395 primaryTMappings.setSource(this); 396 primaryTMappings.setTarget(ex); 397 primaryTMappings.setSourceCoordinates(primaryTranscriptCoord); 398 //this is actually the coordinates on the chromosome not the coordinates of the exon ;) 399 //primaryTMappings.setTargetCoordinates(exonCoord); 400 primaryTranscriptExonMappings.add(primaryTMappings); 401 402 Mapping processedTMappings = new Mapping(); 403 processedTMappings.setSource(this); 404 processedTMappings.setTarget(ex); 405 processedTMappings.setSourceCoordinates(processedTranscriptCoord); 406 //processedTMappings.setTargetCoordinates(exonCoord); 407 processedTranscriptExonMappings.add(processedTMappings); 408 409 410 length += exonCoord.getLength(); 411 } 412 413 this.processedTranscriptLength = length; 414 } 415 416 /** 417 * Returns the mapped relative primaryTranscript position for a given Chromosome 418 * (or other Toplevel genomic) position. Wraps the DAFeature 'convertChromosomePositionToFeature' 419 * method. The TopLevel Target should only have a 420 * positive coordinate system. If the TopLevel Target coordinates are found to extend below 1, 421 * a range exception is thrown rather than try to handle this. 422 * @param chromosomePosition 423 */ 424 public Integer convertChromosomeToPrimaryTranscriptPosition(Integer chromosomePosition) { 425 return super.convertChromosomePositionToFeature(chromosomePosition); 426 } 427 428 /** 429 * Returns the mapped Chromosomal position for a given 430 * primaryTranscript position. Wraps the DAFeature 'convertToTargetPosition' 431 * method. Whilst the TopLevel Target should only have a 432 * positive coordinate system (and if not a RangeException will be thrown here), 433 * the query Integer is allowed to be outwith the bounds of the Feature, 434 * and may possibly return a value outwith the bounds of the Chromosome. 435 * @param primaryTranscriptPosition 436 */ 437 public Integer convertPrimaryTranscriptPositionToChromosome(Integer primaryTranscriptPosition) { 438 return super.convertToTargetPosition(primaryTranscriptPosition); 439 } 440 441 /** 442 * Converts an Integer position on the processed (spliced) transcript 443 * to the position on the TopLevel-annotated Target (typically the Chromosome). 444 * The TopLevel Target should only have a 445 * positive coordinate system. However negative values will be returned here 446 * if appropriate. 447 * @param processedTranscriptPosition Integer 448 * @return Integer 449 */ 450 public Integer convertProcessedTranscriptPositionToChromosome(Integer processedTranscriptPosition) throws DAOException { 451 452 if (processedTranscriptPosition==null || processedTranscriptPosition == 0) { 453 throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world." 454 +" Use -1 for one base upstream or +1 for the first base."); 455 } 456 457 Integer result = null; 458 459 super.inititializeTopLevel(); 460 461 if (this.topLevelTargetCoordinates==null || 462 this.topLevelTargetCoordinates.getStart()==null || 463 this.topLevelTargetCoordinates.getEnd()==null) { 464 return null; 465 } 466 467 468 Coordinate primaryTranscCoords = this.topLevelTargetCoordinates; 469 470 471 472 473 if (processedTranscriptPosition<0) { 474 if (Strand.REVERSE_STRAND.equals(primaryTranscCoords.getStrand())) { 475 result = primaryTranscCoords.getEnd()-processedTranscriptPosition; 476 //if move from negative to positive 477 if (primaryTranscCoords.getEnd()<0 && result>0) { 478 result++; 479 } 480 } else { 481 result = primaryTranscCoords.getStart()+processedTranscriptPosition; 482 //if move from positive to negative 483 if (primaryTranscCoords.getStart()>0 && result<1) { 484 result--; 485 } 486 } 487 return result; 488 } 489 490 if ( processedTranscriptExonMappings==null ) { 491 this.stitchExons(); 492 } 493 494 if (processedTranscriptExonMappings==null || processedTranscriptExonMappings.isEmpty()) { 495 return null; 496 } 497 498 if ( processedTranscriptPosition>this.getProcessedTranscriptLength()) { 499 500 if (Strand.REVERSE_STRAND.equals(primaryTranscCoords.getStrand())) { 501 result = primaryTranscCoords.getStart() - (+processedTranscriptPosition-this.getProcessedTranscriptLength()); 502 if (primaryTranscCoords.getStart()>0 && result<1) { 503 result--; 504 } 505 506 } else { 507 result = primaryTranscCoords.getEnd()+processedTranscriptPosition-this.getProcessedTranscriptLength(); 508 if(primaryTranscCoords.getEnd()<1 && result>0) { 509 result++; 510 } 511 } 512 return result; 513 } 514 515 516 for (Mapping m: this.processedTranscriptExonMappings) { 517 DAExon ex = (DAExon) m.getTarget(); 518 Coordinate sourceCoordinates = m.getSourceCoordinates(); 519 if (sourceCoordinates.containsPoint(processedTranscriptPosition)) { 520 Coordinate exonCoordinates = ex.getTopLevelMappings().first().getTargetCoordinates(); 521 522 if (Strand.REVERSE_STRAND.equals(exonCoordinates.getStrand())) { 523 result = exonCoordinates.getEnd() - (processedTranscriptPosition- sourceCoordinates.getStart() ); 524 //if move from positive to negative 525 if (exonCoordinates.getEnd()>0 && result<1) { 526 result--; 527 } 528 529 } else { 530 result = exonCoordinates.getStart() + (processedTranscriptPosition -sourceCoordinates.getStart() ); 531 //if move from negative to positive 532 if ( exonCoordinates.getStart()<0 && result>0) { 533 result++; 534 } 535 536 break; 537 } 538 } 539 } 540 return result; 541 } 542 543 /** 544 * Converts an Integer position on the TopLevel-annotated Target (typically the Chromosome) 545 * to the position on the processed (spliced) Transcript. The TopLevel Target should only have a 546 * positive coordinate system. If the TopLevel Target position is less than 1, 547 * a range exception is thrown rather than try to handle this. 548 * Although the chromosomePosition parameter may not be less than 1, the method does cope with 549 * negative coordinate transcript/exon mappings on the chromosome. 550 * @param chromosomePosition Integer 551 * @return Integer 552 */ 553 public Integer convertChromosomeToProcessedTranscriptPosition(Integer chromosomePosition) throws DAOException { 554 555 if (chromosomePosition==null|| chromosomePosition==0 ) { 556 throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world." 557 +" Use -1 for one base upstream or +1 for the first base."); 558 } 559 if (chromosomePosition<0) { 560 throw new RangeException("A chromosome has no coordinates lower than 0."); 561 } 562 563 Integer result = null; 564 Integer processedLength = null; 565 566 //calling 'getProcessedTranscriptLength()' forces intialization of the top level mappings and 567 //top level mappings and the stitching together of exons 568 processedLength = this.getProcessedTranscriptLength(); 569 570 571 if (processedLength==null 572 || processedLength==0 573 || this.topLevelTargetCoordinates==null 574 || this.topLevelTargetCoordinates.getStart()==null 575 || this.topLevelTargetCoordinates.getEnd()==null) { 576 return null; 577 } 578 579 Coordinate primaryTranscTargetCoords = this.topLevelTargetCoordinates; 580 581 if (Strand.REVERSE_STRAND.equals(primaryTranscTargetCoords.getStrand()) ) { 582 583 //before transcript start 584 if (chromosomePosition > primaryTranscTargetCoords.getEnd()) { 585 //result = this.getProcessedTranscriptLength() + (chromosomePosition- primaryTranscTargetCoords.getEnd() ); 586 result = - (chromosomePosition -primaryTranscTargetCoords.getEnd()); 587 //if going from -ive to positive, we will have gone back one too many 588 if (chromosomePosition>0 && primaryTranscTargetCoords.getEnd()<1 ) { 589 result++; 590 } 591 return result; 592 } 593 594 if (chromosomePosition.equals( primaryTranscTargetCoords.getEnd())) { 595 return 1; 596 } 597 598 //beyond transcript end 599 if (chromosomePosition < primaryTranscTargetCoords.getStart()) { 600 //result = - (primaryTranscTargetCoords.getStart() - chromosomePosition); 601 result = processedLength + (primaryTranscTargetCoords.getStart()-chromosomePosition ); 602// //if going from positive to negative, we will have gone forward one too many 603// if (chromosomePosition<0 && primaryTranscTargetCoords.getStart()>0) { 604// result--; 605// } 606 return result; 607 } 608 609 610 } else { 611 612 //before transcript start 613 if (chromosomePosition<primaryTranscTargetCoords.getStart()) { 614 result = - (primaryTranscTargetCoords.getStart() - chromosomePosition); 615// //if move from positive to negative 616// if (chromosomePosition<0 && primaryTranscTargetCoords.getStart()>0) { 617// result++; 618// } 619 return result; 620 } 621 622 if (chromosomePosition.equals(primaryTranscTargetCoords.getStart())) { 623 return 1; 624 } 625 626 //beyond transcript end 627 if (chromosomePosition > primaryTranscTargetCoords.getEnd()) { 628 result = processedLength + (chromosomePosition - primaryTranscTargetCoords.getEnd()); 629 //if move from negative to positive 630 if (chromosomePosition>0 && primaryTranscTargetCoords.getEnd() <1) { 631 result--; 632 } 633 return result; 634 } 635 } 636 637 if ( processedTranscriptExonMappings==null) { 638 this.stitchExons(); 639 } 640 641 for (Mapping m: this.processedTranscriptExonMappings) { 642 DAExon ex = (DAExon) m.getTarget(); 643 Coordinate exonCoordinates = ex.getTopLevelMappings().first().getTargetCoordinates(); 644 if (exonCoordinates.containsPoint(chromosomePosition)) { 645 Coordinate sourceCoordinates = m.getSourceCoordinates(); 646 647 if (Strand.REVERSE_STRAND.equals(exonCoordinates.getStrand())) { 648 result = sourceCoordinates.getStart() + (exonCoordinates.getEnd()-chromosomePosition ); 649 } else { 650 result = sourceCoordinates.getStart() + (chromosomePosition-exonCoordinates.getStart() ); 651 if (chromosomePosition>0 && exonCoordinates.getStart()<1) { 652 result--; 653 } 654 } 655 break; 656 } 657 } 658 659 return result; 660 } 661 662 /** 663 * Returns the mapped position on the Primary transcript equivalent to the given 664 * position on the processed transcript 665 * @param processedTranscriptPosition 666 * 667 * @throws DAOException 668 */ 669 public Integer convertProcessedToPrimaryTranscriptPosition(Integer processedTranscriptPosition) throws DAOException { 670 671 if (processedTranscriptPosition==null|| processedTranscriptPosition==0 ) { 672 throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world."); 673 } 674 675 if (processedTranscriptPosition<0) { 676 return processedTranscriptPosition; 677 } 678 679 Integer chr = this.convertProcessedTranscriptPositionToChromosome(processedTranscriptPosition); 680 681 if (chr==null) { 682 return null; 683 } else { 684 return this.convertChromosomeToPrimaryTranscriptPosition(chr); 685 } 686 } 687 688 /** 689 * Returns the mapped position on the Processed transcript equivalent to the given 690 * position on the Primary (unprocessed) transcript 691 * @param primaryTranscriptPosition 692 * 693 * @throws DAOException 694 */ 695 public Integer convertPrimaryToProcessedTranscriptPosition(Integer primaryTranscriptPosition) throws DAOException { 696 697 if (primaryTranscriptPosition==null|| primaryTranscriptPosition==0 ) { 698 throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world."); 699 } 700 701 if (primaryTranscriptPosition<0) { 702 return primaryTranscriptPosition; 703 } 704 705 Integer chr = this.convertPrimaryTranscriptPositionToChromosome(primaryTranscriptPosition); 706 707 if (chr==null) { 708 return null; 709 } else { 710 return this.convertChromosomeToProcessedTranscriptPosition(chr); 711 } 712 } 713 714 715 /** 716 * Returns a DADNASequence object representing the processed (spliced exons) 717 * version of this Transcript. 718 * 719 * @throws DAOException 720 */ 721 public DADNASequence getProcessedTranscript() throws DAOException { 722 723 if (processedTranscript != null ) { 724 return processedTranscript; 725 } 726 727 String seq = ""; 728 729 if ( processedTranscriptExonMappings==null) { 730 this.stitchExons(); 731 } 732 733 for (Mapping m:processedTranscriptExonMappings ) { 734 seq = seq.concat(super.getSequenceAsString(m.getSourceCoordinates().getStart(),m.getSourceCoordinates().getEnd())); 735 } 736 737 if (seq!= null && !seq.isEmpty()) { 738 processedTranscript = new DADNASequence(seq); 739 } 740 return processedTranscript; 741 } 742 743 /** 744 * Returns the length of the processed (spliced exons) 745 * version of this Transcript. 746 * 747 * @throws DAOException 748 */ 749 public Integer getProcessedTranscriptLength() throws DAOException { 750 if (processedTranscriptLength==null) { 751 this.stitchExons(); 752 } 753 return processedTranscriptLength; 754 } 755 756 757 /** 758 * Returns the length of the unprocessed (primary) 759 * version of this Transcript. (Wraps the DAFeature method 'getLength()'). 760 * 761 * @throws DAOException 762 */ 763 public Integer getPrimaryTranscriptLength() { 764 return super.getLength(); 765 } 766 767 /** 768 * Returns a DADNASequence object representing the unprocessed primary Transcript. 769 * (Wraps the DAFeature 'getSequence()' method.) 770 * 771 */ 772 public DADNASequence getPrimaryTranscript() { 773 return super.getSequence(); 774 } 775 776 /** 777 * Use of this deprecated method is equivalent to the preferred method 'getPrimaryTranscripRNASequence'. 778 * 779 * @deprecated 780 */ 781 @Deprecated 782 @Override 783 public RNASequence getRNASequence() { 784 return super.getRNASequence(); 785 } 786 787 /** 788 * Preferred method to explicitly get the RNASequence object representing the 789 * PrimaryTranscript. Wraps DAFeature getRNASequence(). 790 * 791 */ 792 public RNASequence getPrimaryTranscriptRNASequence() { 793 return super.getRNASequence(); 794 } 795 796 /** 797 * Use of this deprecated method is equivalent to the preferred method 'getPrimaryTranscripRNASequenceAsString'. 798 * 799 * @deprecated 800 */ 801 @Deprecated 802 @Override 803 public String getRNASequenceAsString() { 804 return super.getRNASequenceAsString(); 805 } 806 807 /** 808 * Preferred method to explicitly get a String representing the RNA sequence 809 * of the PrimaryTranscript. Wraps DAFeature getRNASequenceAsString(). 810 * 811 */ 812 public String getPrimaryTranscriptRNASequenceAsString() { 813 return super.getRNASequenceAsString(); 814 } 815 816 /** 817 * Use of this deprecated method is equivalent to the preferred method 818 * 'getPrimaryTranscripRNASequenceAsString(int,int)'. 819 * 820 * @deprecated 821 */ 822 @Deprecated 823 @Override 824 public String getRNASequenceAsString(Integer start, Integer stop) throws RangeException { 825 return super.getRNASequenceAsString(start, stop); 826 } 827 828 /** 829 * Preferred method to explicitly get a String representing the RNA sequence 830 * of the PrimaryTranscript, for the given range. 831 * Wraps DAFeature getRNASequenceAsString(int,int). 832 * 833 */ 834 public String getPrimaryTranscriptRNASequenceAsString(Integer start, Integer stop) throws RangeException { 835 return super.getRNASequenceAsString(start, stop); 836 } 837 838 /** 839 * Returns an RNASequence object representing the 840 * Processed (RNA spliced) Transcript. 841 * 842 */ 843 public RNASequence getProcessedTranscriptRNASequence() throws DAOException { 844 845 if (this.getProcessedTranscript()==null || topLevelTargetSequence==null ) { 846 return null; 847 } else { 848 Integer id = topLevelTargetSequence.getCodonTableID(); 849 return this.getProcessedTranscript().getRNASequence(this.getRegistry().getTranscriptionEngine(id)); 850 } 851 852 } 853 854 /** 855 * Returns a String representing the RNA sequence 856 * of the Processed (RNA spliced) Transcript. 857 * 858 */ 859 public String getProcessedTranscriptRNASequenceAsString() throws DAOException { 860 861 if (this.getProcessedTranscript()==null || topLevelTargetSequence==null) { 862 return ""; 863 } 864 865 return this.getProcessedTranscriptRNASequence().getSequenceAsString(); 866 } 867 868 /** 869 * Returns a String representing the RNA sequence 870 * of the Processed (RNA spliced) Transcript, for the given range. 871 * 872 */ 873 public String getProcessedTranscriptRNASequenceAsString(Integer start, Integer stop) throws RangeException, DAOException { 874 875 if (this.getProcessedTranscript()==null || topLevelTargetSequence==null) { 876 return ""; 877 } 878 879 return this.getProcessedTranscriptRNASequence().getSequenceAsString(start,stop, org.biojava3.core.sequence.Strand.POSITIVE); 880 } 881 882 /** 883 * Beware: this method does not trigger lazyloading of typed XRefs. 884 * @param type 885 */ 886 protected Set<DAXRef> getXRefs(ExternalDBType type) { 887 return typedXRefs.get(type); 888 } 889 890 /** 891 * Triggers lazyload if Transcript is not initialized. 892 */ 893 @Override 894 public DAXRef getDisplayXRef() { 895 this.reinitialize(); 896 return displayXRef; 897 } 898 899 public void setDisplayXRef(DAXRef xref) { 900 this.displayXRef = xref; 901 if (xref!=null) { 902 this.xrefs.add(xref); 903 Set s = new HashSet<DAXRef>(); 904 s.add(xref); 905 this.typedXRefs.put(ExternalDBType.DisplayID, s); 906 } 907 } 908 909 /** 910 * Returns any curated VegaID for the Transcript, forcing lazy load if not set, and defaulting 911 * to an empty string if absent (e.g. for all the invertebrate species in EnsemblGenomes). 912 */ 913 @Override 914 public String getVegaID() { 915 916 if (this.vegaTranscriptID!=null) { 917 return this.vegaTranscriptID; 918 } 919 920 List<DAXRef> outList = new ArrayList<DAXRef>(); 921 922 if (this.getDaoFactory()!= null 923 && !this.getDaoFactory().getRegistry().getDatasourceType().equals(DBConnection.DataSource.ENSEMBLDB)) { 924 this.vegaTranscriptID=""; 925 this.addTypedXRefs(ExternalDBType.VegaTranscript, outList); 926 return this.vegaTranscriptID ; 927 } 928 929 this.reinitialize(); 930 931 if (this.vegaTranscriptID==null ) { 932 933 if (this.getDaoFactory()!= null ) { 934 935 for (DAXRef dax : this.getAllXRefs()) { 936 if (dax.getPrimaryAccession()!=null 937 && dax.getPrimaryAccession().startsWith("OTT") 938 && dax.getDB()!= null 939 && (dax.getDB().getDBName().equals(ExternalDBType.VegaTranscript.toString()) 940 || 941 dax.getDB().getDBName().contains("Vega_transcript") 942 || 943 dax.getDB().getDBName().contains("vega_transcript") ) 944 ) { 945 outList.add(dax); 946 } 947 } 948 949 if (outList.isEmpty()) { 950 this.vegaTranscriptID = ""; 951 this.addTypedXRefs(ExternalDBType.VegaTranscript, outList); 952 return this.vegaTranscriptID; 953 } else { 954 //add the factory to the xref - although we shouldn't need to use it as it is fully initialized 955 for (DAXRef xr: outList) { 956 xr.setDaoFactory(this.getDaoFactory()); 957 } 958 this.addTypedXRefs(ExternalDBType.VegaTranscript, outList); 959 } 960 961 if (outList.size()==1) { 962 this.vegaTranscriptID = outList.get(0).getPrimaryAccession().trim(); 963 return this.vegaTranscriptID; 964 } else { 965 //hopefully all the IDs will be the same - but can't guarantee this! 966 String pre = "Multiple Vega IDs: {"; 967 boolean multiple = false; 968 String out = null; 969 String firstID = ""; 970 971 for (XRef x: outList) { 972 if (out==null) { 973 out = x.getPrimaryAccession().trim(); 974 firstID = x.getPrimaryAccession().trim(); 975 } else if (!firstID.equals(x.getPrimaryAccession().trim())) { 976 out = out.concat(", ").concat(x.getPrimaryAccession().trim()); 977 multiple = true; 978 } 979 } 980 981 if (!multiple) { 982 vegaTranscriptID=out; 983 } else { 984 out = out.trim().concat("}"); 985 vegaTranscriptID = pre.concat(out); 986 } 987 return this.vegaTranscriptID; 988 } 989 } 990 } 991 return this.vegaTranscriptID; 992 } 993 994 @Override 995 public Set<DAXRef> getVegaXRefs() { 996 this.getVegaID(); 997 return this.getXRefs(ExternalDBType.VegaTranscript); 998 } 999 1000 @Override 1001 public Set<DAXRef> getCCDSXRefs() { 1002 this.getCcdsID(); 1003 return this.getXRefs(ExternalDBType.CCDS); 1004 } 1005 1006 @Override 1007 public Set<DAXRef> getAllXRefs() { 1008 1009 if (this.xrefsInitialized) { 1010 return xrefs; 1011 } 1012 1013 this.reinitialize(); 1014 1015 if (this.getDaoFactory() != null && this.getId() != null) { 1016 List<DAXRef> result = null; 1017 try { 1018 result = (List<DAXRef>) this.getDaoFactory().getXRefDAO().getAllXRefs(this); 1019 1020 } catch (DAOException ex) { 1021 LOGGER.info("Threw DAOException on trying to get Vega ID for Feature: " + this.getStableID(), ex); 1022 } finally { 1023 this.xrefsInitialized = true; 1024 } 1025 1026 if (result == null || result.isEmpty()) { 1027 return xrefs; 1028 } else { 1029 1030 for (DAXRef xr : result) { 1031 //add the factory to the xref - although we shouldn't need to use it as it is fully initialized 1032 xr.setDaoFactory(this.getDaoFactory()); 1033 /* shouldn't be necessary now */ 1034 //check that we are not creating duplicate External DBs... 1035// ExternalDB db = xr.getDB(); 1036// int originalHashCode = db.originalHashCode(); 1037// db = this.getDaoFactory().getDatabase().validateExternalDB(db); 1038// int checkedHashCode = db.originalHashCode(); 1039// if (originalHashCode-checkedHashCode !=0) { 1040// System.out.println("*** FAILED TO REUSE EXTERNALDB"); 1041// } else { 1042// System.out.println("*** successfully reused externaldb"); 1043// } 1044// xr.setDB(db); 1045 xrefs.add(xr); 1046 } 1047 } 1048 } 1049 return xrefs; 1050 } 1051 1052 @Override 1053 public TreeSet<String> getAllSynonyms() { 1054 1055 if (synonyms!=null) { 1056 return synonyms; 1057 } 1058 this.reinitialize(); 1059 try { 1060 synonyms = this.getDaoFactory().getXRefDAO().getAllSynonyms(this); 1061 } catch (DAOException ex) { 1062 LOGGER.debug("Failed to getAllSynonyms for DAFeature", ex); 1063 } 1064 if (synonyms==null) { 1065 synonyms = new TreeSet<String>(); 1066 } 1067 return synonyms; 1068 } 1069 1070 @Override 1071 public TreeSet<String> getSynonyms(XRef xref) { 1072 return xref.getSynonyms(); 1073 } 1074 1075 protected void addTypedXRefs(ExternalDBType type, Collection<? extends XRef> xrefs) { 1076 if ( this.typedXRefs.get(type)==null) { 1077 this.typedXRefs.put(type, new HashSet<DAXRef>()); 1078 } 1079 this.typedXRefs.get(type).addAll((Collection<DAXRef>)xrefs); 1080 } 1081 1082 /** 1083 * Returns any curated CCDS for this transcript. 1084 * The Consensus CDS (CCDS) project is a collaborative effort to identify a 1085 * core set of human and mouse protein coding regions that are consistently 1086 * annotated and of high quality. The long term goal is to support convergence 1087 * towards a standard set of gene annotations. 1088 */ 1089 @Override 1090 public String getCcdsID() { 1091 if (ccdsID==null) { 1092 setCcdsXRef(); 1093 } 1094 return ccdsID; 1095 } 1096 1097 /* 1098 * Private setter routine for the CCDS ID. Restricted to ENSEMBL database. 1099 * Calls the getTypedXRefs() method for ExternalDBType.CCDS and stores the 1100 * typed XRef results. 1101 */ 1102 private void setCcdsXRef() { 1103 1104 //return immediately if we have already set the list of CCDS XRefs 1105 if (this.getXRefs(ExternalDBType.CCDS)!= null) { 1106 return; 1107 } 1108 1109 List<DAXRef> out = new ArrayList<DAXRef>(); 1110 1111 1112 if (this.getDaoFactory()!= null 1113 && !this.getDaoFactory().getRegistry().getDatasourceType().equals(DBConnection.DataSource.ENSEMBLDB)) { 1114 this.ccdsID=""; 1115 this.addTypedXRefs(ExternalDBType.CCDS, out); 1116 return; 1117 } 1118 this.reinitialize(); 1119 1120 if (this.getDaoFactory()!= null && this.getId()!=null) { 1121 1122 for (DAXRef dax : this.getAllXRefs()) { 1123 if (dax.getDB()!= null && dax.getDB().getDBName().equals(ExternalDBType.CCDS.toString())) { 1124 out.add(dax); 1125 } 1126 } 1127 1128 if ( out.isEmpty() ) { 1129 this.ccdsID = ""; 1130 this.addTypedXRefs(ExternalDBType.CCDS, out); 1131 return; 1132 } else { 1133 1134 //add the factory to the xref - although we shouldn't need to use it as it is fully initialized 1135 for (DAXRef xr: out) { 1136 xr.setDaoFactory(this.getDaoFactory()); 1137 } 1138 1139 this.addTypedXRefs(ExternalDBType.CCDS, out); 1140 } 1141 1142 if (out.size()==1) { 1143 //use the DisplayID in preference to the accession - because this has the version appended 1144 this.ccdsID = out.get(0).getDisplayID().trim(); 1145 } else { 1146 //hopefully there wil be a single, or at least unique ID - but may not be able to guarantee this! 1147 String pre = "Multiple CCDS IDs: {"; 1148 boolean multiple = false; 1149 String o = null; 1150 String firstID = ""; 1151 1152 for (XRef x: out) { 1153 if (o==null) { 1154 //use the DisplayID in preference to the accession - because this has the version appended 1155 o = x.getDisplayID().trim(); 1156 firstID = x.getDisplayID().trim(); 1157 } else if (!firstID.equals(x.getDisplayID().trim())) { 1158 o = o.concat(", ").concat(x.getDisplayID().trim()); 1159 multiple = true; 1160 } 1161 } 1162 o = o.trim().concat("}"); 1163 if (multiple) { 1164 ccdsID = pre.concat(o); 1165 } 1166 } 1167 1168 } else { 1169 this.ccdsID= ""; 1170 } 1171 } 1172 1173}