001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.datasourceaware.core;
023
024import java.util.*;
025import org.biojava3.core.sequence.RNASequence;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028import uk.ac.roslin.ensembl.config.DBConnection;
029import uk.ac.roslin.ensembl.config.ExternalDBType;
030import uk.ac.roslin.ensembl.config.FeatureType;
031import uk.ac.roslin.ensembl.dao.factory.DAOCoreFactory;
032import uk.ac.roslin.ensembl.datasourceaware.DAXRef;
033import uk.ac.roslin.ensembl.exception.DAOException;
034import uk.ac.roslin.ensembl.exception.RangeException;
035import uk.ac.roslin.ensembl.model.Coordinate.Strand;
036import uk.ac.roslin.ensembl.model.*;
037import uk.ac.roslin.ensembl.model.core.Exon;
038import uk.ac.roslin.ensembl.model.core.Transcript;
039import uk.ac.roslin.ensembl.model.core.Translation;
040import uk.ac.roslin.ensembl.model.core.VegaFeature;
041
042public class DATranscript extends DAAnalyzedFeature implements Transcript, VegaFeature, XRefed {
043
044    Integer canonicalTranslationID = null;
045    DATranslation canonicalTranslation = null;
046    private TreeMap<Integer, DATranslation> translationHash = null;
047    private String biotype = null;
048    private DAGene gene = null;
049    private boolean canonical = false;
050    private Integer geneID = null;
051    private TreeMap<Integer, DAExon> exonHash = null;
052    MappingSet primaryTranscriptExonMappings = null;
053    MappingSet processedTranscriptExonMappings = null;
054    Integer processedTranscriptLength = null;
055    private DADNASequence processedTranscript = null;
056    final static Logger LOGGER = LoggerFactory.getLogger(DATranscript.class);
057
058    private TreeSet<String> synonyms = null;
059    protected  Set<DAXRef> xrefs = new HashSet<DAXRef>();
060    protected HashMap<ExternalDBType, Set<DAXRef>> typedXRefs = new HashMap<ExternalDBType, Set<DAXRef>>();
061    boolean xrefsInitialized = false;        
062    private String vegaTranscriptID = null;
063    private String ccdsID =null;
064
065    public DATranscript() {
066        super();
067    }
068
069    /**
070     * 
071     * @param daoFactory 
072     */
073    public DATranscript(DAOCoreFactory daoFactory) {
074        super(daoFactory);
075    }
076
077    @Override
078    public ObjectType getType() {
079        return FeatureType.transcript;
080    }
081
082    /**
083     * Returns all of the Translation objects associated with this Transcript 
084     * (typically there will only be one, the canonical translation, there may 
085     * also be no translation for a transcript). If
086     * the 'translationHash' object has not yet been instantiated, a lazy load 
087     * by the TranslationDAO object returned by the DAOFactory is triggered.
088     * @return Collection of Translation objects.
089     * 
090     */
091    @Override
092    public Collection<? extends Translation> getTranslations() {
093
094        List<? extends Translation> out = null;       
095        if (this.translationHash != null) {
096            return this.translationHash.values();
097        }
098        this.reinitialize(); // we need an ID for this method
099        //this clears any existing translations
100        this.translationHash = new TreeMap<Integer, DATranslation>();
101        
102        try {
103             out = this.getDaoFactory().getTranslationDAO().getTranslationsForTranscript(this);
104
105        } catch (Exception e) {
106            LOGGER.info("failed to get Translations", e);
107        }
108
109        return out;
110    }
111
112    /**
113     * Returns the canonical Translation for this Transcript (if there is one).
114     * If not set, a lazy load using 'getTranslations()' is triggered. 
115     */
116    @Override
117    public DATranslation getCanonicalTranslation() {
118
119        if (canonicalTranslation != null) {
120            return this.canonicalTranslation;
121        } else {
122            
123            if (this.getDaoFactory()==null || this.getStableID()==null || this.getStableID().isEmpty()) {
124                return null;
125            }
126            this.reinitialize(); // need canonicalTranslationID 
127            //prior to version 57 there was no concept of a canonicalTranslation storedin the transcript table
128            if (this.getSchemaVersion()!=null && Integer.parseInt(this.getSchemaVersion())>56 
129                    &&   (this.canonicalTranslationID == null || this.canonicalTranslationID.equals(0)) ) {
130                return null;
131            }
132
133            //get all of the transcripts not just the canonical one!
134            this.getTranslations();
135            return this.canonicalTranslation;
136        }
137    }
138
139    /**
140     * Utility method to reset or initialize the important fields of this Transcript 
141     * (which must have a 'stableID' set).
142     * @throws DAOException 
143     */
144    @Override
145    void reinitialize()  { 
146        if (!this.isLazyloadAllowed() || this.isInitialized()) {
147            return;
148        }
149        try {
150            //nb getDaoFActory() will try and make a factory if we have at least species and ensembl version
151            this.getDaoFactory().getTranscriptDAO().reInitialize(this);
152        } catch (Exception ex) {
153            LOGGER.info("Failed to reinitialize the Transcript from the Database (using its stableID: "
154                    +this.stableID+").", ex);
155        } finally {
156            //always set this so dont try again
157            this.setInitialized(true);
158        }
159    }
160
161    public String getBiotype() {
162        this.reinitialize();
163        return this.biotype;
164    }
165
166    public void setBiotype(String biotype) {
167        this.biotype = biotype;
168    }
169
170    /**
171     * Gets the Gene associated with this transcript, using the stableID for the 
172     * gene set on Transcript initialization.
173     */
174    @Override
175    public DAGene getGene() {
176        this.reinitialize(); // need the geneID
177        if (gene == null && geneID != null) {
178            try {
179                gene = (DAGene) this.getDaoFactory().getGeneDAO().getGeneByID(this.geneID);
180            } catch (Exception e) {
181                LOGGER.info("Error thrown whilst trying to retrieve Gene for a Transcript", e);
182            }
183        }
184
185        return gene;
186    }
187
188    public void setGene(DAGene gene) {
189        this.gene = gene;
190    }
191
192    public Integer getGeneID() {
193        this.reinitialize();
194        return geneID;
195    }
196
197    public void setGeneID(Integer geneID) {
198        this.geneID = geneID;
199    }
200
201    /**
202     * Returns true if this Transcript has been annotated by Ensembl as 'Canonical', 
203     * according to the following rules:
204     * A 'Canonical' Transcript is the longest CCDS model in a gene.
205     * If none is available then the longest coding Ensembl-Havana merged transcript 
206     * is chosen. If no merged  transcript is present, the longest coding transcript
207     * is used, regardless of their source; this can be either an Ensembl or a 
208     * Havana transcript. Finally, if there are no coding transcripts in the gene, 
209     * the longest non-coding transcript is selected.
210     */
211    public boolean isCanonical() {
212        this.reinitialize();
213        return canonical;
214    }
215
216    public void setCanonical(boolean canonical) {
217        this.canonical = canonical;
218    }
219
220    @Override
221    public String getDisplayName() {
222        this.reinitialize();        
223        return (displayName != null) ? displayName : stableID;
224    }
225
226    /**
227     * Returns a Collection of Exons for this Transcript. These are returned in 
228     * (Ranked) order from the exonHash of this Transcript, which will be initialized 
229     * by lazy loading. The ordered exons are used to make the 'processedTranscript' 
230     * object for this Transcript (the full length transcript being considered
231     * the 'pimaryTranscript'
232     */
233    @Override
234    public Collection<DAExon> getExons() {
235        this.reinitialize(); //needs transcript id
236        if (exonHash == null) {
237            exonHash = new TreeMap<Integer, DAExon>();
238            try {
239                Collection<DAExon> out =  (Collection<DAExon>) this.getDaoFactory().getExonDAO().getExonsForTranscript(this);
240                // don't use this - use the values ordered in the exonHash
241                //return out;
242            } catch (Exception e) {
243                LOGGER.info("Threw DAOException on trying to populate exons for a transcript", e);
244            }
245
246        }
247        return this.exonHash.values();
248    }
249
250    /**
251     * Method used by the ExonDAO to add an Exon to the 'exonHash', hashed on the 
252     * 'Rank' integer held by the Exon.
253     * @param exon 
254     */
255    public void addExon(Exon exon) {
256        if (exonHash == null) {
257            exonHash = new TreeMap<Integer, DAExon>();
258        }
259        try {
260            DAExon e = (DAExon) exon;
261            this.exonHash.put(e.getRank(), e);
262        } catch (Exception ex) {
263            LOGGER.info("failed to add an exon to the transcript ", ex);
264        }
265    }
266
267    /**
268     * Returns the internal database ID for the Translation object marked as 'Canonical'.
269     * (Prior to Ensembl57 there was a single translation object for each transcript, 
270     * considered as canonical.)
271     */
272    public Integer getCanonicalTranslationID() {
273        reinitialize();
274        return canonicalTranslationID;
275    }
276
277    public void setCanonicalTranslationID(Integer canonicalTranslationID) {
278        this.canonicalTranslationID = canonicalTranslationID;
279    }
280
281    /**
282     * Package method to add a Translation to the 'translationHash', 
283     * hashed on the internal database ID of the Translation, and to set the 
284     * canonicalTranlation/ID where applicable.
285     * @param trl 
286     */
287    protected void addTranslation(DATranslation trl) {
288        if (trl != null && trl.getId() != null) {
289            
290            if (this.translationHash==null) {
291                this.translationHash = new TreeMap<Integer, DATranslation>();
292            }
293            
294            trl.setTranscript(this);
295            this.translationHash.put(trl.getId(), trl);
296            
297            if(this.getSchemaVersion()!= null && Integer.parseInt(this.getSchemaVersion())<57 ){
298                trl.setCanonical(true);
299                this.canonicalTranslation = trl;
300                this.canonicalTranslationID = trl.getId();
301            } else if (trl.getId().equals(this.getCanonicalTranslationID())) {
302                trl.setCanonical(true);
303                this.canonicalTranslation = trl;
304            }
305        }
306    }
307
308    /**
309     * Method used by the TranslationDAO to add Translations to the 'translationHash', 
310     * hashed on the internal database ID of the Translation. 
311     * @param trls Collection&lt;DATranslation&gt;
312     */
313    public void addTranslations(Collection<DATranslation> trls) {
314
315        if (trls != null) {
316            for (DATranslation trl : trls) {
317                this.addTranslation(trl);
318            }
319        }
320    }
321
322    /**
323     * Returns whether this Transcript has (at least one, canonical) Translation.
324     */
325    @Override
326    public boolean isTranslated() {
327        
328         if (this.getDaoFactory()==null) {
329                return false;
330         }
331                 
332         if  (this.getStableID()==null || this.getStableID().isEmpty()) {
333                return false;
334         }
335        this.reinitialize();
336        //prior to version 57 there was no concept of a canonicalTranslation storedin the transcript table
337        if (this.getSchemaVersion()!=null && Integer.parseInt(this.getSchemaVersion())>56 ) {
338           return (this.canonicalTranslationID != null && !this.canonicalTranslationID.equals(0)) ;
339        } else {
340            return (this.getTranslations()!= null && !this.getTranslations().isEmpty());
341        }
342    }
343    
344    /**
345     * Private method to initialize both MappingSets 'primaryTranscriptExonMappings'
346     * and 'processedTranscriptExonMappings' which hold the (ordered) Mappings of 
347     * Coordinates on the primary or processed Transcript against the Exons. 
348     * @throws DAOException 
349     */
350    private void stitchExons() throws DAOException {
351
352        super.inititializeTopLevel();
353        
354        primaryTranscriptExonMappings = new MappingSet();
355        processedTranscriptExonMappings = new MappingSet();
356        
357        boolean intranslated = false;
358        String seq = "";
359
360        Collection<DAExon> exons = this.getExons();
361        if (exons == null || exons.isEmpty() || this.topLevelTargetCoordinates==null ||
362                this.topLevelTargetCoordinates.getStart()==null ||
363                this.topLevelTargetCoordinates.getEnd()==null) {
364            return;
365        }
366        
367        //@TODO makesafe
368        //@TODO make get from best top level mapping
369        Integer transcriptChrStart = this.topLevelTargetCoordinates.getStart();
370        Integer transcriptChrEnd = this.topLevelTargetCoordinates.getEnd();
371        Strand strand = this.topLevelTargetCoordinates.getStrand();
372        
373        int length = 0;
374        int primaryPosition = 1;
375        int processedPosition = 1;
376        
377        for (DAExon ex : exons) {
378            Coordinate exonCoord = ex.getTopLevelMappings().first().getTargetCoordinates();
379            Coordinate primaryTranscriptCoord = null;
380            Coordinate processedTranscriptCoord = null;
381            if (Strand.REVERSE_STRAND.equals(strand)) {
382                primaryPosition = transcriptChrEnd - exonCoord.getEnd() +1;
383                primaryTranscriptCoord = new Coordinate(primaryPosition, primaryPosition+exonCoord.getLength()-1, strand);     
384                processedTranscriptCoord = new Coordinate(processedPosition , processedPosition+exonCoord.getLength()-1 , strand);
385                processedPosition += exonCoord.getLength();            
386            } else {
387                primaryPosition = exonCoord.getStart() - transcriptChrStart + 1;
388                primaryTranscriptCoord = new Coordinate(primaryPosition, primaryPosition+exonCoord.getLength()-1, strand);
389                processedTranscriptCoord = new Coordinate(processedPosition, processedPosition+exonCoord.getLength()-1, strand);  
390                processedPosition += exonCoord.getLength();
391            }
392           
393            
394            Mapping primaryTMappings = new Mapping();
395            primaryTMappings.setSource(this);
396            primaryTMappings.setTarget(ex);
397            primaryTMappings.setSourceCoordinates(primaryTranscriptCoord);
398            //this is actually the coordinates on the chromosome not the coordinates of the exon ;)
399            //primaryTMappings.setTargetCoordinates(exonCoord);
400            primaryTranscriptExonMappings.add(primaryTMappings);  
401            
402            Mapping processedTMappings = new Mapping();
403            processedTMappings.setSource(this);
404            processedTMappings.setTarget(ex);
405            processedTMappings.setSourceCoordinates(processedTranscriptCoord);
406            //processedTMappings.setTargetCoordinates(exonCoord);
407            processedTranscriptExonMappings.add(processedTMappings);  
408            
409            
410            length += exonCoord.getLength();
411        }
412       
413        this.processedTranscriptLength = length;
414    }
415
416    /**
417     * Returns the mapped relative primaryTranscript position for a given Chromosome 
418     * (or other Toplevel genomic) position. Wraps the DAFeature 'convertChromosomePositionToFeature'
419     * method.  The TopLevel Target should only have  a 
420     * positive coordinate system. If the TopLevel Target coordinates are found to extend below 1, 
421     * a range exception is thrown rather than try to handle this.
422     * @param chromosomePosition
423     */
424    public Integer convertChromosomeToPrimaryTranscriptPosition(Integer chromosomePosition) {
425        return super.convertChromosomePositionToFeature(chromosomePosition);
426    }
427    
428    /**
429     * Returns the mapped Chromosomal position for a given 
430     * primaryTranscript position. Wraps the DAFeature 'convertToTargetPosition'
431     * method.  Whilst the TopLevel Target should only have  a 
432     * positive coordinate system (and if not a RangeException will be thrown here), 
433     * the query Integer is allowed to be outwith the bounds of the Feature,
434     * and may possibly return a value outwith the bounds of the Chromosome.
435     * @param primaryTranscriptPosition
436     */
437    public Integer convertPrimaryTranscriptPositionToChromosome(Integer primaryTranscriptPosition) {
438        return super.convertToTargetPosition(primaryTranscriptPosition);
439    }
440    
441     /**
442     * Converts an Integer position on the processed (spliced) transcript  
443     * to the position on the TopLevel-annotated Target (typically the Chromosome). 
444     * The TopLevel Target should only have  a 
445     * positive coordinate system. However negative values will be returned here 
446     * if appropriate.
447     * @param processedTranscriptPosition Integer
448     * @return Integer
449     */  
450    public Integer convertProcessedTranscriptPositionToChromosome(Integer processedTranscriptPosition) throws DAOException {
451        
452        if (processedTranscriptPosition==null || processedTranscriptPosition == 0) {
453            throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world."
454                    +" Use -1 for one base upstream or +1 for the first base.");
455        } 
456        
457        Integer result = null;
458        
459        super.inititializeTopLevel();
460        
461        if (this.topLevelTargetCoordinates==null ||
462                this.topLevelTargetCoordinates.getStart()==null ||
463                this.topLevelTargetCoordinates.getEnd()==null) {
464            return null;
465        }
466        
467        
468        Coordinate primaryTranscCoords = this.topLevelTargetCoordinates;
469        
470   
471  
472                
473        if (processedTranscriptPosition<0) {
474            if (Strand.REVERSE_STRAND.equals(primaryTranscCoords.getStrand())) {
475                result = primaryTranscCoords.getEnd()-processedTranscriptPosition;
476                //if move from negative to positive
477                if (primaryTranscCoords.getEnd()<0 && result>0) {
478                    result++;
479                }                
480            } else {
481                result = primaryTranscCoords.getStart()+processedTranscriptPosition;             
482                //if move from positive to negative
483                if (primaryTranscCoords.getStart()>0 && result<1) {
484                    result--;
485                }
486            }
487            return result;
488        }
489
490        if ( processedTranscriptExonMappings==null ) {
491            this.stitchExons();
492        }  
493        
494        if (processedTranscriptExonMappings==null || processedTranscriptExonMappings.isEmpty()) {
495            return null;
496        }
497        
498        if ( processedTranscriptPosition>this.getProcessedTranscriptLength()) {
499            
500            if (Strand.REVERSE_STRAND.equals(primaryTranscCoords.getStrand())) {
501                result = primaryTranscCoords.getStart() - (+processedTranscriptPosition-this.getProcessedTranscriptLength());
502                if (primaryTranscCoords.getStart()>0 && result<1) {
503                    result--;
504                }
505                
506            } else {            
507                result = primaryTranscCoords.getEnd()+processedTranscriptPosition-this.getProcessedTranscriptLength();
508                if(primaryTranscCoords.getEnd()<1 && result>0) {
509                    result++;
510                }
511            }
512            return result;
513        }
514        
515        
516        for (Mapping m: this.processedTranscriptExonMappings) {
517            DAExon ex = (DAExon) m.getTarget();
518            Coordinate sourceCoordinates = m.getSourceCoordinates();
519            if (sourceCoordinates.containsPoint(processedTranscriptPosition)) {
520                Coordinate exonCoordinates = ex.getTopLevelMappings().first().getTargetCoordinates();
521                
522                if (Strand.REVERSE_STRAND.equals(exonCoordinates.getStrand())) {
523                    result =  exonCoordinates.getEnd() - (processedTranscriptPosition- sourceCoordinates.getStart() );
524                    //if move from positive to negative
525                    if (exonCoordinates.getEnd()>0 && result<1) {
526                        result--;
527                    }
528                    
529                } else {
530                    result = exonCoordinates.getStart() + (processedTranscriptPosition -sourceCoordinates.getStart() );
531                    //if move from negative to positive
532                    if ( exonCoordinates.getStart()<0 && result>0) {
533                        result++;
534                    }
535                    
536                    break;
537                }               
538            }
539        }
540        return result;
541    }
542    
543    /**
544     * Converts an Integer position on the TopLevel-annotated Target (typically the Chromosome) 
545     * to the position on the processed (spliced) Transcript. The TopLevel Target should only have  a 
546     * positive coordinate system. If the TopLevel Target position is less than 1, 
547     * a range exception is thrown rather than try to handle this.
548     * Although the  chromosomePosition parameter may not be  less than 1, the method does cope with 
549     * negative coordinate transcript/exon  mappings on the chromosome.
550     * @param chromosomePosition Integer
551     * @return Integer
552     */      
553    public Integer convertChromosomeToProcessedTranscriptPosition(Integer chromosomePosition) throws DAOException {
554        
555        if (chromosomePosition==null|| chromosomePosition==0 ) {
556            throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world."
557                    +" Use -1 for one base upstream or +1 for the first base.");            
558        }
559        if (chromosomePosition<0) {
560            throw new RangeException("A chromosome has no coordinates lower than 0.");            
561        }
562
563        Integer result = null;            
564        Integer processedLength = null;
565        
566        //calling 'getProcessedTranscriptLength()' forces intialization of the top level mappings and 
567        //top level mappings and the stitching together of exons
568        processedLength = this.getProcessedTranscriptLength();
569
570        
571        if (processedLength==null 
572            || processedLength==0 
573            || this.topLevelTargetCoordinates==null 
574            || this.topLevelTargetCoordinates.getStart()==null 
575            || this.topLevelTargetCoordinates.getEnd()==null) {
576            return null;
577        }     
578        
579        Coordinate primaryTranscTargetCoords = this.topLevelTargetCoordinates;
580
581        if (Strand.REVERSE_STRAND.equals(primaryTranscTargetCoords.getStrand()) ) {
582            
583            //before transcript start
584            if (chromosomePosition > primaryTranscTargetCoords.getEnd()) {
585                    //result = this.getProcessedTranscriptLength() + (chromosomePosition- primaryTranscTargetCoords.getEnd() );
586                    result =  - (chromosomePosition -primaryTranscTargetCoords.getEnd());
587                    //if going from -ive to positive, we will have gone back  one too many
588                    if (chromosomePosition>0 && primaryTranscTargetCoords.getEnd()<1 ) {
589                        result++;
590                    }
591                    return result;
592                }
593            
594            if (chromosomePosition.equals( primaryTranscTargetCoords.getEnd())) {
595                return 1;
596            }
597                
598            //beyond transcript end
599            if (chromosomePosition < primaryTranscTargetCoords.getStart()) {
600                    //result =  - (primaryTranscTargetCoords.getStart() - chromosomePosition);
601                    result = processedLength + (primaryTranscTargetCoords.getStart()-chromosomePosition );
602//                    //if going from positive to negative, we will have gone forward one too many
603//                    if (chromosomePosition<0 && primaryTranscTargetCoords.getStart()>0) {
604//                        result--;
605//                    }                
606                    return result;    
607                }  
608                
609                
610        } else {
611             
612            //before transcript start
613            if (chromosomePosition<primaryTranscTargetCoords.getStart()) {
614                    result =  - (primaryTranscTargetCoords.getStart() - chromosomePosition);
615//                    //if move from positive to negative
616//                    if (chromosomePosition<0 && primaryTranscTargetCoords.getStart()>0) {
617//                        result++;
618//                    }
619                    return result;
620             }
621            
622             if (chromosomePosition.equals(primaryTranscTargetCoords.getStart())) {
623                 return 1;
624             }
625             
626            //beyond transcript end
627            if (chromosomePosition > primaryTranscTargetCoords.getEnd()) {
628                 result = processedLength + (chromosomePosition - primaryTranscTargetCoords.getEnd());
629                 //if move from negative to positive
630                 if (chromosomePosition>0 && primaryTranscTargetCoords.getEnd() <1) {
631                        result--;
632                 }
633                 return result;
634             }             
635        }
636        
637        if ( processedTranscriptExonMappings==null) {
638            this.stitchExons();
639        }     
640        
641        for (Mapping m: this.processedTranscriptExonMappings) {
642            DAExon ex = (DAExon) m.getTarget();
643            Coordinate exonCoordinates = ex.getTopLevelMappings().first().getTargetCoordinates();
644            if (exonCoordinates.containsPoint(chromosomePosition)) {
645                Coordinate sourceCoordinates = m.getSourceCoordinates();
646                
647                if (Strand.REVERSE_STRAND.equals(exonCoordinates.getStrand())) {
648                    result = sourceCoordinates.getStart() + (exonCoordinates.getEnd()-chromosomePosition );
649                } else {
650                    result = sourceCoordinates.getStart() + (chromosomePosition-exonCoordinates.getStart() );
651                    if (chromosomePosition>0 && exonCoordinates.getStart()<1) {
652                        result--;
653                    }
654                }     
655                break;
656            }
657        }
658
659        return result;
660    }    
661    
662    /**
663     * Returns the mapped position on the Primary transcript equivalent to the given 
664     * position on the processed transcript
665     * @param processedTranscriptPosition
666     * 
667     * @throws DAOException 
668     */
669    public Integer convertProcessedToPrimaryTranscriptPosition(Integer processedTranscriptPosition) throws DAOException {
670                       
671        if (processedTranscriptPosition==null|| processedTranscriptPosition==0 ) {
672            throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world.");
673        }          
674        
675        if (processedTranscriptPosition<0) {
676            return processedTranscriptPosition;
677        }
678        
679        Integer chr = this.convertProcessedTranscriptPositionToChromosome(processedTranscriptPosition);
680        
681        if (chr==null) {
682            return null;
683        } else {
684            return this.convertChromosomeToPrimaryTranscriptPosition(chr);
685        }
686    }
687    
688    /**
689     * Returns the mapped position on the Processed transcript equivalent to the given 
690     * position on the Primary (unprocessed) transcript
691     * @param primaryTranscriptPosition
692     * 
693     * @throws DAOException 
694     */
695    public Integer convertPrimaryToProcessedTranscriptPosition(Integer primaryTranscriptPosition) throws DAOException {
696        
697        if (primaryTranscriptPosition==null|| primaryTranscriptPosition==0 ) {
698            throw new IllegalArgumentException("The position 0 is meaningless in the Ensembl DNA world.");
699        }  
700        
701        if (primaryTranscriptPosition<0) {
702            return primaryTranscriptPosition;
703        }        
704        
705        Integer chr = this.convertPrimaryTranscriptPositionToChromosome(primaryTranscriptPosition);
706        
707        if (chr==null) {
708            return null;
709        } else {
710            return this.convertChromosomeToProcessedTranscriptPosition(chr);
711        }
712    }
713    
714    
715    /**
716     * Returns a DADNASequence object representing the processed (spliced exons)
717     * version of this Transcript.
718     * 
719     * @throws DAOException 
720     */
721    public DADNASequence getProcessedTranscript() throws DAOException  {
722        
723        if (processedTranscript != null ) {
724            return processedTranscript;
725        }
726        
727        String seq = "";
728               
729        if ( processedTranscriptExonMappings==null) {
730                this.stitchExons();
731        }  
732        
733        for (Mapping m:processedTranscriptExonMappings ) {          
734            seq = seq.concat(super.getSequenceAsString(m.getSourceCoordinates().getStart(),m.getSourceCoordinates().getEnd()));
735        }
736        
737        if (seq!= null && !seq.isEmpty()) {
738            processedTranscript =  new DADNASequence(seq);
739        }
740        return processedTranscript;
741    }
742
743    /**
744     * Returns the length of the processed (spliced exons)
745     * version of this Transcript.
746     * 
747     * @throws DAOException 
748     */
749    public Integer getProcessedTranscriptLength() throws DAOException {
750        if (processedTranscriptLength==null) {
751            this.stitchExons();
752        }
753        return processedTranscriptLength;
754    }
755    
756    
757    /**
758     * Returns the length of the unprocessed (primary)
759     * version of this Transcript. (Wraps the DAFeature method 'getLength()').
760     * 
761     * @throws DAOException 
762     */
763    public Integer getPrimaryTranscriptLength()  { 
764        return super.getLength();
765    }
766    
767    /**
768     * Returns a DADNASequence object representing the unprocessed primary Transcript.
769     * (Wraps the DAFeature 'getSequence()' method.)
770     * 
771     */
772    public DADNASequence getPrimaryTranscript() {
773        return super.getSequence();
774    }
775    
776    /**
777     * Use of this deprecated method is equivalent to the preferred method 'getPrimaryTranscripRNASequence'.
778     * 
779     * @deprecated
780     */
781    @Deprecated
782    @Override
783    public RNASequence getRNASequence() {
784        return super.getRNASequence();
785    }
786    
787    /**
788     * Preferred method to explicitly get the RNASequence object representing the 
789     * PrimaryTranscript. Wraps DAFeature getRNASequence().
790     * 
791     */
792    public RNASequence getPrimaryTranscriptRNASequence() {
793        return super.getRNASequence();
794    }
795    
796    /**
797     * Use of this deprecated method is equivalent to the preferred method 'getPrimaryTranscripRNASequenceAsString'.
798     * 
799     * @deprecated
800     */    
801    @Deprecated
802    @Override
803    public String getRNASequenceAsString() {
804        return super.getRNASequenceAsString();
805    }
806
807    /**
808     * Preferred method to explicitly get a String representing the RNA sequence 
809     * of the  PrimaryTranscript. Wraps DAFeature getRNASequenceAsString().
810     * 
811     */    
812    public String getPrimaryTranscriptRNASequenceAsString() {
813        return super.getRNASequenceAsString();
814    }
815    
816    /**
817     * Use of this deprecated method is equivalent to the preferred method 
818     * 'getPrimaryTranscripRNASequenceAsString(int,int)'.
819     * 
820     * @deprecated
821     */    
822    @Deprecated
823    @Override
824    public String getRNASequenceAsString(Integer start, Integer stop) throws RangeException  {
825        return super.getRNASequenceAsString(start, stop);
826    }
827
828    /**
829     * Preferred method to explicitly get a String representing the RNA sequence 
830     * of the  PrimaryTranscript, for the given range. 
831     * Wraps DAFeature getRNASequenceAsString(int,int).
832     * 
833     */ 
834    public String getPrimaryTranscriptRNASequenceAsString(Integer start, Integer stop) throws RangeException  {
835        return super.getRNASequenceAsString(start, stop);
836    }
837    
838    /**
839     * Returns an RNASequence object representing the 
840     * Processed (RNA spliced) Transcript. 
841     * 
842     */
843    public RNASequence getProcessedTranscriptRNASequence() throws DAOException {
844        
845        if (this.getProcessedTranscript()==null || topLevelTargetSequence==null ) {
846            return null;
847        } else {
848           Integer id = topLevelTargetSequence.getCodonTableID();
849           return this.getProcessedTranscript().getRNASequence(this.getRegistry().getTranscriptionEngine(id));
850        }        
851
852    }    
853    
854    /**
855     * Returns a String representing the RNA sequence 
856     * of the  Processed (RNA spliced) Transcript. 
857     * 
858     */ 
859    public String getProcessedTranscriptRNASequenceAsString() throws DAOException  {
860        
861          if (this.getProcessedTranscript()==null || topLevelTargetSequence==null) {
862            return "";
863          }
864        
865        return this.getProcessedTranscriptRNASequence().getSequenceAsString();
866    }
867    
868    /**
869     * Returns a String representing the RNA sequence 
870     * of the  Processed (RNA spliced) Transcript, for the given range. 
871     * 
872     */ 
873    public String getProcessedTranscriptRNASequenceAsString(Integer start, Integer stop) throws RangeException, DAOException  {
874        
875          if (this.getProcessedTranscript()==null || topLevelTargetSequence==null) {
876            return "";
877          }
878        
879        return this.getProcessedTranscriptRNASequence().getSequenceAsString(start,stop, org.biojava3.core.sequence.Strand.POSITIVE);
880    }
881    
882    /**
883     * Beware: this method does not trigger lazyloading of typed XRefs. 
884     * @param type
885     */
886    protected Set<DAXRef> getXRefs(ExternalDBType type) {
887        return typedXRefs.get(type);
888    }
889    
890    /** 
891     * Triggers lazyload if Transcript is not initialized.
892     */
893    @Override
894    public DAXRef getDisplayXRef() {
895        this.reinitialize();
896        return displayXRef;
897    }
898
899    public void setDisplayXRef(DAXRef xref) {
900        this.displayXRef = xref;
901        if (xref!=null) {
902            this.xrefs.add(xref);
903            Set s = new HashSet<DAXRef>();
904            s.add(xref);
905            this.typedXRefs.put(ExternalDBType.DisplayID, s);            
906        }
907    }    
908
909    /**
910     * Returns any curated VegaID for the Transcript, forcing lazy load if not set, and defaulting 
911     * to an empty string if absent (e.g. for all the invertebrate species in EnsemblGenomes).
912     */    
913    @Override
914    public String getVegaID() {
915        
916        if (this.vegaTranscriptID!=null) {
917            return this.vegaTranscriptID;
918        }
919        
920        List<DAXRef> outList = new ArrayList<DAXRef>();
921        
922        if (this.getDaoFactory()!= null 
923                && !this.getDaoFactory().getRegistry().getDatasourceType().equals(DBConnection.DataSource.ENSEMBLDB)) {
924            this.vegaTranscriptID="";
925            this.addTypedXRefs(ExternalDBType.VegaTranscript, outList);
926            return this.vegaTranscriptID ;
927        }
928              
929        this.reinitialize();
930        
931        if (this.vegaTranscriptID==null ) {
932            
933            if (this.getDaoFactory()!= null ) {
934
935                  for (DAXRef dax : this.getAllXRefs()) {
936                        if (dax.getPrimaryAccession()!=null
937                                && dax.getPrimaryAccession().startsWith("OTT")
938                                && dax.getDB()!= null  
939                                && (dax.getDB().getDBName().equals(ExternalDBType.VegaTranscript.toString())
940                                    ||
941                                    dax.getDB().getDBName().contains("Vega_transcript") 
942                                    ||
943                                    dax.getDB().getDBName().contains("vega_transcript") )
944                                ) {
945                            outList.add(dax);
946                            }
947                  }
948                
949                if (outList.isEmpty()) {
950                    this.vegaTranscriptID = "";
951                    this.addTypedXRefs(ExternalDBType.VegaTranscript, outList);
952                    return this.vegaTranscriptID;
953                } else {
954                    //add the factory to the xref - although we shouldn't need to use it as it is fully initialized
955                    for (DAXRef xr: outList) {
956                        xr.setDaoFactory(this.getDaoFactory());
957                    }                    
958                    this.addTypedXRefs(ExternalDBType.VegaTranscript, outList);
959                }
960
961                if (outList.size()==1) {
962                    this.vegaTranscriptID = outList.get(0).getPrimaryAccession().trim();
963                    return this.vegaTranscriptID;
964                } else {
965                    //hopefully all the IDs will be the same - but can't guarantee this!
966                    String pre = "Multiple Vega IDs: {";
967                    boolean multiple = false;
968                    String out = null;
969                    String firstID = "";
970                    
971                    for (XRef x: outList) {
972                        if (out==null) {
973                            out = x.getPrimaryAccession().trim();
974                            firstID = x.getPrimaryAccession().trim(); 
975                        } else if (!firstID.equals(x.getPrimaryAccession().trim())) {                        
976                            out = out.concat(", ").concat(x.getPrimaryAccession().trim());
977                            multiple  = true;
978                        }
979                    }
980                    
981                    if (!multiple) {
982                        vegaTranscriptID=out;
983                    } else {
984                        out = out.trim().concat("}");
985                        vegaTranscriptID = pre.concat(out);
986                    }
987                    return this.vegaTranscriptID;
988                }
989            }          
990        }
991        return this.vegaTranscriptID;
992    }
993
994    @Override
995    public Set<DAXRef> getVegaXRefs() {
996        this.getVegaID();
997        return this.getXRefs(ExternalDBType.VegaTranscript);
998    }
999    
1000    @Override 
1001    public Set<DAXRef> getCCDSXRefs() {
1002        this.getCcdsID();
1003        return this.getXRefs(ExternalDBType.CCDS);
1004    }
1005
1006    @Override
1007    public Set<DAXRef> getAllXRefs() {
1008
1009        if (this.xrefsInitialized) {
1010            return xrefs;
1011        }
1012
1013        this.reinitialize();
1014
1015        if (this.getDaoFactory() != null && this.getId() != null) {
1016            List<DAXRef> result = null;
1017            try {
1018                result = (List<DAXRef>) this.getDaoFactory().getXRefDAO().getAllXRefs(this);
1019
1020            } catch (DAOException ex) {
1021                LOGGER.info("Threw DAOException on trying to get Vega ID for Feature: " + this.getStableID(), ex);
1022            } finally {
1023                this.xrefsInitialized = true;
1024            }
1025
1026            if (result == null || result.isEmpty()) {
1027                return xrefs;
1028            } else {
1029                
1030                for (DAXRef xr : result) {
1031                    //add the factory to the xref - although we shouldn't need to use it as it is fully initialized
1032                    xr.setDaoFactory(this.getDaoFactory());
1033                    /* shouldn't be necessary now */  
1034                    //check that we are not creating duplicate External DBs...
1035//                    ExternalDB db = xr.getDB();
1036//                    int originalHashCode = db.originalHashCode();
1037//                    db  = this.getDaoFactory().getDatabase().validateExternalDB(db);
1038//                    int checkedHashCode = db.originalHashCode();
1039//                    if (originalHashCode-checkedHashCode !=0) {
1040//                        System.out.println("*** FAILED TO REUSE EXTERNALDB");
1041//                    } else {
1042//                        System.out.println("*** successfully reused externaldb");
1043//                    }
1044//                    xr.setDB(db);
1045                    xrefs.add(xr);
1046                    }
1047                }
1048        }
1049        return xrefs;
1050    }
1051    
1052    @Override
1053    public TreeSet<String> getAllSynonyms() {
1054
1055        if (synonyms!=null) {
1056            return synonyms;
1057        }
1058        this.reinitialize();
1059        try {
1060            synonyms = this.getDaoFactory().getXRefDAO().getAllSynonyms(this);
1061        } catch (DAOException ex) {
1062            LOGGER.debug("Failed to getAllSynonyms for DAFeature", ex);
1063        }
1064        if (synonyms==null) {
1065            synonyms = new TreeSet<String>();
1066        }
1067        return synonyms;
1068    }
1069    
1070    @Override
1071    public TreeSet<String> getSynonyms(XRef xref) {
1072        return xref.getSynonyms();
1073    }       
1074        
1075    protected void addTypedXRefs(ExternalDBType type, Collection<? extends XRef> xrefs) {
1076        if ( this.typedXRefs.get(type)==null) {
1077            this.typedXRefs.put(type, new HashSet<DAXRef>());
1078        }
1079        this.typedXRefs.get(type).addAll((Collection<DAXRef>)xrefs);
1080    }
1081    
1082    /**
1083     * Returns any curated CCDS for this transcript. 
1084     * The Consensus CDS (CCDS) project is a collaborative effort to identify a 
1085     * core set of human and mouse protein coding regions that are consistently 
1086     * annotated and of high quality. The long term goal is to support convergence 
1087     * towards a standard set of gene annotations.  
1088     */
1089    @Override
1090    public String getCcdsID() {
1091        if (ccdsID==null) {
1092           setCcdsXRef();
1093        }
1094        return ccdsID;
1095    }
1096    
1097    /*
1098     * Private setter routine for the CCDS ID. Restricted to  ENSEMBL database.
1099     * Calls the getTypedXRefs() method for ExternalDBType.CCDS and stores the 
1100     * typed XRef results.
1101     */
1102    private void setCcdsXRef() {
1103        
1104        //return immediately if we have already set the list of CCDS XRefs
1105        if (this.getXRefs(ExternalDBType.CCDS)!= null) {
1106            return;
1107        }
1108        
1109        List<DAXRef> out = new ArrayList<DAXRef>();  
1110        
1111        
1112        if (this.getDaoFactory()!= null 
1113                && !this.getDaoFactory().getRegistry().getDatasourceType().equals(DBConnection.DataSource.ENSEMBLDB)) {
1114            this.ccdsID="";
1115            this.addTypedXRefs(ExternalDBType.CCDS, out);
1116            return;
1117        }
1118        this.reinitialize();
1119        
1120        if (this.getDaoFactory()!= null && this.getId()!=null) {                
1121
1122            for (DAXRef dax : this.getAllXRefs()) {
1123                if (dax.getDB()!= null  && dax.getDB().getDBName().equals(ExternalDBType.CCDS.toString())) {
1124                    out.add(dax);
1125                }
1126            }
1127
1128                if ( out.isEmpty() ) {
1129                    this.ccdsID = "";
1130                    this.addTypedXRefs(ExternalDBType.CCDS, out);
1131                    return;
1132                } else {
1133                    
1134                    //add the factory to the xref - although we shouldn't need to use it as it is fully initialized
1135                    for (DAXRef xr: out) {
1136                        xr.setDaoFactory(this.getDaoFactory());
1137                    }
1138                    
1139                    this.addTypedXRefs(ExternalDBType.CCDS, out);
1140                }
1141
1142                if (out.size()==1) {
1143                    //use the DisplayID in preference to the accession - because this has the version appended
1144                    this.ccdsID = out.get(0).getDisplayID().trim();
1145                } else {
1146                    //hopefully there wil be a single, or at least unique ID - but may not be able to guarantee this!
1147                    String pre = "Multiple CCDS IDs: {";
1148                    boolean multiple = false;
1149                    String o = null;
1150                    String firstID = "";
1151                    
1152                    for (XRef x: out) {
1153                        if (o==null) {
1154                            //use the DisplayID in preference to the accession - because this has the version appended
1155                            o = x.getDisplayID().trim();
1156                            firstID = x.getDisplayID().trim(); 
1157                        } else if (!firstID.equals(x.getDisplayID().trim())) {                        
1158                            o = o.concat(", ").concat(x.getDisplayID().trim());
1159                            multiple  = true;
1160                        }
1161                    }
1162                    o = o.trim().concat("}");
1163                    if (multiple) {                        
1164                        ccdsID = pre.concat(o);
1165                    }
1166                }
1167
1168            } else {
1169            this.ccdsID= "";
1170            }        
1171    }    
1172    
1173}