001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.datasourceaware.core; 023 024import java.util.*; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027import uk.ac.roslin.ensembl.config.DBConnection; 028import uk.ac.roslin.ensembl.config.EnsemblComparaDivision; 029import uk.ac.roslin.ensembl.config.ExternalDBType; 030import uk.ac.roslin.ensembl.config.FeatureType; 031import uk.ac.roslin.ensembl.dao.factory.DAOCoreFactory; 032import uk.ac.roslin.ensembl.datasourceaware.DAXRef; 033import uk.ac.roslin.ensembl.datasourceaware.compara.DAHomologyPairRelationship; 034import uk.ac.roslin.ensembl.exception.DAOException; 035import uk.ac.roslin.ensembl.model.ObjectType; 036import uk.ac.roslin.ensembl.model.XRef; 037import uk.ac.roslin.ensembl.model.XRefed; 038import uk.ac.roslin.ensembl.model.core.CollectionSpecies; 039import uk.ac.roslin.ensembl.model.core.Gene; 040import uk.ac.roslin.ensembl.model.core.Species; 041import uk.ac.roslin.ensembl.model.core.Transcript; 042import uk.ac.roslin.ensembl.model.core.VegaFeature; 043 044public class DAGene extends DAAnalyzedFeature implements Gene, VegaFeature, XRefed { 045 046 private String biotype = null; 047 private Integer canonicalTranscriptID = null; 048 private DATranscript canonicalTranscript = null; 049 private TreeMap<Integer, DATranscript> transcriptHash = null; 050 private HashMap<EnsemblComparaDivision, List<DAHomologyPairRelationship>> homologies = 051 new HashMap<EnsemblComparaDivision, List<DAHomologyPairRelationship>>(); 052 private TreeSet<EnsemblComparaDivision> homologySearchesPerformed = new TreeSet<EnsemblComparaDivision>(); 053 054 private TreeSet<String> synonyms = null; 055 protected Set<DAXRef> xrefs = new HashSet<DAXRef>(); 056 protected HashMap<ExternalDBType, Set<DAXRef>> typedXRefs = new HashMap<ExternalDBType, Set<DAXRef>>(); 057 boolean xrefsInitialized = false; 058 private Integer orthologueCount = null; 059 private Integer paralogueCount = null; 060 private Integer homoeologueCount = null; 061 private String vegaGeneID =null; 062 063 final static Logger LOGGER = LoggerFactory.getLogger(DAGene.class); 064 065 public DAGene() { 066 super(); 067 } 068 069 public DAGene(DAOCoreFactory factory) { 070 super(factory); 071 } 072 073 @Override 074 public ObjectType getType() { 075 return FeatureType.gene; 076 } 077 078 @Override 079 public String getBiotype() { 080 if (this.biotype==null) { 081 this.reinitialize(); 082 } 083 return this.biotype; 084 } 085 086 public void setBiotype(String biotype) { 087 this.biotype = biotype; 088 } 089 090 @Override 091 public Collection<DATranscript> getTranscripts() { 092 if (transcriptHash == null) { 093 094 try { 095 transcriptHash = new TreeMap<Integer, DATranscript>(); 096 return (Collection<DATranscript> ) this.getDaoFactory().getTranscriptDAO().getTranscriptsForGene(this); 097 } catch (Exception e) { 098 transcriptHash = new TreeMap<Integer, DATranscript>(); 099 LOGGER.info("Threw DAOException on trying to populate transcripts for a Gene", e); 100 } 101 } 102 103 return this.transcriptHash.values(); 104 } 105 106 public void addTranscript(Transcript transcript) { 107 if (transcriptHash==null) { 108 transcriptHash = new TreeMap<Integer, DATranscript>(); 109 } 110 try { 111 DATranscript t = (DATranscript) transcript; 112 this.transcriptHash.put(t.getId(), t); 113 } catch (Exception e) { 114 LOGGER.info("failed to add a transcript to the Gene", e); 115 } 116 } 117 118 @Override 119 public DATranscript getCanonicalTranscript() { 120 121 if (this.canonicalTranscript != null) { 122 return this.canonicalTranscript; 123 } 124 //force reinitialization 125 if (this.getCanonicalTranscriptID()==null || this.canonicalTranscriptID.equals(0)) { 126 return null; 127 } 128 //get all of the transcripts not just the canonical one! 129 this.getTranscripts(); 130 131 if (this.canonicalTranscript==null && this.canonicalTranscriptID!=null ) { 132 this.canonicalTranscript = this.transcriptHash.get(this.canonicalTranscriptID); 133 } 134 return this.canonicalTranscript; 135 136 } 137 138 public void setCanonicalTranscript (DATranscript transcript) { 139 this.canonicalTranscript = transcript; 140 } 141 142 143 @Override 144 public String getStableID() { 145 //can't reinitialize if we dont have the stableID 146 return stableID; 147 } 148 149 @Override 150 public void setStableID(String stableID) { 151 this.stableID = stableID; 152 } 153 154 public Integer getCanonicalTranscriptID() { 155 if (this.canonicalTranscriptID==null) { 156 this.reinitialize(); 157 } 158 return canonicalTranscriptID; 159 } 160 161 public void setCanonicalTranscriptID(Integer canonicalTranscriptID) { 162 this.canonicalTranscriptID = canonicalTranscriptID; 163 } 164 165 @Override 166 public DATranslation getCanonicalTranslation() { 167 if (this.getCanonicalTranscript() != null) { 168 return this.getCanonicalTranscript().getCanonicalTranslation(); 169 } else { 170 return null; 171 } 172 } 173 174 /** 175 * Note the special case for Bacteria in collections in DB >16 176 * @return 177 */ 178 @Override 179 public EnsemblComparaDivision getComparaDivision() { 180 181 if (this.getSpecies() != null && this.getSpecies() instanceof CollectionSpecies 182 && Integer.parseInt(this.getDBVersion())>16) { 183 return EnsemblComparaDivision.PAN_HOMOLOGY; 184 } else if (this.getSpecies() != null) { 185 return this.getSpecies().getComparaDivision(); 186 } 187 return null; 188 } 189 /** 190 * Returns all Homologies (as a List of HomologyPairRelationships) in the default 191 * ComparaDivision for this Gene. Implemented by 192 * calling getHomologies(EnsemblComparaDivision comparaDivision) with 193 * the default ComparaDivision for this Gene. 194 * 195 */ 196 @Override 197 public List<DAHomologyPairRelationship> getHomologies() { 198 return this.getHomologies(this.getComparaDivision()); 199 } 200 201 /** 202 * Returns all Homologies (as a List of HomologyPairRelationships) in the specified 203 * ComparaDivision for this Gene. 204 * @param comparaDivision 205 * 206 */ 207 @Override 208 public List<DAHomologyPairRelationship> getHomologies(EnsemblComparaDivision comparaDivision) { 209 210 List<DAHomologyPairRelationship> out = new ArrayList<DAHomologyPairRelationship>(); 211 212 if (comparaDivision==null) { 213 return out; 214 } 215 216 if (comparaDivision.equals(EnsemblComparaDivision.PAN_HOMOLOGY) 217 && !this.getSpecies().isInPanCompara(this.getDBVersion())) { 218 return out; 219 } 220 221 if (!this.homologySearchesPerformed.contains(comparaDivision)) { 222 223 if (this.getStableID() == null || this.getStableID().isEmpty()) { 224 return out; 225 } 226 227 try { 228 out = (List<DAHomologyPairRelationship>) this.getComparaFactory(comparaDivision).getHomologyDAO().getHomologiesForGene(this); 229 this.homologySearchesPerformed.add(comparaDivision); 230 //this would over write any existing... maybe ok! 231 this.homologies.put(comparaDivision, out); 232 233// if (this.homologies.get(comparaDivision) == null) { 234// this.homologies.put(comparaDivision, new ArrayList<DAHomologyPairRelationship>()); 235// } 236// this.homologies.get(comparaDivision).addAll(out); 237 238 } catch (Exception ex) { 239 LOGGER.info("failed to get Homologies", ex); 240 } 241 242 } 243 244 if (this.homologies.containsKey(comparaDivision)) { 245 return this.homologies.get(comparaDivision); 246 } else { 247 return new ArrayList<DAHomologyPairRelationship>(); 248 } 249 } 250 251 252 /** 253 * Returns Homologies (as a List of HomologyPairRelationships) in the default 254 * ComparaDivision for this Gene for the specified Species. Implemented by 255 * calling getHomologies(EnsemblComparaDivision comparaDivision, Species sp) with 256 * the default ComparaDivision for this Gene. 257 * @param sp 258 * 259 */ 260 @Override 261 public List<DAHomologyPairRelationship> getHomologies(Species sp) { 262 return this.getHomologies(this.getComparaDivision(), sp); 263 } 264 265 /** 266 * Returns Homologies (as a List of HomologyPairRelationships) in the specified 267 * ComparaDivision for the specified Species. 268 * Cos of the lack of indices it is much faster to implement using the query 269 * to get all homologues than restrict to a single gdb.name in the sql [unless 270 * you do it by a specific subselect]. So to get homologues for a single species do 271 * the whole query and do the filterring in the Java. This allows us to store 272 * all homologies at once too. 273 * @param comparaDivision 274 * @param sp 275 * 276 */ 277 @Override 278 public List<DAHomologyPairRelationship> getHomologies(EnsemblComparaDivision comparaDivision, Species sp) { 279 280 //make sure basic fetch is done 281 this.getHomologies(comparaDivision); 282 283 List<DAHomologyPairRelationship> out = new ArrayList<DAHomologyPairRelationship>(); 284 285 if (homologies.get(comparaDivision)!=null && !homologies.get(comparaDivision).isEmpty()) { 286 for (DAHomologyPairRelationship r : homologies.get(comparaDivision)) { 287 if (r.getTarget().getSpecies()!=null 288 && r.getTarget().getSpecies().equals(sp)) { 289 out.add(r); 290 } 291 } 292 } 293 return out; 294 } 295 296 297 public void addHomology(EnsemblComparaDivision division, DAHomologyPairRelationship homology) { 298 299 if (this.homologies.get(division)==null) { 300 this.homologies.put(division, new ArrayList<DAHomologyPairRelationship>()); 301 } 302 this.homologies.get(division).add(homology); 303 304 } 305 306 public boolean setSearchedHomologies (EnsemblComparaDivision comparaDivision) { 307 return this.homologySearchesPerformed.add(comparaDivision); 308 } 309 310 @Override 311 void reinitialize() { 312 if (!this.isLazyloadAllowed() || this.isInitialized()) { 313 return; 314 } 315 try { 316 //nb getDaoFactory() will try and make a factory if we have at least species and ensembl version 317 this.getDaoFactory().getGeneDAO().reInitialize(this); 318 319 } catch (Exception ex) { 320 LOGGER.info("Failed to reinitialize the Gene from the Database (using its stableID: " 321 +this.stableID+").", ex); 322 } finally { 323 //always set this so dont try again 324 this.setInitialized(true); 325 } 326 } 327 328 @Override 329 public String getDisplayName() { 330 this.reinitialize(); 331 return (displayName!=null) ? displayName : stableID ; 332 } 333 334 /** 335 * Utility method to get all of the current DAHomologyPairRelationships for this Gene 336 * 337 * @return List<DAHomologyPair BinaryRelationship> 338 */ 339 @Override 340 public List<DAHomologyPairRelationship> getHomologiesWithoutLazyLoad() { 341 342 List<DAHomologyPairRelationship> out = new ArrayList<DAHomologyPairRelationship>(); 343 344 for (EnsemblComparaDivision div: homologies.keySet()) { 345 out.addAll(homologies.get(div)); 346 } 347 return out; 348 } 349 350 /** 351 * Returns preloaded XRefs of given type: Note - does not lazy load these 352 * @param type 353 */ 354 protected Set<DAXRef> getXRefs(ExternalDBType type) { 355 return typedXRefs.get(type); 356 } 357 358 @Override 359 public DAXRef getDisplayXRef() { 360 this.reinitialize(); 361 return displayXRef; 362 } 363 364 public void setDisplayXRef(DAXRef xref) { 365 this.displayXRef = xref; 366 if (xref!=null) { 367 this.xrefs.add(xref); 368 Set s = new HashSet<DAXRef>(); 369 s.add(xref); 370 this.typedXRefs.put(ExternalDBType.DisplayID, s); 371 } 372 } 373 374 /** 375 * Returns any curated VegaID for the Gene, forcing lazy load if not set, and defaulting 376 * to an empty string if absent (e.g. for all the invertebrate species in EnsemblGenomes). 377 * @return String the VegaID of this Gene 378 */ 379 @Override 380 public String getVegaID() { 381 382 if (this.vegaGeneID!=null) { 383 return this.vegaGeneID; 384 } 385 386 List<DAXRef> outList = new ArrayList<DAXRef>(); 387 388 if (this.getDaoFactory()!= null 389 && !this.getDaoFactory().getRegistry().getDatasourceType().equals(DBConnection.DataSource.ENSEMBLDB)) { 390 this.vegaGeneID=""; 391 this.addTypedXRefs(ExternalDBType.VegaGene, outList); 392 return vegaGeneID; 393 } 394 395 this.reinitialize(); 396 397 if (this.vegaGeneID==null ) { 398 399 if (this.getDaoFactory()!= null && this.getId()!=null) { 400 401 for (DAXRef dax : this.getAllXRefs()) { 402 if (dax.getPrimaryAccession()!=null 403 && dax.getPrimaryAccession().startsWith("OTT") 404 && dax.getDB()!= null 405 && (dax.getDB().getDBName().equals(ExternalDBType.VegaGene.toString()) 406 || 407 dax.getDB().getDBName().contains("Vega_gene") 408 || 409 dax.getDB().getDBName().contains("vega_gene") ) 410 ) { 411 outList.add(dax); 412 } 413 } 414 415 416 if (outList.isEmpty()) { 417 this.vegaGeneID = ""; 418 this.addTypedXRefs(ExternalDBType.VegaGene, outList); 419 return this.vegaGeneID; 420 } else { 421 //add the factory to the xref - although we shouldn't need to use it as it is fully initialized 422 for (DAXRef xr: outList) { 423 xr.setDaoFactory(this.getDaoFactory()); 424 } 425 this.addTypedXRefs(ExternalDBType.VegaGene, outList); 426 } 427 428 if (outList.size()==1) { 429 this.vegaGeneID = outList.get(0).getPrimaryAccession().trim(); 430 return this.vegaGeneID; 431 } else { 432 //hopefully all the IDs will be the same - but can't guarantee this! 433 String pre = "Multiple Vega IDs: {"; 434 boolean multiple = false; 435 String out = null; 436 String firstID = ""; 437 438 for (XRef x: outList) { 439 if (out==null) { 440 out = x.getPrimaryAccession().trim(); 441 firstID = x.getPrimaryAccession().trim(); 442 } else if (!firstID.equals(x.getPrimaryAccession().trim())) { 443 out = out.concat(", ").concat(x.getPrimaryAccession().trim()); 444 multiple = true; 445 } 446 } 447 448 if (!multiple) { 449 vegaGeneID=out; 450 } else { 451 out = out.trim().concat("}"); 452 vegaGeneID = pre.concat(out); 453 } 454 return this.vegaGeneID; 455 } 456 } 457 } 458 return this.vegaGeneID; 459 } 460 461 @Override 462 public Set<DAXRef> getVegaXRefs() { 463 //getting the VegaID lazyloads both the VegaID and the Vega XRefs 464 this.getVegaID(); 465 return this.getXRefs(ExternalDBType.VegaGene); 466 } 467 468 @Override 469 public Set<DAXRef> getAllXRefs() { 470 471 if (this.xrefsInitialized) { 472 return xrefs; 473 } 474 475 this.reinitialize(); 476 477 if (this.getDaoFactory() != null && this.getId() != null) { 478 List<DAXRef> result = null; 479 try { 480 result = (List<DAXRef>) this.getDaoFactory().getXRefDAO().getAllXRefs(this); 481 482 } catch (DAOException ex) { 483 LOGGER.info("Threw DAOException on trying to get Vega ID for Feature: " + this.getStableID(), ex); 484 } finally { 485 this.xrefsInitialized = true; 486 } 487 488 if (result == null || result.isEmpty()) { 489 return xrefs; 490 } else { 491 492 for (DAXRef xr : result) { 493 //add the factory to the xref - although we shouldn't need to use it as it is fully initialized 494 xr.setDaoFactory(this.getDaoFactory()); 495 //check that we are not creating duplicate External DBs... 496 497 /* shouldn't be necessary now */ 498// ExternalDB db = xr.getDB(); 499// int originalHashCode = db.originalHashCode(); 500// db = this.getDaoFactory().getDatabase().validateExternalDB(db); 501// int checkedHashCode = db.originalHashCode(); 502// if (originalHashCode-checkedHashCode !=0) { 503// System.out.println("*** FAILED TO REUSE EXTERNALDB"); 504// } else { 505// System.out.println("*** successfully reused externaldb"); 506// } 507// xr.setDB(db); 508 xrefs.add(xr); 509 } 510 } 511 } 512 return xrefs; 513 } 514 515 @Override 516 public TreeSet<String> getAllSynonyms() { 517 518 if (synonyms!=null) { 519 return synonyms; 520 } 521 this.reinitialize(); 522 try { 523 synonyms = this.getDaoFactory().getXRefDAO().getAllSynonyms(this); 524 } catch (DAOException ex) { 525 LOGGER.debug("Failed to getAllSynonyms for DAFeature", ex); 526 } 527 if (synonyms==null) { 528 synonyms = new TreeSet<String>(); 529 } 530 return synonyms; 531 } 532 533 @Override 534 public TreeSet<String> getSynonyms(XRef xref) { 535 return xref.getSynonyms(); 536 } 537 538 protected void addTypedXRefs(ExternalDBType type, Collection<? extends XRef> xrefs) { 539 if ( this.typedXRefs.get(type)==null) { 540 this.typedXRefs.put(type, new HashSet<DAXRef>()); 541 } 542 this.typedXRefs.get(type).addAll((Collection<DAXRef>)xrefs); 543 } 544 545 public Integer getOrthologueCount() { 546 if (orthologueCount==null && this.getDaoFactory()!= null 547 && Integer.parseInt(this.getDaoFactory().getEnsemblSchemaVersion()) > 75 ) { 548 this.reinitialize(); 549 } 550 return orthologueCount; 551 } 552 553 public void setOrthologueCount(Integer orthologueCount) { 554 this.orthologueCount = orthologueCount; 555 } 556 557 public Integer getParalogueCount() { 558 if (paralogueCount==null && this.getDaoFactory()!= null 559 && Integer.parseInt(this.getDaoFactory().getEnsemblSchemaVersion()) > 75 ) { 560 this.reinitialize(); 561 } 562 return paralogueCount; 563 } 564 565 public void setParalogueCount(Integer paralogueCount) { 566 this.paralogueCount = paralogueCount; 567 } 568 569 public Integer getHomoeologueCount() { 570 if (homoeologueCount==null && this.getDaoFactory()!= null 571 && Integer.parseInt(this.getDaoFactory().getEnsemblSchemaVersion()) > 75 ) { 572 this.reinitialize(); 573 } 574 return homoeologueCount; 575 } 576 577 public void setHomoeologueCount(Integer homoeologueCount) { 578 this.homoeologueCount = homoeologueCount; 579 } 580 581 582}