001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.dao.database; 023 024import java.io.BufferedWriter; 025import java.io.File; 026import java.io.FileWriter; 027import java.io.IOException; 028import java.io.InputStreamReader; 029import java.io.Reader; 030import java.util.*; 031import java.util.Map.Entry; 032import org.apache.ibatis.session.SqlSession; 033import org.apache.ibatis.session.SqlSessionFactory; 034import org.apache.ibatis.session.SqlSessionFactoryBuilder; 035import org.biojava3.core.sequence.transcription.TranscriptionEngine; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038import uk.ac.roslin.ensembl.config.*; 039import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 040import uk.ac.roslin.ensembl.exception.ConfigurationException; 041import uk.ac.roslin.ensembl.exception.DAOException; 042import uk.ac.roslin.ensembl.exception.NonUniqueException; 043import uk.ac.roslin.ensembl.mapper.DatabaseMapper; 044import uk.ac.roslin.ensembl.model.core.CollectionSpecies; 045import uk.ac.roslin.ensembl.model.core.Species; 046import uk.ac.roslin.ensembl.model.database.*; 047 048public class DBRegistry 049 implements Registry { 050 051 final static Logger LOGGER = LoggerFactory.getLogger(DBRegistry.class); 052 final static String AmbiguitySeparator = "|OR|"; 053 protected SqlSessionFactory sqlMapper; 054 protected Properties configProperties = new Properties(); 055 protected SchemaVersion schemaVersion = null; 056 protected DBConnection dbConnection = null; 057 private String mostRecentEnsemblVersion; 058// private String mostRecentGenomesVersion; 059 //incase the datasource does not include the 'current' dbVersion 060 // the 'schema' Version 061 private int highestEnsemblSchemaVersion = 0; 062 //the DB dbVersion 063 private int highestReleaseVersion = 0; 064// private int currentDatasourceVersion = 0; 065 //the versions of core databases for a species or collection name 066 private HashMap<String, TreeSet<Integer>> databaseVersions = new HashMap<String, TreeSet<Integer>>(); 067 protected TreeSet<Integer> knownSchemaVersions = new TreeSet<Integer>(); 068 //the mybatis configProperties for database connection 069 private String baseMybatis; 070 //to read the mybatis regConfig 071 protected Reader reader; 072 073 //these two hashes hold all the species....keyed on the meta.key = species.productionname 074 //(databasestylename) 075 protected TreeMap<String, DBSpecies> speciesHash = new TreeMap<String, DBSpecies>(); 076 //for collection species without GCAccessions (mostly prior to the use of non-taxonomic collections) 077 protected TreeMap<String, DBCollectionSpecies> oldstyle_collectionspeciesHashByDBName = new TreeMap<String, DBCollectionSpecies>(); 078// //only put species made from new style collections ( or rather, any species with a GCAccession) in here 079 080 protected TreeMap<String, DBCollectionSpecies> collectionspeciesHashByGCAccession = new TreeMap<String, DBCollectionSpecies>(); 081 082 protected List<DBCollection> collections = new ArrayList<DBCollection>(); 083 //we need a separate one of these for each collection (dbVersion) as they will grow 084 protected TreeMap<String, TreeMap<String, DBCollection>> collectionNameDBVersionHash = new TreeMap<String, TreeMap<String, DBCollection>>(); 085 //the aliases are now all added as lower case versions 086 protected TreeMap<String, String> ensemblNameForAliasHash = new TreeMap<String, String>(); 087 088 //new style ( post v16) collection species should all be indexed by GCAccession 089 //hence we need a hash of alias to Accession 090 protected TreeMap<String, String> assemblyAccessionStemForAliasHash = new TreeMap<String, String>(); 091 092 //the databases 093 protected TreeSet<DBDatabase> allDatabases = new TreeSet<DBDatabase>(); 094 protected TreeSet<DBSingleSpeciesDatabase> singleSpeciesDatabases = new TreeSet<DBSingleSpeciesDatabase>(); 095 protected TreeSet<DBComparisonDatabase> comparisonDatabases = new TreeSet<DBComparisonDatabase>(); 096 //HashMap of comparison databases indexed on 'group' , then on 'dbVersion' (not schema dbVersion) 097 protected HashMap<EnsemblComparaDivision, HashMap<String, DBComparisonDatabase>> comparaDBByDivision 098 = new HashMap<EnsemblComparaDivision, HashMap<String, DBComparisonDatabase>>(); 099 protected TreeSet<DBCollectionDatabase> collectionDatabases = new TreeSet<DBCollectionDatabase>(); 100 protected String newline = (System.getProperty("line.separator") != null) ? System.getProperty("line.separator") : "\r\n"; 101 protected StringBuilder tooNewDB = new StringBuilder(); 102 protected StringBuilder unknownDB = new StringBuilder(); 103 protected RegistryConfiguration regConfig = null; 104 DataSource datasourceType; 105 protected boolean alreadyInitialized = false; 106 protected boolean alreadyParsed = false; 107 protected HashMap<String, String> renamedDBs = new HashMap<String, String>(); 108 109 private HashMap<Integer, TranscriptionEngine> transcriptionEngines = new HashMap<Integer, TranscriptionEngine>(); 110 111 public static DBRegistry createEmptyRegistry() { 112 return new DBRegistry(); 113 } 114 115 public static DBRegistry createRegistryForDataSource(DataSource type) throws ConfigurationException, DAOException { 116 if (type == null) { 117 throw new ConfigurationException("Invalid Datasource"); 118 } 119 if (DataSource.ENSEMBLBACTERIA.equals(type)) { 120 throw new ConfigurationException("Ensembl Bacteria data sources are too large to be loaded en masse. " 121 +"Use '.createRegistryForDataSourceCurrentRelease()' to load the current release; " 122 +"or create an unitialized registry to interrogate available versions and then load a single release."); 123 } 124 DBRegistry dbRegistry = new DBRegistry(); 125 dbRegistry.setConfiguration(type); 126 dbRegistry.initialize(null); 127 return dbRegistry; 128 } 129 130 public static DBRegistry createUninitializedRegistryForDataSource(DataSource type) throws ConfigurationException, DAOException { 131 if (type == null) { 132 throw new ConfigurationException("Invalid Datasource"); 133 } 134 DBRegistry dbRegistry = new DBRegistry(); 135 dbRegistry.setConfiguration(type); 136 dbRegistry.parseWithoutInitializing(); 137 return dbRegistry; 138 } 139 140 public static DBRegistry createRegistryForConfiguration(RegistryConfiguration conf) throws ConfigurationException, DAOException { 141 if (conf == null) { 142 throw new ConfigurationException("Invalid Configuration"); 143 } 144 DBRegistry dbRegistry = new DBRegistry(); 145 dbRegistry.setConfiguration(conf); 146 dbRegistry.initialize(null); 147 return dbRegistry; 148 } 149 150 public static DBRegistry createUninitializedRegistryForConfiguration(RegistryConfiguration conf) throws ConfigurationException, DAOException { 151 if (conf == null) { 152 throw new ConfigurationException("Invalid Configuration"); 153 } 154 DBRegistry dbRegistry = new DBRegistry(); 155 dbRegistry.setConfiguration(conf); 156 dbRegistry.parseWithoutInitializing(); 157 return dbRegistry; 158 } 159 160 public static DBRegistry createRegistryForDataSourceCurrentRelease(DataSource type) throws ConfigurationException, DAOException { 161 if (type == null) { 162 throw new ConfigurationException("Invalid Datasource"); 163 } 164 165 // would be simpler just to do 166 //SchemaVersion versionProps = new SchemaVersion(); 167 //versionProps.getCurrentGenomesVersion() 168 //versionProps.getCurrentEnsemblVersion() 169 DBRegistry dbRegistry = new DBRegistry(); 170 dbRegistry.setConfiguration(type); 171 dbRegistry.parseWithoutInitializing(); 172 dbRegistry.knownSchemaVersions.clear(); 173 //allows backpedaling if actual sorce is older than config 174 //(e.g. ensemblgenomes updates a month later than ensembl) 175 dbRegistry.knownSchemaVersions.add(dbRegistry.highestEnsemblSchemaVersion); 176 dbRegistry.initialize(dbRegistry.highestReleaseVersion); 177 return dbRegistry; 178 } 179 180 public static DBRegistry createRegistryForDataSourceAtReleaseVersion(DataSource type, Integer release) throws ConfigurationException, DAOException { 181 if (type == null) { 182 throw new ConfigurationException("Invalid Datasource"); 183 } 184 if (release == null) { 185 throw new ConfigurationException("Invalid schema/release"); 186 } 187 DBRegistry dbRegistry = new DBRegistry(); 188 dbRegistry.setConfiguration(type); 189 dbRegistry.parseWithoutInitializing(); 190 if (type.equals(DataSource.ENSEMBLDB)) { 191 if (!dbRegistry.knownSchemaVersions.contains(release)) { 192 throw new ConfigurationException(release + " not a known schema/release"); 193 } 194 } 195 dbRegistry.initialize(release); 196 return dbRegistry; 197 } 198 199 /** 200 * Parameterless public constructor for DBRegistry object 201 */ 202 private DBRegistry() { 203 } 204 205 public void setConfiguration(RegistryConfiguration conf) throws ConfigurationException, DAOException { 206 this.regConfig = conf; 207 this.renamedDBs = this.regConfig.getDb().getRenamedDBs(); 208 this.datasourceType = regConfig.getType(); 209 this.dbConnection = regConfig.getDb(); 210 this.configProperties.putAll(dbConnection.getConfigurationProperties()); 211 this.schemaVersion = regConfig.getSchema(); 212 this.configProperties.putAll(schemaVersion.getConfigurationProperties()); 213 this.mostRecentEnsemblVersion = this.schemaVersion.getCurrentEnsemblVersion(); 214// this.mostRecentGenomesVersion = this.schemaVersion.getCurrentGenomesVersion(); 215 216// //for ensemble genomes we have a DBrelease_schema_build 217// //but for ensembl we have DBrelease_build, where DBrelease = schema 218// 219// if (this.datasourceType==DataSource.ENSEMBLGENOMES) { 220// this.currentDatasourceVersion = Integer.parseInt(this.mostRecentGenomesVersion); 221// } else { 222// this.currentDatasourceVersion = Integer.parseInt(this.mostRecentEnsemblVersion); 223// } 224 String[] versions = schemaVersion.getRegisteredSchemas(); 225 226 for (int i = 0; i < versions.length; i++) { 227 this.knownSchemaVersions.add(Integer.parseInt(versions[i])); 228 } 229 230 LOGGER.info("This application is configured to use schema version " + mostRecentEnsemblVersion 231 + " of Ensembl, and knows about the schema versions: " + Arrays.toString(versions)); 232 233 // mybatis configProperties for databaseDAO 234 this.baseMybatis = this.schemaVersion.getBaseMybatis(); 235 try { 236 reader = new InputStreamReader(this.getClass().getClassLoader().getResourceAsStream(baseMybatis)); 237 //this doesn't work once you bundle up 238 //reader = Resources.getResourceAsReader(baseMybatis); 239 } catch (Exception ex) { 240 throw new ConfigurationException("Fail to read Mybatis Configuration for initial Database connection", ex); 241 } 242 try { 243 sqlMapper = new SqlSessionFactoryBuilder().build(reader, "current", configProperties); 244 if (sqlMapper == null) { 245 throw new Exception("SqlSessionFactory is null"); 246 } 247 } catch (Exception e) { 248 throw new ConfigurationException("The DBRegistry is unable to make a SqlSessionFactory", e); 249 } 250 } 251 252 public void setConfiguration(DataSource type) throws ConfigurationException, DAOException { 253 this.setConfiguration(new RegistryConfiguration(type)); 254 } 255 256 private void parseWithoutInitializing() throws ConfigurationException, DAOException { 257 258 if (this.alreadyInitialized) { 259 throw new ConfigurationException("Tried to re-initialize a DBRegistry"); 260 } 261 262 LOGGER.info("Registry is reading information about databases available from this source (" 263 + this.dbConnection.getConfigurationProperties().getProperty("url") + "):"); 264 265 for (String s : this.getDatabases()) { 266 267 if (dbConnection.isDBNameValid(s)) { 268 try { 269 DBDatabase db = this.makeDatabase(s.trim()); 270 271 if (db.intSchemaVersion > Integer.parseInt(this.mostRecentEnsemblVersion)) { 272 LOGGER.warn("IGNORING Database " + s + " with higher schema number (" 273 + db.getSchemaVersion() + ") than application configuration (" + this.mostRecentEnsemblVersion + ")."); 274 tooNewDB.append("\t"+s + newline); 275 } else if (!this.knownSchemaVersions.contains(Integer.parseInt(db.getSchemaVersion()))) { 276 LOGGER.warn("IGNORING Database " + s + " with schema number (" 277 + db.getSchemaVersion() + ") not found in the application configuration."); 278 unknownDB.append("\t"+s + newline); 279 } else { 280 this.pseudoRegisterDatabase(db); 281 } 282 283 } catch (Exception e) { 284 LOGGER.warn("Unspecified error thrown trying to make a Database object for '" + s + "'", e); 285 } 286 } 287 } 288 this.alreadyParsed=true; 289 290 } 291 292 /** 293 * private initialisation method for the registry which connects to ensembl 294 * via a DBDatasourceDAO and retrieves details on all the current tables and 295 * creates database objects of the appropriate type for each. Also retrieves 296 * dbSpeciesName aliases for all the current release dbSpeciesName. And 297 * finds out what dbSpeciesName are present in multispecies databases. 298 * 299 * @param release 300 * @throws DAOException, ConfigurationException 301 * @throws uk.ac.roslin.ensembl.exception.ConfigurationException 302 */ 303 public void initialize(Integer release) throws DAOException, ConfigurationException { 304 305 if (this.alreadyInitialized) { 306 throw new ConfigurationException("Tried to re-initialize a DBRegistry"); 307 } 308 309 //populates the names of all the databases available, excluding marts, test, mysql and information_schema 310 LOGGER.info("Registry is loading and sorting all databases available from this source (" 311 + this.dbConnection.getConfigurationProperties().getProperty("url") + "):"); 312 313 if (release != null) { 314 LOGGER.info("Loading databases only for release " + release); 315 } 316 317 tooNewDB = new StringBuilder(); 318 unknownDB = new StringBuilder(); 319 320 if (release != null) { 321 this.highestReleaseVersion = release; 322 } 323 324 /* 325 * note by default translation by this default engine is configured to convert 326 * non-Methionine initiations to Methionine. This cannot be alterred so you need 327 * to make an appropriate different Engine to use in place of the default 328 * TranscriptionEngine.Builder b = new TranscriptionEngine.Builder(); 329 * b.table(1).initMet(false).trimStop(true); 330 * TranscriptionEngine alternativeDefault = b.build(); 331 * 332 * this could then be handed to (DATranlsation).getProteinSequence(TranscriptionEngine trancriptionEngine) 333 * or (DNASequence).getRNASequence().getProteinSequence(engine) 334 */ 335 transcriptionEngines.put(1, TranscriptionEngine.getDefault()); 336 337 for (String s : this.getDatabases()) { 338 339 if (dbConnection.isDBNameValid(s)) { 340// if (s.startsWith("bacillus_collection_core_") 341// || s.startsWith("bacteria_21_collection_core_") ) { 342// && (s.startsWith("escherichia_shigella_collection_core_16") 343// || s.startsWith("bacteria_22"))) { 344 try { 345 DBDatabase db = this.makeDatabase(s.trim()); 346 347 if (release != null && db.getIntDBVersion() != release) { 348 continue; 349 } 350 351 if (db.intSchemaVersion > Integer.parseInt(this.mostRecentEnsemblVersion)) { 352 LOGGER.warn("IGNORING Database " + s + " with higher schema number (" 353 + db.getSchemaVersion() + ") than application configuration (" + this.mostRecentEnsemblVersion + ")."); 354 tooNewDB.append("\t"+s + newline); 355 } // //might drop this test, we may drop 'mostRecentGenomes'.... 356 // else if ((this.datasourceType == DataSource.ENSEMBLGENOMES) 357 // && db.intDBVersion > Integer.parseInt(this.mostRecentGenomesVersion)) { 358 // LOGGER.warn("BEWARE: Adding Database " + s + " with higher release number (" 359 // + db.intDBVersion + ") than application configuration (" + this.mostRecentGenomesVersion + "). " 360 // + "This database can be accessed by schema or release number but will not be returned by default methods that return the current release version."); 361 // this.aboveCurrentReleaseDB.append(s + newline); 362 // this.registerDatabase(db); 363 //// LOGGER.warn("IGNORING Database " + s + " with higher release number (" 364 //// + db.intDBVersion + ") than application configuration (" + this.mostRecentGenomesVersion + ")."); 365 //// tooNewDB.append("\t"+s + newline); 366 // } 367 else if (!this.knownSchemaVersions.contains(Integer.parseInt(db.getSchemaVersion()))) { 368 LOGGER.warn("IGNORING Database " + s + " with schema number (" 369 + db.getSchemaVersion() + ") not found in the application configuration."); 370 unknownDB.append("\t"+s + newline); 371 } else { 372 this.registerDatabase(db); 373 } 374 375 } catch (Exception e) { 376 LOGGER.warn("Unspecified error thrown trying to make a Database object for '" + s + "'", e); 377 } 378 } 379 } 380 381 if (allDatabases.isEmpty()) { 382 throw new ConfigurationException("No Valid Databases Loaded"); 383 } 384 385 if (release != null) { 386 DBDatabase first = allDatabases.first(); 387 if (first != null) { 388 this.mostRecentEnsemblVersion = first.schemaVersion; 389 this.highestEnsemblSchemaVersion = first.intSchemaVersion; 390 this.highestReleaseVersion = release; 391 this.knownSchemaVersions.clear(); 392 this.knownSchemaVersions.add(release); 393 } 394 } 395 396 //change this to get the most recent release rather than the 'current release' 397 //this should sort the noted BUG below too 398 //get all the core databases for the current release 399 //so that we can create Species database objects for each of these 400 //filling in the aliases and some other properties 401 //we then put these in a hash against genus_species 402 //BUG: if the datasource doesnt have any current releases 403 //e.g. if configured to load ensembldb_archives 404 //we wont make any species - 405 //need a work around if this is the case - to use the highest release dbVersion found 406 TreeSet<DBSingleSpeciesDatabase> badDatabases = new TreeSet<DBSingleSpeciesDatabase>(); 407 408 // go through all the SSdatabases and select the current core releases 409 // and make a species object from these 410 for (DBSingleSpeciesDatabase d : this.singleSpeciesDatabases) { 411 412 //can't use this method anymore cos can throw a nonUniqueException 413// if (d.type == EnsemblDBType.core 414// && d.getDBVersion().equalsIgnoreCase(this.getMostRecentDBVersion(d.getdBClassifier()))) { 415 String mostRecentVersion = null; 416 417 if (this.databaseVersions.containsKey(d.getdBClassifier())) { 418 mostRecentVersion = this.databaseVersions.get(d.getdBClassifier()).last().toString(); 419 } else { 420 //if this isnt a db classifier as stored in the version hash - bail out 421 continue; 422 } 423 424 //if this is the most recent core database - we make the species 425 if (d.type == EnsemblDBType.core 426 && d.getDBVersion().equalsIgnoreCase(mostRecentVersion)) { 427 428 DBSpecies spData = null; 429 430 //this can fail if there is something wrong with the database! 431 try { 432 spData = this.createSpecies((DBSingleSpeciesCoreDatabase) d); 433 } catch (DAOException dAOE) { 434 LOGGER.info("Failed to create a species for current core database: " + d.getdBName() 435 + ": Removed this database!", dAOE); 436 } 437 if (spData != null && spData.getDatabaseStyleName() != null && !spData.getDatabaseStyleName().isEmpty()) { 438 d.setSpecies(spData); 439 speciesHash.put(spData.getDatabaseStyleName(), spData); 440 } else { 441 //store this as a bad (current) database 442 badDatabases.add(d); 443 } 444 } 445 446 } 447 448 //remove any databases that i cant make a species for! 449 this.singleSpeciesDatabases.removeAll(badDatabases); 450 this.allDatabases.removeAll(badDatabases); 451 452 // the collections will add members with time - so each collectionDB needs 453 //its own Collection 454 //get all the collection_core databases 455 //so that we can create Collection database objects for each of these 456 //and populate it with CollectionSpecies objects for this collection 457 // however we will reuse existing CollectionSpecies objects 458 // and only add new ones to the registry 459 TreeSet<DBCollectionDatabase> badCollectionDatabases = new TreeSet<DBCollectionDatabase>(); 460 461 for (DBCollectionDatabase d : this.collectionDatabases) { 462 if (d.type == EnsemblDBType.collection_core) { 463 464 // make a registrycollection for each collection core database 465 //as these may differ over time 466 DBCollection thisCollection = null; 467 try { 468 //creates (or reuses) all the species in the collection 469 thisCollection = this.getCollectionSpeciesProperties((CollectionCoreDatabase) d); 470 } catch (DAOException ex) { 471 LOGGER.info("Failed to create a collectionfor current core database: " + d.getdBName() 472 + ": Removed this database!", ex); 473 } 474 475 if (thisCollection != null) { 476 this.collections.add(thisCollection); 477 //d.setCollection(r); 478 if (this.collectionNameDBVersionHash.containsKey(thisCollection.getCollectionName())) { 479 this.collectionNameDBVersionHash.get(thisCollection.getCollectionName()).put(d.getDBVersion(), thisCollection); 480 } else { 481 TreeMap<String, DBCollection> map = new TreeMap<String, DBCollection>(); 482 map.put(d.getDBVersion(), thisCollection); 483 this.collectionNameDBVersionHash.put(thisCollection.getCollectionName(), map); 484 } 485 } else { 486 badCollectionDatabases.add(d); 487 } 488 } 489 } 490 491 this.collectionDatabases.removeAll(badCollectionDatabases); 492 this.allDatabases.removeAll(badCollectionDatabases); 493 494 // we need to set the collections on the other types of collection databases 495 for (DBCollectionDatabase d : this.collectionDatabases) { 496 if (d.type != EnsemblDBType.collection_core) { 497 String dbVersion = d.getDBVersion(); 498 String cName = d.getCollectionName(); 499 if (this.collectionNameDBVersionHash.containsKey(cName) 500 && this.collectionNameDBVersionHash.get(cName).containsKey(dbVersion)) { 501 d.setCollection(this.collectionNameDBVersionHash.get(cName).get(dbVersion)); 502 } 503 } 504 } 505 506 //add aliases for renamed databases 507 //only for single species databases 508 for (Map.Entry<String, String> entry : this.renamedDBs.entrySet()) { 509 if (speciesHash.containsKey(entry.getValue())) { 510 speciesHash.get(entry.getValue()).getAliases().add(entry.getKey()); 511 speciesHash.get(entry.getValue()).getAliases().add(entry.getKey().replace("_", " ")); 512 } 513 } 514 515 //we now populate the look up hash for aliases 516 //and at the same time add all the relevant databases to each DBSpecies 517 //and vice versa 518 for (DBSpecies sp : speciesHash.values()) { 519 for (String s : sp.getAliases()) { 520 if (this.ensemblNameForAliasHash.containsKey(s)) { 521 String n = this.ensemblNameForAliasHash.get(s).concat(AmbiguitySeparator + sp.getDatabaseStyleName()); 522 this.ensemblNameForAliasHash.put(s, n); 523 } else { 524 this.ensemblNameForAliasHash.put(s, sp.getDatabaseStyleName()); 525 } 526 } 527 for (DBSingleSpeciesDatabase d : this.singleSpeciesDatabases) { 528 if (d.getDbSpeciesName().equalsIgnoreCase(sp.getDatabaseStyleName())) { 529 sp.addDatabase(d); 530 if (d.getSpecies() == null) { 531 d.setSpecies(sp); 532 } 533 } 534 } 535 } 536 537 for (DBCollection coll : this.collections) { 538 //this sets all the species to have the most recent dbstyle name, and all the TreeSets 539 //in the collections to be remade 540 coll.resortSpecies(); 541 } 542 543 for (DBCollectionSpecies sp : oldstyle_collectionspeciesHashByDBName.values()) { 544 for (String s : sp.getAliases()) { 545 if (this.ensemblNameForAliasHash.containsKey(s)) { 546 String n = this.ensemblNameForAliasHash.get(s).concat(AmbiguitySeparator + sp.getDatabaseStyleName()); 547 this.ensemblNameForAliasHash.put(s, n); 548 } else { 549 this.ensemblNameForAliasHash.put(s, sp.getDatabaseStyleName()); 550 } 551 } 552 for (DBCollectionDatabase d : this.collectionDatabases) { 553 try { 554 if (EnsemblDBType.getCollectionDatabaseTypes().contains(d.getType()) 555 && d.getCollection().getSpecies().contains(sp)) { 556 sp.addDatabase(d); 557 } 558 } catch (Exception e) { 559 } 560 } 561 } 562 563 for (DBCollectionSpecies sp : collectionspeciesHashByGCAccession.values()) { 564 for (String s : sp.getAliases()) { 565 566 if (this.assemblyAccessionStemForAliasHash.containsKey(s)) { 567 String n = this.assemblyAccessionStemForAliasHash.get(s).concat(AmbiguitySeparator + sp.getAssemblyAccessionStem()); 568 this.assemblyAccessionStemForAliasHash.put(s, n); 569 } else { 570 this.assemblyAccessionStemForAliasHash.put(s, sp.getAssemblyAccessionStem()); 571 } 572 } 573 for (DBCollectionDatabase d : this.collectionDatabases) { 574 try { 575 576 if (EnsemblDBType.getCollectionDatabaseTypes().contains(d.getType()) 577 && d.getCollection().getSpecies().contains(sp)) { 578 sp.addDatabase(d); 579 } 580 } catch (Exception e) { 581 } 582 } 583 } 584 585 //add all the relevant databases to each DBCollection 586 for (Entry<String, TreeMap<String, DBCollection>> e : this.collectionNameDBVersionHash.entrySet()) { 587 String name = e.getKey(); 588 589 for (Entry<String, DBCollection> ei : e.getValue().entrySet()) { 590 String dbversion = ei.getKey(); 591 DBCollection collection = ei.getValue(); 592 TreeSet<CollectionDatabase> temp = new TreeSet<CollectionDatabase>(); 593 for (DBCollectionDatabase d : this.collectionDatabases) { 594 if (d.getCollectionName().equals(name) && d.getDBVersion().equalsIgnoreCase(dbversion)) { 595 temp.add(d); 596 } 597 } 598 collection.addDatabases(temp); 599 } 600 } 601 this.alreadyInitialized = true; 602 } 603 604 /** 605 * Reports the available database versions for this Registry. 606 * Will successfully report for either initialized or uninitialized Registries. 607 * since v74 608 * @return StringBuilder 609 */ 610 @Override 611 public StringBuilder getVersionReport() { 612 613 StringBuilder out = new StringBuilder(); 614 615 out.append(newline+"VERSION REPORT"+newline+"--------------"+newline+newline); 616 617 if (datasourceType == null) { 618 return out.append("\tWARNING: This Registry has invalid DataSource."); 619 } 620 if (dbConnection == null) { 621 return out.append("\tWARNING: This Registry has invalid Database Connection."); 622 } 623 624 if (!this.alreadyInitialized) { 625 out.append("\tWARNING: This registry is uninitialized."+newline); 626 } 627 628 out.append("Registry for: [" + this.datasourceType.toString() + "] " + this.dbConnection.getConfigurationProperties().getProperty("url") + newline); 629 630 out.append("Most recent configured Ensembl Release Schema: " + this.mostRecentEnsemblVersion + newline); 631 out.append("Most recent Schema Version available in Datasource: " + this.highestEnsemblSchemaVersion + newline); 632 if ( this.datasourceType==DataSource.ENSEMBLGENOMES) { 633 out.append("Most recent configured ensemblgenomes release available in Datasource: " + this.highestReleaseVersion + newline); 634 } 635 if ( this.datasourceType==DataSource.ENSEMBLBACTERIA) { 636 out.append("Most recent configured ensemblbacteria release available in Datasource: " + this.highestReleaseVersion + newline); 637 } 638 639 out.append("Known Schemas (Ensembl releases)" + newline + "\t"); 640 641 for (Integer i : this.getKnownSchemaVersions()) { 642 out.append(i + ", "); 643 }; 644 645 int end = out.lastIndexOf(","); 646 out.delete(end, end + 1); 647 out.append(newline); 648 649 out.append(dbConnection.report() + newline); 650 651 return out; 652 } 653 654 /** 655 * Reports the connection and available database versions etc. for an initialized Registry. 656 * Lists databases for which this API is not configured (including those too old or too recent). 657 * @return StringBuilder 658 */ 659 @Override 660 public StringBuilder getBriefRegistryReport() { 661 StringBuilder out = new StringBuilder(); 662 663 out.append(newline+"REGISTRY REPORT"+newline+"---------------"+newline+newline); 664 665 if (this.regConfig == null) { 666 out.append("\tWARNING: This Registry object has not been configured."+newline+newline); 667 return out; 668 } 669 670 out = this.getVersionReport(); 671 672 if (tooNewDB.toString() != null && !tooNewDB.toString().isEmpty()) { 673 out.append("WARNING: Some database releases were too recent for the current JEnsembl configuration (i.e. using unknown schema):" 674 + newline+ newline + tooNewDB.toString()); 675 } 676 677 if (unknownDB.toString() != null && !unknownDB.toString().isEmpty()) { 678 out.append("WARNING: Some database releases were not known in the current JEnsembl configuration:" 679 + newline + newline+ unknownDB.toString()); 680 } 681 682 if ((unknownDB.toString() == null || unknownDB.toString().isEmpty()) 683 && (tooNewDB.toString() == null || tooNewDB.toString().isEmpty())) { 684 out.append("No database releases were not known in the current JEnsembl configuration." 685 + newline); 686 } 687 688 if (!this.alreadyInitialized) { 689 out.append(newline+"\tWARNING: This configured Registry has not yet been initialized" + newline); 690 return out; 691 } 692 693 return out; 694 } 695 696 /** 697 * Reports the connection, available database versions, species and actual database names for an initialized Registry. 698 * Lists databases for which this API is not configured (including those too old or too recent). 699 * @return File 700 * @throws java.io.IOException 701 */ 702 @Override 703 public File getRegistryReport() throws IOException { 704 705 File file = File.createTempFile("RegistryReport", ".txt"); 706 FileWriter fileWriter = new FileWriter(file); 707 BufferedWriter out = new BufferedWriter(fileWriter, 8192); 708 709 710 if (this.regConfig == null) { 711 out.write(newline+"REGISTRY REPORT"+newline+"---------------"+newline+newline); 712 out.write("\tWARNING: This Registry object has not been configured."); 713 714 out.flush(); 715 out.close(); 716 return file; 717 } 718 719 out.write(this.getBriefRegistryReport().toString()); 720 721 if (!this.alreadyInitialized) { 722 723 out.flush(); 724 out.close(); 725 return file; 726 } 727 728 out.append(newline+"REGISTRY DETAILS"+newline+newline); 729 730 out.append("Species represented in Datasource (with aliases):" + newline); 731 out.append("SPECIES in unique databases" + newline ); 732 out.append("---------------------------" + newline + newline ); 733 734 for (DBSpecies s : this.speciesHash.values()) { 735 out.append(s.getSpeciesBinomial() + " [" + s.getDatabaseStyleName() + "] ("); 736 for (String st : s.getAliases()) { 737 out.append(st + ", "); 738 } 739 try { 740 out.append(")" + newline + "\tMost Recent Core Database: " + s.getMostRecentCoreDatabase().getdBName() + newline); 741 742 for (FeatureType key : ((DBSingleSpeciesCoreDatabase) s.getMostRecentCoreDatabase()).getBuildLevels().keySet()) { 743 out.append("\t\t"+key.toString() + " : " + ((DBSingleSpeciesCoreDatabase) s.getMostRecentCoreDatabase()).getBuildLevels().get(key) +newline); 744 } 745 746 } catch (Exception e) { 747 out.append(")" + newline + "\tNo Current Core Database" + newline); 748 } 749 } 750 751 out.append(newline + "Old Style SPECIES in collection databases" + newline ); 752 out.append("-------------------------------" + newline+newline ); 753 754 if (this.oldstyle_collectionspeciesHashByDBName.values().isEmpty()) { 755 out.append(newline + "NONE" + newline); 756 } 757 758 for (DBCollectionSpecies s : this.oldstyle_collectionspeciesHashByDBName.values()) { 759 out.append(s.getSpeciesBinomial() + " [" + s.getDatabaseStyleName() + "] ("); 760 for (String st : s.getAliases()) { 761 out.append(st + ", "); 762 } 763 try { 764 out.append(")" + newline + "\tCurrent Core Database: " + s.getMostRecentCoreDatabase().getdBName() + newline); 765 } catch (Exception e) { 766 out.append(")" + newline + "\tNo Current Core Database" + newline); 767 } 768 } 769 770 out.append(newline+"MULTI SPECIES DATABASES: "); 771 out.append(newline+"-----------------------"+newline); 772 773 for (EnsemblDBType t : EnsemblDBType.getSpeciesComparisonDatabaseTypes()) { 774 out.append(t.toString() + ":"+newline); 775 for (Database d : this.getDatabasesByType(t)) { 776 out.append("\t" + d.getdBName() + newline); 777 } 778 } 779 780 out.append(newline+"SINGLE SPECIES DATABASE DETAILS:"); 781 out.append(newline+"--------------------------------"+newline); 782 783 for (DBSpecies sp : this.speciesHash.values()) { 784 String id = sp.getSpeciesBinomial(); 785 out.append(id + newline+"--------------------"+newline); 786 787 out.append(newline+"\tMost Recent Versions:"+newline); 788 789// for (Database d : sp.getDatabasesByVersion(this.getMostRecentDBVersion())) { 790// out.append("\t\t" + d.getdBName()+newline); 791// } 792 for (Database d : sp.getDatabasesByVersion(sp.getMostRecentCoreDatabase().getDBVersion())) { 793 out.append("\t\t" + d.getdBName() +newline); 794 } 795 796 out.append("\tBy Type:"+newline); 797 for (EnsemblDBType t : EnsemblDBType.getSingleSpeciesDatabaseTypes()) { 798 799 out.append("\t" + t.toString()+newline); 800 if (sp.getDatabasesByType(t).isEmpty()) { 801 out.append("\t\tNONE"+newline); 802 } else { 803 for (Database d : sp.getDatabasesByType(t)) { 804 out.append("\t\t" + d.getdBName() +newline); 805 if (t == EnsemblDBType.core) { 806 out.append("\t\t\tAssembly: " + ((SingleSpeciesCoreDatabase) d).getAssemblyName() 807 + " [" + ((SingleSpeciesCoreDatabase) d).getAssemblyAccession() 808 + "]"+newline); 809 } 810 } 811 } 812 } 813 814 } 815 out.append(newline+"COLLECTION SPECIES DATABASE DETAILS:"+newline); 816 out.append("------------------------------------"+newline+newline); 817 818 out.append(newline+"NEW STYLE SPECIES ID (using GC Accession, typically post release 17):"+newline+newline); 819 820 if (this.collectionspeciesHashByGCAccession.values().isEmpty()) { 821 out.append("\tNONE"+newline); 822 } 823 824 for (DBCollectionSpecies sp : this.collectionspeciesHashByGCAccession.values()) { 825 String id = sp.getSpeciesBinomial(); 826 String acc = sp.getAssemblyAccessionStem()!=null && !sp.getAssemblyAccessionStem().isEmpty() ? sp.getAssemblyAccessionStem():" "; 827 828 out.append(id+" (assembly accession:"+acc+")" +newline+"---------------------"+newline); 829 830 out.append(newline+"\tCurrent Versions:"+newline); 831 for (Database d : sp.getDatabasesByVersion(sp.getMostRecentCoreDatabase().getDBVersion())) { 832 out.append("\t\t" + d.getdBName() +newline); 833 } 834 out.append("\tBy Type:"+newline); 835 for (EnsemblDBType t : EnsemblDBType.getCollectionDatabaseTypes()) { 836 837 out.append("\t" + t.toString() +newline); 838 if (sp.getDatabasesByType(t).isEmpty()) { 839 out.append("\t\tNONE"+newline); 840 } else { 841 for (Database d : sp.getDatabasesByType(t)) { 842 out.append("\t\t" + d.getdBName() +newline); 843 } 844 } 845 } 846 } 847 848 849 out.append(newline+"OLD STYLE SPECIES ID (using name matching, typically pre release 17):"+newline+newline); 850 851 if (this.oldstyle_collectionspeciesHashByDBName.values().isEmpty()) { 852 out.append("\tNONE"+newline); 853 } 854 855 for (DBCollectionSpecies sp : this.oldstyle_collectionspeciesHashByDBName.values()) { 856 String id = sp.getSpeciesBinomial(); 857 String acc = sp.getAssemblyAccessionStem()!=null && !sp.getAssemblyAccessionStem().isEmpty() ? sp.getAssemblyAccessionStem():" "; 858 859 out.append(id+" (assembly accession:"+acc+")" +newline+"---------------------"+newline); 860 861 out.append(newline+"\tCurrent Versions:"+newline); 862 for (Database d : sp.getDatabasesByVersion(sp.getMostRecentCoreDatabase().getDBVersion())) { 863 out.append("\t\t" + d.getdBName() +newline); 864 } 865 out.append("\tBy Type:"+newline); 866 for (EnsemblDBType t : EnsemblDBType.getCollectionDatabaseTypes()) { 867 868 out.append("\t" + t.toString() +newline); 869 if (sp.getDatabasesByType(t).isEmpty()) { 870 out.append("\t\tNONE"+newline); 871 } else { 872 for (Database d : sp.getDatabasesByType(t)) { 873 out.append("\t\t" + d.getdBName() +newline); 874 } 875 } 876 } 877 } 878 879 out.append(newline+"Core Collection Databases: Species and build information"+newline); 880 out.append("--------------------------------------------------------"+newline); 881 882 if (this.collectionDatabases.isEmpty()) { 883 out.append(newline+"NONE"+newline); 884 } 885 886 for (DBCollectionDatabase d : this.collectionDatabases) { 887 if (d.getType().equals(EnsemblDBType.collection_core)) { 888 889 out.append(newline+"CORECOLLECTION DB: " + d.dBName +newline); 890 891 for (DBCollectionSpecies species : d.getCollection().getSpecies()) { 892 893 DBCollectionCoreDatabase db = (DBCollectionCoreDatabase) d; 894 out.append("\tSPECIES: " + species.commonName +newline); 895 896 //add a safety check 897 if (db.getBuildLevels(species) != null) { 898 for (FeatureType key : db.getBuildLevels(species).keySet()) { 899 out.append("\t\t" + key.toString() + " : " + db.getBuildLevels(species).get(key) +newline); 900 } 901 } 902 903 } 904 } 905 } 906 907 out.flush(); 908 out.close(); 909 return file; 910 } 911 912 @Override 913 public Properties getConfigProperties() { 914 return configProperties; 915 } 916 917 @Override 918 public String getMostRecentEnsemblVersion() { 919 return mostRecentEnsemblVersion; 920 } 921 922 //@Override 923 public String getMostRecentDBVersion(String speciesOrCollection) throws NonUniqueException { 924 925 if (speciesOrCollection == null || speciesOrCollection.isEmpty()) { 926 return ""; 927 } 928 929 String dbName = speciesOrCollection; 930 931 //can throw nonuniqueexception 932 //it shouldnt do this if the 'speciesOrCollection' input is a db classifier 933 if (this.getEnsemblNameForAlias(speciesOrCollection) != null) { 934 dbName = this.getEnsemblNameForAlias(speciesOrCollection); 935 } 936 937 if (this.databaseVersions.containsKey(dbName)) { 938 return this.databaseVersions.get(dbName).last().toString(); 939 } else { 940 return ""; 941 } 942 943 } 944 945 public TreeSet<Integer> getKnownSchemaVersions() { 946 return knownSchemaVersions; 947 } 948 949 @Override 950 public int getHighestEnsemblSchemaVersion() throws DAOException, ConfigurationException { 951 if (this.alreadyInitialized || this.alreadyParsed) { 952 return highestEnsemblSchemaVersion; 953 } else { 954 if (this.datasourceType != null) { 955 this.parseWithoutInitializing(); 956 } else { 957 throw new ConfigurationException("Attempt to retrieve HighestEnsemblSchemaVersion from uninitialized database."); 958 } 959 return highestEnsemblSchemaVersion; 960 } 961 } 962 963 @Override 964 public int getHighestReleaseVersion() throws DAOException, ConfigurationException { 965 if (this.alreadyInitialized || this.alreadyParsed) { 966 return highestReleaseVersion; 967 } else { 968 if (this.datasourceType != null) { 969 this.parseWithoutInitializing(); 970 } else { 971 throw new ConfigurationException("Attempt to retrieve HighestReleaseVersion from uninitialized database."); 972 } 973 return highestReleaseVersion; 974 } 975 } 976 977 // Database look up methods 978 @Override 979 public DBDatabase getDatabase(String species_name_or_alias) throws NonUniqueException { 980 return getDatabase(species_name_or_alias, null, null); 981 } 982 983 @Override 984 public DBDatabase getDatabase(String species_name_or_alias, String db_version) throws NonUniqueException { 985 return getDatabase(species_name_or_alias, null, db_version); 986 } 987 988 @Override 989 public DBDatabase getDatabase(String speciesNameOrAlias_collectionName_comparaDivision, 990 DatabaseType database_type, String db_version) throws NonUniqueException { 991 992 String name = speciesNameOrAlias_collectionName_comparaDivision; 993 994 DBDatabase out = null; 995 996 //default to CORE type if not provided 997 EnsemblDBType type = (database_type != null 998 && database_type.toString() != null 999 && !database_type.toString().equals("")) 1000 ? (EnsemblDBType) database_type 1001 : EnsemblDBType.core; 1002 1003 if (name == null || name.isEmpty()) { 1004 if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(type)) { 1005 name = "multi"; 1006 } else { 1007 //deff out if no name 1008 return out; 1009 } 1010 } 1011 1012 //default to current dbVersion if not provided 1013 String version = (db_version != null 1014 && !db_version.isEmpty() 1015 && !db_version.equals("current")) 1016 ? db_version 1017 // : this.getMostRecentDBVersion(); 1018 : this.getMostRecentDBVersion(name); 1019 1020 String speciesOrGroup = ""; 1021 1022 if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(type)) { 1023 //no look up possible 1024 speciesOrGroup = name; 1025 } else if (EnsemblDBType.getSingleSpeciesDatabaseTypes().contains(type)) { 1026 //do a look up for aliases 1027 speciesOrGroup = this.getEnsemblNameForAlias(name); 1028 if (speciesOrGroup == null) { 1029 //deff out 1030 return out; 1031 } 1032 } else if (EnsemblDBType.getCollectionDatabaseTypes().contains(type)) { 1033 //no look up possible 1034 speciesOrGroup = name; 1035 } 1036 1037 if (EnsemblDBType.getSingleSpeciesDatabaseTypes().contains(type)) { 1038 for (DBSingleSpeciesDatabase d : this.singleSpeciesDatabases) { 1039 if (d.getDbSpeciesName().equals(speciesOrGroup) 1040 && d.getDBVersion().equals(version) 1041 && d.getType() == type) { 1042 return d; 1043 } 1044 } 1045 } else if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(type)) { 1046 1047 EnsemblComparaDivision gp = EnsemblComparaDivision.getEnsemblComparaDivision(speciesOrGroup); 1048 for (DBComparisonDatabase d : this.comparisonDatabases) { 1049 if (d.getComparisonDivision().equals(gp) 1050 && d.getDBVersion().equals(version) 1051 && d.getType() == type) { 1052 return d; 1053 } 1054 } 1055 } //this is only working on Groups not ind species 1056 else if (EnsemblDBType.getCollectionDatabaseTypes().contains(type)) { 1057 for (DBCollectionDatabase d : this.collectionDatabases) { 1058 if (d.getCollectionName().equals(speciesOrGroup) 1059 && d.getDBVersion().equals(version) 1060 && d.getType() == type) { 1061 return d; 1062 } 1063 } 1064 } 1065 1066 return out; 1067 } 1068 1069 @Override 1070 public ComparisonDatabase getComparaDatabase(EnsemblComparaDivision comparaDivision, String db_version) { 1071 1072 if (this.comparaDBByDivision.get(comparaDivision) != null) { 1073 return this.comparaDBByDivision.get(comparaDivision).get(db_version); 1074 } else { 1075 return null; 1076 } 1077 } 1078 1079 @Override 1080 public DBDatabase getDatabaseForFullName(String fullname) { 1081 1082 DBDatabase out = null; 1083 1084 if (fullname == null || fullname.isEmpty()) { 1085 return out; 1086 } 1087 1088 for (DBDatabase d : this.allDatabases) { 1089 if (d.getdBName().equals(fullname)) { 1090 out = d; 1091 break; 1092 } 1093 } 1094 return out; 1095 } 1096 1097 @Override 1098 public TreeSet<DBDatabase> getDatabasesByType(DatabaseType type) { 1099 TreeSet<DBDatabase> out = new TreeSet<DBDatabase>(); 1100 1101 for (DBDatabase d : this.allDatabases) { 1102 if (d.getType() == type) { 1103 out.add(d); 1104 } 1105 } 1106 return out; 1107 } 1108 1109 @Override 1110 public String findMybatisSchemaForSchemaVersion(DatabaseType type, String schema_version) { 1111 1112 String out = null; 1113 1114 try { 1115 out = schemaVersion.getMybatisSchemaPath(type.toString(), schema_version); 1116 } catch (Exception e) { 1117 } 1118 1119 // 1120 if (out == null) { 1121 try { 1122 out = schemaVersion.getMybatisSchemaPath(type.toString(), schemaVersion.getCurrentEnsemblVersion()); 1123 } catch (Exception e) { 1124 } 1125 } 1126 1127 return out; 1128 } 1129 1130 /** 1131 * Looks up the ensembl_genus_species_name for the given alias from the 1132 * ensemblNameForAliasHash hash of aliases in this Registry. 1133 * 1134 * @param alias String 1135 * @return String the ensembl_genus_species_name 1136 */ 1137 @Override 1138 public String getEnsemblNameForAlias(String alias) throws NonUniqueException { 1139 if (alias == null || alias.isEmpty()) { 1140 return null; 1141 } 1142 //if the entered 'alias' is a good ensembl name 1143 if (ensemblNameForAliasHash.containsValue(alias)) { 1144 return alias; 1145 } 1146 //else look for the lowercase version of 'alias' as an alias 1147 String get = ensemblNameForAliasHash.get(alias.toLowerCase()); 1148 if (get == null) { 1149 return null; 1150 } else if (get.contains(DBRegistry.AmbiguitySeparator)) { 1151 1152 Collection<String> allHits = new HashSet<String>(); 1153 String[] split = get.split("\\|OR\\|"); 1154 allHits.addAll(Arrays.asList(split)); 1155 throw new NonUniqueException("Alias: '" + alias + "' matches more than one Ensembl Name." 1156 + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { " 1157 + "System.out.println(((Species) o).getDatabaseStyleName()); }", allHits); 1158 } else { 1159 return get; 1160 } 1161 } 1162 1163/** 1164 * Looks up the GC_assembly_accession_stem for the given alias of a 1165 * collectionspecies. Post v16 this is the preferred identifier for a 1166 * CollectionSpecies. 1167 * 1168 * @param alias String 1169 * @return String the GC_assembly_accession_stem 1170 */ 1171 public String getGCAssemblyAccessionForAlias(String alias) throws NonUniqueException { 1172 if (alias == null || alias.isEmpty()) { 1173 return null; 1174 } 1175 1176 String get = assemblyAccessionStemForAliasHash.get(alias.toLowerCase()); 1177 if (get == null) { 1178 return null; 1179 } else if (get.contains(DBRegistry.AmbiguitySeparator)) { 1180 1181 Collection<String> allHits = new HashSet<String>(); 1182 String[] split = get.split("\\|OR\\|"); 1183 allHits.addAll(Arrays.asList(split)); 1184 throw new NonUniqueException("Alias matches more than one Ensembl Name." 1185 + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { " 1186 + "System.out.println(((Species) o).getDatabaseStyleName()); }", allHits); 1187 } else { 1188 return get; 1189 } 1190 1191 } 1192 1193 public Set<String> getEnsemblNamesForAliasBeginning(String alias) { 1194 if (alias == null || alias.isEmpty()) { 1195 return null; 1196 } 1197 1198 Set<String> out = new HashSet<String>(); 1199 1200 //look for the lowercase version of 'alias' as an alias 1201 String search = alias.toLowerCase(); 1202 1203 for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) { 1204 if (e.getKey().startsWith(search)) { 1205 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1206 String[] split = e.getValue().split("\\|OR\\|"); 1207 out.addAll(Arrays.asList(split)); 1208 } else { 1209 out.add(e.getValue()); 1210 } 1211 } 1212 } 1213 return out; 1214 } 1215 1216 public Set<String> getEnsemblNamesForAliasContaining(String alias) { 1217 if (alias == null || alias.isEmpty()) { 1218 return null; 1219 } 1220 1221 Set<String> out = new HashSet<String>(); 1222 1223 //look for the lowercase version of 'alias' as an alias 1224 String search = alias.toLowerCase(); 1225 1226 for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) { 1227 if (e.getKey().contains(search)) { 1228 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1229 String[] split = e.getValue().split("\\|OR\\|"); 1230 out.addAll(Arrays.asList(split)); 1231 } else { 1232 out.add(e.getValue()); 1233 } 1234 } 1235 } 1236 return out; 1237 } 1238 1239 public Set<String> getGCAssemblyAccessionsForAliasBeginning(String alias) { 1240 if (alias == null || alias.isEmpty()) { 1241 return null; 1242 } 1243 1244 Set<String> out = new HashSet<String>(); 1245 1246 //look for the lowercase version of 'alias' as an alias 1247 String search = alias.toLowerCase(); 1248 1249 for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) { 1250 if (e.getKey().startsWith(search)) { 1251 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1252 String[] split = e.getValue().split("\\|OR\\|"); 1253 out.addAll(Arrays.asList(split)); 1254 } else { 1255 out.add(e.getValue()); 1256 } 1257 } 1258 } 1259 return out; 1260 } 1261 1262 public Set<String> getGCAssemblyAccessionsForAliasContaining(String alias) { 1263 if (alias == null || alias.isEmpty()) { 1264 return null; 1265 } 1266 1267 Set<String> out = new HashSet<String>(); 1268 1269 //look for the lowercase version of 'alias' as an alias 1270 String search = alias.toLowerCase(); 1271 1272 for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) { 1273 if (e.getKey().contains(search)) { 1274 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1275 String[] split = e.getValue().split("\\|OR\\|"); 1276 out.addAll(Arrays.asList(split)); 1277 } else { 1278 out.add(e.getValue()); 1279 } 1280 } 1281 } 1282 return out; 1283 } 1284 1285 public Set<? extends Species> getSpeciesForAliasBeginning(String alias) { 1286 1287 if (this.getDatasourceType().equals(DataSource.ENSEMBLBACTERIA)) { 1288 return this.getBacterialSpeciesForAliasBeginning(alias); 1289 } 1290 1291 if (alias == null || alias.isEmpty()) { 1292 return null; 1293 } 1294 1295 Set<DBSpecies> out = new HashSet<DBSpecies>(); 1296 1297 //look for the lowercase version of 'alias' as an alias 1298 String search = alias.toLowerCase(); 1299 1300 for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) { 1301 if (e.getKey().startsWith(search)) { 1302 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1303 String[] split = e.getValue().split("\\|OR\\|"); 1304 for (String s:split) { 1305 out.add(this.speciesHash.get(s)); 1306 } 1307 } else { 1308 out.add(this.speciesHash.get(e.getValue())); 1309 } 1310 } 1311 } 1312 return out; 1313 } 1314 1315 public Set<? extends Species> getSpeciesForAliasContaining(String alias) { 1316 1317 if (this.getDatasourceType().equals(DataSource.ENSEMBLBACTERIA)) { 1318 return this.getBacterialSpeciesForAliasContaining(alias); 1319 } 1320 1321 if (alias == null || alias.isEmpty()) { 1322 return null; 1323 } 1324 1325 Set<DBSpecies> out = new HashSet<DBSpecies>(); 1326 1327 //look for the lowercase version of 'alias' as an alias 1328 String search = alias.toLowerCase(); 1329 1330 for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) { 1331 if (e.getKey().contains(search)) { 1332 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1333 String[] split = e.getValue().split("\\|OR\\|"); 1334 for (String s:split) { 1335 out.add(this.speciesHash.get(s)); 1336 } 1337 } else { 1338 out.add(this.speciesHash.get(e.getValue())); 1339 } 1340 } 1341 } 1342 return out; 1343 } 1344 1345 public Set<? extends CollectionSpecies> getBacterialSpeciesForAliasBeginning(String alias) { 1346 1347 if (this.highestReleaseVersion < 17) { 1348 return this.getOldStyleBacterialSpeciesForAliasBeginning(alias); 1349 } 1350 1351 if (alias == null || alias.isEmpty()) { 1352 return null; 1353 } 1354 1355 Set<DBCollectionSpecies> out = new HashSet<DBCollectionSpecies>(); 1356 1357 //look for the lowercase version of 'alias' as an alias 1358 String search = alias.toLowerCase(); 1359 1360 for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) { 1361 if (e.getKey().startsWith(search)) { 1362 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1363 String[] split = e.getValue().split("\\|OR\\|"); 1364 for (String s:split) { 1365 out.add(this.collectionspeciesHashByGCAccession.get(s)); 1366 } 1367 } else { 1368 out.add(this.collectionspeciesHashByGCAccession.get(e.getValue())); 1369 } 1370 } 1371 } 1372 return out; 1373 } 1374 1375 public Set<? extends CollectionSpecies> getBacterialSpeciesForAliasContaining(String alias) { 1376 1377 if (this.highestReleaseVersion < 17) { 1378 return this.getOldStyleBacterialSpeciesForAliasContaining(alias); 1379 } 1380 1381 if (alias == null || alias.isEmpty()) { 1382 return null; 1383 } 1384 1385 Set<DBCollectionSpecies> out = new HashSet<DBCollectionSpecies>(); 1386 1387 //look for the lowercase version of 'alias' as an alias 1388 String search = alias.toLowerCase(); 1389 1390 for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) { 1391 if (e.getKey().contains(search)) { 1392 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1393 String[] split = e.getValue().split("\\|OR\\|"); 1394 for (String s:split) { 1395 out.add(this.collectionspeciesHashByGCAccession.get(s)); 1396 } 1397 } else { 1398 out.add(this.collectionspeciesHashByGCAccession.get(e.getValue())); 1399 } 1400 } 1401 } 1402 return out; 1403 } 1404 1405 public Set<? extends CollectionSpecies> getOldStyleBacterialSpeciesForAliasBeginning(String alias) { 1406 if (alias == null || alias.isEmpty()) { 1407 return null; 1408 } 1409 1410 Set<CollectionSpecies> out = new HashSet<CollectionSpecies>(); 1411 1412 //look for the lowercase version of 'alias' as an alias 1413 String search = alias.toLowerCase(); 1414 1415 for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) { 1416 if (e.getKey().startsWith(search)) { 1417 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1418 String[] split = e.getValue().split("\\|OR\\|"); 1419 for (String s:split) { 1420 out.add(this.oldstyle_collectionspeciesHashByDBName.get(s) ); 1421 } 1422 } else { 1423 out.add(this.oldstyle_collectionspeciesHashByDBName.get(e.getValue())); 1424 } 1425 } 1426 } 1427 return out; 1428 } 1429 1430 public Set<? extends CollectionSpecies> getOldStyleBacterialSpeciesForAliasContaining(String alias) { 1431 if (alias == null || alias.isEmpty()) { 1432 return null; 1433 } 1434 1435 Set<CollectionSpecies> out = new HashSet<CollectionSpecies>(); 1436 1437 //look for the lowercase version of 'alias' as an alias 1438 String search = alias.toLowerCase(); 1439 1440 for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) { 1441 if (e.getKey().contains(search)) { 1442 if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) { 1443 String[] split = e.getValue().split("\\|OR\\|"); 1444 for (String s:split) { 1445 out.add(this.oldstyle_collectionspeciesHashByDBName.get(s) ); 1446 } 1447 } else { 1448 out.add(this.oldstyle_collectionspeciesHashByDBName.get(e.getValue())); 1449 } 1450 } 1451 } 1452 return out; 1453 } 1454 1455 /** 1456 * Method for an adhoc alias to be added by the user in this session. Adds 1457 * the alias to the aliases TreeSet of the cognate DBSpecies and to the 1458 * ensemblNameForAliasHash hash of aliases in this Registry 1459 * 1460 * @param ensembl_genus_species_name Ensmebl databse dbVersion of the name, 1461 * e.g. 'homo_sapiens' 1462 * @param alias any string the user wants to use an alias for this 1463 * @return boolean true if successful 1464 * @throws Exception if update fails, with meaning full message 1465 */ 1466 @Override 1467 public boolean addSessionAlias(String ensembl_genus_species_name, String alias) throws Exception { 1468 1469 if (ensembl_genus_species_name == null || alias == null || ensembl_genus_species_name.isEmpty() || alias.isEmpty()) { 1470 throw new Exception("Failed to add alias: " + alias + " for species: " + ensembl_genus_species_name); 1471 } 1472 Species spData = null; 1473 1474 try { 1475 spData = this.speciesHash.get(ensembl_genus_species_name); 1476 if (spData == null) { 1477 spData = this.getCSpeciesByGCAccessionStem(this.assemblyAccessionStemForAliasHash.get(ensembl_genus_species_name)); 1478 if (spData == null) { 1479 spData = this.oldstyle_collectionspeciesHashByDBName.get(ensembl_genus_species_name); 1480 } 1481 if (spData == null) { 1482 throw new Exception("No species in ensembl registry called: " + ensembl_genus_species_name); 1483 } 1484 } 1485 1486 } catch (Exception e) { 1487 throw new Exception("Failed to add alias: " + alias + " for species: " + ensembl_genus_species_name, e); 1488 } 1489 return this.addSessionAlias(spData, alias); 1490 } 1491 1492 @Override 1493 public boolean addSessionAlias(Species sp, String alias) throws Exception { 1494 if (sp == null || alias == null || alias.isEmpty()) { 1495 throw new Exception("Failed to add alias: " + alias + " for species: " + sp); 1496 } 1497 1498 if (this.getGCAccessionedCollectionSpecies().contains(sp)) { 1499 sp.getAliases().add(alias.toLowerCase()); 1500 this.assemblyAccessionStemForAliasHash.put(alias.toLowerCase(), sp.getAssemblyAccessionStem()); 1501 } else { 1502 sp.getAliases().add(alias.toLowerCase()); 1503 if (this.ensemblNameForAliasHash.containsKey(alias.toLowerCase())) { 1504 String n = this.ensemblNameForAliasHash.get(alias.toLowerCase()).concat(AmbiguitySeparator + sp.getDatabaseStyleName()); 1505 this.ensemblNameForAliasHash.put(alias.toLowerCase(), n); 1506 } else { 1507 this.ensemblNameForAliasHash.put(alias.toLowerCase(), sp.getDatabaseStyleName()); 1508 } 1509 } 1510 return true; 1511 } 1512 1513 /** 1514 * Returns a HashSet<DBSpecies> matching the given alias. If name is not a 1515 * valid String, returns null, if no species are found, returns empty Set. 1516 * If Name matches any DBSpecies, these are returned without searching for 1517 * DBCollectionSpecies. Note if we hit a CollectionSpecies - any more recent 1518 * Species with GCA (ENA) Identifiers will be returned before searching for 1519 * older Species (lacking GCA IDs) which match the alias. If you want to be 1520 * sure to search for all alias hits use getSetOfEverySpeciesByAlias(String 1521 * alias_or_name). 1522 * 1523 * @param alias_or_name 1524 * @return a Collection<? extends Species> of DBSpecies. 1525 */ 1526 @Override 1527 public Collection<? extends Species> getSetOfSpeciesByAlias(String alias_or_name) { 1528 HashSet<DBSpecies> ret = new HashSet<DBSpecies>(); 1529 1530 if (alias_or_name == null || alias_or_name.isEmpty()) { 1531 return null; 1532 } 1533 1534 //first look for DBSpecies 1535 DBSpecies out = null; 1536 String name = null; 1537 try { 1538 name = this.getEnsemblNameForAlias(alias_or_name); 1539 out = (name != null) ? this.speciesHash.get(name) : null; 1540 if (out != null) { 1541 ret.add(out); 1542 } 1543 } catch (NonUniqueException ex) { 1544 for (String acc : (Collection<String>) ex.getAllHits()) { 1545 if (this.speciesHash.get(acc) != null) { 1546 ret.add(this.speciesHash.get(acc)); 1547 } 1548 } 1549 } 1550 if (!ret.isEmpty()) { 1551 return ret; 1552 } 1553 1554 //if no DBSpecies hit 1555 //we next look for a recent type of CollectionSpecies 1556 String gcAccession = null; 1557 try { 1558 gcAccession = this.getGCAssemblyAccessionForAlias(alias_or_name); 1559 out = (gcAccession != null) ? this.collectionspeciesHashByGCAccession.get(gcAccession) 1560 : null; 1561 if (out != null) { 1562 ret.add(out); 1563 } else { 1564 //if the alias hasn't pulled out a recent CollectionSpecies = fall back to look for an older one 1565 name = this.getEnsemblNameForAlias(alias_or_name); 1566 out = (name != null) ? this.oldstyle_collectionspeciesHashByDBName.get(name) : null; 1567 if (out != null) { 1568 ret.add(out); 1569 } 1570 } 1571 } catch (NonUniqueException ex) { 1572 for (String acc : (Collection<String>) ex.getAllHits()) { 1573 if (collectionspeciesHashByGCAccession.get(acc) != null) { 1574 ret.add(this.collectionspeciesHashByGCAccession.get(acc)); 1575 } 1576 } 1577 if (ret.isEmpty()) { 1578 //if the alias doesn't pulled out recent CollectionSpecies = fall back to look for older ones 1579 for (String acc : (Collection<String>) ex.getAllHits()) { 1580 if (this.oldstyle_collectionspeciesHashByDBName.get(acc) != null) { 1581 ret.add(this.oldstyle_collectionspeciesHashByDBName.get(acc)); 1582 } 1583 } 1584 } 1585 } 1586 return ret; 1587 } 1588 1589 /** 1590 * Returns a HashSet<DBSpecies> of all DBSpecies and old and newstyle 1591 * CollectionSpecies matching the given alias. If name is not a valid 1592 * String, returns null, if no species are found, returns empty Set. 1593 * 1594 * @param alias_or_name 1595 * @return a Collection<? extends Species> of DBSpecies. 1596 */ 1597 @Override 1598 public Collection<? extends Species> getSetOfEverySpeciesByAlias(String alias_or_name) { 1599 HashSet<DBSpecies> ret = new HashSet<DBSpecies>(); 1600 1601 if (alias_or_name == null || alias_or_name.isEmpty()) { 1602 return null; 1603 } 1604 1605 DBSpecies out = null; 1606 1607 String name = null; 1608 try { 1609 name = this.getEnsemblNameForAlias(alias_or_name); 1610 out = (name != null) ? this.speciesHash.get(name) : null; 1611 if (out != null) { 1612 ret.add(out); 1613 } 1614 out = (name != null) ? this.oldstyle_collectionspeciesHashByDBName.get(name) 1615 : null; 1616 if (out != null) { 1617 ret.add(out); 1618 } 1619 } catch (NonUniqueException ex) { 1620 for (String acc : (Collection<String>) ex.getAllHits()) { 1621 if (this.speciesHash.get(acc) != null) { 1622 ret.add(this.speciesHash.get(acc)); 1623 } 1624 if (this.oldstyle_collectionspeciesHashByDBName.get(acc) != null) { 1625 ret.add(this.oldstyle_collectionspeciesHashByDBName.get(acc)); 1626 } 1627 } 1628 } 1629 1630 String gcAccession = null; 1631 try { 1632 gcAccession = this.getGCAssemblyAccessionForAlias(alias_or_name); 1633 out = (gcAccession != null) ? this.collectionspeciesHashByGCAccession.get(gcAccession) 1634 : null; 1635 if (out != null) { 1636 ret.add(out); 1637 } 1638 } catch (NonUniqueException ex) { 1639 for (String acc : (Collection<String>) ex.getAllHits()) { 1640 if (collectionspeciesHashByGCAccession.get(acc) != null) { 1641 ret.add(this.collectionspeciesHashByGCAccession.get(acc)); 1642 } 1643 } 1644 } 1645 return ret; 1646 } 1647 1648 /** 1649 * Wraps getSetOfSpeciesByAlias(String alias_or_name) to return a single 1650 * species if the returned Collection has only one member. If no species are 1651 * found, returns null. If more than one species is found, it throws a 1652 * NonUniqueException which holds the HashSet<DBSpecies> of results. Note if 1653 * we hit a CollectionSpecies - more recent Species with GCA (ENA) 1654 * Identifiers will be returned in preference to older Species lacking IDs. 1655 * 1656 * @param alias_or_name 1657 * @return 1658 * @throws NonUniqueException 1659 */ 1660 @Override 1661 public DBSpecies getSpeciesByAlias(String alias_or_name) throws NonUniqueException { 1662 HashSet<DBSpecies> ret = (HashSet<DBSpecies>) this.getSetOfSpeciesByAlias(alias_or_name); 1663 1664 if (ret == null || ret.isEmpty()) { 1665 return null; 1666 } 1667 1668 if (ret.size() == 1) { 1669 return ret.iterator().next(); 1670 } else { 1671 throw new NonUniqueException("More than one species with this alias is found. " 1672 + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { " 1673 + "System.out.println(((Species) o).getDatabaseStyleName()); }", ret); 1674 } 1675 1676 } 1677 1678 /** 1679 * Wraps getSetOfEverySpeciesByAlias(String alias_or_name) to return a 1680 * single species if the returned Collection has only one member with the 1681 * desired alias and version. If no species are found, returns null. If more 1682 * than one species is found for the datasource version, it throws a 1683 * NonUniqueException which holds the HashSet<DBSpecies> of results. 1684 * 1685 * @param alias_or_name 1686 * @return 1687 * @throws NonUniqueException 1688 */ 1689 @Override 1690 public DBSpecies getSpeciesByAlias(String alias_or_name, String version) throws NonUniqueException { 1691 HashSet<DBSpecies> ret = (HashSet<DBSpecies>) this.getSetOfEverySpeciesByAlias(alias_or_name); 1692 HashSet<DBSpecies> versions = new HashSet<DBSpecies>(); 1693 1694 if (ret == null || ret.isEmpty()) { 1695 return null; 1696 } else if (ret.size() == 1) { 1697 DBSpecies o = ret.iterator().next(); 1698 if (o.getDBVersions().contains(version)) { 1699 return o; 1700 } else { 1701 return null; 1702 } 1703 } else { 1704 for (DBSpecies sp : ret) { 1705 if (sp.getDBVersions().contains(version)) { 1706 versions.add(sp); 1707 } 1708 } 1709 if (versions.isEmpty()) { 1710 return null; 1711 } else if (versions.size() == 1) { 1712 return versions.iterator().next(); 1713 } else if (versions.size() > 1) { 1714 throw new NonUniqueException("More than one species with alias '" 1715 + alias_or_name + "' is found for version '" + version + "' of the datasource. " 1716 + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { " 1717 + "System.out.println(((Species) o).getDatabaseStyleName()); }", versions); 1718 } 1719 } 1720 return null; 1721 } 1722 1723 /** 1724 * Returns the most recent Species matching the alias. Return null if no 1725 * match, throws NonUniqueException which holds the HashSet<DBSpecies> of 1726 * results if more than one, equally recent species is found. 1727 * 1728 * @param alias_or_name 1729 * @return 1730 * @throws NonUniqueException 1731 */ 1732 @Override 1733 public DBSpecies getMostRecentSpeciesByAlias(String alias_or_name) throws NonUniqueException { 1734 1735 HashSet<DBSpecies> temp = (HashSet<DBSpecies>) this.getSetOfEverySpeciesByAlias(alias_or_name); 1736 TreeMap<Integer, HashSet<DBSpecies>> versionMap = new TreeMap<Integer, HashSet<DBSpecies>>(); 1737 1738 HashSet<DBSpecies> out = null; 1739 1740 if (temp == null || temp.isEmpty()) { 1741 return null; 1742 } 1743 1744 if (temp.size() == 1) { 1745 return temp.iterator().next(); 1746 } else { 1747 for (DBSpecies sp : temp) { 1748 1749 if (!versionMap.containsKey(sp.getHighestDBRelease())) { 1750 versionMap.put(sp.getHighestDBRelease(), new HashSet<DBSpecies>()); 1751 } 1752 versionMap.get(sp.getHighestDBRelease()).add(sp); 1753 } 1754 } 1755 1756 out = versionMap.lastEntry().getValue(); 1757 1758 if (out.size() == 1) { 1759 return out.iterator().next(); 1760 } else { 1761 throw new NonUniqueException("More than one species with this alias is found. " 1762 + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { " 1763 + "System.out.println(((Species) o).getDatabaseStyleName()); }", out); 1764 } 1765 1766 } 1767 1768 @Override 1769 public DBSpecies getSpeciesByEnsemblName(String ensemblName) { 1770 if (ensemblName == null || ensemblName.isEmpty()) { 1771 return null; 1772 } 1773 1774 DBSpecies out = null; 1775 out = this.speciesHash.get(ensemblName); 1776 if (out == null) { 1777 out = this.oldstyle_collectionspeciesHashByDBName.get(ensemblName); 1778 } 1779 1780 return out; 1781 } 1782 1783 public DBCollectionSpecies getCSpeciesByGCAccessionStem(String key) { 1784 if (key == null || key.isEmpty()) { 1785 return null; 1786 } 1787 return this.collectionspeciesHashByGCAccession.get(key); 1788 } 1789 1790 @Override 1791 public DBCollection getMostRecentCollection(String name) { 1792 return this.getCollection(name, null); 1793 } 1794 1795 @Override 1796 public DBCollection getCollection(String name, String dbVersion) { 1797 DBCollection out = null; 1798 1799 if (name == null || name.isEmpty()) { 1800 return out; 1801 } 1802 1803 if (dbVersion == null || dbVersion.isEmpty()) { 1804 if (this.databaseVersions.containsKey(name)) { 1805 dbVersion = this.databaseVersions.get(name).last().toString(); 1806 } 1807 } 1808 1809 if (dbVersion == null || dbVersion.isEmpty()) { 1810 return out; 1811 } 1812 1813 if (this.collectionNameDBVersionHash.containsKey(name)) { 1814 if (this.collectionNameDBVersionHash.get(name).containsKey(dbVersion)) { 1815 out = this.collectionNameDBVersionHash.get(name).get(dbVersion); 1816 } 1817 } 1818 return out; 1819 } 1820 1821 private void registerDatabase(DBDatabase db) { 1822 if (db instanceof DBCollectionDatabase) { 1823 this.collectionDatabases.add((DBCollectionDatabase) db); 1824 if (db.intSchemaVersion > this.highestEnsemblSchemaVersion) { 1825 this.highestEnsemblSchemaVersion = db.intSchemaVersion; 1826 } 1827 if (db.intDBVersion > this.highestReleaseVersion) { 1828 this.highestReleaseVersion = db.intDBVersion; 1829 } 1830 } else if (db instanceof DBSingleSpeciesDatabase) { 1831 this.singleSpeciesDatabases.add((DBSingleSpeciesDatabase) db); 1832 if (db.intSchemaVersion > this.highestEnsemblSchemaVersion) { 1833 this.highestEnsemblSchemaVersion = db.intSchemaVersion; 1834 } 1835 if (db.intDBVersion > this.highestReleaseVersion) { 1836 this.highestReleaseVersion = db.intDBVersion; 1837 } 1838 } else if (db instanceof DBComparisonDatabase) { 1839 this.comparisonDatabases.add((DBComparisonDatabase) db); 1840 1841 if (db.getType().equals(EnsemblDBType.compara)) { 1842 1843 if (!this.comparaDBByDivision.containsKey(((DBComparisonDatabase) db).getComparisonDivision())) { 1844 this.comparaDBByDivision.put(((DBComparisonDatabase) db).getComparisonDivision(), 1845 new HashMap<String, DBComparisonDatabase>()); 1846 } 1847 this.comparaDBByDivision.get(((DBComparisonDatabase) db).getComparisonDivision()).put( 1848 db.getDBVersion(), (DBComparisonDatabase) db); 1849 1850 } 1851 1852 } 1853 1854 //registering versions of this database 1855 if (!this.databaseVersions.containsKey(db.getdBClassifier())) { 1856 this.databaseVersions.put(db.getdBClassifier(), new TreeSet<Integer>()); 1857 } 1858 this.databaseVersions.get(db.getdBClassifier()).add(db.getIntDBVersion()); 1859 1860 this.allDatabases.add(db); 1861 } 1862 1863 private void pseudoRegisterDatabase(DBDatabase db) { 1864 if (db instanceof DBCollectionDatabase 1865 || db instanceof DBSingleSpeciesDatabase) { 1866 1867 if (db.intSchemaVersion > this.highestEnsemblSchemaVersion) { 1868 this.highestEnsemblSchemaVersion = db.intSchemaVersion; 1869 } 1870 if (db.intDBVersion > this.highestReleaseVersion) { 1871 this.highestReleaseVersion = db.intDBVersion; 1872 } 1873 } 1874 } 1875 1876 @Override 1877 public Collection<DBCollection> getCollectionRegistriesByName(String name) { 1878 if (name == null || name.isEmpty()) { 1879 return new ArrayList<DBCollection>(); 1880 } 1881 if (collectionNameDBVersionHash.containsKey(name)) { 1882 return new ArrayList<DBCollection>(collectionNameDBVersionHash.get(name).values()); 1883 } else { 1884 return new ArrayList<DBCollection>(); 1885 } 1886 } 1887 1888 @Override 1889 public Collection<DBCollection> getCollectionsByDBVersion(String dbVersion) { 1890 1891 Collection<DBCollection> out = new ArrayList<DBCollection>(); 1892 if (dbVersion == null || dbVersion.isEmpty()) { 1893 return out; 1894 } 1895 1896 for (String name : this.collectionNameDBVersionHash.keySet()) { 1897 1898 if (this.collectionNameDBVersionHash.get(name).containsKey(dbVersion)) { 1899 out.add(this.collectionNameDBVersionHash.get(name).get(dbVersion)); 1900 } 1901 } 1902 return out; 1903 } 1904 1905 @Override 1906 public List<DBCollection> getCollections() { 1907 return collections; 1908 } 1909 1910 /** 1911 * Fetches the list of all DBSpecies objects for this Registry. 1912 * 1913 * @return 1914 */ 1915 @Override 1916 public List<DBSpecies> getSpecies() { 1917 return new ArrayList<DBSpecies>(speciesHash.values()); 1918 } 1919 1920 /** 1921 * Returns the combined list of DBCollectionSpecies that are indexed by 1922 * dbstylename (old_style_ and those indexed by GCAssemblyAccession (new 1923 * style, post v16) 1924 * 1925 * @return 1926 */ 1927 @Override 1928 public List<DBCollectionSpecies> getCollectionSpecies() { 1929 1930 ArrayList<DBCollectionSpecies> out = new ArrayList<DBCollectionSpecies>(oldstyle_collectionspeciesHashByDBName.values()); 1931 out.addAll(collectionspeciesHashByGCAccession.values()); 1932 return out; 1933 } 1934 1935 /** 1936 * Utility method to return only those DBCOllectionSpecies that possess and 1937 * are indexed by a GCAssemblyAccession(Stem or Chain) 1938 * 1939 * @return 1940 */ 1941 public List<DBCollectionSpecies> getGCAccessionedCollectionSpecies() { 1942 return new ArrayList<DBCollectionSpecies>(collectionspeciesHashByGCAccession.values()); 1943 } 1944 1945 /** 1946 * Utility method to return only those DBCOllectionSpecies that do not 1947 * possess and are therefore not indexed by a GCAssemblyAccession but bey 1948 * dbstylename 1949 * 1950 * @return 1951 */ 1952 public List<DBCollectionSpecies> getNamedCollectionSpecies() { 1953 return new ArrayList<DBCollectionSpecies>(oldstyle_collectionspeciesHashByDBName.values()); 1954 } 1955 1956 @Override 1957 public DataSource getDatasourceType() { 1958 return datasourceType; 1959 } 1960 1961 @Override 1962 public DBDatabase makeDatabase(String db_name) throws ConfigurationException { 1963 1964 if (db_name == null || db_name.isEmpty()) { 1965 throw new ConfigurationException("Invalid database name: " + db_name); 1966 } 1967 1968 EnsemblDBType t = null; 1969 1970 for (EnsemblDBType et : EnsemblDBType.getAllDatabaseTypes()) { 1971 1972 if (et.toString().startsWith("collection") 1973 && db_name.contains("_collection_") 1974 && db_name.matches(new String(".+_" + et.toString() + "_\\d+.*"))) { 1975 t = et; 1976 break; 1977 } else if (db_name.matches(new String(".+_" + et.toString() + "_\\d+.*")) 1978 || (et.toString().startsWith("ensembl_") && db_name.contains(et.toString() + "_"))) { 1979 t = et; 1980 } 1981 } 1982 1983 if (t == null) { 1984 throw new ConfigurationException("Unrecognized database type for database name: " + db_name); 1985 } 1986 1987 try { 1988 if (EnsemblDBType.getSingleSpeciesDatabaseTypes().contains(t)) { 1989 if (t.equals(EnsemblDBType.core)) { 1990 return new DBSingleSpeciesCoreDatabase(db_name, t, this); 1991 } else if (t.equals(EnsemblDBType.variation)) { 1992 return new DBSingleSpeciesVariationDatabase(db_name, t, this); 1993 } else { 1994 //haven't implemented othersubtypes yet 1995 return new DBSingleSpeciesDatabase(db_name, t, this); 1996 } 1997 } else if (EnsemblDBType.getCollectionDatabaseTypes().contains(t)) { 1998 if (t.equals(EnsemblDBType.collection_core)) { 1999 return new DBCollectionCoreDatabase(db_name, t, this); 2000// to implement in the unlikely event we have collections with variation databases 2001// } else if (t.equals(EnsemblDBType.collection_variation)) { 2002// return new DBCollectionVariationDatabase(db_name, t, this); 2003 } else { 2004 //haven't implemented other subtypes yet 2005 return new DBCollectionDatabase(db_name, t, this); 2006 } 2007 } else if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(t)) { 2008 return new DBComparisonDatabase(db_name, t, this); 2009 } else { 2010 //shouldnt hit here 2011 throw new Exception("Unrecognized database type for database name: " + db_name); 2012 } 2013 } catch (Exception e) { 2014 throw new ConfigurationException(e.getMessage()); 2015 } 2016 2017 } 2018 2019 //DAO methods 2020 private List<String> getDatabases() throws DAOException { 2021 2022 List<String> outList = null; 2023 SqlSession session = null; 2024 2025 try { 2026 session = sqlMapper.openSession(); 2027 DatabaseMapper dm = session.getMapper(DatabaseMapper.class); 2028 outList = dm.getAllDatabaseNames(); 2029 } catch (Exception e) { 2030 throw new DAOException("Failed to interrogate all Database Names", e); 2031 } finally { 2032 if (session != null) { 2033 session.close(); 2034 } 2035 } 2036 2037 if (outList == null) { 2038 /* A null tempList is a perfectly valid outcome to return */ 2039 return new ArrayList<String>(); 2040 } 2041 2042 // if we want to test parsing of databse names we can add them here.. 2043 /* 2044 outList.add("ictidomys_tridecemlineatus_variation_68_2"); 2045 outList.add("spermophilus_tridecemlineatus_variation_67_2"); 2046 */ 2047 return outList; 2048 } 2049 2050 private DBCollection getCollectionSpeciesProperties(CollectionCoreDatabase database) throws DAOException { 2051 2052 if (database == null) { 2053 return null; 2054 } 2055 DBCollection collection = null; 2056 DBCollectionCoreDatabase myDatabase = (DBCollectionCoreDatabase) database; 2057 String dBName = myDatabase.getdBName(); 2058 String thisSchemaVersion = myDatabase.getSchemaVersion(); 2059 String dbVersion = myDatabase.getDBVersion(); 2060 TreeMap<Integer, DBCollectionSpecies> localSpp = new TreeMap<Integer, DBCollectionSpecies>(); 2061 collection = new DBCollection(myDatabase); 2062 2063 List<HashMap> results = null; 2064 SqlSession session = null; 2065 2066 try { 2067 session = sqlMapper.openSession(); 2068 DatabaseMapper dm = session.getMapper(DatabaseMapper.class); 2069 results = dm.getSpeciesFromCollection(dBName); 2070 } catch (Exception e) { 2071 throw new DAOException("Failed to interrogate species properties for " + dBName, e); 2072 } finally { 2073 if (session != null) { 2074 session.close(); 2075 } 2076 } 2077 2078 if ((results == null) || (results.isEmpty() == true)) { 2079 /* return empty collectiont*/ 2080 return collection; 2081 } 2082 2083 HashMap<Integer, HashMap<String, String>> buildLevelsHash = new HashMap<Integer, HashMap<String, String>>(); 2084 2085 for (Object o : results) { 2086 2087 Integer id = null; 2088 DBCollectionSpecies species = null; 2089 2090 HashMap m = (HashMap) o; 2091 m.put("schemaVersion", thisSchemaVersion); 2092 m.put("dbVersion", dbVersion); 2093 2094 id = (Integer) m.get("species_id"); 2095 2096 // if we've already made species with this id for this collection 2097 //use it 2098 if (localSpp.containsKey(id)) { 2099 species = localSpp.get(id); 2100 //and add this database 2101 //i think this is redundant - it should already be set... 2102 species.addDatabase(myDatabase); 2103 } //we haven't got a species with this id in the collection already 2104 //- so make one, add it, and use it 2105 else { 2106 try { 2107 species = new DBCollectionSpecies(myDatabase); 2108 } catch (ConfigurationException ex) { 2109 //what do we do here?? 2110 } 2111 if (species == null) { 2112 continue; 2113 } 2114 species.setIDForVersion(id, dbVersion); 2115 localSpp.put(id, species); 2116 } 2117 2118 if (!((String) m.get("meta_key")).endsWith("build.level")) { 2119 species.setProperty(m); 2120 } else { 2121 2122 if (buildLevelsHash.containsKey(id)) { 2123 buildLevelsHash.get(id).put((String) m.get("meta_key"), (String) m.get("meta_value")); 2124 } else { 2125 buildLevelsHash.put(id, new HashMap<String, String>()); 2126 buildLevelsHash.get(id).put((String) m.get("meta_key"), (String) m.get("meta_value")); 2127 } 2128 } 2129 } 2130 2131 //loop through all the species made for this collection database 2132 //this adds them to one of two maps in the registry - by GCAccession or by db_style_name (production_name) 2133 for (DBCollectionSpecies s : localSpp.values()) { 2134 2135 // there may be species without dbstyle names... 2136 if (s.getDatabaseStyleName() == null 2137 || s.getDatabaseStyleName().isEmpty()) { 2138 s.setDatabaseStyleSpeciesName(s.getSpeciesBinomial().toLowerCase().replace(' ', '_')); 2139 s.setDatabaseStyleSpeciesName(Integer.parseInt(dbVersion), s.getDatabaseStyleName()); 2140 } 2141 2142 //this cant be used cos we havent populated the alias look up yet! 2143 //if (this.registry.getSpeciesByAlias(s.getDatabaseStyleName()) == null) { 2144 //ensembl have changed the case of the first letter of the dbname between v58 & v59 2145 //therefore the code below fails to rationalize species between collections 58 and 59 2146 //i am just going to live with this for the time being bcause the dbname is also used as the comparaname 2147 //and is also different between 58 and 59 2148 //hopefully from 59 things will be stable 2149 //give this to the reg 2150 //currently we are hashing on the production name ( databasestylename) 2151 //but i may need to alter this if we want continuity after release 16 2152 // if this species has a GCAccession - use it for the hashing 2153 if (s.getAssemblyAccession(dbVersion) != null 2154 && !s.getAssemblyAccession(dbVersion).isEmpty()) { 2155 2156 String accession = s.getAssemblyAccession(dbVersion); 2157 String stem = accession.split("\\.")[0]; 2158 if (this.getCSpeciesByGCAccessionStem(stem) == null) { 2159 //we've made a new species so add it to the registry 2160 //but to the hash on GCAccessionStem 2161 this.collectionspeciesHashByGCAccession.put(stem, s); 2162 //and add it to this collection 2163 collection.addSpecies(s); 2164 } else { 2165 //we've already got this species in the registry - so modify it there 2166 DBCollectionSpecies existingSpecies = this.getCSpeciesByGCAccessionStem(stem); 2167 2168 Integer existingVersion = existingSpecies.getHighestDBRelease(); 2169 if (Integer.parseInt(dbVersion) > existingVersion) { 2170 //reset any values that should be global for the species 2171 existingSpecies.setCommonName(s.getCommonName()); 2172 existingSpecies.setShortName(s.getShortName()); 2173 existingSpecies.setSpeciesBinomial(s.getSpeciesBinomial()); 2174 existingSpecies.setUrlName(s.getUrlName()); 2175 existingSpecies.setComparaDivision(s.getComparaDivision()); 2176 existingSpecies.setEnsemblStablePrefix(s.getEnsemblStablePrefix()); 2177 existingSpecies.setTaxonomyID(s.getTaxonomyID()); 2178 2179 // the dbstylename is used for the TreeSet ordering - so we have a problem if it changes 2180 //so we dont change this here! - but add a finalize step to set the most recent version 2181 //existingSpecies.setDatabaseStyleSpeciesName(s.getDatabaseStyleName()); 2182 } 2183 2184 existingSpecies.getAliases().addAll(s.getAliases()); 2185 existingSpecies.setAssemblyAccession(dbVersion, accession); 2186 existingSpecies.setAssemblyName(dbVersion, s.getAssemblyName(dbVersion)); 2187 existingSpecies.setComparaName(dbVersion, s.getComparaName(dbVersion)); 2188 existingSpecies.setDatabaseStyleSpeciesName(Integer.parseInt(dbVersion), s.getComparaName(dbVersion)); 2189 existingSpecies.setIDForVersion(s.getDBSpeciesID(dbVersion), dbVersion); 2190 existingSpecies.addDatabase(myDatabase); 2191 collection.addSpecies(existingSpecies); 2192 } 2193 } //we dont have a GCAccession - so use the db style name.... 2194 else { 2195 if (this.getSpeciesByEnsemblName(s.getDatabaseStyleName()) == null) { 2196 //we've made a new species so add it to the registry 2197 //but to the hash on db-syle_name 2198 this.oldstyle_collectionspeciesHashByDBName.put(s.getDatabaseStyleName(), s); 2199 //and add it to this collection 2200 collection.addSpecies(s); 2201 } else { 2202 //we've already got this species in the registry - so modify it there 2203 DBCollectionSpecies existingSpecies = (DBCollectionSpecies) this.getSpeciesByEnsemblName(s.getDatabaseStyleName()); 2204 2205 Integer existingVersion = existingSpecies.getHighestDBRelease(); 2206 if (Integer.parseInt(dbVersion) > existingVersion) { 2207 //reset any values that should be global for the species 2208 existingSpecies.setCommonName(s.getCommonName()); 2209 existingSpecies.setShortName(s.getShortName()); 2210 existingSpecies.setSpeciesBinomial(s.getSpeciesBinomial()); 2211 existingSpecies.setUrlName(s.getUrlName()); 2212 existingSpecies.setComparaDivision(s.getComparaDivision()); 2213 existingSpecies.setEnsemblStablePrefix(s.getEnsemblStablePrefix()); 2214 existingSpecies.setTaxonomyID(s.getTaxonomyID()); 2215 // the dbstylename is used for the TreeSet ordering - so we have a problem if it changes 2216 //so we dont change this here! - but add a finalize step 2217 //existingSpecies.setDatabaseStyleSpeciesName(s.getDatabaseStyleName()); 2218 } 2219 2220 existingSpecies.getAliases().addAll(s.getAliases()); 2221 existingSpecies.setAssemblyName(dbVersion, s.getAssemblyName(dbVersion)); 2222 existingSpecies.setIDForVersion(s.getDBSpeciesID(dbVersion), dbVersion); 2223 existingSpecies.setComparaName(dbVersion, s.getComparaName(dbVersion)); 2224 existingSpecies.setDatabaseStyleSpeciesName(Integer.parseInt(dbVersion), s.getComparaName(dbVersion)); 2225 existingSpecies.addDatabase(myDatabase); 2226 //and add it to this collection 2227 collection.addSpecies(existingSpecies); 2228 } 2229 } 2230 } 2231 2232 myDatabase.setCollection(collection); 2233 2234 //loop though all the species of the collection 2235 //to copy the build levels from here to the database 2236 for (DBSpecies s : collection.getSpecies()) { 2237 DBCollectionSpecies sp = (DBCollectionSpecies) s; 2238 if (buildLevelsHash.containsKey(sp.getDBSpeciesID(dbVersion))) { 2239 myDatabase.setBuildLevels(sp, buildLevelsHash.get(sp.getDBSpeciesID(dbVersion))); 2240 } 2241 } 2242 2243 return collection; 2244 } 2245 2246// //not used yet 2247// private List<String> getAllCoreDatabasesForVersion(int dbVersion) throws DAOException { 2248 //<editor-fold defaultstate="collapsed" desc="comment"> 2249 // 2250 // int release = dbVersion; 2251 // List<String> outList = null; 2252 // SqlSession session = null; 2253 // 2254 // try { 2255 // session = sqlMapper.openSession(); 2256 // DatabaseMapper dm = session.getMapper(DatabaseMapper.class); 2257 // outList = dm.getAllCoreDatabaseNames(release); 2258 // } catch (Exception e) { 2259 // throw new DAOException("Failed to interrogate all core Database Names for release " + release, e); 2260 // } finally { 2261 // if (session != null) { 2262 // session.close(); 2263 // } 2264 // } 2265 // 2266 // if (outList == null || outList.isEmpty()) { 2267 // /* A null List is a perfectly valid outcome to return */ 2268 // return null; 2269 // } 2270 // 2271 // return outList; 2272 // } 2273 //</editor-fold> 2274 private DBSpecies createSpecies(SingleSpeciesCoreDatabase database) throws DAOException { 2275 2276 DBSpecies spData = new DBSpecies((DBSingleSpeciesCoreDatabase) database); 2277 Integer dbVersion = database.getIntDBVersion(); 2278 List<HashMap> tempList = null; 2279 2280 SqlSession session = null; 2281 2282 try { 2283 session = sqlMapper.openSession(); 2284 DatabaseMapper dm = session.getMapper(DatabaseMapper.class); 2285 tempList = dm.getSpeciesProperties(database.getdBName()); 2286 } catch (Exception e) { 2287 throw new DAOException("Failed to interrogate species properties for " + spData.getDatabaseStyleName(), e); 2288 } finally { 2289 if (session != null) { 2290 session.close(); 2291 } 2292 } 2293 2294 //for pre dbVersion 59 releases, the meta table is not guaranteed 2295 //to hold the species.production_name, species.short_name and 2296 //species.scientific_name meta_keys 2297 //pre schemaVersion 59 the compara name was always = Species Binomial 2298 //post schemaVersion 58 the compara name - production name 2299 //post schema 67 The following meta keys are retired 2300 //species.short_name 2301 //species.ensembl_common_name 2302 //species.ensembl_alias_name 2303 //The following meta keys will be added 2304 //species.url 2305 //species.display_name 2306 if (tempList != null) { 2307 for (HashMap row : tempList) { 2308 2309 String key = (String) row.get("key"); 2310 String value = (String) row.get("value"); 2311 2312 if (key == null || key.isEmpty() || value == null) { 2313 continue; 2314 } 2315 2316 //making aliases all lower case now 2317 if (key.startsWith("species.") 2318 && !key.equals("species.division") 2319 && !key.equals("species.stable_id_prefix")) { 2320 spData.getAliases().add(value.toLowerCase()); 2321 } 2322 2323 if (key.equals("species.stable_id_prefix")) { 2324 spData.setEnsemblStablePrefix(value); 2325 } else if (key.equals("species.production_name")) { 2326 spData.setDatabaseStyleSpeciesName(value); 2327 spData.setDatabaseStyleSpeciesName(dbVersion, value); 2328 } else if (key.equals("species.scientific_name")) { 2329 spData.setSpeciesBinomial(value); 2330 } else if (key.equals("species.common_name")) { 2331 spData.setCommonName(value); 2332 } else if (key.equals("species.taxonomy_id")) { 2333 spData.setTaxonomyID(value); 2334 } else if (key.equals("species.short_name") 2335 || key.equals("species.display_name")) { 2336 spData.setShortName(value); 2337 } else if (key.endsWith("build.level")) { 2338 database.setBuildLevel(key, value); 2339 } else if (key.equals("species.division")) { 2340 //this is only present in ensemblgenomes 2341 spData.setComparaDivision(value); 2342 } else if (key.equals("species.url")) { 2343 //the url name is set for the species from this most recent core db 2344 spData.setUrlName(value); 2345 } else if (key.equals("assembly.name")) { 2346 //assembly details are set in the database 2347 //as that is where lazy loading is controlled 2348 //(for releases other than highest - the values are lazy loaded not filled in here) 2349 database.setAssemblyName(value); 2350 } 2351 if (key.equals("assembly.accession")) { 2352 database.setAssemblyAccession(value); 2353 } 2354 } 2355 } 2356 2357 if (spData.getSpeciesBinomial().isEmpty()) { 2358 String s = database.getDbSpeciesName(); 2359 spData.setSpeciesBinomial(s.replaceFirst(s.substring(0, 1), (s.substring(0, 1)).toUpperCase()).replace("_", " ")); 2360 spData.getAliases().add(spData.getSpeciesBinomial().toLowerCase()); 2361 } 2362 2363 if (spData.getDatabaseStyleName().isEmpty()) { 2364 spData.setDatabaseStyleSpeciesName(database.getDbSpeciesName()); 2365 //actually not necessary if we are only running create species on the most 2366 //recent single species core databases 2367 spData.setDatabaseStyleSpeciesName(dbVersion, database.getDbSpeciesName()); 2368 spData.getAliases().add(spData.getDatabaseStyleName().toLowerCase()); 2369 } 2370 2371 if (Integer.parseInt(database.getSchemaVersion()) >= 59) { 2372 spData.setComparaName(database.getDBVersion(), spData.getDatabaseStyleName()); 2373 } else { 2374 spData.setComparaName(database.getDBVersion(), spData.getSpeciesBinomial()); 2375 } 2376 2377 return spData; 2378 } 2379 2380 /** 2381 * should only be called for non current singlespecies core databases 2382 * 2383 * @param db 2384 * @throws DAOException 2385 */ 2386 @Override 2387 public void setSpeciesMetadata(CoreDatabase db) throws DAOException { 2388 2389 if (db == null || db.getdBName() == null || db.getdBName().isEmpty()) { 2390 return; 2391 } 2392 2393 if (db instanceof CollectionDatabase) { 2394 2395 CollectionCoreDatabase database = (CollectionCoreDatabase) db; 2396 TreeSet<? extends CollectionSpecies> spp = database.getCollection().getSpecies(); 2397 HashMap<Integer, DBCollectionSpecies> localSpp = new HashMap<Integer, DBCollectionSpecies>(); 2398 2399 for (CollectionSpecies sp : spp) { 2400 localSpp.put(sp.getDBSpeciesID(database.getDBVersion()), (DBCollectionSpecies) sp); 2401 } 2402 2403 String dBName = database.getdBName(); 2404 List<HashMap> results = null; 2405 SqlSession session = null; 2406 2407 try { 2408 session = sqlMapper.openSession(); 2409 DatabaseMapper dm = session.getMapper(DatabaseMapper.class); 2410 results = dm.getSpeciesFromCollection(dBName); 2411 } catch (Exception e) { 2412 throw new DAOException("Failed to set species metadata for " + dBName, e); 2413 } finally { 2414 if (session != null) { 2415 session.close(); 2416 } 2417 } 2418 2419 if ((results == null) || (results.isEmpty() == true)) { 2420 return; 2421 } 2422 2423 for (Object o : results) { 2424 Integer id = null; 2425 DBCollectionSpecies species = null; 2426 HashMap m = (HashMap) o; 2427 id = (Integer) m.get("species_id"); 2428 2429 // if we've already made species with this id for this collection 2430 //use it 2431 if (localSpp.containsKey(id)) { 2432 species = localSpp.get(id); 2433 } else { 2434 continue; 2435 } 2436 2437// if (m.get("meta_key").equals("assembly.accession")) { 2438// database.setAssemblyAccession(species, (String) m.get("meta_value")); 2439// } 2440// if (m.get("meta_key").equals("assembly.name")) { 2441// database.setAssemblyName(species, (String) m.get("meta_value")); 2442// } 2443 } 2444 } else { 2445 SingleSpeciesCoreDatabase database = (SingleSpeciesCoreDatabase) db; 2446 String comparaName = ""; 2447 DBSpecies sp = (DBSpecies) database.getSpecies(); 2448 String dBName = database.getdBName(); 2449 List<HashMap> results = null; 2450 SqlSession session = null; 2451 2452 try { 2453 session = sqlMapper.openSession(); 2454 DatabaseMapper dm = session.getMapper(DatabaseMapper.class); 2455 results = dm.getSpeciesProperties(dBName); 2456 } catch (Exception e) { 2457 throw new DAOException("Failed to set species metadata for " + dBName, e); 2458 } finally { 2459 if (session != null) { 2460 session.close(); 2461 } 2462 } 2463 2464 if ((results == null) || (results.isEmpty() == true)) { 2465 return; 2466 } 2467 2468 for (HashMap row : results) { 2469 String key = (String) row.get("key"); 2470 String value = (String) row.get("value"); 2471 if (key == null || key.isEmpty() || value == null) { 2472 continue; 2473 } 2474 if (key.equals("species.production_name")) { 2475 comparaName = value; 2476 } else if (key.endsWith("build.level")) { 2477 database.setBuildLevel(key, value); 2478 } else if (key.equals("assembly.name")) { 2479 //assembly details are set in the database 2480 //as that is where lazy loading is controlled 2481 database.setAssemblyName(value); 2482 } else if (key.equals("assembly.accession")) { 2483 database.setAssemblyAccession(value); 2484 } 2485 } 2486 2487 //for pre dbVersion 59 releases, the meta table is not guaranteed 2488 //to hold the species.production_name, species.short_name and 2489 //species.scientific_name meta_keys 2490 //pre schemaVersion 59 the compara name was always = Species Binomial 2491 //post schemaVersion 58 the compara name - production name 2492 //post schema 67 The following meta keys are retired 2493 //species.short_name //species.ensembl_common_name //species.ensembl_alias_name 2494 //The following meta keys will be added 2495 //species.url //species.display_name 2496 if (Integer.parseInt(database.getSchemaVersion()) >= 59) { 2497 database.setComparaName(comparaName); 2498 } else { 2499 database.setComparaName(sp.getSpeciesBinomial()); 2500 } 2501 } 2502 } 2503 2504 /** 2505 * note by default translation by this default engine is configured to 2506 * convert non-Methionine initiations to Methionine 2507 * 2508 */ 2509 @Override 2510 public TranscriptionEngine getDefaultTranscriptionEngine() { 2511 return transcriptionEngines.get(1); 2512 } 2513 2514 @Override 2515 public TranscriptionEngine getTranscriptionEngine(Integer codonTable) { 2516 2517 /* valid codes are 2518 * 1 - UNIVERSAL 2519 * 2 - VERTEBRATE_MITOCHONDRIAL 2520 * 3 - YEAST_MITOCHONDRIAL 2521 * 4 - MOLD_MITOCHONDRIAL 2522 * 5 - INVERTEBRATE_MITOCHONDRIAL 2523 * 6 - CILIATE_NUCLEAR 2524 * 9 - ECHINODERM_MITOCHONDRIAL 2525 * 10 - EUPLOTID_NUCLEAR 2526 * 11 - BACTERIAL 2527 * 12 - ALTERNATIVE_YEAST_NUCLEAR 2528 * 13 - ASCIDIAN_MITOCHONDRIAL 2529 * 14 - FLATWORM_MITOCHONDRIAL 2530 * 15 - BLEPHARISMA_MACRONUCLEAR 2531 * 16 - 2CHLOROPHYCEAN_MITOCHONDRIAL 2532 * 21 - TREMATODE_MITOCHONDRIAL 2533 * 23 - SCENEDESMUS_MITOCHONDRIAL 2534 */ 2535 //using an invalid code will use the default universal table 2536 if (transcriptionEngines.containsKey(codonTable)) { 2537 return transcriptionEngines.get(codonTable); 2538 } 2539 TranscriptionEngine.Builder b = new TranscriptionEngine.Builder(); 2540 b.table(codonTable); 2541 TranscriptionEngine engine = b.build(); 2542 transcriptionEngines.put(codonTable, engine); 2543 return engine; 2544 2545 } 2546 2547 @Override 2548 public HashMap<String, String> getRenamedDBs() { 2549 return renamedDBs; 2550 } 2551 2552 HashMap<String, Set<DBSpecies>> panComparaSpecies = new HashMap<String, Set<DBSpecies>>(); 2553 2554 @Override 2555 public boolean isSpeciesInPanHomology(Species querySpecies, String dbVersion) { 2556 2557 if (querySpecies == null) { 2558 return false; 2559 } 2560 2561 if (dbVersion == null || dbVersion.isEmpty()) { 2562 if (querySpecies.getMostRecentCoreDatabase() != null) { 2563 dbVersion = querySpecies.getMostRecentCoreDatabase().getDBVersion(); 2564 } 2565 } 2566 2567 if (dbVersion == null || dbVersion.isEmpty() || dbVersion.equals("0")) { 2568 return false; 2569 } 2570 2571 if (!panComparaSpecies.containsKey(dbVersion)) { 2572 try { 2573 findPanHomologySpecies(dbVersion); 2574 } catch (DAOException ex) { 2575 return false; 2576 } 2577 } 2578 2579 return panComparaSpecies.get(dbVersion).contains(querySpecies); 2580 } 2581 2582 public Set<DBSpecies> getPanComparaSpecies(String dbVersion) { 2583 2584 if (dbVersion == null || dbVersion.isEmpty()) { 2585 dbVersion = "" + this.highestReleaseVersion; 2586 } 2587 if (dbVersion == null || dbVersion.isEmpty() || dbVersion.equals("0")) { 2588 return null; 2589 } 2590 2591 if (!panComparaSpecies.containsKey(dbVersion)) { 2592 try { 2593 findPanHomologySpecies(dbVersion); 2594 } catch (DAOException ex) { 2595 //dont fail here 2596 } 2597 } 2598 return panComparaSpecies.get(dbVersion); 2599 } 2600 2601 private void findPanHomologySpecies(String version) throws DAOException { 2602 2603 Database db = this.getComparaDatabase(EnsemblComparaDivision.PAN_HOMOLOGY, version); 2604 2605 if (db == null) { 2606 panComparaSpecies.put(version, new HashSet<DBSpecies>()); 2607 return; 2608 } 2609 2610 HashSet<DBSpecies> spp = new HashSet<DBSpecies>(); 2611 List<HashMap> results = null; 2612 SqlSession session = null; 2613 2614 try { 2615 session = sqlMapper.openSession(); 2616 DatabaseMapper dm = session.getMapper(DatabaseMapper.class); 2617 results = dm.getPanComparaSpecies(db.getdBName()); 2618 } catch (Exception e) { 2619 LOGGER.debug("Failed to interrogate species in Pan Compara DB for " + db); 2620 //throw new DAOException("Failed to interrogate species in Pan Compara DB for " + db, e); 2621 } finally { 2622 if (session != null) { 2623 session.close(); 2624 } 2625 } 2626 2627 if ((results == null) || (results.isEmpty() == true)) { 2628 panComparaSpecies.put(version, new HashSet<DBSpecies>()); 2629 return; 2630 } 2631 2632 for (HashMap h : results) { 2633 2634 String alias = (String) h.get("name"); 2635 //not sure i can use this yet 2636 String assembly = (String) h.get("assembly"); 2637 2638 DBSpecies sp = null; 2639 2640 try { 2641 sp = this.getSpeciesByAlias(alias, version); 2642 if (sp != null) { 2643 spp.add(sp); 2644 } 2645 } catch (NonUniqueException ex) { 2646 LOGGER.debug(ex.getMessage()); 2647 for (Object s : ex.getAllHits()) { 2648 LOGGER.debug(((DBSpecies) s).getDatabaseStyleName()); 2649 } 2650 } 2651 } 2652 panComparaSpecies.put(version, spp); 2653 } 2654 2655}