001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.dao.database;
023
024import java.io.BufferedWriter;
025import java.io.File;
026import java.io.FileWriter;
027import java.io.IOException;
028import java.io.InputStreamReader;
029import java.io.Reader;
030import java.util.*;
031import java.util.Map.Entry;
032import org.apache.ibatis.session.SqlSession;
033import org.apache.ibatis.session.SqlSessionFactory;
034import org.apache.ibatis.session.SqlSessionFactoryBuilder;
035import org.biojava3.core.sequence.transcription.TranscriptionEngine;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038import uk.ac.roslin.ensembl.config.*;
039import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
040import uk.ac.roslin.ensembl.exception.ConfigurationException;
041import uk.ac.roslin.ensembl.exception.DAOException;
042import uk.ac.roslin.ensembl.exception.NonUniqueException;
043import uk.ac.roslin.ensembl.mapper.DatabaseMapper;
044import uk.ac.roslin.ensembl.model.core.CollectionSpecies;
045import uk.ac.roslin.ensembl.model.core.Species;
046import uk.ac.roslin.ensembl.model.database.*;
047
048public class DBRegistry
049        implements Registry {
050
051    final static Logger LOGGER = LoggerFactory.getLogger(DBRegistry.class);
052    final static String AmbiguitySeparator = "|OR|";
053    protected SqlSessionFactory sqlMapper;
054    protected Properties configProperties = new Properties();
055    protected SchemaVersion schemaVersion = null;
056    protected DBConnection dbConnection = null;
057    private String mostRecentEnsemblVersion;
058//    private String mostRecentGenomesVersion;
059    //incase the datasource does not include the 'current' dbVersion
060    // the 'schema' Version
061    private int highestEnsemblSchemaVersion = 0;
062    //the DB dbVersion
063    private int highestReleaseVersion = 0;
064//    private int currentDatasourceVersion = 0;
065    //the versions of core databases for a species or collection name
066    private HashMap<String, TreeSet<Integer>> databaseVersions = new HashMap<String, TreeSet<Integer>>();
067    protected TreeSet<Integer> knownSchemaVersions = new TreeSet<Integer>();
068    //the mybatis configProperties for database connection
069    private String baseMybatis;
070    //to read the mybatis regConfig
071    protected Reader reader;
072
073    //these two hashes hold all the species....keyed on the meta.key = species.productionname
074    //(databasestylename)
075    protected TreeMap<String, DBSpecies> speciesHash = new TreeMap<String, DBSpecies>();
076    //for collection species without GCAccessions (mostly prior to the use of non-taxonomic collections)
077    protected TreeMap<String, DBCollectionSpecies> oldstyle_collectionspeciesHashByDBName = new TreeMap<String, DBCollectionSpecies>();
078//    //only put species made from new style collections ( or rather, any species with a GCAccession) in here
079
080    protected TreeMap<String, DBCollectionSpecies> collectionspeciesHashByGCAccession = new TreeMap<String, DBCollectionSpecies>();
081
082    protected List<DBCollection> collections = new ArrayList<DBCollection>();
083    //we need a separate one of these for each collection (dbVersion) as they will grow
084    protected TreeMap<String, TreeMap<String, DBCollection>> collectionNameDBVersionHash = new TreeMap<String, TreeMap<String, DBCollection>>();
085    //the aliases are now all added as lower case versions
086    protected TreeMap<String, String> ensemblNameForAliasHash = new TreeMap<String, String>();
087
088    //new style ( post v16) collection species should all be indexed by GCAccession
089    //hence we need a hash of alias to Accession
090    protected TreeMap<String, String> assemblyAccessionStemForAliasHash = new TreeMap<String, String>();
091
092    //the databases
093    protected TreeSet<DBDatabase> allDatabases = new TreeSet<DBDatabase>();
094    protected TreeSet<DBSingleSpeciesDatabase> singleSpeciesDatabases = new TreeSet<DBSingleSpeciesDatabase>();
095    protected TreeSet<DBComparisonDatabase> comparisonDatabases = new TreeSet<DBComparisonDatabase>();
096    //HashMap of comparison databases indexed on 'group' , then on 'dbVersion' (not schema dbVersion)
097    protected HashMap<EnsemblComparaDivision, HashMap<String, DBComparisonDatabase>> comparaDBByDivision
098            = new HashMap<EnsemblComparaDivision, HashMap<String, DBComparisonDatabase>>();
099    protected TreeSet<DBCollectionDatabase> collectionDatabases = new TreeSet<DBCollectionDatabase>();
100    protected String newline = (System.getProperty("line.separator") != null) ? System.getProperty("line.separator") : "\r\n";
101    protected StringBuilder tooNewDB = new StringBuilder();
102    protected StringBuilder unknownDB = new StringBuilder();
103    protected RegistryConfiguration regConfig = null;
104    DataSource datasourceType;
105    protected boolean alreadyInitialized = false;
106    protected boolean alreadyParsed = false;
107    protected HashMap<String, String> renamedDBs = new HashMap<String, String>();
108
109    private HashMap<Integer, TranscriptionEngine> transcriptionEngines = new HashMap<Integer, TranscriptionEngine>();
110
111    public static DBRegistry createEmptyRegistry() {
112        return new DBRegistry();
113    }
114
115    public static DBRegistry createRegistryForDataSource(DataSource type) throws ConfigurationException, DAOException {
116        if (type == null) {
117            throw new ConfigurationException("Invalid Datasource");
118        }
119        if (DataSource.ENSEMBLBACTERIA.equals(type)) {
120            throw new ConfigurationException("Ensembl Bacteria data sources are too large to be loaded en masse. "
121                    +"Use '.createRegistryForDataSourceCurrentRelease()' to load the current release; "
122                    +"or create an unitialized registry to interrogate available versions and then load a single release.");
123        }
124        DBRegistry dbRegistry = new DBRegistry();
125        dbRegistry.setConfiguration(type);
126        dbRegistry.initialize(null);
127        return dbRegistry;
128    }
129
130    public static DBRegistry createUninitializedRegistryForDataSource(DataSource type) throws ConfigurationException, DAOException {
131        if (type == null) {
132            throw new ConfigurationException("Invalid Datasource");
133        }
134        DBRegistry dbRegistry = new DBRegistry();
135        dbRegistry.setConfiguration(type);
136        dbRegistry.parseWithoutInitializing();
137        return dbRegistry;
138    }
139
140    public static DBRegistry createRegistryForConfiguration(RegistryConfiguration conf) throws ConfigurationException, DAOException {
141        if (conf == null) {
142            throw new ConfigurationException("Invalid Configuration");
143        }
144        DBRegistry dbRegistry = new DBRegistry();
145        dbRegistry.setConfiguration(conf);
146        dbRegistry.initialize(null);
147        return dbRegistry;
148    }
149
150    public static DBRegistry createUninitializedRegistryForConfiguration(RegistryConfiguration conf) throws ConfigurationException, DAOException {
151        if (conf == null) {
152            throw new ConfigurationException("Invalid Configuration");
153        }
154        DBRegistry dbRegistry = new DBRegistry();
155        dbRegistry.setConfiguration(conf);
156        dbRegistry.parseWithoutInitializing();
157        return dbRegistry;
158    }
159
160    public static DBRegistry createRegistryForDataSourceCurrentRelease(DataSource type) throws ConfigurationException, DAOException {
161        if (type == null) {
162            throw new ConfigurationException("Invalid Datasource");
163        }
164
165        // would be simpler just to do
166        //SchemaVersion versionProps = new SchemaVersion(); 
167        //versionProps.getCurrentGenomesVersion()
168        //versionProps.getCurrentEnsemblVersion()
169        DBRegistry dbRegistry = new DBRegistry();
170        dbRegistry.setConfiguration(type);
171        dbRegistry.parseWithoutInitializing();
172        dbRegistry.knownSchemaVersions.clear();
173        //allows backpedaling if actual sorce is older than config 
174        //(e.g. ensemblgenomes updates a month later than ensembl)
175        dbRegistry.knownSchemaVersions.add(dbRegistry.highestEnsemblSchemaVersion);
176        dbRegistry.initialize(dbRegistry.highestReleaseVersion);
177        return dbRegistry;
178    }
179
180    public static DBRegistry createRegistryForDataSourceAtReleaseVersion(DataSource type, Integer release) throws ConfigurationException, DAOException {
181        if (type == null) {
182            throw new ConfigurationException("Invalid Datasource");
183        }
184        if (release == null) {
185            throw new ConfigurationException("Invalid schema/release");
186        }
187        DBRegistry dbRegistry = new DBRegistry();
188        dbRegistry.setConfiguration(type);
189        dbRegistry.parseWithoutInitializing();
190        if (type.equals(DataSource.ENSEMBLDB)) {
191            if (!dbRegistry.knownSchemaVersions.contains(release)) {
192                throw new ConfigurationException(release + " not a known schema/release");
193            }
194        }
195        dbRegistry.initialize(release);
196        return dbRegistry;
197    }
198
199    /**
200     * Parameterless public constructor for DBRegistry object
201     */
202    private DBRegistry() {
203    }
204
205    public void setConfiguration(RegistryConfiguration conf) throws ConfigurationException, DAOException {
206        this.regConfig = conf;
207        this.renamedDBs = this.regConfig.getDb().getRenamedDBs();
208        this.datasourceType = regConfig.getType();
209        this.dbConnection = regConfig.getDb();
210        this.configProperties.putAll(dbConnection.getConfigurationProperties());
211        this.schemaVersion = regConfig.getSchema();
212        this.configProperties.putAll(schemaVersion.getConfigurationProperties());
213        this.mostRecentEnsemblVersion = this.schemaVersion.getCurrentEnsemblVersion();
214//        this.mostRecentGenomesVersion = this.schemaVersion.getCurrentGenomesVersion();
215
216//        //for ensemble genomes we have a DBrelease_schema_build
217//        //but for ensembl we have DBrelease_build, where DBrelease = schema
218//        
219//        if (this.datasourceType==DataSource.ENSEMBLGENOMES) {
220//            this.currentDatasourceVersion = Integer.parseInt(this.mostRecentGenomesVersion);
221//        } else {
222//            this.currentDatasourceVersion = Integer.parseInt(this.mostRecentEnsemblVersion);
223//        }
224        String[] versions = schemaVersion.getRegisteredSchemas();
225
226        for (int i = 0; i < versions.length; i++) {
227            this.knownSchemaVersions.add(Integer.parseInt(versions[i]));
228        }
229
230        LOGGER.info("This application is configured to use schema version " + mostRecentEnsemblVersion
231                + " of Ensembl, and knows about the schema versions: " + Arrays.toString(versions));
232
233        // mybatis configProperties for databaseDAO
234        this.baseMybatis = this.schemaVersion.getBaseMybatis();
235        try {
236            reader = new InputStreamReader(this.getClass().getClassLoader().getResourceAsStream(baseMybatis));
237            //this doesn't work once you bundle up
238            //reader = Resources.getResourceAsReader(baseMybatis);
239        } catch (Exception ex) {
240            throw new ConfigurationException("Fail to read Mybatis Configuration for initial Database connection", ex);
241        }
242        try {
243            sqlMapper = new SqlSessionFactoryBuilder().build(reader, "current", configProperties);
244            if (sqlMapper == null) {
245                throw new Exception("SqlSessionFactory is null");
246            }
247        } catch (Exception e) {
248            throw new ConfigurationException("The DBRegistry is unable to make a SqlSessionFactory", e);
249        }
250    }
251
252    public void setConfiguration(DataSource type) throws ConfigurationException, DAOException {
253        this.setConfiguration(new RegistryConfiguration(type));
254    }
255
256    private void parseWithoutInitializing() throws ConfigurationException, DAOException {
257
258        if (this.alreadyInitialized) {
259            throw new ConfigurationException("Tried to re-initialize a DBRegistry");
260        }
261
262        LOGGER.info("Registry is reading information about databases available from this source ("
263                + this.dbConnection.getConfigurationProperties().getProperty("url") + "):");
264
265        for (String s : this.getDatabases()) {
266
267            if (dbConnection.isDBNameValid(s)) {
268                try {
269                    DBDatabase db = this.makeDatabase(s.trim());
270
271                    if (db.intSchemaVersion > Integer.parseInt(this.mostRecentEnsemblVersion)) {
272                        LOGGER.warn("IGNORING Database " + s + " with higher schema number ("
273                                + db.getSchemaVersion() + ") than application configuration (" + this.mostRecentEnsemblVersion + ").");
274                        tooNewDB.append("\t"+s + newline);
275                    } else if (!this.knownSchemaVersions.contains(Integer.parseInt(db.getSchemaVersion()))) {
276                        LOGGER.warn("IGNORING Database " + s + " with schema number ("
277                                + db.getSchemaVersion() + ") not found in the application configuration.");
278                        unknownDB.append("\t"+s + newline);
279                    } else {
280                        this.pseudoRegisterDatabase(db);
281                    }
282
283                } catch (Exception e) {
284                    LOGGER.warn("Unspecified error thrown trying to make a Database object for '" + s + "'", e);
285                }
286            }
287        }
288        this.alreadyParsed=true;
289
290    }
291
292    /**
293     * private initialisation method for the registry which connects to ensembl
294     * via a DBDatasourceDAO and retrieves details on all the current tables and
295     * creates database objects of the appropriate type for each. Also retrieves
296     * dbSpeciesName aliases for all the current release dbSpeciesName. And
297     * finds out what dbSpeciesName are present in multispecies databases.
298     *
299     * @param release
300     * @throws DAOException, ConfigurationException
301     * @throws uk.ac.roslin.ensembl.exception.ConfigurationException
302     */
303    public void initialize(Integer release) throws DAOException, ConfigurationException {
304
305        if (this.alreadyInitialized) {
306            throw new ConfigurationException("Tried to re-initialize a DBRegistry");
307        }
308
309        //populates the names of all the databases available, excluding marts, test, mysql and information_schema
310        LOGGER.info("Registry is loading and sorting all databases available from this source ("
311                + this.dbConnection.getConfigurationProperties().getProperty("url") + "):");
312
313        if (release != null) {
314            LOGGER.info("Loading databases only for release " + release);
315        }
316
317        tooNewDB = new StringBuilder();
318        unknownDB = new StringBuilder();
319
320        if (release != null) {
321            this.highestReleaseVersion = release;
322        }
323
324        /*
325         * note by default translation by this default engine is configured to convert 
326         * non-Methionine initiations to Methionine. This cannot be alterred so you need 
327         * to make an appropriate different Engine to use in place of the default
328         * TranscriptionEngine.Builder b = new TranscriptionEngine.Builder();
329         * b.table(1).initMet(false).trimStop(true);
330         * TranscriptionEngine alternativeDefault = b.build();
331         * 
332         * this could then be handed to  (DATranlsation).getProteinSequence(TranscriptionEngine trancriptionEngine)
333         * or (DNASequence).getRNASequence().getProteinSequence(engine)
334         */
335        transcriptionEngines.put(1, TranscriptionEngine.getDefault());
336
337        for (String s : this.getDatabases()) {
338
339            if (dbConnection.isDBNameValid(s)) {
340//                 if (s.startsWith("bacillus_collection_core_")  
341//                      || s.startsWith("bacteria_21_collection_core_") ) { 
342//                    && (s.startsWith("escherichia_shigella_collection_core_16")
343//                        || s.startsWith("bacteria_22"))) {
344                try {
345                    DBDatabase db = this.makeDatabase(s.trim());
346
347                    if (release != null && db.getIntDBVersion() != release) {
348                        continue;
349                    }
350
351                    if (db.intSchemaVersion > Integer.parseInt(this.mostRecentEnsemblVersion)) {
352                        LOGGER.warn("IGNORING Database " + s + " with higher schema number ("
353                                + db.getSchemaVersion() + ") than application configuration (" + this.mostRecentEnsemblVersion + ").");
354                        tooNewDB.append("\t"+s + newline);
355                    } //                    //might drop this test, we may drop 'mostRecentGenomes'....
356                    //                    else if ((this.datasourceType == DataSource.ENSEMBLGENOMES)
357                    //                            && db.intDBVersion > Integer.parseInt(this.mostRecentGenomesVersion)) {
358                    //                        LOGGER.warn("BEWARE: Adding Database " + s + " with higher release number ("
359                    //                                + db.intDBVersion + ") than application configuration (" + this.mostRecentGenomesVersion + "). "
360                    //                                + "This database can be accessed by schema or release number but will not be returned by default methods that return the current release version.");
361                    //                        this.aboveCurrentReleaseDB.append(s + newline);
362                    //                        this.registerDatabase(db);
363                    ////                        LOGGER.warn("IGNORING Database " + s + " with higher release number ("
364                    ////                                + db.intDBVersion + ") than application configuration (" + this.mostRecentGenomesVersion + ").");
365                    ////                        tooNewDB.append("\t"+s + newline);
366                    //                    } 
367                    else if (!this.knownSchemaVersions.contains(Integer.parseInt(db.getSchemaVersion()))) {
368                        LOGGER.warn("IGNORING Database " + s + " with schema number ("
369                                + db.getSchemaVersion() + ") not found in the application configuration.");
370                        unknownDB.append("\t"+s + newline);
371                    } else {
372                        this.registerDatabase(db);
373                    }
374
375                } catch (Exception e) {
376                    LOGGER.warn("Unspecified error thrown trying to make a Database object for '" + s + "'", e);
377                }
378            }
379        }
380
381        if (allDatabases.isEmpty()) {
382            throw new ConfigurationException("No Valid Databases Loaded");
383        }
384
385        if (release != null) {
386            DBDatabase first = allDatabases.first();
387            if (first != null) {
388                this.mostRecentEnsemblVersion = first.schemaVersion;
389                this.highestEnsemblSchemaVersion = first.intSchemaVersion;
390                this.highestReleaseVersion = release;
391                this.knownSchemaVersions.clear();
392                this.knownSchemaVersions.add(release);
393            }
394        }
395
396        //change this to get the most recent release rather than the 'current release'
397        //this should sort the noted BUG below too
398        //get all the core databases for the current release
399        //so that we can create Species database objects for each of these
400        //filling in the aliases and some other properties
401        //we then put these in a hash against genus_species
402        //BUG: if the datasource doesnt have any current releases
403        //e.g. if configured to load ensembldb_archives
404        //we wont make any species -
405        //need a work around if this is the case - to use the highest release dbVersion found
406        TreeSet<DBSingleSpeciesDatabase> badDatabases = new TreeSet<DBSingleSpeciesDatabase>();
407
408        // go through all the SSdatabases and select the current core releases
409        // and make a species object from these
410        for (DBSingleSpeciesDatabase d : this.singleSpeciesDatabases) {
411
412            //can't use this method anymore cos can throw a nonUniqueException
413//            if (d.type == EnsemblDBType.core
414//                    && d.getDBVersion().equalsIgnoreCase(this.getMostRecentDBVersion(d.getdBClassifier()))) {     
415            String mostRecentVersion = null;
416
417            if (this.databaseVersions.containsKey(d.getdBClassifier())) {
418                mostRecentVersion = this.databaseVersions.get(d.getdBClassifier()).last().toString();
419            } else {
420                //if this isnt a db classifier as stored in the version hash - bail out
421                continue;
422            }
423
424            //if this is the most recent core database - we make the species
425            if (d.type == EnsemblDBType.core
426                    && d.getDBVersion().equalsIgnoreCase(mostRecentVersion)) {
427
428                DBSpecies spData = null;
429
430                //this can fail if there is something wrong with the database!
431                try {
432                    spData = this.createSpecies((DBSingleSpeciesCoreDatabase) d);
433                } catch (DAOException dAOE) {
434                    LOGGER.info("Failed to create a species for current core database: " + d.getdBName()
435                            + ": Removed this database!", dAOE);
436                }
437                if (spData != null && spData.getDatabaseStyleName() != null && !spData.getDatabaseStyleName().isEmpty()) {
438                    d.setSpecies(spData);
439                    speciesHash.put(spData.getDatabaseStyleName(), spData);
440                } else {
441                    //store this as a bad (current) database
442                    badDatabases.add(d);
443                }
444            }
445
446        }
447
448        //remove any databases that i cant make a species for!
449        this.singleSpeciesDatabases.removeAll(badDatabases);
450        this.allDatabases.removeAll(badDatabases);
451
452        // the collections will add members with time - so each collectionDB needs
453        //its own Collection
454        //get all the collection_core databases 
455        //so that we can create Collection database objects for each of these
456        //and populate it with CollectionSpecies objects for this collection
457        // however we will reuse existing CollectionSpecies objects
458        // and only add new ones to the registry
459        TreeSet<DBCollectionDatabase> badCollectionDatabases = new TreeSet<DBCollectionDatabase>();
460
461        for (DBCollectionDatabase d : this.collectionDatabases) {
462            if (d.type == EnsemblDBType.collection_core) {
463
464                // make a registrycollection for each collection core database
465                //as these may differ over time
466                DBCollection thisCollection = null;
467                try {
468                    //creates (or reuses) all the species in the collection
469                    thisCollection = this.getCollectionSpeciesProperties((CollectionCoreDatabase) d);
470                } catch (DAOException ex) {
471                    LOGGER.info("Failed to create a collectionfor current core database: " + d.getdBName()
472                            + ": Removed this database!", ex);
473                }
474
475                if (thisCollection != null) {
476                    this.collections.add(thisCollection);
477                    //d.setCollection(r);
478                    if (this.collectionNameDBVersionHash.containsKey(thisCollection.getCollectionName())) {
479                        this.collectionNameDBVersionHash.get(thisCollection.getCollectionName()).put(d.getDBVersion(), thisCollection);
480                    } else {
481                        TreeMap<String, DBCollection> map = new TreeMap<String, DBCollection>();
482                        map.put(d.getDBVersion(), thisCollection);
483                        this.collectionNameDBVersionHash.put(thisCollection.getCollectionName(), map);
484                    }
485                } else {
486                    badCollectionDatabases.add(d);
487                }
488            }
489        }
490
491        this.collectionDatabases.removeAll(badCollectionDatabases);
492        this.allDatabases.removeAll(badCollectionDatabases);
493
494        // we need to set the collections on the other types of collection databases
495        for (DBCollectionDatabase d : this.collectionDatabases) {
496            if (d.type != EnsemblDBType.collection_core) {
497                String dbVersion = d.getDBVersion();
498                String cName = d.getCollectionName();
499                if (this.collectionNameDBVersionHash.containsKey(cName)
500                        && this.collectionNameDBVersionHash.get(cName).containsKey(dbVersion)) {
501                    d.setCollection(this.collectionNameDBVersionHash.get(cName).get(dbVersion));
502                }
503            }
504        }
505
506        //add aliases for renamed databases
507        //only for single species databases
508        for (Map.Entry<String, String> entry : this.renamedDBs.entrySet()) {
509            if (speciesHash.containsKey(entry.getValue())) {
510                speciesHash.get(entry.getValue()).getAliases().add(entry.getKey());
511                speciesHash.get(entry.getValue()).getAliases().add(entry.getKey().replace("_", " "));
512            }
513        }
514
515        //we now populate the look up hash for aliases
516        //and at the same time add all the relevant databases to each DBSpecies
517        //and vice versa
518        for (DBSpecies sp : speciesHash.values()) {
519            for (String s : sp.getAliases()) {
520                if (this.ensemblNameForAliasHash.containsKey(s)) {
521                    String n = this.ensemblNameForAliasHash.get(s).concat(AmbiguitySeparator + sp.getDatabaseStyleName());
522                    this.ensemblNameForAliasHash.put(s, n);
523                } else {
524                    this.ensemblNameForAliasHash.put(s, sp.getDatabaseStyleName());
525                }
526            }
527            for (DBSingleSpeciesDatabase d : this.singleSpeciesDatabases) {
528                if (d.getDbSpeciesName().equalsIgnoreCase(sp.getDatabaseStyleName())) {
529                    sp.addDatabase(d);
530                    if (d.getSpecies() == null) {
531                        d.setSpecies(sp);
532                    }
533                }
534            }
535        }
536
537        for (DBCollection coll : this.collections) {
538            //this sets all the species to have the most recent dbstyle name, and all the TreeSets 
539            //in the collections to be remade
540            coll.resortSpecies();
541        }
542
543        for (DBCollectionSpecies sp : oldstyle_collectionspeciesHashByDBName.values()) {
544            for (String s : sp.getAliases()) {
545                if (this.ensemblNameForAliasHash.containsKey(s)) {
546                    String n = this.ensemblNameForAliasHash.get(s).concat(AmbiguitySeparator + sp.getDatabaseStyleName());
547                    this.ensemblNameForAliasHash.put(s, n);
548                } else {
549                    this.ensemblNameForAliasHash.put(s, sp.getDatabaseStyleName());
550                }
551            }
552            for (DBCollectionDatabase d : this.collectionDatabases) {
553                try {
554                    if (EnsemblDBType.getCollectionDatabaseTypes().contains(d.getType())
555                            && d.getCollection().getSpecies().contains(sp)) {
556                        sp.addDatabase(d);
557                    }
558                } catch (Exception e) {
559                }
560            }
561        }
562
563        for (DBCollectionSpecies sp : collectionspeciesHashByGCAccession.values()) {
564            for (String s : sp.getAliases()) {
565
566                if (this.assemblyAccessionStemForAliasHash.containsKey(s)) {
567                    String n = this.assemblyAccessionStemForAliasHash.get(s).concat(AmbiguitySeparator + sp.getAssemblyAccessionStem());
568                    this.assemblyAccessionStemForAliasHash.put(s, n);
569                } else {
570                    this.assemblyAccessionStemForAliasHash.put(s, sp.getAssemblyAccessionStem());
571                }
572            }
573            for (DBCollectionDatabase d : this.collectionDatabases) {
574                try {
575
576                    if (EnsemblDBType.getCollectionDatabaseTypes().contains(d.getType())
577                            && d.getCollection().getSpecies().contains(sp)) {
578                        sp.addDatabase(d);
579                    }
580                } catch (Exception e) {
581                }
582            }
583        }
584
585        //add all the relevant databases to each DBCollection
586        for (Entry<String, TreeMap<String, DBCollection>> e : this.collectionNameDBVersionHash.entrySet()) {
587            String name = e.getKey();
588
589            for (Entry<String, DBCollection> ei : e.getValue().entrySet()) {
590                String dbversion = ei.getKey();
591                DBCollection collection = ei.getValue();
592                TreeSet<CollectionDatabase> temp = new TreeSet<CollectionDatabase>();
593                for (DBCollectionDatabase d : this.collectionDatabases) {
594                    if (d.getCollectionName().equals(name) && d.getDBVersion().equalsIgnoreCase(dbversion)) {
595                        temp.add(d);
596                    }
597                }
598                collection.addDatabases(temp);
599            }
600        }
601        this.alreadyInitialized = true;
602    }
603
604    /**
605     * Reports the available database versions for this Registry.
606     * Will successfully report for either initialized or uninitialized Registries.
607     * since v74
608     * @return StringBuilder
609     */
610    @Override
611    public StringBuilder getVersionReport() {
612
613        StringBuilder out = new StringBuilder();
614        
615        out.append(newline+"VERSION REPORT"+newline+"--------------"+newline+newline);
616
617        if (datasourceType == null) {
618            return out.append("\tWARNING: This Registry has invalid DataSource.");
619        }
620        if (dbConnection == null) {
621            return out.append("\tWARNING: This Registry has invalid Database Connection.");
622        }
623       
624        if (!this.alreadyInitialized) {
625            out.append("\tWARNING: This registry is uninitialized."+newline);
626        }
627
628        out.append("Registry for: [" + this.datasourceType.toString() + "] " + this.dbConnection.getConfigurationProperties().getProperty("url") + newline);
629
630        out.append("Most recent configured Ensembl Release Schema: " + this.mostRecentEnsemblVersion + newline);
631        out.append("Most recent Schema Version available in Datasource: " + this.highestEnsemblSchemaVersion + newline);
632        if ( this.datasourceType==DataSource.ENSEMBLGENOMES) {
633            out.append("Most recent configured ensemblgenomes release available in Datasource: " + this.highestReleaseVersion + newline);
634        }
635        if ( this.datasourceType==DataSource.ENSEMBLBACTERIA) {
636            out.append("Most recent configured ensemblbacteria release available in Datasource: " + this.highestReleaseVersion + newline);
637        }
638
639        out.append("Known Schemas (Ensembl releases)" + newline + "\t");
640
641        for (Integer i : this.getKnownSchemaVersions()) {
642            out.append(i + ", ");
643        };
644
645        int end = out.lastIndexOf(",");
646        out.delete(end, end + 1);
647        out.append(newline);
648        
649        out.append(dbConnection.report() + newline);
650
651        return out;
652    } 
653    
654        /**
655     * Reports the connection and available database versions etc. for an initialized Registry.
656     * Lists databases for which this API is not configured (including those too old or too recent).
657     * @return StringBuilder
658     */
659    @Override
660    public StringBuilder getBriefRegistryReport() {
661        StringBuilder out = new StringBuilder();
662        
663        out.append(newline+"REGISTRY REPORT"+newline+"---------------"+newline+newline);        
664        
665        if (this.regConfig == null) {
666            out.append("\tWARNING: This Registry object has not been configured."+newline+newline);
667            return out;
668        }
669
670        out = this.getVersionReport();
671        
672        if (tooNewDB.toString() != null && !tooNewDB.toString().isEmpty()) {
673            out.append("WARNING: Some database releases were too recent for the current JEnsembl configuration (i.e. using unknown schema):"
674                    + newline+ newline + tooNewDB.toString());
675        }
676
677        if (unknownDB.toString() != null && !unknownDB.toString().isEmpty()) {
678            out.append("WARNING: Some database releases were not known in the current JEnsembl configuration:"
679                    + newline + newline+ unknownDB.toString());
680        }
681
682        if ((unknownDB.toString() == null || unknownDB.toString().isEmpty())
683                && (tooNewDB.toString() == null || tooNewDB.toString().isEmpty())) {
684            out.append("No database releases were not known in the current JEnsembl configuration."
685                    + newline);
686        }
687        
688       if (!this.alreadyInitialized) {
689            out.append(newline+"\tWARNING: This configured Registry has not yet been initialized" + newline);
690            return out;
691        }
692
693        return out;
694    }
695
696    /**
697     * Reports the connection, available database versions, species and actual database names for an initialized Registry.
698     * Lists databases for which this API is not configured (including those too old or too recent). 
699     * @return File
700     * @throws java.io.IOException
701     */
702    @Override
703    public File getRegistryReport() throws IOException {
704        
705        File file = File.createTempFile("RegistryReport", ".txt");
706        FileWriter fileWriter = new FileWriter(file);
707        BufferedWriter out = new BufferedWriter(fileWriter, 8192);
708        
709
710        if (this.regConfig == null) {
711            out.write(newline+"REGISTRY REPORT"+newline+"---------------"+newline+newline);                    
712            out.write("\tWARNING: This Registry object has not been configured.");
713            
714            out.flush();
715            out.close();
716            return file;
717        }
718
719        out.write(this.getBriefRegistryReport().toString());
720        
721        if (!this.alreadyInitialized) {
722                        
723            out.flush();
724            out.close();
725            return file;
726        }
727        
728        out.append(newline+"REGISTRY DETAILS"+newline+newline);        
729        
730        out.append("Species represented in Datasource (with aliases):" + newline);
731        out.append("SPECIES in unique databases" + newline );
732        out.append("---------------------------" + newline + newline );
733
734        for (DBSpecies s : this.speciesHash.values()) {
735            out.append(s.getSpeciesBinomial() + " [" + s.getDatabaseStyleName() + "] (");
736            for (String st : s.getAliases()) {
737                out.append(st + ", ");
738            }
739            try {
740                out.append(")" + newline + "\tMost Recent Core Database: " + s.getMostRecentCoreDatabase().getdBName() + newline);
741
742                for (FeatureType key : ((DBSingleSpeciesCoreDatabase) s.getMostRecentCoreDatabase()).getBuildLevels().keySet()) {
743                    out.append("\t\t"+key.toString() + " : " + ((DBSingleSpeciesCoreDatabase) s.getMostRecentCoreDatabase()).getBuildLevels().get(key) +newline);
744                }
745
746            } catch (Exception e) {
747                out.append(")" + newline + "\tNo Current Core Database" + newline);
748            }
749        }
750
751        out.append(newline + "Old Style SPECIES in collection databases" + newline );
752        out.append("-------------------------------" + newline+newline );
753
754        if (this.oldstyle_collectionspeciesHashByDBName.values().isEmpty()) {
755            out.append(newline + "NONE" + newline);
756        }
757
758        for (DBCollectionSpecies s : this.oldstyle_collectionspeciesHashByDBName.values()) {
759            out.append(s.getSpeciesBinomial() + " [" + s.getDatabaseStyleName() + "] (");
760            for (String st : s.getAliases()) {
761                out.append(st + ", ");
762            }
763            try {
764                out.append(")" + newline + "\tCurrent Core Database: " + s.getMostRecentCoreDatabase().getdBName() + newline);
765            } catch (Exception e) {
766                out.append(")" + newline + "\tNo Current Core Database" + newline);
767            }
768        }
769
770        out.append(newline+"MULTI SPECIES DATABASES: ");
771        out.append(newline+"-----------------------"+newline);
772
773        for (EnsemblDBType t : EnsemblDBType.getSpeciesComparisonDatabaseTypes()) {
774            out.append(t.toString() + ":"+newline);
775            for (Database d : this.getDatabasesByType(t)) {
776                out.append("\t" + d.getdBName() + newline);
777            }
778        }
779
780        out.append(newline+"SINGLE SPECIES DATABASE DETAILS:");
781        out.append(newline+"--------------------------------"+newline);
782
783        for (DBSpecies sp : this.speciesHash.values()) {
784            String id = sp.getSpeciesBinomial();
785            out.append(id + newline+"--------------------"+newline);
786
787            out.append(newline+"\tMost Recent Versions:"+newline);
788
789//            for (Database d : sp.getDatabasesByVersion(this.getMostRecentDBVersion())) {
790//                out.append("\t\t" + d.getdBName()+newline);
791//            }
792            for (Database d : sp.getDatabasesByVersion(sp.getMostRecentCoreDatabase().getDBVersion())) {
793                out.append("\t\t" + d.getdBName() +newline);
794            }
795
796            out.append("\tBy Type:"+newline);
797            for (EnsemblDBType t : EnsemblDBType.getSingleSpeciesDatabaseTypes()) {
798
799                out.append("\t" + t.toString()+newline);
800                if (sp.getDatabasesByType(t).isEmpty()) {
801                    out.append("\t\tNONE"+newline);
802                } else {
803                    for (Database d : sp.getDatabasesByType(t)) {
804                        out.append("\t\t" + d.getdBName() +newline);
805                        if (t == EnsemblDBType.core) {
806                            out.append("\t\t\tAssembly: " + ((SingleSpeciesCoreDatabase) d).getAssemblyName()
807                                    + " [" + ((SingleSpeciesCoreDatabase) d).getAssemblyAccession()
808                                    + "]"+newline);
809                        }
810                    }
811                }
812            }
813
814        }
815        out.append(newline+"COLLECTION SPECIES DATABASE DETAILS:"+newline);
816        out.append("------------------------------------"+newline+newline);
817
818        out.append(newline+"NEW STYLE SPECIES ID (using GC Accession, typically post release 17):"+newline+newline);
819        
820        if (this.collectionspeciesHashByGCAccession.values().isEmpty()) {
821            out.append("\tNONE"+newline);
822        }        
823        
824        for (DBCollectionSpecies sp : this.collectionspeciesHashByGCAccession.values()) {
825            String id = sp.getSpeciesBinomial();
826            String acc = sp.getAssemblyAccessionStem()!=null && !sp.getAssemblyAccessionStem().isEmpty() ? sp.getAssemblyAccessionStem():" ";
827            
828            out.append(id+" (assembly accession:"+acc+")"  +newline+"---------------------"+newline);
829
830            out.append(newline+"\tCurrent Versions:"+newline);
831            for (Database d : sp.getDatabasesByVersion(sp.getMostRecentCoreDatabase().getDBVersion())) {
832                out.append("\t\t" + d.getdBName() +newline);
833            }
834            out.append("\tBy Type:"+newline);
835            for (EnsemblDBType t : EnsemblDBType.getCollectionDatabaseTypes()) {
836
837                out.append("\t" + t.toString() +newline);
838                if (sp.getDatabasesByType(t).isEmpty()) {
839                    out.append("\t\tNONE"+newline);
840                } else {
841                    for (Database d : sp.getDatabasesByType(t)) {
842                        out.append("\t\t" + d.getdBName() +newline);
843                    }
844                }
845            }
846        }        
847        
848        
849        out.append(newline+"OLD STYLE SPECIES ID (using name matching, typically pre release 17):"+newline+newline);
850        
851        if (this.oldstyle_collectionspeciesHashByDBName.values().isEmpty()) {
852            out.append("\tNONE"+newline);
853        }
854
855        for (DBCollectionSpecies sp : this.oldstyle_collectionspeciesHashByDBName.values()) {
856            String id = sp.getSpeciesBinomial();
857            String acc = sp.getAssemblyAccessionStem()!=null && !sp.getAssemblyAccessionStem().isEmpty() ? sp.getAssemblyAccessionStem():" ";
858            
859            out.append(id+" (assembly accession:"+acc+")"  +newline+"---------------------"+newline);
860
861            out.append(newline+"\tCurrent Versions:"+newline);
862            for (Database d : sp.getDatabasesByVersion(sp.getMostRecentCoreDatabase().getDBVersion())) {
863                out.append("\t\t" + d.getdBName() +newline);
864            }
865            out.append("\tBy Type:"+newline);
866            for (EnsemblDBType t : EnsemblDBType.getCollectionDatabaseTypes()) {
867
868                out.append("\t" + t.toString() +newline);
869                if (sp.getDatabasesByType(t).isEmpty()) {
870                    out.append("\t\tNONE"+newline);
871                } else {
872                    for (Database d : sp.getDatabasesByType(t)) {
873                        out.append("\t\t" + d.getdBName() +newline);
874                    }
875                }
876            }
877        }
878
879        out.append(newline+"Core Collection Databases: Species and build information"+newline);
880        out.append("--------------------------------------------------------"+newline);
881
882        if (this.collectionDatabases.isEmpty()) {
883            out.append(newline+"NONE"+newline);
884        }
885
886        for (DBCollectionDatabase d : this.collectionDatabases) {
887            if (d.getType().equals(EnsemblDBType.collection_core)) {
888
889                out.append(newline+"CORECOLLECTION DB: " + d.dBName +newline);
890
891                for (DBCollectionSpecies species : d.getCollection().getSpecies()) {
892
893                    DBCollectionCoreDatabase db = (DBCollectionCoreDatabase) d;
894                    out.append("\tSPECIES: " + species.commonName +newline);
895
896                    //add a safety check
897                    if (db.getBuildLevels(species) != null) {
898                        for (FeatureType key : db.getBuildLevels(species).keySet()) {
899                            out.append("\t\t" + key.toString() + " : " + db.getBuildLevels(species).get(key) +newline);
900                        }
901                    }
902
903                }
904            }
905        }
906
907             out.flush();
908            out.close();
909            return file;
910    }    
911    
912    @Override
913    public Properties getConfigProperties() {
914        return configProperties;
915    }    
916    
917    @Override
918    public String getMostRecentEnsemblVersion() {
919        return mostRecentEnsemblVersion;
920    }
921    
922    //@Override
923    public String getMostRecentDBVersion(String speciesOrCollection) throws NonUniqueException {
924
925        if (speciesOrCollection == null || speciesOrCollection.isEmpty()) {
926            return "";
927        }
928
929        String dbName = speciesOrCollection;
930
931        //can throw nonuniqueexception
932        //it shouldnt do this if the 'speciesOrCollection' input is a db classifier
933        if (this.getEnsemblNameForAlias(speciesOrCollection) != null) {
934            dbName = this.getEnsemblNameForAlias(speciesOrCollection);
935        }
936
937        if (this.databaseVersions.containsKey(dbName)) {
938            return this.databaseVersions.get(dbName).last().toString();
939        } else {
940            return "";
941        }
942
943    }    
944
945    public TreeSet<Integer> getKnownSchemaVersions() {
946        return knownSchemaVersions;
947    }
948
949    @Override
950    public int getHighestEnsemblSchemaVersion() throws DAOException, ConfigurationException {
951        if (this.alreadyInitialized || this.alreadyParsed) {
952            return highestEnsemblSchemaVersion;
953        } else {
954            if (this.datasourceType != null) {
955                this.parseWithoutInitializing();
956            } else {
957                throw new ConfigurationException("Attempt to retrieve HighestEnsemblSchemaVersion from uninitialized database.");
958            }
959            return highestEnsemblSchemaVersion;
960        }
961    }
962
963    @Override
964    public int getHighestReleaseVersion() throws DAOException, ConfigurationException {
965        if (this.alreadyInitialized || this.alreadyParsed) {
966            return highestReleaseVersion;
967        } else {
968            if (this.datasourceType != null) {
969                this.parseWithoutInitializing();
970            } else {
971                throw new ConfigurationException("Attempt to retrieve HighestReleaseVersion from uninitialized database.");
972            }
973            return highestReleaseVersion;
974        }
975    }    
976    
977    // Database look up methods
978    @Override
979    public DBDatabase getDatabase(String species_name_or_alias) throws NonUniqueException {
980        return getDatabase(species_name_or_alias, null, null);
981    }
982
983    @Override
984    public DBDatabase getDatabase(String species_name_or_alias, String db_version) throws NonUniqueException {
985        return getDatabase(species_name_or_alias, null, db_version);
986    }
987
988    @Override
989    public DBDatabase getDatabase(String speciesNameOrAlias_collectionName_comparaDivision,
990            DatabaseType database_type, String db_version) throws NonUniqueException {
991
992        String name = speciesNameOrAlias_collectionName_comparaDivision;
993
994        DBDatabase out = null;
995
996        //default to CORE type if not provided
997        EnsemblDBType type = (database_type != null
998                && database_type.toString() != null
999                && !database_type.toString().equals(""))
1000                ? (EnsemblDBType) database_type
1001                : EnsemblDBType.core;
1002
1003        if (name == null || name.isEmpty()) {
1004            if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(type)) {
1005                name = "multi";
1006            } else {
1007                //deff out if no name
1008                return out;
1009            }
1010        }
1011
1012        //default to current dbVersion if not provided
1013        String version = (db_version != null
1014                && !db_version.isEmpty()
1015                && !db_version.equals("current"))
1016                ? db_version
1017                //                : this.getMostRecentDBVersion();
1018                : this.getMostRecentDBVersion(name);
1019
1020        String speciesOrGroup = "";
1021
1022        if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(type)) {
1023            //no look up possible
1024            speciesOrGroup = name;
1025        } else if (EnsemblDBType.getSingleSpeciesDatabaseTypes().contains(type)) {
1026            //do a look up for aliases
1027            speciesOrGroup = this.getEnsemblNameForAlias(name);
1028            if (speciesOrGroup == null) {
1029                //deff out
1030                return out;
1031            }
1032        } else if (EnsemblDBType.getCollectionDatabaseTypes().contains(type)) {
1033            //no look up possible
1034            speciesOrGroup = name;
1035        }
1036
1037        if (EnsemblDBType.getSingleSpeciesDatabaseTypes().contains(type)) {
1038            for (DBSingleSpeciesDatabase d : this.singleSpeciesDatabases) {
1039                if (d.getDbSpeciesName().equals(speciesOrGroup)
1040                        && d.getDBVersion().equals(version)
1041                        && d.getType() == type) {
1042                    return d;
1043                }
1044            }
1045        } else if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(type)) {
1046
1047            EnsemblComparaDivision gp = EnsemblComparaDivision.getEnsemblComparaDivision(speciesOrGroup);
1048            for (DBComparisonDatabase d : this.comparisonDatabases) {
1049                if (d.getComparisonDivision().equals(gp)
1050                        && d.getDBVersion().equals(version)
1051                        && d.getType() == type) {
1052                    return d;
1053                }
1054            }
1055        } //this is only working on Groups not ind species
1056        else if (EnsemblDBType.getCollectionDatabaseTypes().contains(type)) {
1057            for (DBCollectionDatabase d : this.collectionDatabases) {
1058                if (d.getCollectionName().equals(speciesOrGroup)
1059                        && d.getDBVersion().equals(version)
1060                        && d.getType() == type) {
1061                    return d;
1062                }
1063            }
1064        }
1065
1066        return out;
1067    }
1068
1069    @Override
1070    public ComparisonDatabase getComparaDatabase(EnsemblComparaDivision comparaDivision, String db_version) {
1071
1072        if (this.comparaDBByDivision.get(comparaDivision) != null) {
1073            return this.comparaDBByDivision.get(comparaDivision).get(db_version);
1074        } else {
1075            return null;
1076        }
1077    }
1078
1079    @Override
1080    public DBDatabase getDatabaseForFullName(String fullname) {
1081
1082        DBDatabase out = null;
1083
1084        if (fullname == null || fullname.isEmpty()) {
1085            return out;
1086        }
1087
1088        for (DBDatabase d : this.allDatabases) {
1089            if (d.getdBName().equals(fullname)) {
1090                out = d;
1091                break;
1092            }
1093        }
1094        return out;
1095    }
1096
1097    @Override
1098    public TreeSet<DBDatabase> getDatabasesByType(DatabaseType type) {
1099        TreeSet<DBDatabase> out = new TreeSet<DBDatabase>();
1100
1101        for (DBDatabase d : this.allDatabases) {
1102            if (d.getType() == type) {
1103                out.add(d);
1104            }
1105        }
1106        return out;
1107    }
1108
1109    @Override
1110    public String findMybatisSchemaForSchemaVersion(DatabaseType type, String schema_version) {
1111
1112        String out = null;
1113
1114        try {
1115            out = schemaVersion.getMybatisSchemaPath(type.toString(), schema_version);
1116        } catch (Exception e) {
1117        }
1118
1119        //
1120        if (out == null) {
1121            try {
1122                out = schemaVersion.getMybatisSchemaPath(type.toString(), schemaVersion.getCurrentEnsemblVersion());
1123            } catch (Exception e) {
1124            }
1125        }
1126
1127        return out;
1128    }
1129
1130    /**
1131     * Looks up the ensembl_genus_species_name for the given alias from the
1132     * ensemblNameForAliasHash hash of aliases in this Registry.
1133     *
1134     * @param alias String
1135     * @return String the ensembl_genus_species_name
1136     */
1137    @Override
1138    public String getEnsemblNameForAlias(String alias) throws NonUniqueException {
1139        if (alias == null || alias.isEmpty()) {
1140            return null;
1141        }
1142        //if the entered 'alias' is a good ensembl name
1143        if (ensemblNameForAliasHash.containsValue(alias)) {
1144            return alias;
1145        }
1146        //else look for the lowercase version of 'alias' as an alias
1147        String get = ensemblNameForAliasHash.get(alias.toLowerCase());
1148        if (get == null) {
1149            return null;
1150        } else if (get.contains(DBRegistry.AmbiguitySeparator)) {
1151
1152            Collection<String> allHits = new HashSet<String>();
1153            String[] split = get.split("\\|OR\\|");
1154            allHits.addAll(Arrays.asList(split));
1155            throw new NonUniqueException("Alias: '" + alias + "' matches more than one Ensembl Name."
1156                    + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { "
1157                    + "System.out.println(((Species) o).getDatabaseStyleName()); }", allHits);
1158        } else {
1159            return get;
1160        }
1161    }
1162
1163/**
1164     * Looks up the GC_assembly_accession_stem for the given alias of a
1165     * collectionspecies. Post v16 this is the preferred identifier for a
1166     * CollectionSpecies.
1167     *
1168     * @param alias String
1169     * @return String the GC_assembly_accession_stem
1170     */
1171    public String getGCAssemblyAccessionForAlias(String alias) throws NonUniqueException {
1172        if (alias == null || alias.isEmpty()) {
1173            return null;
1174        }
1175
1176        String get = assemblyAccessionStemForAliasHash.get(alias.toLowerCase());
1177        if (get == null) {
1178            return null;
1179        } else if (get.contains(DBRegistry.AmbiguitySeparator)) {
1180
1181            Collection<String> allHits = new HashSet<String>();
1182            String[] split = get.split("\\|OR\\|");
1183            allHits.addAll(Arrays.asList(split));
1184            throw new NonUniqueException("Alias matches more than one Ensembl Name."
1185                    + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { "
1186                    + "System.out.println(((Species) o).getDatabaseStyleName()); }", allHits);
1187        } else {
1188            return get;
1189        }
1190
1191    }    
1192    
1193    public Set<String> getEnsemblNamesForAliasBeginning(String alias) {
1194        if (alias == null || alias.isEmpty()) {
1195            return null;
1196        }
1197        
1198        Set<String> out = new HashSet<String>();
1199        
1200        //look for the lowercase version of 'alias' as an alias
1201        String search = alias.toLowerCase();
1202        
1203        for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) {
1204            if (e.getKey().startsWith(search)) {               
1205                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1206                    String[] split = e.getValue().split("\\|OR\\|");
1207                    out.addAll(Arrays.asList(split));
1208                } else {                
1209                    out.add(e.getValue());
1210                }
1211            }
1212        }
1213        return out;
1214    }
1215    
1216    public Set<String> getEnsemblNamesForAliasContaining(String alias) {
1217        if (alias == null || alias.isEmpty()) {
1218            return null;
1219        }
1220        
1221        Set<String> out = new HashSet<String>();
1222        
1223        //look for the lowercase version of 'alias' as an alias
1224        String search = alias.toLowerCase();
1225        
1226        for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) {
1227            if (e.getKey().contains(search)) {               
1228                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1229                    String[] split = e.getValue().split("\\|OR\\|");
1230                    out.addAll(Arrays.asList(split));
1231                } else {                
1232                    out.add(e.getValue());
1233                }
1234            }
1235        }
1236        return out;
1237    }
1238    
1239    public Set<String> getGCAssemblyAccessionsForAliasBeginning(String alias)  {
1240        if (alias == null || alias.isEmpty()) {
1241            return null;
1242        }
1243
1244        Set<String> out = new HashSet<String>();
1245        
1246        //look for the lowercase version of 'alias' as an alias
1247        String search = alias.toLowerCase();
1248        
1249        for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) {
1250            if (e.getKey().startsWith(search)) {               
1251                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1252                    String[] split = e.getValue().split("\\|OR\\|");
1253                    out.addAll(Arrays.asList(split));
1254                } else {                
1255                    out.add(e.getValue());
1256                }
1257            }
1258        }
1259        return out;        
1260    } 
1261    
1262    public Set<String> getGCAssemblyAccessionsForAliasContaining(String alias)  {
1263        if (alias == null || alias.isEmpty()) {
1264            return null;
1265        }
1266
1267        Set<String> out = new HashSet<String>();
1268        
1269        //look for the lowercase version of 'alias' as an alias
1270        String search = alias.toLowerCase();
1271        
1272        for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) {
1273            if (e.getKey().contains(search)) {               
1274                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1275                    String[] split = e.getValue().split("\\|OR\\|");
1276                    out.addAll(Arrays.asList(split));
1277                } else {                
1278                    out.add(e.getValue());
1279                }
1280            }
1281        }
1282        return out;        
1283    } 
1284    
1285    public Set<? extends Species> getSpeciesForAliasBeginning(String alias) {
1286        
1287        if (this.getDatasourceType().equals(DataSource.ENSEMBLBACTERIA)) {
1288            return this.getBacterialSpeciesForAliasBeginning(alias);
1289        }
1290        
1291        if (alias == null || alias.isEmpty()) {
1292            return null;
1293        }
1294        
1295        Set<DBSpecies> out = new HashSet<DBSpecies>();
1296        
1297        //look for the lowercase version of 'alias' as an alias
1298        String search = alias.toLowerCase();
1299        
1300        for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) {
1301            if (e.getKey().startsWith(search)) {               
1302                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1303                    String[] split = e.getValue().split("\\|OR\\|");
1304                    for (String s:split) {
1305                        out.add(this.speciesHash.get(s));
1306                    }
1307                } else {                
1308                    out.add(this.speciesHash.get(e.getValue()));
1309                }
1310            }
1311        }
1312        return out;
1313    }
1314    
1315    public Set<? extends Species> getSpeciesForAliasContaining(String alias) {
1316        
1317        if (this.getDatasourceType().equals(DataSource.ENSEMBLBACTERIA)) {
1318            return this.getBacterialSpeciesForAliasContaining(alias);
1319        }
1320        
1321        if (alias == null || alias.isEmpty()) {
1322            return null;
1323        }
1324        
1325        Set<DBSpecies> out = new HashSet<DBSpecies>();
1326        
1327        //look for the lowercase version of 'alias' as an alias
1328        String search = alias.toLowerCase();
1329        
1330        for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) {
1331            if (e.getKey().contains(search)) {               
1332                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1333                    String[] split = e.getValue().split("\\|OR\\|");
1334                    for (String s:split) {
1335                        out.add(this.speciesHash.get(s));
1336                    }
1337                } else {                
1338                    out.add(this.speciesHash.get(e.getValue()));
1339                }
1340            }
1341        }
1342        return out;
1343    }
1344    
1345    public Set<? extends CollectionSpecies> getBacterialSpeciesForAliasBeginning(String alias) {
1346        
1347        if (this.highestReleaseVersion < 17) {
1348            return this.getOldStyleBacterialSpeciesForAliasBeginning(alias);
1349        }
1350
1351        if (alias == null || alias.isEmpty()) {
1352            return null;
1353        }
1354        
1355        Set<DBCollectionSpecies> out = new HashSet<DBCollectionSpecies>();
1356        
1357        //look for the lowercase version of 'alias' as an alias
1358        String search = alias.toLowerCase();
1359        
1360        for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) {
1361            if (e.getKey().startsWith(search)) {               
1362                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1363                    String[] split = e.getValue().split("\\|OR\\|");
1364                    for (String s:split) {
1365                        out.add(this.collectionspeciesHashByGCAccession.get(s));
1366                    }
1367                } else {                
1368                    out.add(this.collectionspeciesHashByGCAccession.get(e.getValue()));
1369                }
1370            }
1371        }
1372        return out;
1373    }
1374    
1375    public Set<? extends CollectionSpecies> getBacterialSpeciesForAliasContaining(String alias) {
1376        
1377        if (this.highestReleaseVersion < 17) {
1378            return this.getOldStyleBacterialSpeciesForAliasContaining(alias);
1379        }
1380
1381        if (alias == null || alias.isEmpty()) {
1382            return null;
1383        }
1384        
1385        Set<DBCollectionSpecies> out = new HashSet<DBCollectionSpecies>();
1386        
1387        //look for the lowercase version of 'alias' as an alias
1388        String search = alias.toLowerCase();
1389        
1390        for (Entry<String,String> e:assemblyAccessionStemForAliasHash.entrySet() ) {
1391            if (e.getKey().contains(search)) {               
1392                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1393                    String[] split = e.getValue().split("\\|OR\\|");
1394                    for (String s:split) {
1395                        out.add(this.collectionspeciesHashByGCAccession.get(s));
1396                    }
1397                } else {                
1398                    out.add(this.collectionspeciesHashByGCAccession.get(e.getValue()));
1399                }
1400            }
1401        }
1402        return out;
1403    }
1404    
1405    public Set<? extends CollectionSpecies> getOldStyleBacterialSpeciesForAliasBeginning(String alias)  {
1406        if (alias == null || alias.isEmpty()) {
1407            return null;
1408        }
1409
1410        Set<CollectionSpecies> out = new HashSet<CollectionSpecies>();
1411        
1412        //look for the lowercase version of 'alias' as an alias
1413        String search = alias.toLowerCase();
1414        
1415        for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) {
1416            if (e.getKey().startsWith(search)) {               
1417                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1418                    String[] split = e.getValue().split("\\|OR\\|");                   
1419                    for (String s:split) {
1420                      out.add(this.oldstyle_collectionspeciesHashByDBName.get(s) );
1421                    }
1422                } else {                
1423                    out.add(this.oldstyle_collectionspeciesHashByDBName.get(e.getValue()));
1424                }
1425            }
1426        }
1427        return out;        
1428    }
1429    
1430    public Set<? extends CollectionSpecies> getOldStyleBacterialSpeciesForAliasContaining(String alias)  {
1431        if (alias == null || alias.isEmpty()) {
1432            return null;
1433        }
1434
1435        Set<CollectionSpecies> out = new HashSet<CollectionSpecies>();
1436        
1437        //look for the lowercase version of 'alias' as an alias
1438        String search = alias.toLowerCase();
1439        
1440        for (Entry<String,String> e:ensemblNameForAliasHash.entrySet() ) {
1441            if (e.getKey().contains(search)) {               
1442                if (e.getValue().contains(DBRegistry.AmbiguitySeparator)) {
1443                    String[] split = e.getValue().split("\\|OR\\|");                   
1444                    for (String s:split) {
1445                      out.add(this.oldstyle_collectionspeciesHashByDBName.get(s) );
1446                    }
1447                } else {                
1448                    out.add(this.oldstyle_collectionspeciesHashByDBName.get(e.getValue()));
1449                }
1450            }
1451        }
1452        return out;        
1453    }
1454
1455    /**
1456     * Method for an adhoc alias to be added by the user in this session. Adds
1457     * the alias to the aliases TreeSet of the cognate DBSpecies and to the
1458     * ensemblNameForAliasHash hash of aliases in this Registry
1459     *
1460     * @param ensembl_genus_species_name Ensmebl databse dbVersion of the name,
1461     * e.g. 'homo_sapiens'
1462     * @param alias any string the user wants to use an alias for this
1463     * @return boolean true if successful
1464     * @throws Exception if update fails, with meaning full message
1465     */
1466    @Override
1467    public boolean addSessionAlias(String ensembl_genus_species_name, String alias) throws Exception {
1468
1469        if (ensembl_genus_species_name == null || alias == null || ensembl_genus_species_name.isEmpty() || alias.isEmpty()) {
1470            throw new Exception("Failed to add alias: " + alias + " for species: " + ensembl_genus_species_name);
1471        }
1472        Species spData = null;
1473
1474        try {
1475            spData = this.speciesHash.get(ensembl_genus_species_name);
1476            if (spData == null) {
1477                spData = this.getCSpeciesByGCAccessionStem(this.assemblyAccessionStemForAliasHash.get(ensembl_genus_species_name));
1478                if (spData == null) {
1479                    spData = this.oldstyle_collectionspeciesHashByDBName.get(ensembl_genus_species_name);
1480                }
1481                if (spData == null) {
1482                    throw new Exception("No species in ensembl registry called: " + ensembl_genus_species_name);
1483                }
1484            }
1485
1486        } catch (Exception e) {
1487            throw new Exception("Failed to add alias: " + alias + " for species: " + ensembl_genus_species_name, e);
1488        }
1489        return this.addSessionAlias(spData, alias);
1490    }
1491
1492    @Override
1493    public boolean addSessionAlias(Species sp, String alias) throws Exception {
1494        if (sp == null || alias == null || alias.isEmpty()) {
1495            throw new Exception("Failed to add alias: " + alias + " for species: " + sp);
1496        }
1497
1498        if (this.getGCAccessionedCollectionSpecies().contains(sp)) {
1499            sp.getAliases().add(alias.toLowerCase());
1500            this.assemblyAccessionStemForAliasHash.put(alias.toLowerCase(), sp.getAssemblyAccessionStem());
1501        } else {
1502            sp.getAliases().add(alias.toLowerCase());
1503            if (this.ensemblNameForAliasHash.containsKey(alias.toLowerCase())) {
1504                String n = this.ensemblNameForAliasHash.get(alias.toLowerCase()).concat(AmbiguitySeparator + sp.getDatabaseStyleName());
1505                this.ensemblNameForAliasHash.put(alias.toLowerCase(), n);
1506            } else {
1507                this.ensemblNameForAliasHash.put(alias.toLowerCase(), sp.getDatabaseStyleName());
1508            }
1509        }
1510        return true;
1511    }
1512
1513    /**
1514     * Returns a HashSet<DBSpecies> matching the given alias. If name is not a
1515     * valid String, returns null, if no species are found, returns empty Set.
1516     * If Name matches any DBSpecies, these are returned without searching for
1517     * DBCollectionSpecies. Note if we hit a CollectionSpecies - any more recent
1518     * Species with GCA (ENA) Identifiers will be returned before searching for
1519     * older Species (lacking GCA IDs) which match the alias. If you want to be
1520     * sure to search for all alias hits use getSetOfEverySpeciesByAlias(String
1521     * alias_or_name).
1522     *
1523     * @param alias_or_name
1524     * @return a Collection<? extends Species> of DBSpecies.
1525     */
1526    @Override
1527    public Collection<? extends Species> getSetOfSpeciesByAlias(String alias_or_name) {
1528        HashSet<DBSpecies> ret = new HashSet<DBSpecies>();
1529
1530        if (alias_or_name == null || alias_or_name.isEmpty()) {
1531            return null;
1532        }
1533
1534        //first look for DBSpecies
1535        DBSpecies out = null;
1536        String name = null;
1537        try {
1538            name = this.getEnsemblNameForAlias(alias_or_name);
1539            out = (name != null) ? this.speciesHash.get(name) : null;
1540            if (out != null) {
1541                ret.add(out);
1542            }
1543        } catch (NonUniqueException ex) {
1544            for (String acc : (Collection<String>) ex.getAllHits()) {
1545                if (this.speciesHash.get(acc) != null) {
1546                    ret.add(this.speciesHash.get(acc));
1547                }
1548            }
1549        }
1550        if (!ret.isEmpty()) {
1551            return ret;
1552        }
1553
1554        //if no DBSpecies hit
1555        //we next look for a recent type of CollectionSpecies 
1556        String gcAccession = null;
1557        try {
1558            gcAccession = this.getGCAssemblyAccessionForAlias(alias_or_name);
1559            out = (gcAccession != null) ? this.collectionspeciesHashByGCAccession.get(gcAccession)
1560                    : null;
1561            if (out != null) {
1562                ret.add(out);
1563            } else {
1564                //if the alias hasn't pulled out a recent CollectionSpecies = fall back to look for an older one
1565                name = this.getEnsemblNameForAlias(alias_or_name);
1566                out = (name != null) ? this.oldstyle_collectionspeciesHashByDBName.get(name) : null;
1567                if (out != null) {
1568                    ret.add(out);
1569                }
1570            }
1571        } catch (NonUniqueException ex) {
1572            for (String acc : (Collection<String>) ex.getAllHits()) {
1573                if (collectionspeciesHashByGCAccession.get(acc) != null) {
1574                    ret.add(this.collectionspeciesHashByGCAccession.get(acc));
1575                }
1576            }
1577            if (ret.isEmpty()) {
1578                //if the alias doesn't pulled out recent CollectionSpecies = fall back to look for older ones
1579                for (String acc : (Collection<String>) ex.getAllHits()) {
1580                    if (this.oldstyle_collectionspeciesHashByDBName.get(acc) != null) {
1581                        ret.add(this.oldstyle_collectionspeciesHashByDBName.get(acc));
1582                    }
1583                }
1584            }
1585        }
1586        return ret;
1587    }
1588
1589    /**
1590     * Returns a HashSet<DBSpecies> of all DBSpecies and old and newstyle
1591     * CollectionSpecies matching the given alias. If name is not a valid
1592     * String, returns null, if no species are found, returns empty Set.
1593     *
1594     * @param alias_or_name
1595     * @return a Collection<? extends Species> of DBSpecies.
1596     */
1597    @Override
1598    public Collection<? extends Species> getSetOfEverySpeciesByAlias(String alias_or_name) {
1599        HashSet<DBSpecies> ret = new HashSet<DBSpecies>();
1600
1601        if (alias_or_name == null || alias_or_name.isEmpty()) {
1602            return null;
1603        }
1604
1605        DBSpecies out = null;
1606
1607        String name = null;
1608        try {
1609            name = this.getEnsemblNameForAlias(alias_or_name);
1610            out = (name != null) ? this.speciesHash.get(name) : null;
1611            if (out != null) {
1612                ret.add(out);
1613            }
1614            out = (name != null) ? this.oldstyle_collectionspeciesHashByDBName.get(name)
1615                    : null;
1616            if (out != null) {
1617                ret.add(out);
1618            }
1619        } catch (NonUniqueException ex) {
1620            for (String acc : (Collection<String>) ex.getAllHits()) {
1621                if (this.speciesHash.get(acc) != null) {
1622                    ret.add(this.speciesHash.get(acc));
1623                }
1624                if (this.oldstyle_collectionspeciesHashByDBName.get(acc) != null) {
1625                    ret.add(this.oldstyle_collectionspeciesHashByDBName.get(acc));
1626                }
1627            }
1628        }
1629
1630        String gcAccession = null;
1631        try {
1632            gcAccession = this.getGCAssemblyAccessionForAlias(alias_or_name);
1633            out = (gcAccession != null) ? this.collectionspeciesHashByGCAccession.get(gcAccession)
1634                    : null;
1635            if (out != null) {
1636                ret.add(out);
1637            }
1638        } catch (NonUniqueException ex) {
1639            for (String acc : (Collection<String>) ex.getAllHits()) {
1640                if (collectionspeciesHashByGCAccession.get(acc) != null) {
1641                    ret.add(this.collectionspeciesHashByGCAccession.get(acc));
1642                }
1643            }
1644        }
1645        return ret;
1646    }
1647
1648    /**
1649     * Wraps getSetOfSpeciesByAlias(String alias_or_name) to return a single
1650     * species if the returned Collection has only one member. If no species are
1651     * found, returns null. If more than one species is found, it throws a
1652     * NonUniqueException which holds the HashSet<DBSpecies> of results. Note if
1653     * we hit a CollectionSpecies - more recent Species with GCA (ENA)
1654     * Identifiers will be returned in preference to older Species lacking IDs.
1655     *
1656     * @param alias_or_name
1657     * @return
1658     * @throws NonUniqueException
1659     */
1660    @Override
1661    public DBSpecies getSpeciesByAlias(String alias_or_name) throws NonUniqueException {
1662        HashSet<DBSpecies> ret = (HashSet<DBSpecies>) this.getSetOfSpeciesByAlias(alias_or_name);
1663
1664        if (ret == null || ret.isEmpty()) {
1665            return null;
1666        }
1667
1668        if (ret.size() == 1) {
1669            return ret.iterator().next();
1670        } else {
1671            throw new NonUniqueException("More than one species with this alias is found. "
1672                    + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { "
1673                    + "System.out.println(((Species) o).getDatabaseStyleName()); }", ret);
1674        }
1675
1676    }
1677
1678    /**
1679     * Wraps getSetOfEverySpeciesByAlias(String alias_or_name) to return a
1680     * single species if the returned Collection has only one member with the
1681     * desired alias and version. If no species are found, returns null. If more
1682     * than one species is found for the datasource version, it throws a
1683     * NonUniqueException which holds the HashSet<DBSpecies> of results.
1684     *
1685     * @param alias_or_name
1686     * @return
1687     * @throws NonUniqueException
1688     */
1689    @Override
1690    public DBSpecies getSpeciesByAlias(String alias_or_name, String version) throws NonUniqueException {
1691        HashSet<DBSpecies> ret = (HashSet<DBSpecies>) this.getSetOfEverySpeciesByAlias(alias_or_name);
1692        HashSet<DBSpecies> versions = new HashSet<DBSpecies>();
1693
1694        if (ret == null || ret.isEmpty()) {
1695            return null;
1696        } else if (ret.size() == 1) {
1697            DBSpecies o = ret.iterator().next();
1698            if (o.getDBVersions().contains(version)) {
1699                return o;
1700            } else {
1701                return null;
1702            }
1703        } else {
1704            for (DBSpecies sp : ret) {
1705                if (sp.getDBVersions().contains(version)) {
1706                    versions.add(sp);
1707                }
1708            }
1709            if (versions.isEmpty()) {
1710                return null;
1711            } else if (versions.size() == 1) {
1712                return versions.iterator().next();
1713            } else if (versions.size() > 1) {
1714                throw new NonUniqueException("More than one species with alias '"
1715                        + alias_or_name + "' is found for version '" + version + "' of the datasource. "
1716                        + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { "
1717                        + "System.out.println(((Species) o).getDatabaseStyleName()); }", versions);
1718            }
1719        }
1720        return null;
1721    }
1722
1723    /**
1724     * Returns the most recent Species matching the alias. Return null if no
1725     * match, throws NonUniqueException which holds the HashSet<DBSpecies> of
1726     * results if more than one, equally recent species is found.
1727     *
1728     * @param alias_or_name
1729     * @return
1730     * @throws NonUniqueException
1731     */
1732    @Override
1733    public DBSpecies getMostRecentSpeciesByAlias(String alias_or_name) throws NonUniqueException {
1734
1735        HashSet<DBSpecies> temp = (HashSet<DBSpecies>) this.getSetOfEverySpeciesByAlias(alias_or_name);
1736        TreeMap<Integer, HashSet<DBSpecies>> versionMap = new TreeMap<Integer, HashSet<DBSpecies>>();
1737
1738        HashSet<DBSpecies> out = null;
1739
1740        if (temp == null || temp.isEmpty()) {
1741            return null;
1742        }
1743
1744        if (temp.size() == 1) {
1745            return temp.iterator().next();
1746        } else {
1747            for (DBSpecies sp : temp) {
1748
1749                if (!versionMap.containsKey(sp.getHighestDBRelease())) {
1750                    versionMap.put(sp.getHighestDBRelease(), new HashSet<DBSpecies>());
1751                }
1752                versionMap.get(sp.getHighestDBRelease()).add(sp);
1753            }
1754        }
1755
1756        out = versionMap.lastEntry().getValue();
1757
1758        if (out.size() == 1) {
1759            return out.iterator().next();
1760        } else {
1761            throw new NonUniqueException("More than one species with this alias is found. "
1762                    + "You can examine the list of species by printing for (Object o: NUN.getAllHits() { "
1763                    + "System.out.println(((Species) o).getDatabaseStyleName()); }", out);
1764        }
1765
1766    }
1767
1768    @Override
1769    public DBSpecies getSpeciesByEnsemblName(String ensemblName) {
1770        if (ensemblName == null || ensemblName.isEmpty()) {
1771            return null;
1772        }
1773
1774        DBSpecies out = null;
1775        out = this.speciesHash.get(ensemblName);
1776        if (out == null) {
1777            out = this.oldstyle_collectionspeciesHashByDBName.get(ensemblName);
1778        }
1779
1780        return out;
1781    }
1782
1783    public DBCollectionSpecies getCSpeciesByGCAccessionStem(String key) {
1784        if (key == null || key.isEmpty()) {
1785            return null;
1786        }
1787        return this.collectionspeciesHashByGCAccession.get(key);
1788    }
1789
1790    @Override
1791    public DBCollection getMostRecentCollection(String name) {
1792        return this.getCollection(name, null);
1793    }
1794
1795    @Override
1796    public DBCollection getCollection(String name, String dbVersion) {
1797        DBCollection out = null;
1798
1799        if (name == null || name.isEmpty()) {
1800            return out;
1801        }
1802
1803        if (dbVersion == null || dbVersion.isEmpty()) {
1804            if (this.databaseVersions.containsKey(name)) {
1805                dbVersion = this.databaseVersions.get(name).last().toString();
1806            }
1807        }
1808
1809        if (dbVersion == null || dbVersion.isEmpty()) {
1810            return out;
1811        }
1812
1813        if (this.collectionNameDBVersionHash.containsKey(name)) {
1814            if (this.collectionNameDBVersionHash.get(name).containsKey(dbVersion)) {
1815                out = this.collectionNameDBVersionHash.get(name).get(dbVersion);
1816            }
1817        }
1818        return out;
1819    }
1820
1821    private void registerDatabase(DBDatabase db) {
1822        if (db instanceof DBCollectionDatabase) {
1823            this.collectionDatabases.add((DBCollectionDatabase) db);
1824            if (db.intSchemaVersion > this.highestEnsemblSchemaVersion) {
1825                this.highestEnsemblSchemaVersion = db.intSchemaVersion;
1826            }
1827            if (db.intDBVersion > this.highestReleaseVersion) {
1828                this.highestReleaseVersion = db.intDBVersion;
1829            }
1830        } else if (db instanceof DBSingleSpeciesDatabase) {
1831            this.singleSpeciesDatabases.add((DBSingleSpeciesDatabase) db);
1832            if (db.intSchemaVersion > this.highestEnsemblSchemaVersion) {
1833                this.highestEnsemblSchemaVersion = db.intSchemaVersion;
1834            }
1835            if (db.intDBVersion > this.highestReleaseVersion) {
1836                this.highestReleaseVersion = db.intDBVersion;
1837            }
1838        } else if (db instanceof DBComparisonDatabase) {
1839            this.comparisonDatabases.add((DBComparisonDatabase) db);
1840
1841            if (db.getType().equals(EnsemblDBType.compara)) {
1842
1843                if (!this.comparaDBByDivision.containsKey(((DBComparisonDatabase) db).getComparisonDivision())) {
1844                    this.comparaDBByDivision.put(((DBComparisonDatabase) db).getComparisonDivision(),
1845                            new HashMap<String, DBComparisonDatabase>());
1846                }
1847                this.comparaDBByDivision.get(((DBComparisonDatabase) db).getComparisonDivision()).put(
1848                        db.getDBVersion(), (DBComparisonDatabase) db);
1849
1850            }
1851
1852        }
1853
1854        //registering versions of this database
1855        if (!this.databaseVersions.containsKey(db.getdBClassifier())) {
1856            this.databaseVersions.put(db.getdBClassifier(), new TreeSet<Integer>());
1857        }
1858        this.databaseVersions.get(db.getdBClassifier()).add(db.getIntDBVersion());
1859
1860        this.allDatabases.add(db);
1861    }
1862
1863    private void pseudoRegisterDatabase(DBDatabase db) {
1864        if (db instanceof DBCollectionDatabase
1865                || db instanceof DBSingleSpeciesDatabase) {
1866
1867            if (db.intSchemaVersion > this.highestEnsemblSchemaVersion) {
1868                this.highestEnsemblSchemaVersion = db.intSchemaVersion;
1869            }
1870            if (db.intDBVersion > this.highestReleaseVersion) {
1871                this.highestReleaseVersion = db.intDBVersion;
1872            }
1873        }
1874    }
1875
1876    @Override
1877    public Collection<DBCollection> getCollectionRegistriesByName(String name) {
1878        if (name == null || name.isEmpty()) {
1879            return new ArrayList<DBCollection>();
1880        }
1881        if (collectionNameDBVersionHash.containsKey(name)) {
1882            return new ArrayList<DBCollection>(collectionNameDBVersionHash.get(name).values());
1883        } else {
1884            return new ArrayList<DBCollection>();
1885        }
1886    }
1887
1888    @Override
1889    public Collection<DBCollection> getCollectionsByDBVersion(String dbVersion) {
1890
1891        Collection<DBCollection> out = new ArrayList<DBCollection>();
1892        if (dbVersion == null || dbVersion.isEmpty()) {
1893            return out;
1894        }
1895
1896        for (String name : this.collectionNameDBVersionHash.keySet()) {
1897
1898            if (this.collectionNameDBVersionHash.get(name).containsKey(dbVersion)) {
1899                out.add(this.collectionNameDBVersionHash.get(name).get(dbVersion));
1900            }
1901        }
1902        return out;
1903    }
1904
1905    @Override
1906    public List<DBCollection> getCollections() {
1907        return collections;
1908    }
1909
1910    /**
1911     * Fetches the list of all DBSpecies objects for this Registry.
1912     *
1913     * @return
1914     */
1915    @Override
1916    public List<DBSpecies> getSpecies() {
1917        return new ArrayList<DBSpecies>(speciesHash.values());
1918    }
1919
1920    /**
1921     * Returns the combined list of DBCollectionSpecies that are indexed by
1922     * dbstylename (old_style_ and those indexed by GCAssemblyAccession (new
1923     * style, post v16)
1924     *
1925     * @return
1926     */
1927    @Override
1928    public List<DBCollectionSpecies> getCollectionSpecies() {
1929
1930        ArrayList<DBCollectionSpecies> out = new ArrayList<DBCollectionSpecies>(oldstyle_collectionspeciesHashByDBName.values());
1931        out.addAll(collectionspeciesHashByGCAccession.values());
1932        return out;
1933    }
1934
1935    /**
1936     * Utility method to return only those DBCOllectionSpecies that possess and
1937     * are indexed by a GCAssemblyAccession(Stem or Chain)
1938     *
1939     * @return
1940     */
1941    public List<DBCollectionSpecies> getGCAccessionedCollectionSpecies() {
1942        return new ArrayList<DBCollectionSpecies>(collectionspeciesHashByGCAccession.values());
1943    }
1944
1945    /**
1946     * Utility method to return only those DBCOllectionSpecies that do not
1947     * possess and are therefore not indexed by a GCAssemblyAccession but bey
1948     * dbstylename
1949     *
1950     * @return
1951     */
1952    public List<DBCollectionSpecies> getNamedCollectionSpecies() {
1953        return new ArrayList<DBCollectionSpecies>(oldstyle_collectionspeciesHashByDBName.values());
1954    }
1955
1956    @Override
1957    public DataSource getDatasourceType() {
1958        return datasourceType;
1959    }
1960
1961    @Override
1962    public DBDatabase makeDatabase(String db_name) throws ConfigurationException {
1963
1964        if (db_name == null || db_name.isEmpty()) {
1965            throw new ConfigurationException("Invalid database name: " + db_name);
1966        }
1967
1968        EnsemblDBType t = null;
1969
1970        for (EnsemblDBType et : EnsemblDBType.getAllDatabaseTypes()) {
1971
1972            if (et.toString().startsWith("collection")
1973                    && db_name.contains("_collection_")
1974                    && db_name.matches(new String(".+_" + et.toString() + "_\\d+.*"))) {
1975                t = et;
1976                break;
1977            } else if (db_name.matches(new String(".+_" + et.toString() + "_\\d+.*"))
1978                    || (et.toString().startsWith("ensembl_") && db_name.contains(et.toString() + "_"))) {
1979                t = et;
1980            }
1981        }
1982
1983        if (t == null) {
1984            throw new ConfigurationException("Unrecognized database type for database name: " + db_name);
1985        }
1986
1987        try {
1988            if (EnsemblDBType.getSingleSpeciesDatabaseTypes().contains(t)) {
1989                if (t.equals(EnsemblDBType.core)) {
1990                    return new DBSingleSpeciesCoreDatabase(db_name, t, this);
1991                } else if (t.equals(EnsemblDBType.variation)) {
1992                    return new DBSingleSpeciesVariationDatabase(db_name, t, this);
1993                } else {
1994                    //haven't implemented othersubtypes yet
1995                    return new DBSingleSpeciesDatabase(db_name, t, this);
1996                }
1997            } else if (EnsemblDBType.getCollectionDatabaseTypes().contains(t)) {
1998                if (t.equals(EnsemblDBType.collection_core)) {
1999                    return new DBCollectionCoreDatabase(db_name, t, this);
2000// to implement in the unlikely event we have collections with variation databases                    
2001//                } else if (t.equals(EnsemblDBType.collection_variation)) {
2002//                    return new DBCollectionVariationDatabase(db_name, t, this);
2003                } else {
2004                    //haven't implemented other subtypes yet
2005                    return new DBCollectionDatabase(db_name, t, this);
2006                }
2007            } else if (EnsemblDBType.getSpeciesComparisonDatabaseTypes().contains(t)) {
2008                return new DBComparisonDatabase(db_name, t, this);
2009            } else {
2010                //shouldnt hit here
2011                throw new Exception("Unrecognized database type for database name: " + db_name);
2012            }
2013        } catch (Exception e) {
2014            throw new ConfigurationException(e.getMessage());
2015        }
2016
2017    }
2018
2019    //DAO methods
2020    private List<String> getDatabases() throws DAOException {
2021
2022        List<String> outList = null;
2023        SqlSession session = null;
2024
2025        try {
2026            session = sqlMapper.openSession();
2027            DatabaseMapper dm = session.getMapper(DatabaseMapper.class);
2028            outList = dm.getAllDatabaseNames();
2029        } catch (Exception e) {
2030            throw new DAOException("Failed to interrogate all Database Names", e);
2031        } finally {
2032            if (session != null) {
2033                session.close();
2034            }
2035        }
2036
2037        if (outList == null) {
2038            /* A null tempList is a perfectly valid outcome to return */
2039            return new ArrayList<String>();
2040        }
2041
2042        // if we want to test parsing of databse names we can add them here..
2043        /*
2044         outList.add("ictidomys_tridecemlineatus_variation_68_2");
2045         outList.add("spermophilus_tridecemlineatus_variation_67_2");
2046         */
2047        return outList;
2048    }
2049
2050    private DBCollection getCollectionSpeciesProperties(CollectionCoreDatabase database) throws DAOException {
2051
2052        if (database == null) {
2053            return null;
2054        }
2055        DBCollection collection = null;
2056        DBCollectionCoreDatabase myDatabase = (DBCollectionCoreDatabase) database;
2057        String dBName = myDatabase.getdBName();
2058        String thisSchemaVersion = myDatabase.getSchemaVersion();
2059        String dbVersion = myDatabase.getDBVersion();
2060        TreeMap<Integer, DBCollectionSpecies> localSpp = new TreeMap<Integer, DBCollectionSpecies>();
2061        collection = new DBCollection(myDatabase);
2062
2063        List<HashMap> results = null;
2064        SqlSession session = null;
2065
2066        try {
2067            session = sqlMapper.openSession();
2068            DatabaseMapper dm = session.getMapper(DatabaseMapper.class);
2069            results = dm.getSpeciesFromCollection(dBName);
2070        } catch (Exception e) {
2071            throw new DAOException("Failed to interrogate species properties for " + dBName, e);
2072        } finally {
2073            if (session != null) {
2074                session.close();
2075            }
2076        }
2077
2078        if ((results == null) || (results.isEmpty() == true)) {
2079            /*  return empty collectiont*/
2080            return collection;
2081        }
2082
2083        HashMap<Integer, HashMap<String, String>> buildLevelsHash = new HashMap<Integer, HashMap<String, String>>();
2084
2085        for (Object o : results) {
2086
2087            Integer id = null;
2088            DBCollectionSpecies species = null;
2089
2090            HashMap m = (HashMap) o;
2091            m.put("schemaVersion", thisSchemaVersion);
2092            m.put("dbVersion", dbVersion);
2093
2094            id = (Integer) m.get("species_id");
2095
2096            // if we've already made species with this id  for this collection
2097            //use it
2098            if (localSpp.containsKey(id)) {
2099                species = localSpp.get(id);
2100                //and add this database
2101                //i think this is redundant - it should already be set...
2102                species.addDatabase(myDatabase);
2103            } //we haven't got a species with this id in the collection already
2104            //- so make one, add it, and use it
2105            else {
2106                try {
2107                    species = new DBCollectionSpecies(myDatabase);
2108                } catch (ConfigurationException ex) {
2109                    //what do we do here??
2110                }
2111                if (species == null) {
2112                    continue;
2113                }
2114                species.setIDForVersion(id, dbVersion);
2115                localSpp.put(id, species);
2116            }
2117
2118            if (!((String) m.get("meta_key")).endsWith("build.level")) {
2119                species.setProperty(m);
2120            } else {
2121
2122                if (buildLevelsHash.containsKey(id)) {
2123                    buildLevelsHash.get(id).put((String) m.get("meta_key"), (String) m.get("meta_value"));
2124                } else {
2125                    buildLevelsHash.put(id, new HashMap<String, String>());
2126                    buildLevelsHash.get(id).put((String) m.get("meta_key"), (String) m.get("meta_value"));
2127                }
2128            }
2129        }
2130        
2131        //loop through all the species made for this collection database
2132        //this adds them to one of two maps in the registry - by GCAccession or by db_style_name (production_name)
2133        for (DBCollectionSpecies s : localSpp.values()) {
2134
2135            // there may be species without dbstyle names...
2136            if (s.getDatabaseStyleName() == null
2137                    || s.getDatabaseStyleName().isEmpty()) {
2138                s.setDatabaseStyleSpeciesName(s.getSpeciesBinomial().toLowerCase().replace(' ', '_'));
2139                s.setDatabaseStyleSpeciesName(Integer.parseInt(dbVersion), s.getDatabaseStyleName());
2140            }
2141
2142            //this cant be used cos we havent populated the alias look up yet!
2143            //if (this.registry.getSpeciesByAlias(s.getDatabaseStyleName()) == null) {
2144            //ensembl have changed the case of the first letter of the dbname between v58 & v59
2145            //therefore the code below fails to rationalize species between collections 58 and 59
2146            //i am just going to live with this for the time being bcause the dbname is also used as the comparaname
2147            //and is also different between 58 and 59
2148            //hopefully from 59 things will be stable
2149            //give this to the reg
2150            //currently we are hashing on the production name ( databasestylename)
2151            //but i may need to alter this if we want continuity after release 16
2152            // if this species has a GCAccession - use it for the hashing
2153            if (s.getAssemblyAccession(dbVersion) != null
2154                    && !s.getAssemblyAccession(dbVersion).isEmpty()) {
2155
2156                String accession = s.getAssemblyAccession(dbVersion);
2157                String stem = accession.split("\\.")[0];
2158                if (this.getCSpeciesByGCAccessionStem(stem) == null) {
2159                    //we've made a new species so add it to the registry
2160                    //but to the hash on GCAccessionStem
2161                    this.collectionspeciesHashByGCAccession.put(stem, s);
2162                    //and add it to this collection
2163                    collection.addSpecies(s);
2164                } else {
2165                    //we've already got this species in the registry - so modify it there 
2166                    DBCollectionSpecies existingSpecies = this.getCSpeciesByGCAccessionStem(stem);
2167
2168                    Integer existingVersion = existingSpecies.getHighestDBRelease();
2169                    if (Integer.parseInt(dbVersion) > existingVersion) {
2170                        //reset any values that should be global for the species
2171                        existingSpecies.setCommonName(s.getCommonName());
2172                        existingSpecies.setShortName(s.getShortName());
2173                        existingSpecies.setSpeciesBinomial(s.getSpeciesBinomial());
2174                        existingSpecies.setUrlName(s.getUrlName());
2175                        existingSpecies.setComparaDivision(s.getComparaDivision());
2176                        existingSpecies.setEnsemblStablePrefix(s.getEnsemblStablePrefix());
2177                        existingSpecies.setTaxonomyID(s.getTaxonomyID());
2178
2179                        // the dbstylename is used for the TreeSet ordering - so we have a problem if it changes
2180                        //so we dont change this here! - but add a finalize step to set the most recent version
2181                        //existingSpecies.setDatabaseStyleSpeciesName(s.getDatabaseStyleName());
2182                    }
2183
2184                    existingSpecies.getAliases().addAll(s.getAliases());
2185                    existingSpecies.setAssemblyAccession(dbVersion, accession);
2186                    existingSpecies.setAssemblyName(dbVersion, s.getAssemblyName(dbVersion));
2187                    existingSpecies.setComparaName(dbVersion, s.getComparaName(dbVersion));
2188                    existingSpecies.setDatabaseStyleSpeciesName(Integer.parseInt(dbVersion), s.getComparaName(dbVersion));
2189                    existingSpecies.setIDForVersion(s.getDBSpeciesID(dbVersion), dbVersion);
2190                    existingSpecies.addDatabase(myDatabase);
2191                    collection.addSpecies(existingSpecies);
2192                }
2193            } //we dont have a GCAccession - so use the db style name....
2194            else {
2195                if (this.getSpeciesByEnsemblName(s.getDatabaseStyleName()) == null) {
2196                    //we've made a new species so add it to the registry
2197                    //but to the hash on db-syle_name
2198                    this.oldstyle_collectionspeciesHashByDBName.put(s.getDatabaseStyleName(), s);
2199                    //and add it to this collection
2200                    collection.addSpecies(s);
2201                } else {
2202                    //we've already got this species in the registry - so modify it there
2203                    DBCollectionSpecies existingSpecies = (DBCollectionSpecies) this.getSpeciesByEnsemblName(s.getDatabaseStyleName());
2204
2205                    Integer existingVersion = existingSpecies.getHighestDBRelease();
2206                    if (Integer.parseInt(dbVersion) > existingVersion) {
2207                        //reset any values that should be global for the species
2208                        existingSpecies.setCommonName(s.getCommonName());
2209                        existingSpecies.setShortName(s.getShortName());
2210                        existingSpecies.setSpeciesBinomial(s.getSpeciesBinomial());
2211                        existingSpecies.setUrlName(s.getUrlName());
2212                        existingSpecies.setComparaDivision(s.getComparaDivision());
2213                        existingSpecies.setEnsemblStablePrefix(s.getEnsemblStablePrefix());
2214                        existingSpecies.setTaxonomyID(s.getTaxonomyID());
2215                        // the dbstylename is used for the TreeSet ordering - so we have a problem if it changes
2216                        //so we dont change this here! - but add a finalize step
2217                        //existingSpecies.setDatabaseStyleSpeciesName(s.getDatabaseStyleName());
2218                    }
2219
2220                    existingSpecies.getAliases().addAll(s.getAliases());
2221                    existingSpecies.setAssemblyName(dbVersion, s.getAssemblyName(dbVersion));
2222                    existingSpecies.setIDForVersion(s.getDBSpeciesID(dbVersion), dbVersion);
2223                    existingSpecies.setComparaName(dbVersion, s.getComparaName(dbVersion));
2224                    existingSpecies.setDatabaseStyleSpeciesName(Integer.parseInt(dbVersion), s.getComparaName(dbVersion));
2225                    existingSpecies.addDatabase(myDatabase);
2226                    //and add it to this collection
2227                    collection.addSpecies(existingSpecies);
2228                }
2229            }
2230        }
2231
2232        myDatabase.setCollection(collection);
2233
2234        //loop though all the species of the collection
2235        //to copy the build levels from here to the database
2236        for (DBSpecies s : collection.getSpecies()) {
2237            DBCollectionSpecies sp = (DBCollectionSpecies) s;
2238            if (buildLevelsHash.containsKey(sp.getDBSpeciesID(dbVersion))) {
2239                myDatabase.setBuildLevels(sp, buildLevelsHash.get(sp.getDBSpeciesID(dbVersion)));
2240            }
2241        }
2242
2243        return collection;
2244    }
2245
2246//    //not used yet
2247//    private List<String> getAllCoreDatabasesForVersion(int dbVersion) throws DAOException {
2248    //<editor-fold defaultstate="collapsed" desc="comment">
2249    //
2250    //        int release = dbVersion;
2251    //        List<String> outList = null;
2252    //        SqlSession session = null;
2253    //
2254    //        try {
2255    //            session = sqlMapper.openSession();
2256    //            DatabaseMapper dm = session.getMapper(DatabaseMapper.class);
2257    //            outList = dm.getAllCoreDatabaseNames(release);
2258    //        } catch (Exception e) {
2259    //            throw new DAOException("Failed to interrogate all core Database Names for release " + release, e);
2260    //        } finally {
2261    //            if (session != null) {
2262    //                session.close();
2263    //            }
2264    //        }
2265    //
2266    //        if (outList == null || outList.isEmpty()) {
2267    //            /* A null List is a perfectly valid outcome to return */
2268    //            return null;
2269    //        }
2270    //
2271    //        return outList;
2272    //    }
2273    //</editor-fold>
2274    private DBSpecies createSpecies(SingleSpeciesCoreDatabase database) throws DAOException {
2275
2276        DBSpecies spData = new DBSpecies((DBSingleSpeciesCoreDatabase) database);
2277        Integer dbVersion = database.getIntDBVersion();
2278        List<HashMap> tempList = null;
2279
2280        SqlSession session = null;
2281
2282        try {
2283            session = sqlMapper.openSession();
2284            DatabaseMapper dm = session.getMapper(DatabaseMapper.class);
2285            tempList = dm.getSpeciesProperties(database.getdBName());
2286        } catch (Exception e) {
2287            throw new DAOException("Failed to interrogate species properties for " + spData.getDatabaseStyleName(), e);
2288        } finally {
2289            if (session != null) {
2290                session.close();
2291            }
2292        }
2293
2294        //for pre dbVersion 59 releases, the meta table is not guaranteed
2295        //to hold the species.production_name, species.short_name and
2296        //species.scientific_name meta_keys
2297        //pre schemaVersion 59 the compara name was always = Species Binomial
2298        //post schemaVersion 58 the compara name - production name
2299        //post schema 67  The following meta keys are retired
2300        //species.short_name
2301        //species.ensembl_common_name
2302        //species.ensembl_alias_name
2303        //The following meta keys will be added
2304        //species.url
2305        //species.display_name
2306        if (tempList != null) {
2307            for (HashMap row : tempList) {
2308
2309                String key = (String) row.get("key");
2310                String value = (String) row.get("value");
2311
2312                if (key == null || key.isEmpty() || value == null) {
2313                    continue;
2314                }
2315
2316                //making aliases all lower case now
2317                if (key.startsWith("species.")
2318                        && !key.equals("species.division")
2319                        && !key.equals("species.stable_id_prefix")) {
2320                    spData.getAliases().add(value.toLowerCase());
2321                }
2322
2323                if (key.equals("species.stable_id_prefix")) {
2324                    spData.setEnsemblStablePrefix(value);
2325                } else if (key.equals("species.production_name")) {
2326                    spData.setDatabaseStyleSpeciesName(value);
2327                    spData.setDatabaseStyleSpeciesName(dbVersion, value);
2328                } else if (key.equals("species.scientific_name")) {
2329                    spData.setSpeciesBinomial(value);
2330                } else if (key.equals("species.common_name")) {
2331                    spData.setCommonName(value);
2332                } else if (key.equals("species.taxonomy_id")) {
2333                    spData.setTaxonomyID(value);
2334                } else if (key.equals("species.short_name")
2335                        || key.equals("species.display_name")) {
2336                    spData.setShortName(value);
2337                } else if (key.endsWith("build.level")) {
2338                    database.setBuildLevel(key, value);
2339                } else if (key.equals("species.division")) {
2340                    //this is only present in ensemblgenomes
2341                    spData.setComparaDivision(value);
2342                } else if (key.equals("species.url")) {
2343                    //the url name is set for the species from this most recent core db
2344                    spData.setUrlName(value);
2345                } else if (key.equals("assembly.name")) {
2346                    //assembly details are set in the database
2347                    //as that is where lazy loading is controlled
2348                    //(for releases other than highest - the values are lazy loaded not filled in here)
2349                    database.setAssemblyName(value);
2350                }
2351                if (key.equals("assembly.accession")) {
2352                    database.setAssemblyAccession(value);
2353                }
2354            }
2355        }
2356
2357        if (spData.getSpeciesBinomial().isEmpty()) {
2358            String s = database.getDbSpeciesName();
2359            spData.setSpeciesBinomial(s.replaceFirst(s.substring(0, 1), (s.substring(0, 1)).toUpperCase()).replace("_", " "));
2360            spData.getAliases().add(spData.getSpeciesBinomial().toLowerCase());
2361        }
2362
2363        if (spData.getDatabaseStyleName().isEmpty()) {
2364            spData.setDatabaseStyleSpeciesName(database.getDbSpeciesName());
2365            //actually not necessary if we are only running create species on the most 
2366            //recent single species core databases
2367            spData.setDatabaseStyleSpeciesName(dbVersion, database.getDbSpeciesName());
2368            spData.getAliases().add(spData.getDatabaseStyleName().toLowerCase());
2369        }
2370
2371        if (Integer.parseInt(database.getSchemaVersion()) >= 59) {
2372            spData.setComparaName(database.getDBVersion(), spData.getDatabaseStyleName());
2373        } else {
2374            spData.setComparaName(database.getDBVersion(), spData.getSpeciesBinomial());
2375        }
2376
2377        return spData;
2378    }
2379
2380    /**
2381     * should only be called for non current singlespecies core databases
2382     *
2383     * @param db
2384     * @throws DAOException
2385     */
2386    @Override
2387    public void setSpeciesMetadata(CoreDatabase db) throws DAOException {
2388
2389        if (db == null || db.getdBName() == null || db.getdBName().isEmpty()) {
2390            return;
2391        }
2392
2393        if (db instanceof CollectionDatabase) {
2394
2395            CollectionCoreDatabase database = (CollectionCoreDatabase) db;
2396            TreeSet<? extends CollectionSpecies> spp = database.getCollection().getSpecies();
2397            HashMap<Integer, DBCollectionSpecies> localSpp = new HashMap<Integer, DBCollectionSpecies>();
2398
2399            for (CollectionSpecies sp : spp) {
2400                localSpp.put(sp.getDBSpeciesID(database.getDBVersion()), (DBCollectionSpecies) sp);
2401            }
2402
2403            String dBName = database.getdBName();
2404            List<HashMap> results = null;
2405            SqlSession session = null;
2406
2407            try {
2408                session = sqlMapper.openSession();
2409                DatabaseMapper dm = session.getMapper(DatabaseMapper.class);
2410                results = dm.getSpeciesFromCollection(dBName);
2411            } catch (Exception e) {
2412                throw new DAOException("Failed to set species metadata  for " + dBName, e);
2413            } finally {
2414                if (session != null) {
2415                    session.close();
2416                }
2417            }
2418
2419            if ((results == null) || (results.isEmpty() == true)) {
2420                return;
2421            }
2422
2423            for (Object o : results) {
2424                Integer id = null;
2425                DBCollectionSpecies species = null;
2426                HashMap m = (HashMap) o;
2427                id = (Integer) m.get("species_id");
2428
2429                // if we've already made species with this id  for this collection
2430                //use it
2431                if (localSpp.containsKey(id)) {
2432                    species = localSpp.get(id);
2433                } else {
2434                    continue;
2435                }
2436
2437//                if (m.get("meta_key").equals("assembly.accession")) {
2438//                    database.setAssemblyAccession(species, (String) m.get("meta_value"));
2439//                }
2440//                if (m.get("meta_key").equals("assembly.name")) {
2441//                    database.setAssemblyName(species, (String) m.get("meta_value"));
2442//                }
2443            }
2444        } else {
2445            SingleSpeciesCoreDatabase database = (SingleSpeciesCoreDatabase) db;
2446            String comparaName = "";
2447            DBSpecies sp = (DBSpecies) database.getSpecies();
2448            String dBName = database.getdBName();
2449            List<HashMap> results = null;
2450            SqlSession session = null;
2451
2452            try {
2453                session = sqlMapper.openSession();
2454                DatabaseMapper dm = session.getMapper(DatabaseMapper.class);
2455                results = dm.getSpeciesProperties(dBName);
2456            } catch (Exception e) {
2457                throw new DAOException("Failed to set species metadata for " + dBName, e);
2458            } finally {
2459                if (session != null) {
2460                    session.close();
2461                }
2462            }
2463
2464            if ((results == null) || (results.isEmpty() == true)) {
2465                return;
2466            }
2467
2468            for (HashMap row : results) {
2469                String key = (String) row.get("key");
2470                String value = (String) row.get("value");
2471                if (key == null || key.isEmpty() || value == null) {
2472                    continue;
2473                }
2474                if (key.equals("species.production_name")) {
2475                    comparaName = value;
2476                } else if (key.endsWith("build.level")) {
2477                    database.setBuildLevel(key, value);
2478                } else if (key.equals("assembly.name")) {
2479                    //assembly details are set in the database
2480                    //as that is where lazy loading is controlled
2481                    database.setAssemblyName(value);
2482                } else if (key.equals("assembly.accession")) {
2483                    database.setAssemblyAccession(value);
2484                }
2485            }
2486
2487            //for pre dbVersion 59 releases, the meta table is not guaranteed
2488            //to hold the species.production_name, species.short_name and
2489            //species.scientific_name meta_keys
2490            //pre schemaVersion 59 the compara name was always = Species Binomial
2491            //post schemaVersion 58 the compara name - production name
2492            //post schema 67  The following meta keys are retired
2493            //species.short_name //species.ensembl_common_name //species.ensembl_alias_name
2494            //The following meta keys will be added
2495            //species.url //species.display_name
2496            if (Integer.parseInt(database.getSchemaVersion()) >= 59) {
2497                database.setComparaName(comparaName);
2498            } else {
2499                database.setComparaName(sp.getSpeciesBinomial());
2500            }
2501        }
2502    }
2503
2504    /**
2505     * note by default translation by this default engine is configured to
2506     * convert non-Methionine initiations to Methionine
2507     *
2508     */
2509    @Override
2510    public TranscriptionEngine getDefaultTranscriptionEngine() {
2511        return transcriptionEngines.get(1);
2512    }
2513
2514    @Override
2515    public TranscriptionEngine getTranscriptionEngine(Integer codonTable) {
2516
2517        /* valid codes are
2518         * 1 - UNIVERSAL
2519         * 2 - VERTEBRATE_MITOCHONDRIAL
2520         * 3 - YEAST_MITOCHONDRIAL
2521         * 4 - MOLD_MITOCHONDRIAL
2522         * 5 - INVERTEBRATE_MITOCHONDRIAL
2523         * 6 - CILIATE_NUCLEAR
2524         * 9 - ECHINODERM_MITOCHONDRIAL
2525         * 10 - EUPLOTID_NUCLEAR
2526         * 11 - BACTERIAL
2527         * 12 - ALTERNATIVE_YEAST_NUCLEAR
2528         * 13 - ASCIDIAN_MITOCHONDRIAL
2529         * 14 - FLATWORM_MITOCHONDRIAL
2530         * 15 - BLEPHARISMA_MACRONUCLEAR
2531         * 16 - 2CHLOROPHYCEAN_MITOCHONDRIAL
2532         * 21 - TREMATODE_MITOCHONDRIAL
2533         * 23 - SCENEDESMUS_MITOCHONDRIAL 
2534         */
2535        //using an invalid code will use the default universal table
2536        if (transcriptionEngines.containsKey(codonTable)) {
2537            return transcriptionEngines.get(codonTable);
2538        }
2539        TranscriptionEngine.Builder b = new TranscriptionEngine.Builder();
2540        b.table(codonTable);
2541        TranscriptionEngine engine = b.build();
2542        transcriptionEngines.put(codonTable, engine);
2543        return engine;
2544
2545    }
2546
2547    @Override
2548    public HashMap<String, String> getRenamedDBs() {
2549        return renamedDBs;
2550    }
2551
2552    HashMap<String, Set<DBSpecies>> panComparaSpecies = new HashMap<String, Set<DBSpecies>>();
2553
2554    @Override
2555    public boolean isSpeciesInPanHomology(Species querySpecies, String dbVersion) {
2556
2557        if (querySpecies == null) {
2558            return false;
2559        }
2560
2561        if (dbVersion == null || dbVersion.isEmpty()) {
2562            if (querySpecies.getMostRecentCoreDatabase() != null) {
2563                dbVersion = querySpecies.getMostRecentCoreDatabase().getDBVersion();
2564            }
2565        }
2566
2567        if (dbVersion == null || dbVersion.isEmpty() || dbVersion.equals("0")) {
2568            return false;
2569        }
2570
2571        if (!panComparaSpecies.containsKey(dbVersion)) {
2572            try {
2573                findPanHomologySpecies(dbVersion);
2574            } catch (DAOException ex) {
2575                return false;
2576            }
2577        }
2578
2579        return panComparaSpecies.get(dbVersion).contains(querySpecies);
2580    }
2581
2582    public Set<DBSpecies> getPanComparaSpecies(String dbVersion) {
2583
2584        if (dbVersion == null || dbVersion.isEmpty()) {
2585            dbVersion = "" + this.highestReleaseVersion;
2586        }
2587        if (dbVersion == null || dbVersion.isEmpty() || dbVersion.equals("0")) {
2588            return null;
2589        }
2590
2591        if (!panComparaSpecies.containsKey(dbVersion)) {
2592            try {
2593                findPanHomologySpecies(dbVersion);
2594            } catch (DAOException ex) {
2595                //dont fail here
2596            }
2597        }
2598        return panComparaSpecies.get(dbVersion);
2599    }
2600
2601    private void findPanHomologySpecies(String version) throws DAOException {
2602
2603        Database db = this.getComparaDatabase(EnsemblComparaDivision.PAN_HOMOLOGY, version);
2604
2605        if (db == null) {
2606            panComparaSpecies.put(version, new HashSet<DBSpecies>());
2607            return;
2608        }
2609
2610        HashSet<DBSpecies> spp = new HashSet<DBSpecies>();
2611        List<HashMap> results = null;
2612        SqlSession session = null;
2613
2614        try {
2615            session = sqlMapper.openSession();
2616            DatabaseMapper dm = session.getMapper(DatabaseMapper.class);
2617            results = dm.getPanComparaSpecies(db.getdBName());
2618        } catch (Exception e) {
2619            LOGGER.debug("Failed to interrogate species in Pan Compara DB for " + db);
2620            //throw new DAOException("Failed to interrogate species in Pan Compara DB for " + db, e);
2621        } finally {
2622            if (session != null) {
2623                session.close();
2624            }
2625        }
2626
2627        if ((results == null) || (results.isEmpty() == true)) {
2628            panComparaSpecies.put(version, new HashSet<DBSpecies>());
2629            return;
2630        }
2631
2632        for (HashMap h : results) {
2633
2634            String alias = (String) h.get("name");
2635            //not sure i can use this yet
2636            String assembly = (String) h.get("assembly");
2637
2638            DBSpecies sp = null;
2639
2640            try {
2641                sp = this.getSpeciesByAlias(alias, version);
2642                if (sp != null) {
2643                    spp.add(sp);
2644                }
2645            } catch (NonUniqueException ex) {
2646                LOGGER.debug(ex.getMessage());
2647                for (Object s : ex.getAllHits()) {
2648                    LOGGER.debug(((DBSpecies) s).getDatabaseStyleName());
2649                }
2650            }
2651        }
2652        panComparaSpecies.put(version, spp);
2653    }
2654
2655}