001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.config; 023 024import java.util.Collection; 025import java.util.HashMap; 026import java.util.Locale; 027import uk.ac.roslin.ensembl.model.database.DatabaseType; 028 029/** 030 * In 'ensembl.org' there is a single 'ensembl_compara' database which contains 031 * the pairwise comparison data on all the species in 'ensembl.org. However, in 032 * 'ensemblgenomes.org', there are separate 'ensembl_compara' databases for the 033 * different taxonomic groupings: 034 * <ul><li>ensembl_compara_plants</li> 035 * <li>ensembl_compara_bacteria</li> 036 * <li>ensembl_compara_fungi</li> 037 * <li>ensembl_compara_metazoa</li> 038 * <li>ensembl_compara_protozoa</li> 039 * <li>ensembl_compara_pan_homology</li></ul> 040 * In ensembl.org all species are compared in the ensembl_compara database, and 041 * belong to the compara group 'multi'. In each core database (species) in 042 * ensemblgenomes.org the 'species.division' key in table 'meta' specifies the 043 * group: 044 * <ul><li>EnsemblPlants</li> 045 * <li>EnsemblBacteria</li> 046 * <li>EnsemblFungi</li> 047 * <li>EnsemblMetazoa</li> 048 * <li>EnsemblProtozoa</li></ul> 049 * Orthology / paralogy (peptide) predictions are done between all species of a database, 050 * but genomic alignments only between certain combinations of species. Bacteria 051 * is a slightly special case in that we compare members of each collection to 052 * each other, but not between the collections (its actually 10 comparas merged 053 * into one). 054 * <p>ensembl_compara_pan_homology is a peptide compara database produced from 055 * a set of selected species that are taken from all EnsemblGenomes divisions and 056 * from Ensembl (but doesn't include all species from all divisions). See list at 057 * <a href="http://metazoa.ensembl.org/info/docs/compara/homology_method.html"> 058 * http://metazoa.ensembl.org/info/docs/compara/homology_method.html</a>. 059 * One thing to note here is that pan also contains protein family data as well: 060 * <a href="http://metazoa.ensembl.org/info/docs/compara/family.html"> 061 * http://metazoa.ensembl.org/info/docs/compara/family.html</a>. This means that 062 * the genome_db table ends up with more entries than are used in the peptide 063 * homology comparisons alone and cannot be used to determine membership, so you 064 * need to use the species_set/method_link tables as well rather than just relying 065 * on genome_db. 066 */ 067public class EnsemblComparaDivision extends EnsemblType implements DatabaseType, Comparable<EnsemblComparaDivision> { 068 069 public static EnsemblComparaDivision MULTI; 070 public static EnsemblComparaDivision PLANTS; 071 public static EnsemblComparaDivision PROTISTS; 072 public static EnsemblComparaDivision BACTERIA; 073 public static EnsemblComparaDivision FUNGI; 074 public static EnsemblComparaDivision METAZOA; 075 public static EnsemblComparaDivision PAN_HOMOLOGY; 076 public static EnsemblComparaDivision INCOMPLETE_SEARCH; 077 078 private EnsemblComparaDivision(String value) { 079 this.label = value; 080 } 081 082 public static String getDBName(String label) { 083 String out = "ensembl_compara"; 084 if (label.equalsIgnoreCase("multi")) { 085 return out; 086 } else { 087 return out+"_"+label; 088 } 089 } 090 091 private static HashMap<String, EnsemblComparaDivision> typeListHash = EnsemblComparaDivision.initialize(); 092 093 public static Collection<EnsemblComparaDivision> getAllTypes() { 094 return typeListHash.values(); 095 } 096 097 private static HashMap<String, EnsemblComparaDivision> initialize() { 098 099 HashMap<String, EnsemblComparaDivision> out = new HashMap<String, EnsemblComparaDivision>(); 100 101 MULTI = new EnsemblComparaDivision("multi"); 102 PLANTS = new EnsemblComparaDivision("plants"); 103 PROTISTS = new EnsemblComparaDivision("protists"); 104 BACTERIA = new EnsemblComparaDivision("bacteria"); 105 FUNGI = new EnsemblComparaDivision("fungi"); 106 METAZOA = new EnsemblComparaDivision("metazoa"); 107 PAN_HOMOLOGY = new EnsemblComparaDivision("pan_homology"); 108 INCOMPLETE_SEARCH = new EnsemblComparaDivision("incomplete_search"); 109 110 111 out.put(MULTI.toString(),MULTI ); 112 out.put(PLANTS.toString(), PLANTS); 113 out.put(PROTISTS.toString(),PROTISTS ); 114 out.put(BACTERIA.toString(), BACTERIA ); 115 out.put(FUNGI.toString(),FUNGI ); 116 out.put(METAZOA.toString(),METAZOA ); 117 out.put(PAN_HOMOLOGY.toString(), PAN_HOMOLOGY ); 118 out.put(INCOMPLETE_SEARCH.toString(), INCOMPLETE_SEARCH ); 119 120 return out; 121 } 122 123 public static EnsemblComparaDivision getEnsemblComparaDivision(String value) { 124 return typeListHash.get(value); 125 } 126 127 public static EnsemblComparaDivision getEnsemblComparaDivisionByMetaValue(String value) { 128 String v = null; 129 if (value != null) { 130 v = value.replace("Ensembl","").toLowerCase(); 131 } 132 return typeListHash.get(v); 133 } 134 135 136 137 public int compareTo(EnsemblComparaDivision other) { 138 if (other == null || other.toString() == null) { 139 return -1; 140 } 141 142 return this.toString().compareTo(other.toString()); 143 } 144}