001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.List; 027import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 028import uk.ac.roslin.ensembl.config.EnsemblDBType; 029import uk.ac.roslin.ensembl.dao.database.DBRegistry; 030import uk.ac.roslin.ensembl.dao.database.DBSingleSpeciesCoreDatabase; 031import uk.ac.roslin.ensembl.dao.database.DBSingleSpeciesVariationDatabase; 032import uk.ac.roslin.ensembl.dao.database.DBSpecies; 033import uk.ac.roslin.ensembl.dao.factory.DAOVariationFactory; 034import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 035import uk.ac.roslin.ensembl.datasourceaware.core.DADNASequence; 036import uk.ac.roslin.ensembl.datasourceaware.variation.DAVariation; 037import uk.ac.roslin.ensembl.model.Coordinate; 038import uk.ac.roslin.ensembl.model.Mapping; 039import uk.ac.roslin.ensembl.model.variation.Variation; 040 041/** 042 * 043 * @author tpaterso 044 */ 045public class Variations { 046 047 /*getting variations mapped to a chromosome 048 *looking at whether these are unique or not 049 */ 050 public static void main(String[] args) throws Exception { 051 052 053 054 DBRegistry ensembldbRegistry = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 055 056 057 // show that we can get a Variation Database 058 DBSingleSpeciesVariationDatabase d = (DBSingleSpeciesVariationDatabase) ensembldbRegistry.getDatabase("cow", EnsemblDBType.variation, "78"); 059 DBSingleSpeciesCoreDatabase cdb2 =d.getCoreDB(); 060 DBSingleSpeciesCoreDatabase cdb = (DBSingleSpeciesCoreDatabase) d.getSpecies().getDatabaseByTypeAndVersion(EnsemblDBType.core, d.getDBVersion()); 061 062 if (cdb!=cdb2) { 063 throw new Exception("Woof"); 064 } 065 066 067 DBSpecies human = ensembldbRegistry.getSpeciesByAlias("human"); 068 069 DAChromosome chr1 = human.getChromosomeByName("1"); 070 for (Mapping v: chr1.getVariationMappingsOnRegion(1000000,1001000)) { 071 System.out.print("source: "+v.getSource().getHashID() ); 072 System.out.print(" target: "+v.getTarget().getHashID() ); 073 System.out.println(" "+v.getSourceCoordinates() ); 074 075 } 076 077 for (Variation v : chr1.getVariationsOnRegion(1000000, 1001000)) { 078 int c = 1; 079 for (Mapping m: v.getChromosomeMappings()) { 080 System.out.print(c++ +": source: "+m.getSource().getHashID() ); 081 System.out.print(" "+m.getSourceCoordinates() ); 082 System.out.print(" target: "+m.getTarget().getHashID() ); 083 System.out.println(" "+m.getTargetCoordinates() ); 084 } 085 } 086 087 DBSpecies sp = ensembldbRegistry.getSpeciesByAlias("cow"); 088 DADNASequence seq = sp.getChromosomeByName("17", "74"); 089 090 DAOVariationFactory vf = seq.getDaoFactory().getVariationFactory(); 091 092 List<? extends Variation> out = vf.getVariationDAO().getVariationsOnRegion(seq, new Coordinate(1, 10000000)); 093 094 System.out.println("size of variation list: "+out.size()); 095 096 int hidden = 0; 097 098 for (Variation v: out) { 099 100 if (v.getId()==1196571) { 101 System.out.println("1196571 has count of true mappings: "+v.getLoadedMappings().size()); 102 } 103 104 for (Mapping m: v.getLoadedMappings()) { 105 //System.out.println(((VariationMapping)m).getId()+","+v.getId()); 106 } 107 108 if (v.getLoadedMappings().size()>1) { 109 hidden += v.getLoadedMappings().size()-1; 110 System.out.println("more than one mapping for "+v.getId()+" .... "+v.getLoadedMappings().size()); 111 } 112 113 } 114 115 System.out.println("total of "+hidden+" hidden mappings"); 116 System.out.println("real total of mappings = "+(hidden+out.size())); 117 118 119 System.out.println(""); 120 121 122 123 Variation var = vf.getVariationDAO().getUniquelyMappedVariation("rs41255190"); 124 125 System.out.print("Variation: "+var.getName()); 126 System.out.println("\t("+((DADNASequence) var.getLoadedMappings().first().getTarget()).getName() 127 +": "+var.getLoadedMappings().first().getTargetCoordinates().toShortString()+")"); 128 129 List<String> so = new ArrayList<String>(); 130 so.add("rs110752831"); 131 so.add("ss102661474"); 132 133 134 String [] a = {"rs43206054", "rs43206055", "rs43206056", "rs43206057", "rs43206058", "rs133870768", 135 "rs137508837", "rs132751093", "rs136947979", "rs135071426", "rs132801320", "rs137264187", 136 "rs136535420", "rs135487984", "rs136626062", "rs135911839", "rs137363972", "rs134404315", "rs132750081"}; 137 138 List<String> ll = Arrays.asList(a); 139 140 141 142 List<? extends Variation> vars = vf.getVariationDAO().getUniquelyMappedVariations(ll); 143 144 for (Variation v: vars) { 145 System.out.print("Variation: "+v.getName()); 146 if (v.getSynonym()!=null) { 147 System.out.print(" ("+v.getSynonym()+")"); 148 } 149 System.out.println("\t"+((DADNASequence) v.getLoadedMappings().first().getTarget()).getName() 150 +": "+v.getLoadedMappings().first().getTargetCoordinates().toShortString()); 151 } 152 153 154 seq = sp.getChromosomeByName("17"); 155 156 vf = seq.getDaoFactory().getVariationFactory(); 157 158 out = vf.getVariationDAO().getVariationsOnRegion(seq, new Coordinate(1, 10000)); 159 160 System.out.println("size of variation list - current schema: "+out.size()); 161 162 163 } 164 165}