001/** 002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * Project hosted at: http://jensembl.sourceforge.net 009 * 010 * This is free software: you can redistribute it and/or modify 011 * it under the terms of the GNU General Public License (version 3) as published by 012 * the Free Software Foundation. 013 * 014 * This software is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html 021 */ 022package uk.ac.roslin.ensembl.demo; 023 024import java.util.ArrayList; 025import java.util.Date; 026import java.util.List; 027import java.util.TreeSet; 028import uk.ac.roslin.ensembl.config.DBConnection.DataSource; 029import uk.ac.roslin.ensembl.config.FeatureType; 030import uk.ac.roslin.ensembl.dao.database.DBRegistry; 031import uk.ac.roslin.ensembl.dao.database.DBSpecies; 032import uk.ac.roslin.ensembl.dao.factory.DAOVariationFactory; 033import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome; 034import uk.ac.roslin.ensembl.datasourceaware.variation.VariationMapping; 035import uk.ac.roslin.ensembl.model.Coordinate; 036import uk.ac.roslin.ensembl.model.Mapping; 037import uk.ac.roslin.ensembl.model.MappingSet; 038 039/** 040 * 041 * @author tpaterso 042 */ 043public class VariationScript { 044 045 //demonstration script for getting variations near genes 046 047 public static void main(String[] args) throws Exception { 048 049 Coordinate testCoordinate;; 050 051 Integer pad5 ; 052 Integer pad3 ; 053 String spName ; 054 String chrName ; 055 056 057 //if we want a command line version... 058 059// if (args==null || args.length<4 || args.length>6 ) { 060// System.out.print("Usage: \njava -jar -Xmx3g -Xmx3g JEnsemblTest.jar"); 061// System.out.println(" species chromosome 5'pad 3'pad [start stop]" ); 062// return; 063// } 064// 065// pad5 = Integer.parseInt(args[2]); 066// pad3 = Integer.parseInt(args[3]); 067// 068// spName = args[0]; 069// chrName = args[1]; 070// 071// if (args.length>4) { 072// Integer start = Integer.parseInt(args[4]); 073// Integer stop = Integer.parseInt(args[5]); 074// testCoordinate = new Coordinate(start, stop); 075// } 076 077 078 pad5 = 5000; 079 pad3 = 1000; 080 spName = "human"; 081 chrName = "17"; 082 testCoordinate = new Coordinate(1 , 1000000); 083 084 085 086 DBRegistry ensembldbRegistry = DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB); 087 DBSpecies sp = ensembldbRegistry.getSpeciesByAlias(spName); 088 DAChromosome chr = sp.getChromosomeByName(chrName); 089 090 DAOVariationFactory vf = chr.getDaoFactory().getVariationFactory(); 091 092 if (testCoordinate == null) { 093 testCoordinate = new Coordinate(1,chr.getDBSeqLength()); 094 } 095 096 Date start= new Date(); 097 chr.getGenesOnRegion(testCoordinate); 098 Date stop= new Date(); 099 100 101 MappingSet geneMappings = chr.getLoadedMappings(FeatureType.gene); 102 103 104 System.out.print("In "+sp.getCommonName()+" chromosome "+chr.getChromosomeName()); 105 System.out.print(" coordinates "+testCoordinate); 106 System.out.println(" there are "+geneMappings.size()+" genes."); 107 System.out.println(" [Query took "+((double) stop.getTime() - (double) start.getTime()) / 1000 + " seconds]"); 108 109// System.out.println("The gene coordinates are:"); 110// for (Mapping m: geneMappings) { 111// System.out.println("\t"+((DAGene) m.getTarget()).getStableID()+ " "+m.getSourceCoordinates()); 112// } 113 114 115 116 start= new Date(); 117 118 ArrayList<VariationMapping> varMappings = (ArrayList<VariationMapping> ) chr.getVariationMappingsOnRegion(testCoordinate); 119 //ArrayList<VariationMapping> varMappings = (ArrayList<VariationMapping> ) vf.getVariationDAO().getVariationMappingsOnRegion(chr, testCoordinate); 120 stop= new Date(); 121 System.out.print("In "+sp.getCommonName()+" chromosome "+chr.getChromosomeName()); 122 System.out.print(" coordinates "+testCoordinate); 123 System.out.println(" there are "+varMappings.size()+" sequence variations."); 124 System.out.println(" [Query took "+((double) stop.getTime() - (double) start.getTime()) / 1000 + " seconds]"); 125 126 127 start= new Date(); 128 TreeSet<VariationMapping> varSet = new TreeSet<VariationMapping>(Mapping.mappingOnSourceComparator); 129 varSet.addAll(varMappings); 130 131 TreeSet<VariationMapping> resultSet = new TreeSet<VariationMapping>(Mapping.mappingOnSourceComparator); 132 133 for (Mapping m : geneMappings) { 134 135 Coordinate gCoord = m.getSourceCoordinates(); 136 Coordinate gCoordPad = new Coordinate(gCoord.getStart()-pad5, gCoord.getEnd()+pad3, gCoord.getStrand()); 137 138 List<VariationMapping> remove = new ArrayList<VariationMapping>(); 139 140 for (VariationMapping vm : varSet) { 141 142 Coordinate vmCoord = vm.getSourceCoordinates(); 143 144 145 if (vmCoord.getEnd() < gCoordPad.getStart() ) { 146 remove.add(vm); 147 continue; 148 } 149 150 if (vmCoord.overlaps(gCoordPad)) { 151 resultSet.add(vm); 152 continue; 153 } 154 155 if (vmCoord.getStart() > gCoordPad.getEnd()) { 156 break; 157 } 158 159 } 160 161 varSet.removeAll(remove); 162 163 } 164 stop= new Date(); 165 166 System.out.print("There are "+resultSet.size()+" variants within "); 167 System.out.println(pad5+" bases 5' and "+pad3+" bases 3' of the genes"); 168 System.out.println(" [Processing took "+((double) stop.getTime() - (double) start.getTime()) / 1000 + " seconds]"); 169 170 171 172 173 174 175 176 177 } 178 179}