001/**
002 * Copyright (C) 2010-2015 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003 *
004 * This file is part of JEnsembl: a Java API to Ensembl data sources developed by the
005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006 * Veterinary Studies, University of Edinburgh.
007 *
008 * Project hosted at: http://jensembl.sourceforge.net
009 *
010 * This is free software: you can redistribute it and/or modify
011 * it under the terms of the GNU General Public License (version 3) as published by
012 * the Free Software Foundation.
013 *
014 * This software is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 *
019 * You should have received a copy of the GNU General Public License
020 * in this software distribution. If not, see: http://opensource.org/licenses/gpl-3.0.html
021 */
022package uk.ac.roslin.ensembl.demo;
023
024import java.util.ArrayList;
025import java.util.Date;
026import java.util.List;
027import java.util.TreeSet;
028import uk.ac.roslin.ensembl.config.DBConnection.DataSource;
029import uk.ac.roslin.ensembl.config.FeatureType;
030import uk.ac.roslin.ensembl.dao.database.DBRegistry;
031import uk.ac.roslin.ensembl.dao.database.DBSpecies;
032import uk.ac.roslin.ensembl.dao.factory.DAOVariationFactory;
033import uk.ac.roslin.ensembl.datasourceaware.core.DAChromosome;
034import uk.ac.roslin.ensembl.datasourceaware.variation.VariationMapping;
035import uk.ac.roslin.ensembl.model.Coordinate;
036import uk.ac.roslin.ensembl.model.Mapping;
037import uk.ac.roslin.ensembl.model.MappingSet;
038
039/**
040 *
041 * @author tpaterso
042 */
043public class VariationScript {
044    
045    //demonstration script for getting variations near genes
046    
047     public  static void main(String[] args) throws Exception {
048         
049         Coordinate testCoordinate;;
050         
051         Integer pad5 ;
052         Integer pad3 ;
053         String spName ;
054         String chrName ;
055         
056         
057         //if we want a command line version...
058         
059//         if (args==null || args.length<4 || args.length>6 ) {
060//             System.out.print("Usage: \njava -jar -Xmx3g -Xmx3g JEnsemblTest.jar");
061//             System.out.println(" species chromosome 5'pad 3'pad [start stop]" );
062//             return;
063//         }
064//         
065//         pad5 = Integer.parseInt(args[2]);
066//         pad3 = Integer.parseInt(args[3]);
067//        
068//          spName = args[0];
069//          chrName = args[1];
070//         
071//         if (args.length>4) {
072//              Integer start = Integer.parseInt(args[4]);
073//              Integer stop = Integer.parseInt(args[5]); 
074//              testCoordinate = new Coordinate(start, stop);        
075//     }
076                 
077         
078          pad5 = 5000;
079          pad3 = 1000;
080          spName = "human";
081          chrName = "17";
082          testCoordinate = new Coordinate(1 , 1000000); 
083
084         
085
086          DBRegistry ensembldbRegistry =  DBRegistry.createRegistryForDataSource(DataSource.ENSEMBLDB);
087          DBSpecies sp = ensembldbRegistry.getSpeciesByAlias(spName);         
088          DAChromosome chr = sp.getChromosomeByName(chrName);
089          
090          DAOVariationFactory vf = chr.getDaoFactory().getVariationFactory();
091
092          if (testCoordinate == null) {
093                testCoordinate = new Coordinate(1,chr.getDBSeqLength());
094          }
095          
096          Date start= new Date();
097          chr.getGenesOnRegion(testCoordinate);   
098          Date stop= new Date();
099
100                       
101          MappingSet geneMappings =  chr.getLoadedMappings(FeatureType.gene);
102          
103          
104          System.out.print("In "+sp.getCommonName()+" chromosome "+chr.getChromosomeName());
105          System.out.print(" coordinates "+testCoordinate);
106          System.out.println(" there are "+geneMappings.size()+" genes.");    
107          System.out.println(" [Query took "+((double) stop.getTime() - (double) start.getTime()) / 1000 + " seconds]");
108          
109//          System.out.println("The gene coordinates are:");
110//          for (Mapping m: geneMappings) {
111//              System.out.println("\t"+((DAGene) m.getTarget()).getStableID()+ " "+m.getSourceCoordinates());
112//          }
113          
114
115          
116          start= new Date();
117          
118          ArrayList<VariationMapping> varMappings = (ArrayList<VariationMapping> ) chr.getVariationMappingsOnRegion(testCoordinate);
119          //ArrayList<VariationMapping> varMappings = (ArrayList<VariationMapping> ) vf.getVariationDAO().getVariationMappingsOnRegion(chr, testCoordinate);
120          stop= new Date();
121          System.out.print("In "+sp.getCommonName()+" chromosome "+chr.getChromosomeName());
122          System.out.print(" coordinates "+testCoordinate);
123          System.out.println(" there are "+varMappings.size()+" sequence variations.");
124          System.out.println(" [Query took "+((double) stop.getTime() - (double) start.getTime()) / 1000 + " seconds]");
125         
126          
127          start= new Date();
128          TreeSet<VariationMapping> varSet = new TreeSet<VariationMapping>(Mapping.mappingOnSourceComparator);
129          varSet.addAll(varMappings);
130          
131          TreeSet<VariationMapping> resultSet = new TreeSet<VariationMapping>(Mapping.mappingOnSourceComparator);
132          
133          for (Mapping m : geneMappings) {
134              
135                Coordinate gCoord = m.getSourceCoordinates();
136                Coordinate gCoordPad = new Coordinate(gCoord.getStart()-pad5, gCoord.getEnd()+pad3, gCoord.getStrand());
137                
138                List<VariationMapping> remove = new ArrayList<VariationMapping>();
139                
140                for (VariationMapping vm : varSet) {
141                    
142                    Coordinate vmCoord = vm.getSourceCoordinates();
143
144                    
145                    if (vmCoord.getEnd() < gCoordPad.getStart() ) {
146                        remove.add(vm);
147                        continue;
148                    }
149                    
150                    if (vmCoord.overlaps(gCoordPad)) {
151                        resultSet.add(vm);
152                        continue;
153                    }
154                    
155                    if (vmCoord.getStart() > gCoordPad.getEnd()) {
156                        break;
157                    }
158
159                }
160                
161                varSet.removeAll(remove);
162                
163            }
164          stop= new Date();
165          
166          System.out.print("There are "+resultSet.size()+" variants within ");
167          System.out.println(pad5+" bases 5' and "+pad3+" bases 3' of the genes");
168          System.out.println(" [Processing took "+((double) stop.getTime() - (double) start.getTime()) / 1000 + " seconds]");
169         
170
171          
172          
173
174            
175
176            
177     }
178
179}