# Time-stamp: <2012-02-29 15:09:27 Tao Liu> """Module Description Copyright (c) 2012 Tao Liu This code is free software; you can redistribute it and/or modify it under the terms of the BSD License (see the file COPYING included with the distribution). @status: experimental @version: $Revision$ @author: Tao Liu @contact: taoliu@jimmy.harvard.edu """ # ------------------------------------ # python modules # ------------------------------------ from array import array as pyarray from MACS2.Constants import * from random import gammavariate as rgamma from random import seed as rseed from math import log import pymc from pymc import deterministic # ------------------------------------ # constants # ------------------------------------ import numpy.random as numpyrand LOG2E = log(2.718281828459045,2) # for converting natural log to log2 gfold_dict = {} # temporarily save all precomputed gfold # ------------------------------------ # Misc functions # ------------------------------------ # BUGFIX FOR PYMC ARGUMENT CHANGE from inspect import getargspec PROGRESS_BAR_ENABLED = 'progress_bar' in getargspec(pymc.MCMC.sample)[0] def MCMCPoissonPosteriorRatio (sample_number, burn, count1, count2): """MCMC method to calculate ratio distribution of two Posterior Poisson distributions. sample_number: number of sampling. It must be greater than burn, however there is no check. burn: number of samples being burned. count1: observed counts of condition 1 count2: observed counts of condition 2 return: list of log2-ratios """ lam1 = pymc.Uniform('U1',0,10000) # prior of lambda is uniform distribution lam2 = pymc.Uniform('U2',0,10000) # prior of lambda is uniform distribution poi1 = pymc.Poisson('P1',lam1,value=count1,observed=True) # Poisson with observed value count1 poi2 = pymc.Poisson('P2',lam2,value=count2,observed=True) # Poisson with observed value count2 @deterministic def ratio (l1=lam1,l2=lam2): return log(l1,2) - log(l2,2) mcmcmodel = pymc.MCMC([ratio,lam1,poi1,lam2,poi2]) mcmcmodel.use_step_method(pymc.AdaptiveMetropolis,[ratio,lam1,lam2,poi1,poi2], delay=20000) if PROGRESS_BAR_ENABLED: mcmcmodel.sample(iter=sample_number, progress_bar=False, burn=burn) else: mcmcmodel.sample(iter=sample_number, burn=burn) return ratio.trace() def MLEPoissonPosteriorRatio (sample_number, burn, count1, count2): """MLE method to calculate ratio distribution of two Posterior Poisson distributions. MLE of Posterior Poisson is Gamma(k+1,1) if there is only one observation k. sample_number: number of sampling. It must be greater than burn, however there is no check. burn: number of samples being burned. count1: observed counts of condition 1 count2: observed counts of condition 2 return: list of log2-ratios """ rseed(1) ratios = pyarray('f',[]) ra = ratios.append for i in xrange(sample_number): x1 = rgamma(count1+1,1) x2 = rgamma(count2+1,1) ra( log(x1,2) - log(x2,2) ) return ratios[int(burn):] def get_gfold ( v1, v2, precompiled_get=None, cutoff=0.01, sample_number=10000, burn=500, offset=0, mcmc=False): # try cached gfold in this module first if gfold_dict.has_key((v1,v2)): return gfold_dict[(v1,v2)] # calculate ratio+offset # first, get the value from precompiled table try: V = precompiled_get( v1, v2 ) if v1 > v2: # X >= 0 ret = max(0,V+offset) elif v1 < v2: # X < 0 ret = min(0,V+offset) else: ret = 0.0 except IndexError: if mcmc: numpyrand.seed([10]) P_X = MCMCPoissonPosteriorRatio(sample_number,burn,v1,v2) i = int( (sample_number-burn) * cutoff) else: P_X = MLEPoissonPosteriorRatio(sample_number,0,v1,v2) i = int(sample_number * cutoff) P_X = map(lambda x:x+offset,sorted(P_X)) P_X_mean = float(sum(P_X))/len(P_X) if P_X_mean >= 0: # X >= 0 ret = max(0,P_X[i]) elif P_X_mean < 0: # X < 0 ret = min(0,P_X[-1*i]) #print v1,v2,P_X_mean,'-',offset,ret,i,P_X[i],P_X[-i] gfold_dict[(v1,v2)] = ret return ret #def convert_gfold ( v, cutoff = 0.01, precompiled_gfold=None, mcmc=False ): def convert_gfold ( v, precompiled_gfold, sample_number=15000, burn=5000, offset=0, cutoff=0.01, mcmc=False): """Take (name, count1, count2), try to extract precompiled gfold from precompiled_gfold.get; if failed, calculate the gfold using MCMC if mcmc is True, or simple MLE solution if mcmc is False. """ ret = [] retadd = ret.append get_func = precompiled_gfold.get for i in xrange(len(v[0])): rid= v[0][i] v1 = int(v[1][i]) v2 = int(v[2][i]) # calculate gfold from precompiled table, MCMC or MLE gf = get_gfold(v1,v2,precompiled_get=get_func,cutoff=cutoff,sample_number=sample_number,burn=burn,offset=offset,mcmc=mcmc) retadd([rid,gf]) return ret # ------------------------------------ # Classes # ------------------------------------