import numpy as np
import scipy as sp
from numpy import pi
from pymc import Normal, Uniform, Gamma, deterministic, MCMC, MAP, Metropolis
from matplotlib import pyplot as plt


# Define our Linear model

def makeLinearModel(x, observations):

    # Specify the priors of our fit parameters.
    # Our fitting model is y = a0 + a1 * x
    
    # For the intercept a0 we'll assume a normal prior

    A0 = Normal('a0', mu = 0.0, tau = 1./1.0**2)

    # For the gradient a1 we first define a1 = tan(alpha), 
    # and assume a uniform prior for the angle 'alpha' in the interval [0, pi/2]

    Alpha = Uniform('alpha', lower = 0, upper = pi/2.0)

    @deterministic(name = 'a1')
    def A1(alpha = Alpha):
        return np.tan(alpha)

    # Define our fitting model, given the priors defined above

    @deterministic
    def linearModel(x = x, a0 = A0, a1 = A1):
        return a0 + a1*x
    
    # We'll assume a Gaussian distribution for our observations, but we don't know its
    # true variance. We'll assume a Gamma prior for its precision (=1/variance)
    
    Precision = Gamma('precision', alpha=1.0, beta=1.0)
    
    # Specify that the distribution of our observations is gaussian.
    # Note that the Normal() class works with the precision (=1/variance) rather than with
    # the variance itself.

    Observations = Normal('observations', mu = linearModel, tau = Precision, value = observations, observed = True)    

    return locals()


# Define our Quadratic model

def makeQuadraticModel(x, observations):

    # Specify the priors of our fit parameters.
    # Our fitting model is y = b0 + b1 * x + b2 *x*x
    
    # For the intercept b0 we'll assume a normal prior

    B0 = Normal('b0', mu = 0.0, tau = 1./1.0**2)

    # For the gradient b1 we first define b1 = tan(beta), 
    # and assume a uniform prior for the angle 'beta' in the interval [0, pi/2]

    Beta = Uniform('beta', lower = 0, upper = pi/2.0)

    @deterministic(name = 'b1')
    def B1(beta = Beta):
        return np.tan(beta)

    # For the quadratic coefficient b2 we assume a normal prior
    
    B2 = Normal('b2', mu = 1.0, tau = 1./1.0**2)

    # Define our fitting model, given the priors defined above

    @deterministic
    def quadraticModel(x = x, b0 = B0, b1 = B1, b2 = B2):
        return b0 + b1*x + b2*x*x
    
    # We'll assume a Gaussian distribution for our observations, but we don't know its
    # true variance. We'll assume a Gamma prior for its precision (=1/variance)
    
    Precision = Gamma('precision2', alpha=1.0, beta=1.0)
    
    # Specify that the distribution of our observations is gaussian.
    # Note that the Normal() class works with the precision (=1/variance) rather than with
    # the variance itself.

    Observations = Normal('observations', mu = quadraticModel, tau = Precision, value = observations, observed = True)    

    return locals()


if __name__ == "__main__":


    # Make some fake observational data

    x = np.array([0.4202962, 0.4470388, 1.0754123, 2.0000490, 4.3765986, 5.1316189, 6.4634362, 6.9991279, 7.0872710, 7.6706599])
    noise = np.array([-0.2778645,  0.3608284, -0.5909124,  0.9755906, -1.4457499, 0.2952068,  0.5547522, -0.4986355,  0.1957338, -0.4555405])
    trueIntercept = 0.0
    trueGradient = 1.0
    trueQuadraticCoef = 0.0
    observations = trueIntercept + trueGradient * x + trueQuadraticCoef *x*x + noise

    print
    print("Created fake observational data")
    print("True value of the intercept: {}".format(trueIntercept))
    print("True value of the gradient: {}".format(trueGradient))
    print("True value of the quadratic coefficient: {}".format(trueQuadraticCoef))
    print


    # Set up the MCMC sampler for the linear model, and let it sample
    # Specifying the step method is optional. If absent, PyMC will make its own guess.

    myLinearModel = makeLinearModel(x, observations)
    mcLinear = MCMC(myLinearModel)
    mcLinear.use_step_method(Metropolis, mcLinear.A0,    proposal_sd=0.10, proposal_distribution='Normal')
    mcLinear.use_step_method(Metropolis, mcLinear.Alpha, proposal_sd=0.02, proposal_distribution='Normal')
    print("MC Sampling the linear model")
    mcLinear.sample(iter = 30000, burn = 1000, thin = 10)
    interceptValues = mcLinear.trace("a0")[:]
    gradientValues = mcLinear.trace("a1")[:]

    print
    print("Outcome for the linear model:")
    print("a0 = {0} +/- {1}".format(interceptValues.mean(), interceptValues.std()))
    print("a1 = {0} +/- {1}".format(gradientValues.mean(), gradientValues.std()))


    # The following illustrates the use of MAP (Maximum A Posteriori).
    # We use it get the most probably fit parameters for the linear model, to compute the best fit.

    myMAP = MAP(myLinearModel)
    myMAP.fit()
    mapValueIntercept = myMAP.A0.value
    mapValueGradient = myMAP.A1.value
    bestLinearFit = mapValueIntercept + mapValueGradient * x

    print("MAP value of the intercept a0: {}".format(mapValueIntercept))
    print("MAP value of the gradient a1: {}".format(mapValueGradient))
    print

    # Set up a similar MCMC sampler for the quadratic model, and let it sample

    myQuadraticModel = makeQuadraticModel(x, observations)
    mcQuadratic = MCMC(myQuadraticModel)
    print("MC sampling the quadratic model")
    mcQuadratic.sample(iter = 30000, burn = 1000, thin = 10)
    interceptValuesForQuadraticModel = mcQuadratic.trace("b0")[:]
    gradientValuesForQuadraticModel = mcQuadratic.trace("b1")[:]
    quadraticCoefValues = mcQuadratic.trace("b2")[:]

    print
    print("Outcome for the quadratic model:")
    print("b0 = {0} +/- {1}".format(interceptValuesForQuadraticModel.mean(), interceptValues.std()))
    print("b1 = {0} +/- {1}".format(gradientValuesForQuadraticModel.mean(), gradientValues.std()))
    print("b2 = {0} +/- {1}".format(quadraticCoefValues.mean(), quadraticCoefValues.std()))


    # To compare the two models, we compute for each the Bayesian evidence.
    # First for the linear model, then for the quadratic model.

    print
    print("Computing Bayesian evidences...")
    sumOfLikelihoodValues = 0.0
    Nsum = 100000
    for n in range(Nsum):
        mcLinear.draw_from_prior()
        sumOfLikelihoodValues += np.exp(mcLinear.Observations.logp)
    BayesianEvidenceForLinearModel = sumOfLikelihoodValues / Nsum

    print("Bayesian evidence Z1 for the linear model = {}".format(BayesianEvidenceForLinearModel))

    sumOfLikelihoodValues = 0.0
    for n in range(Nsum):
        mcQuadratic.draw_from_prior()
        sumOfLikelihoodValues += np.exp(mcQuadratic.Observations.logp)
    BayesianEvidenceForQuadraticModel = sumOfLikelihoodValues / Nsum

    print("Bayesian evidence Z2 for the quadratic model = {}".format(BayesianEvidenceForQuadraticModel))
    print("Bayes factor Z1 / Z2 = {}".format(BayesianEvidenceForLinearModel / BayesianEvidenceForQuadraticModel))
    print


    # Plot the observed dataset, together with the Linear MAP fit

    plt.figure(1)
    plt.clf()
    plt.subplot(231)
    plt.errorbar(x, observations, yerr=noise, color="blue", fmt="o")
    plt.plot(x, bestLinearFit, color="red")
    plt.xlabel("x")
    plt.ylabel("observations")


    # Plot a 2D histogram of the intercept and the gradient of the linear model

    plt.subplot(232)
    plt.hexbin(interceptValues, gradientValues, gridsize = 50)
    plt.xlabel("intercept a0")
    plt.ylabel("gradient a1")


    # Plot 1D densities of the intercept and gradient values of the linear model

    plt.subplot(234)
    interceptDensity = sp.stats.gaussian_kde(interceptValues)
    xValues = np.linspace(interceptValues.min(), interceptValues.max(), 100)
    plt.plot(xValues, interceptDensity(xValues), color="black")
    plt.axvline(trueIntercept, linewidth=2, color="red")
    plt.xlabel("intercept a0")
    plt.ylabel("density")

    plt.subplot(235)
    gradientDensity = sp.stats.gaussian_kde(gradientValues)
    xValues = np.linspace(gradientValues.min(), gradientValues.max(), 100)
    plt.plot(xValues, gradientDensity(xValues), color = "black")
    plt.axvline(trueGradient, linewidth=2, color="red")
    plt.xlabel("gradient a1")
    plt.ylabel("density")


    # Plot the MC samples of the fit parameters of the linear model

    plt.subplot(233)
    plt.plot(interceptValues)
    plt.xlabel("iteration")
    plt.ylabel("intercept a0 samples")

    plt.subplot(236)
    plt.plot(gradientValues)
    plt.xlabel("iteration")
    plt.ylabel("gradient a1 samples")

    plt.savefig('linearmodel01.png')
    plt.show()