"""
Possible solutions to the first set of exercises from Mike Irwin.

These only give a possible solution. I tried to vary the methods for
calculations a bit to show different approaches. These are
not necessarily the most efficient or most flexible solutions.

You should be able to run this script in a terminal via:

    $:> python exercises.py
"""

# Import necessary modules: first the system modules
import itertools
import urllib
import os
# Then 3rd party modules
import numpy as np
from numpy import exp, sqrt, pi
import matplotlib.pyplot as plt
from scipy.misc import factorial
# Then your own modules (but we don't have any)


def download_data():
    """
    Download the data and the PDF with the exercises to the current folder.
    
    The download will only occur if a file does not exists, otherwise it will be
    skipped. A message is print to the screen.
    """
    # Define the locations of the files
    uri1 = 'http://apm3.ast.cam.ac.uk/~mike/astrostats/cetus_fors.dat'
    uri2 = 'http://apm3.ast.cam.ac.uk/~mike/astrostats/scl_flames.dat'
    uri3 = 'http://apm3.ast.cam.ac.uk/~mike/astrostats/astrostats_E12.pdf'
    
    # Start an URLopener to download the files
    url = urllib.URLopener() 
    
    # Cycle through the file locations and check if that file already exists
    # in the current working directory. If not, download; else, don't bother.
    for uri in [uri1, uri2, uri3]:
        
        local_file = os.path.basename(uri)
    
        if not os.path.isfile(local_file):
            print("Downloading {} to {}".format(local_file, os.getcwd()))
            filen, msg = url.retrieve(uri, filename=local_file)
        else:
            print("File {} already exists in {}".format(local_file, os.getcwd()))
    
    # Be sure to close the stream!
    url.close()

    
def gambler(a=1, b=3, c_d=20):
    """
    Solve the gambler's problem.
    """
    
    c = np.arange(c_d+1)
    d = c_d - c
    
    # Simple - but wrong - approach
    P_c_simple =  factorial(c_d) / (factorial(c) * factorial(d)) *\
                  a**c * b**d / (a+b)**c_d
    
    # Bayesian approach
    num = factorial(a+c) * factorial(b+d) * factorial(c+d) * factorial(a+b+1)
    denom = factorial(a) * factorial(b) * factorial(c) * factorial(d) * \
            factorial(a+b+c+d+1)
    P_c_bayes = num / denom
    
    # For Fisher's approach, we need to deal with the large powers. Therefor,
    # we take the log of the probability function. At c==0 and c==c_d, the
    # probability drops to zero, giving us division and invalid errors from
    # numpy. We could treat these cases separately, avoiding zero-divisions
    # and such, but we choose to just ignore the warnings raised by numpy:
    previous_settings = np.seterr(divide='ignore', invalid='ignore')
    
    num = (a+c)*np.log(a+c) + (b+d)*np.log(b+d) + (c+d)*np.log(c+d) +\
          (a+b)*np.log(a+b)
    denom = a*np.log(a) + b*np.log(b) + c*np.log(c) + d*np.log(d) +\
            (a+b+c+d)*np.log(a+b+c+d)
    P_c_fisher = np.exp(num - denom)
    P_c_fisher[0] = 0.
    P_c_fisher[-1] = 0.
    P_c_fisher = P_c_fisher / np.sum(P_c_fisher)

    # But restore the error-reporting settings from numpy to their previous
    # values, perhaps this is necessary for other bits of the code
    np.seterr(**previous_settings)
    
    # Make a plot of the probabilities
    plt.figure()
    plt.title("Binomial prediction a={} b={} c+d={}".format(a, b, c_d))
    plt.step(c, P_c_simple, where='mid', color='b', label='Simple')
    plt.step(c, P_c_bayes, where='mid', color='g', label='Bayes')
    plt.step(c, P_c_fisher, where='mid', color='r', label='Fisher')
    plt.legend()
    plt.xlabel("c")
    plt.ylabel("P(c)")
    plt.savefig('gambler')
    
    print("Gambler's problem: done")
    

def velocity_dispersions(scale_error=1.0):
    """
    Analyze the velocity dispersions in star clusters/dwarf galaxies.
    """
    # Read in the data and extract the velocity and sigma columns
    data = np.loadtxt('cetus_fors.dat', skiprows=2)
    
    v_hel = data[:,3]
    verr = data[:,4] * scale_error

    # Construct a grid in esimated mean velocity and error
    vmean_1D = np.linspace(-90, -40, 120)
    sigma_1D = np.linspace(0, 35, 100)
    vmean, sigma = np.meshgrid(vmean_1D, sigma_1D)
    
    # Compute the log-likelihood
    sigma_total2 = verr[:,None,None]**2 + sigma[None,:,:]**2
    term1 = np.log(sigma_total2)
    term2 = 0.5 * (v_hel[:,None,None] - vmean[None,:,:])**2 / sigma_total2
    lnL = -0.5 * np.sum(term1, axis=0) - np.sum(term2, axis=0)
    
    # Make a figure of the log-likelihood map and plot some contours
    
    # First we want to know the best values
    lnL_max = lnL.max()
    lnL_argmax = lnL.argmax()
    v_best, sig_best = np.ravel(vmean)[lnL_argmax], np.ravel(sigma)[lnL_argmax]
    
    plt.figure(figsize=(10, 6))
    plt.subplots_adjust(wspace=0.4)
    
    # Plot the map
    plt.subplot(121)
    plt.title("2D ML distribution")
    plt.imshow(lnL, extent=[vmean.min(), vmean.max(), sigma.min(), sigma.max()],
               origin='image', aspect='auto', cmap=plt.cm.spectral,
               vmin=lnL_max-50, vmax=lnL_max)
    
    # Give some info on the best values
    text = r"$\hat{{v}}={:.2f}$ km s$^{{-1}}$"+'\n'+"$\hat{{\sigma}}={:.2f}$ km s$^{{-1}}$"
    text = text.format(v_best, sig_best)
    plt.annotate(text, (0.9,0.9), xycoords='axes fraction', 
                 ha='right', va='top', fontsize='x-large')
    
    # Set the colorbar and give it a label
    cbar = plt.colorbar()
    cbar.set_label(r'$\ln L$', fontsize='x-large')

    # Plot the contours
    levels = [lnL_max, lnL_max-0.5, lnL_max-2, lnL_max-4.5]
    plt.contour(vmean, sigma, lnL, levels=levels, colors='k')

    # Set some labels
    plt.xlabel(r'$v_\mathrm{hel}$ km s$^{-1}$', fontsize='x-large')
    plt.ylabel(r'$\sigma_v$ km s$^{-1}$', fontsize='x-large')
    
    # Plot the marginalised distribution
    plt.subplot(122)
    plt.title("Marginalised distribution")
    lnL = lnL - lnL_max
    marginalised = np.exp(lnL).sum(axis=1)
    marginalised = marginalised / marginalised.max()
    plt.plot(sigma_1D, marginalised, 'k-')
    plt.gca().set_yscale('log')
    plt.ylim(2e-4, 1)
    plt.xlabel(r'$\sigma_v$ km s$^{-1}$', fontsize='x-large')
    plt.ylabel("Relative likelihood")
    plt.savefig("velocity_dispersion_{:.0f}".format(scale_error*100))
    
    print("Velocity dispersion: done")
    # Return the best values, perhaps they come in handy at some point
    return v_best, sig_best


def template_spectrum(wave, sigma, velo, locs=(8498.02, 8542.09, 8662.14),
                                         amps=(150, 250, 200.)):
    """
    Build a template spectrum using a given wavelength range.
    
    sigma denotes the width of the specral lines.
    velo denotes the velocity
    """
    # Shift the location according to the velocity
    cc = 299792.458
    locs = np.array(locs) * (1.0 + velo/cc)
    
    # Make the template spectrum as a sum of Gaussians
    line = 0.0
    for loc, amp in zip(locs, amps):
        line += -amp * np.exp( - 0.5*(wave-loc)**2 / (sigma**2))
    
    return line
    
    
def cross_correlation():
    """
    Compute the cross correlation.
    """
    
    # Read the data, extract the columns, and renormalize the flux
    data = np.loadtxt('scl_flames.dat', skiprows=2)
    wave, flux, error, continuum, template = data.T
    flux = flux / continuum -1
    
    # Define the grid in sigma and velocity
    sigma = np.linspace(0.25,2.25,100)
    velo = np.linspace(-500, 500, 100)
    
    # Prepare an array for the values of the cross-correlation profile
    phis = np.zeros( len(sigma)*len(velo) )
    
    # Run over each combination of sigma and velocity in the grid
    for i, (sig, vel) in enumerate(itertools.product(sigma, velo)):
        
        # Get the template spectrum and normalise it
        T = template_spectrum(wave, sig, vel)
        norm = np.sqrt(np.sum(flux**2) * np.sum(T**2))
        
        # Then get the value of the cross correlation profile
        phis[i] = np.sum(flux * T) / norm
        
    # Reshape the results in a nice grid
    phis = phis.reshape((100,100))
    
    # Make a grid of the CCP, but summed over all sigmas
    plt.figure(figsize=(10,6))
    plt.plot(velo, phis.mean(axis=0))
    plt.axhline(0, linestyle='--', color='k')
    plt.xlim(-500,500)
    plt.xlabel(r"Velocity [km s$^{-1}$]")
    plt.ylabel("Cross-correlation")
    plt.grid()
    plt.savefig('cross_correlation_01')
    
    # Now try the same using the likelihood function
    
    # We'll take a slightly different grid
    sigma = np.linspace(0.25,2.25,100)
    velo = np.linspace(85, 115, 100)
    
    # But effectively do the same, i.e. compute the likelihood for every point
    # in our grid
    lnL = np.zeros( len(sigma)*len(velo) )
    for i, (sig, vel) in enumerate(itertools.product(sigma, velo)):
        T = template_spectrum(wave, sig, vel)
        k_numerator = flux * T / error**2
        k_denominator = T**2 / error**2
        k = np.sum(k_numerator) / np.sum(k_denominator)
        
        lnL[i] = - np.sum( (flux - k*T)**2 / (2.0*error**2))
    
    # Renormalize the loglikelihood
    lnL = lnL - lnL.max()
    
    # And make a figure
    plt.figure()
    
    lnL = lnL.reshape((100,100))
    plt.imshow(lnL, extent=[velo[0],velo[-1], sigma[0],sigma[-1]],
               origin='image', aspect='auto', cmap=plt.cm.spectral,
               vmin=-0.003)
    plt.xlabel(r"$v_\mathrm{hel}$ km s$^{-1}$", fontsize='x-large')
    plt.ylabel(r"$\sigma_T$ [$\AA$]", fontsize='x-large')
    
    cbar = plt.colorbar()
    cbar.set_label("log Likelihood", fontsize='x-large')
    
    velo, sigma = np.meshgrid(velo, sigma)
    plt.contour(velo, sigma, lnL, colors='k', levels=np.linspace(-5e-4,0,10),
                linestyles='solid')
    plt.savefig('cross_correlation_02')
        
    
if __name__ == "__main__":
    
    download_data()
    
    gambler()
    
    velocity_dispersions(scale_error=1.0)
    velocity_dispersions(scale_error=2.0)
    
    cross_correlation()
    
    plt.show()