#  LM7.py   
#
#  M.Lampton Jan 2016;  mlampton@SSL.berkeley.edu
#
#  Demonstrate fitting a 2D Gaussian PSF to a bunch of data.
#  Includes a plot of variance vs best fit model value.
#
#  Back Story: Move a photometry fiber to a succession of sky locations
#  in the neighborhood of a known clean (isolated) star.
#  Measure sky and dark at each location (assumes a shutter for the darks).
#
#    Step 1: Use the darks to estimate MEANDARK AND RMSDARK per fiber.
#    Step 1a, optional: use darks & flats to perform variance absolute calibration.
#    Step 2: from the known sky locations and the sky-minus-MEANDARK differences,
#      perform a fit of a Gaussian 2D PSF, having five parameters. 
#
#  Method: Levenberg Marquardt nonlinear least squares fitter for parms
#  PSF parms are: Xcenter, Ycenter, FWHM, Signal, Background/fiber.
#  Units for x, y, fwhm: microns
#  Units for signal: total electrons from star in exposure
#  Units for background: electrons per exposure per fiber.
#  Fit is linear in signal and bkg, nonlinear in x, y, fwhm. 
#
#  Also evaluates the uncertainties on these five final best fit parms.
#
#  This demo uses fake data based on "true" parmeters TRUEP[].
#  But real data of course have an unknown TRUEP[],
#  and the rms error or hNoise[] of each datum is therefore unknown.
#  These rms errors have to be guessed, to assign weights to the
#  deviations between data and model.
#  Here, I do two data analysis runs:
#      First run, using hNoise[] from initial parameter guess hParms[];
#      Second run, using best fit parameters from first run.
# 
#  It is no good to have hNoise[] vary **during** a fitting process.  There will
#  be brief stretches when background has to go negative, and variance goes crazy.
#  Best to hold hNoise[] constant during a fit, then update it for a second fit. 
# 
#  Hints on choosing a good starting point:
#   1. Initial parameters closer to TRUEP are always better.
#   2. Big initial signal helps pinpoint the true peak (tiny signal would get lost in noise).
#   3. Big initial background helps weight all data equally (good for initial peak finding).


# import print_options
import numpy as np
from scipy import linalg
from pylab import *

#  Declare all constants and constant arrays.
#  These appear only on RHSides and are therefore automatically global.
#  I use all UPPERCASE for these global constants.
 
RFIBER  = 55.0;     # fiber radius microns
RMSDARK = 100.0     # rms electrons per fiber
NPARMS  = 5
DELTAP  = 1.0       # parameter step for Jacobian

#  List eachfiber center {x,y} coordinates in microns
#  This is a tidy 5x5 array but could be ratty and irregular
PIXXY = np.array(
        [[-200.0,  -200.0],
         [-100.0,  -200.0],
         [   0.0,  -200.0],
         [ 100.0,  -200.0],
         [ 200.0,  -200.0],
         [-200.0,  -100.0],
         [-100.0,  -100.0],
         [   0.0,  -100.0],
         [ 100.0,  -100.0],
         [ 200.0,  -100.0],
         [-200.0,    -0.0],
         [-100.0,    -0.0],
         [   0.0,    -0.0],
         [ 100.0,    -0.0],
         [ 200.0,    -0.0],
         [-200.0,  +100.0],
         [-100.0,  +100.0],
         [   0.0,  +100.0],
         [ 100.0,  +100.0],
         [ 200.0,  +100.0],
         [-200.0,  +200.0],
         [-100.0,  +200.0],
         [   0.0,  +200.0],
         [ 100.0,  +200.0],
         [ 200.0,  +200.0]])
NPTS = len(PIXXY)

#  List the sample averaging points with one fiber.
#  I chose 19 {x,y} points within unit radius circle.
#  Remember to multiply these by RFIBER to get microns.
AVEXY = np.array(
        [[ 0.000000,  0.000000],  # origin
         [ 0.346410,  0.200000],  # first circle of six; r=0.4
         [ 0.000000,  0.400000],
         [-0.346410,  0.200000],
         [-0.346410, -0.200000],
         [-0.000000, -0.400000],
         [ 0.346410, -0.200000],
         [ 0.800000,  0.000000],  # second circle of twelve; r=0.8
         [ 0.692820,  0.400000],
         [ 0.400000,  0.692820],
         [ 0.000000,  0.800000],
         [-0.400000,  0.692820],
         [-0.692820,  0.400000],
         [-0.800000,  0.000000],
         [-0.692820, -0.400000],
         [-0.400000, -0.692820],
         [-0.000000, -0.800000],
         [ 0.400000, -0.692820],
         [ 0.692820, -0.400000]])
NAVE = len(AVEXY)


#  Set up the global host parameters and arrays.
#  Parms are....... Xc,um  Yc,um  FWHM,um  Signal   Bkg/fiber
TRUEP   = np.array([125.0, -95.0,  150.0,  30000.0,  1000.0])  # true parms
hParms  = np.array([ 0.0,   0.0,   100.0,  50000.0,  3000.0])  # initial guess
hData   = np.zeros(NPTS)           # fake data
hNoise  = np.zeros(NPTS)           # predicted from TRUEP
hJacob  = np.zeros((NPTS,NPARMS))  # rows,cols of jacobian matrix
hSigma  = np.zeros(NPARMS)         # derived parameter rms errors


#----Freestanding functions not bound to global host arrays---------

def psf(r, fwhm):
# Here I model our PSF as a 2D Gaussian with given FWHM in microns.
# "r" is the radial distance from the center of this Gaussian.
# Returns the density of electrons per square micron, per event.
    s = fwhm/2.35
    if s <= 0:
      return 0.
    arg = 0.5*(r/s)**2
    if arg > 15:
      return 0.
    return math.exp(-arg)/(2.*math.pi*s**2)
    
def getNoiselessData(parms):
# Here I integrate the PSF falling into each fiber.
# Best fit case gives roughly half the light going in.
    clean = np.zeros(NPTS)
    dArea = (math.pi * RFIBER**2)/NAVE;   # square microns
    for i in range(0, NPTS):
        sum = parms[4]
        for j in range(0, NAVE):
            dx = PIXXY[i][0] - parms[0] + RFIBER*AVEXY[j][0]
            dy = PIXXY[i][1] - parms[1] + RFIBER*AVEXY[j][1]
            r = math.sqrt(dx**2 + dy**2)
            sum += parms[3] * dArea * psf(r, parms[2])
        clean[i] = sum
    return clean
    
def chisq(parms, data, noise):
# Here I evaluate resid and chisquared for any given parameter vector.
    model = getNoiselessData(parms)
    resid = np.zeros(NPTS)
    sumsq = 0.0
    for i in range(0, NPTS):
        resid[i] = (model[i] - data[i])/noise[i]
        sumsq += resid[i]**2
    return resid, sumsq
    
def func(which, bestfit, dx):
# Here I create a 1D explorer of the multidimensional chisq function.
# "Which" specifies which parameter to deviate by "dx" from bestfit parms[].
# Called only by getSigmas().
    trial = np.zeros(NPARMS)
    for i in range(0, NPARMS):
        trial[i] = bestfit[i]
    trial[which] += dx
    dummy, sostrial = chisq(trial, hData, hNoise)
    return sostrial
    
def getSigmas(bestfit, sig):
# Here I evaluate the parameter sigmas using curvature along each axis.
    delta = 1.0
    for i in range(0, NPARMS):
        fa = func(i, bestfit, -delta)
        fb = func(i, bestfit, 0.)
        fc = func(i, bestfit, delta)
        denom = math.sqrt(0.5*fa + 0.5*fc - fb)
        sig[i] = delta/denom
    return sig
    
    
#-----Bound functions that read/write host arrays-----------

def setData():
# Sets host array hData[] including true noise.
    global hData
    hData = getNoiselessData(TRUEP)                      # always positive
    # noise = np.zeros(NPTS)
    for i in range(0, NPTS):
        read = np.random.normal() * RMSDARK              # Gaussian term
        shot = np.random.normal() * math.sqrt(hData[i])  # Poisson term
        hData[i] += read + shot                          # can now go negative
    
def setNoise():
# Sets hNoise using initial hParms model.
# A second run should give improved hParms and weights!
    model = getNoiselessData(hParms)                     # always positive
    for i in range(0, NPTS):
        hNoise[i] = math.sqrt(RMSDARK**2 + model[i])     # always positive

def doJacob():
# Builds new host Jacobian at current hParms, once per iteration.
# Writes to host hParms[] and hJacob[][] by element so no global declaration needed.
    for j in range(0, NPARMS):
        hParms[j] += DELTAP
        resid, sos = chisq(hParms, hData, hNoise)
        for i in range(0, NPTS):
            hJacob[i][j] = resid[i]
        hParms[j] -= 2*DELTAP
        resid, sos = chisq(hParms, hData, hNoise)
        for i in range(0, NPTS):
            hJacob[i][j] -= resid[i]
        hParms[j] += DELTAP
        for i in range(0, NPTS):
            hJacob[i][j] *= 0.5/DELTAP

def showParms(i, sos, loglam):
    print '%4i'%i, '%10.2f'%sos, '%6.2f'%loglam, '%8.2f'%hParms[0], '%8.2f'%hParms[1], '%8.2f'%hParms[2], '%8.2f'%hParms[3], '%8.2f'%hParms[4]


#-----Define the LM host and iteration methods-----------
#-----These have their own working constants and arrays-----
#----These appear only on RHSides and are therefore global------
#----In contrast, items that appear on LHSides are assumed local-------

LMITER      = 100
LMBOOST     = 1.5
LMSHRINK    = 0.1
LAMINIT     = 0.01
LAMMAX      = 1E3
LMTOL       = 1E-6

#---globals for iteration; useful for second run picking up from first run----
niter = 0
damping = LAMINIT


#---LM scalar "hDamping" is created and owned by LMhost; 
#---it is passed to LMiter() via LMhost's scope;
#---which then returns it to LMhost within a tuple.  

def LMiter(myDamping):          
# Does one LM iteration; uses scipy linalg.inv(A) matrix inverter.
# This is called by LMhost() which is the host of the process. 
# These four arrays are strictly local i.e. internal to this iteration.
    delta   = np.zeros(NPARMS)
    beta    = np.zeros(NPARMS)
    alpha   = np.zeros((NPARMS,NPARMS)) 
    amatrix = np.zeros((NPARMS,NPARMS))     
    resid, sos = chisq(hParms, hData, hNoise)

    showParms(niter, sos, math.log10(myDamping))
    sosprev = sos
    
    doJacob()                               # once per iteration

    for k in range(0, NPARMS):              # get the downhill gradient
        beta[k] = 0.
        for i in range(0, NPTS):
            beta[k] -= resid[i]*hJacob[i][k]

    for k in range(0, NPARMS):               # get the curvature matrix
        for j in range(0, NPARMS):
            alpha[j][k] = 0.
            for i in range(0, NPTS):
                alpha[j][k] += hJacob[i][j]*hJacob[i][k]
                
    while True:                              # repeat for any uphill steps
        for k in range(0, NPARMS):
            for j in range(0, NPARMS):
                amatrix[j][k] = alpha[j][k]  # copy alpha
                if j == k:
                    amatrix += myDamping     # apply some damping
                    
        amatrix = linalg.inv(amatrix)        # invert it
        
        for k in range(0, NPARMS):           # get a trial step
            delta[k] = 0.
            for j in range(0, NPARMS):
                delta[k] += amatrix[j][k]*beta[j]
        trial = hParms + delta
        
        resid, sos = chisq(trial, hData, hNoise)
        
        relativeRise = (sos - sosprev)/(1+sos)
        
        if relativeRise > LMTOL:             # stay in this loop; more damping.
            myDamping *= LMBOOST     
            print "more myDamping = ", myDamping
            if myDamping > LAMMAX:    
                print "Bailing out!"
                done = True  
                break                      
        elif relativeRise > -LMTOL:          # level exit; all done
            # print "Level iter; exitting"
            done = True
            if relativeRise < 0:
                for k in range(0, NPARMS):
                    hParms[k] = trial[k]
            break
        else:                                # exit this loop but iterate
            # print "nice drop; shrink damping and iterate"
            myDamping *= LMSHRINK
            done = False
            for k in range(0, NPARMS):
                hParms[k] = trial[k]
            break
        
    return done, myDamping
    

def LMhost():   
# Here is the host for the LM iterations.
# damping is created here and used to/from LMiter().
    global niter, damping
    while True:
        niter += 1
        done, damping = LMiter(damping)
        if done: 
           break
    print "Total iterations = %d " % niter
    return


def showResults():
    print "Results:  true parms, best fit, parmError:"
    print '   Xcenter {:8.2f}  {:8.2f}  {:8.2f}'.format(TRUEP[0], hParms[0], hSigma[0])
    print '   Ycenter {:8.2f}  {:8.2f}  {:8.2f}'.format(TRUEP[1], hParms[1], hSigma[1])
    print '      FWHM {:8.2f}  {:8.2f}  {:8.2f}'.format(TRUEP[2], hParms[2], hSigma[2])
    print '    Signal {:8.2f}  {:8.2f}  {:8.2f}'.format(TRUEP[3], hParms[3], hSigma[3])
    print '       Bkg {:8.2f}  {:8.2f}  {:8.2f}'.format(TRUEP[4], hParms[4], hSigma[4])


#-------------DONE WITH DEFINITIONS---------------------
#------------START THE PROGRAM--------------------------

print "\nCreate and show noisy data[] in 5x5 image map format..."
print "Idea is to show what a robotically-stepped fiber image looks like."
print "Expect to get roughly 1/3 of the total starlight into best location"

# run 1
setData()                         # Uses TRUEP.
setNoise()                        # Uses initial guess hParms to set hNoise.
temp = hData.reshape((5,5))       # Reshape to make a 5x5 image.
np.set_printoptions(precision=1)  # Drops trailing zeros, uses tabs to set columns.
print temp
LMhost()                          # Initial fit of the parameters.
getSigmas(hParms, hSigma)         # Evaluate the parameter errors.
showResults()                     # run 1.

# run 2
setNoise()                        # Now using refined hParms to set hNoise.
LMhost()                          # Fit again with improved data weighting.
getSigmas(hParms, hSigma)         # Evaluate the parameter errors.
showResults()                     # run 2.


#----------CREATE A PLOT: MODEL VARIANCE VS MODEL MEAN---------
#-------here the model is for the best fit parameter set-------
#--extrapolation to zero should give dark read noise variance--
#-----unit slope reveals Poisson arrival influence on data-----

# modelmean = getNoiselessData(hParms)
# setNoise()  # use second pass hNoise; probably unnecessary.
# modelvar = np.zeros(NPTS)
# for i in range(0, NPTS):
#     modelvar[i] = hNoise[i]**2
# plot(modelmean, modelvar, marker='o', linestyle='--', color='r')
# xlabel('Model Prediction, electrons')
# ylabel('Model Variance, electrons squared')
# grid(True)
# savefig('VarVsMean')
# show()