Skip to main content

Non Parametric Regression

<font color = red> Parametric and non-parametric regression </font>

In this example, I will show the difference between using a parametric and non-parametric model to fit a set of data

In [1]:
## importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pyfits
from scipy.optimize import curve_fit
In [2]:
## loading data (again a brightness image and a mganetic field map)
dir = '/home/fatima/Desktop/project_1/sunrise_1/'
B = pyfits.getdata(dir+'B_los.fits',ignore_missing_end =True)
C = pyfits.getdata(dir+'line_core.fits', ignore_missing_end =True)
In [3]:
## Averaging the data points
b = np.abs(B)
s = C

b = b.ravel()
s = s.ravel()

points = zip(b,s)


points.sort()


bs = [p[0] for p in points]
ss = [p[1] for  p in points]


points_bin = 200 #number of data points per bin
bins =(len(b)/points_bin)

Bin = np.zeros((bins,points_bin))
Sin = np.zeros((bins,points_bin))

h=n=0
while(h<bins):

    Bin[h] = bs[n:n+points_bin]
    Sin[h] = ss[n:n+points_bin]
    n = n+points_bin
    h = h+1

meanB = np.zeros(bins)
meanS = np.zeros(bins)
stdS = np.zeros(bins)

#computing the average of X and Y and standard deviation of Y in each bin 
for i in range(bins):
 meanB[i] = (Bin[i]).mean()
 meanS[i] = (Sin[i]).mean()
 stdS[i] = (Sin[i]).std()
In [10]:
## plotting data points
csfont = {'fontname':'Helvetica'}
fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(111)
ax.scatter(b,s,color='black',marker='.',s=0.5,alpha=0.6)
ax.axhline(y=1,linestyle='dashed',color='r',linewidth=2)
ax.plot(meanB,meanS,color='r')
ax.set_xlabel('LOS Magnetic Field (G)',fontsize=18,**csfont)
ax.set_ylabel('Stokes I continuum contrast',fontsize=18,**csfont)
ax.set_xlim(0,2000)
ax.set_ylim(0.4,2.4) #for continuum contrast

major_ticks = np.arange(0, 2000, 200)
minor_ticks = np.arange(0, 2000, 50)

major_ticks_y = np.arange(0.4,2.4, 0.2) #continuum 


minor_ticks_y = np.arange(0.4, 2.4, 0.05) 

  
ax.set_xticks(major_ticks)
ax.set_xticks(minor_ticks, minor = True)
ax.set_yticks(major_ticks_y)
ax.set_yticks(minor_ticks_y, minor = True)
ax.tick_params(axis = 'both', which = 'major', length=6, width=1,labelsize = 11)
ax.tick_params(axis = 'both', which = 'minor', length=3, width=1)

Now I will try to model the averaged data points shown in the red curve using two functions, a logarithmic and a power law. I will be using the library scipy.curve_fit

In [5]:
from scipy.optimize import curve_fit

First we have to define the functions

In [4]:
## power law function
def power(x,a,b,c):
 y = a*(x**b) +c
 return y

## Logarithmic function
def log(x,alpha,beta):
 y = alpha*np.log10(x)+beta
 return y
In [5]:
## Let's redefine the X and Y values of the averaged red curve:
X = meanB
Y = meanS

## Let's take the data points corresponding to X > 50
ind = np.where(X > 50)
X = X[ind]
Y = Y[ind]
In [6]:
popt_pl, pcov_pl = curve_fit(power,X,Y)
popt_log, pcov_log = curve_fit(log,X,Y)

Let's plot now the fitted curves

In [7]:
xnew = np.linspace(X.min(),b.max(),1000)
ynew_pl = power(xnew, *popt_pl) #modelled data with a power law function
ynew_log = log(xnew, *popt_log) #modelled data with a logarithmic function
In [24]:
csfont = {'fontname':'Helvetica'}

fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(111)
ax.scatter(b,s,color='black',marker='.',s=0.5,alpha=0.6)
ax.axhline(y=1,linestyle='dashed',color='r',linewidth=2)
ax.plot(meanB,meanS,color='r')
ax.plot(xnew, ynew_pl, label='power law fit')
ax.plot(xnew,ynew_log, label='logarithmic fit')
ax.set_xlabel('LOS Magnetic Field (G)',fontsize=18,**csfont)
ax.set_ylabel('Stokes I continuum contrast',fontsize=18,**csfont)
ax.set_xlim(0,2000)
ax.set_ylim(0.4,2.4) #for continuum contrast

major_ticks = np.arange(0, 2000, 200)
minor_ticks = np.arange(0, 2000, 50)

major_ticks_y = np.arange(0.4,2.4, 0.2) #continuum 


minor_ticks_y = np.arange(0.4, 2.4, 0.05) 

  
ax.set_xticks(major_ticks)
ax.set_xticks(minor_ticks, minor = True)
ax.set_yticks(major_ticks_y)
ax.set_yticks(minor_ticks_y, minor = True)
ax.tick_params(axis = 'both', which = 'major', length=6, width=1,labelsize = 11)
ax.tick_params(axis = 'both', which = 'minor', length=3, width=1)
plt.legend(loc='lower right')
Out[24]:
<matplotlib.legend.Legend at 0x7fe86d190410>

So far we have seen that data points can be modelled with parametric functions, meaning that we assume the data to follow a certain trend corresponding to a certain mathmatical function. Another form of regression is the non-parametric regression (NPR), where we assume nothing about the behaviour of data points.

<font color = red> For this purpose, I will make use of the python tool box pyqt-fit </font>

In [1]:
import pyqt_fit
from pyqt_fit import npr_methods
import pyqt_fit.nonparam_regression as smooth

In non-parametric regression, one has to define which method to use to compute the modelled value within a certain range of x, the bandwidth.

<font color = red> Methods can be called from Methods </font>

In [9]:
h = 100
Method =npr_methods.LocalPolynomialKernel1D(q=3)
k = smooth.NonParamRegression(b,s, method=Method,bandwidth=h)
k.fit()
xnew = np.linspace(b.min(),b.max(),1000)
In [ ]:
csfont = {'fontname':'Helvetica'}

fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(111)
ax.scatter(b,s,color='black',marker='.',s=0.5,alpha=0.6)
ax.axhline(y=1,linestyle='dashed',color='r',linewidth=2)
ax.plot(meanB,meanS,c='r',linewidth=4,label='Binning/500 points')
ax.plot(xnew,k(xnew),'g',lw=2,label='NPR/Cubic local-Polynomial')
ax.set_xlabel('LOS Magnetic Field (G)',fontsize=18,**csfont)
ax.set_ylabel('Stokes I continuum contrast',fontsize=18,**csfont)
ax.set_xlim(0,2000)
ax.set_ylim(0.4,2.4) #for continuum contrast

major_ticks = np.arange(0, 2000, 200)
minor_ticks = np.arange(0, 2000, 50)

major_ticks_y = np.arange(0.4,2.4, 0.2) #continuum 


minor_ticks_y = np.arange(0.4, 2.4, 0.05) 

  
ax.set_xticks(major_ticks)
ax.set_xticks(minor_ticks, minor = True)
ax.set_yticks(major_ticks_y)
ax.set_yticks(minor_ticks_y, minor = True)
ax.tick_params(axis = 'both', which = 'major', length=6, width=1,labelsize = 11)
ax.tick_params(axis = 'both', which = 'minor', length=3, width=1)
plt.legend(loc='lower right')
In [ ]: