Non parametric Regression functions applied on the scatterplots of brightness vs. Field strength
This code contains all the python functions one could use to perform Non parametric regression on scattered data points:
import numpy as np import matplotlib.pyplot as plt import pyqt_fit import pyqt_fit.nonparam_regression as smooth from pyqt_fit import npr_methods import statsmodels import statsmodels.api as sm import pyfits from scipy.optimize import curve_fit lowess = sm.nonparametric.lowess from pyqt_fit import kernels im = pyfits.open('B_inv.fits') B = im[0].data x=abs(B) x = np.ravel(x) im = pyfits.open('Cont_sl0.fits') y = im[0].data y = np.ravel(y) #f = np.loadtxt('sub_data') #x = f[:,0] #y = f[:,1] #f = np.loadtxt('data') #x = f[:,0] #y = f[:,1] def lin_log(x,a,b): y = a*np.log10(x)+b return y ################################### my old way of binning def Binning(x,y,bandwidth): bins = np.arange(x.min(),x.max(),bandwidth) idx = np.digitize(x,bins) mean = [np.mean(y[idx==k]) for k in range(len(bins))] std = [y[idx==k].std() for k in range(len(bins))] f1 = np.loadtxt('bins_b_c_inv') a = f1[:,0] b = f1[:,1] plt.plot(a,b,label='500 pts') plt.plot(bins-bandwidth/2,mean,'r--',lw=4,alpha=.8,label='bandwidth') plt.show() ########################################################## def Method(i): if i==1: method = npr_methods.SpatialAverage() if i==2: method = npr_methods.LocalPolynomialKernel(q=1) if i==3: method = npr_methods.LocalPolynomialKernel(q=2) if i==4: method = npr_methods.LocalPolynomialKernel(q=3) return method ########################################################## LOWESS (statsmodels) def LOWESS(x,y,f): ind = np.where((x>50) &(x<2000)) x = x[ind] y = y[ind] #d = len(x)*0.01 z = lowess(y,x,f) #k0 = smooth.NonParamRegression(x,y,method=npr_methods.SpatialAverage()) #k0.fit() #k1 = smooth.NonParamRegression(x, y, method=npr_methods.LocalPolynomialKernel()) #k1.fit() #k2 = smooth.NonParamRegression(x, y, method=npr_methods.LocalPolynomialKernel1D(q=2)) #k2.fit() #xnew = np.linspace(x.min(),x.max(),1000) plt.scatter(x,y,s=0.1) #plt.plot(xnew,k0(xnew),c='red',lw=1.5,label='spatial averaging') #plt.plot(xnew,k1(xnew),c='yellow',lw=1.5,label='Local Poly(q=1)') #plt.plot(xnew,k2(xnew),c='gray',lw=1.5,label='Local Poly(q=2)') f1 = np.loadtxt('bins') a = f1[:,0] b = f1[:,1] ind2 = np.where(a>90) xnew2 = np.linspace(a[ind2].min(),2000,1000) popt, pcov = curve_fit(lin_log,a[ind2],b[ind2]) ynew = lin_log(xnew2,*popt) plt.plot(a,b,'r',lw=2,label='binning/500 pts') plt.plot(xnew2,ynew,'--b',label='logarithmic fit') plt.plot(z[:,0],z[:,1],'g',lw=1.5,label='LOWESS') plt.xlabel('B_los',fontsize=16) plt.ylabel('Contrast',fontsize=16) plt.xlim(x.min(),2000) plt.ylim(0.6,1.4) plt.legend(loc='lower right') plt.show() ############################ nonparam regression using pyqt_fit def NonParamReg(x,y,f): ind = np.where((x>20)&(x<2000)) x = x[ind] y = y[ind] z = lowess(y,x,f) k0 = smooth.NonParamRegression(x,y,method=npr_methods.SpatialAverage()) k0.fit() k1 = smooth.NonParamRegression(x, y, method=npr_methods.LocalPolynomialKernel()) k1.fit() k2 = smooth.NonParamRegression(x, y, method=npr_methods.LocalPolynomialKernel1D(q=2)) k2.fit() k3 = smooth.NonParamRegression(x, y, method=npr_methods.LocalPolynomialKernel1D(q=3)) k3.fit() plt.scatter(x,y,s=0.1) plt.xlabel('B_los',fontsize=16) plt.ylabel('Line core Contrast',fontsize=16) plt.xlim(x.min(),2000) plt.ylim(0.5,2.5)#(0.6,1.4) xnew = np.linspace(x.min(),x.max(),1000) f = np.loadtxt('bins') a = f[:,0] b = f[:,1] ind2 = np.where(a>100) popt, pcov = curve_fit(lin_log,a[ind2],b[ind2]) ynew = lin_log(xnew,*popt) plt.plot(xnew,k0(xnew),lw=1.5,label='spatial averaging') plt.plot(xnew,k1(xnew),lw=1.5,label='Local Poly(q=1)') plt.plot(xnew,k2(xnew),lw=1.5,label='Local Poly(q=2)') plt.plot(xnew,ynew,lw=1.5,label='Logarithmic fit') #plt.plot(xnew,k3(xnew),c='gray',lw=1.5,label='Local Poly(q=3)') plt.plot(a,b,'--',lw=1.5,label='binning') plt.plot(z[:,0],z[:,1],lw=1.5,label='LOWESS') plt.legend(loc='lower right') #f = open('spatial_av','w') #for i in range(k.fitted_ydata.shape[0]): # f.write(str(xnew[i])+' '+str(k(xnew)[i])+'\n') #d ddf.close() plt.savefig('LC_allfits') NonParamReg(x,y,method) plt.clf() #method = npr_methods.SpatialAverage() #method=npr_methods.LocalPolynomialKernel1D(q) ############### comments #smooth.NonParamRegression class takes as parameters the bandwidth, the covariance, the kernel function, the regression function #LOWESS function is different ############################################# best span=0.75 def LOWESS2(x,y): ind = np.where((x>20)&(x<2000)) x = x[ind] y = y[ind] z1 = lowess(y,x,0.1) z2 = lowess(y,x,0.5) z3 = lowess(y,x,0.8) xnew = np.linspace(x.min(),x.max(),1000) plt.scatter(x,y,s=0.1) plt.plot(z1[:,0],z1[:,1],'b',lw=1.5,label='LOWESS/0.1') plt.plot(z2[:,0],z2[:,1],'r',lw=1.5,label='LOWESS/0.5') plt.plot(z3[:,0],z3[:,1],'g',lw=1.5,label='LOWESS/0.8') f1 = np.loadtxt('bins') a = f1[:,0] b = f1[:,1] plt.plot(a,b,'--g',lw=2,label='binning/500 pts') plt.xlabel('B_los',fontsize=16) plt.ylabel('Contrast',fontsize=16) plt.xlim(x.min(),2000) plt.ylim(0.6,1.4) plt.legend(loc='lower right') plt.show() ###################################### same regression method, same kernel, different bandwidths Method = npr_methods.LocalPolynomialKernel1D(q=3) Method =npr_methods.SpatialAverage() def nonparamreg(x,y,h,Method): ind = np.where((x>30)&(x<2000)) x = x[ind] y = y[ind] xnew = np.linspace(x.min(),x.max(),1000) plt.scatter(x,y,s=0.1) plt.title('Regression method ='+str(Method)) plt.xlim(0,2000) plt.ylim(0.6,1.4) f1 = np.loadtxt('bins_b_c_inv') a = f1[:,0] b = f1[:,1] plt.plot(a,b,'--',lw=2,label='binning/500 pts') #z = lowess(y,x,0.75) #plt.plot(z[:,0],z[:,1],lw=1.5,label='LOWESS/0.75') k = smooth.NonParamRegression(x, y, method=Method) k.fit() plt.plot(xnew,k(xnew),lw=1.5,label='default=%.1f'%k.bandwidth+'G') for i in range(10,h,4): k = smooth.NonParamRegression(x, y, method=Method,bandwidth=i) k.fit() plt.plot(xnew,k(xnew),lw=1.5,label='bandwidth=%.1f'%k.bandwidth+'G') plt.legend() plt.show() ########################### same regression method, different kernels def nonparamreg(x,y,Method): #Method = npr_methods.LocalPolynomialKernel1D(q=2) ind = np.where((x>80)&(x<2000)) x = x[ind] y = y[ind] xnew = np.linspace(x.min(),x.max(),1000) plt.scatter(x,y,s=0.1) plt.title('Regression method = Spatial Averaging') plt.xlim(0,2000) plt.ylim(0.6,1.4) f1 = np.loadtxt('bins') a = f1[:,0] b = f1[:,1] k1 = smooth.NonParamRegression(x, y, method=Method) #Gaussian k2 = smooth.NonParamRegression(x, y, method=Method,kernel=kernels.tricube()) k3 = smooth.NonParamRegression(x, y, method=Method,kernel=kernels.Epanechnikov()) k1.fit(); k2.fit(); k3.fit() plt.plot(xnew,k1(xnew),lw=1.5,label='Gaussian Kernel') plt.plot(xnew,k2(xnew),lw=1.5,label='Tricube Kernel') plt.plot(xnew,k3(xnew),lw=1.5,label='Epanechnikov Kernel') plt.legend() plt.show() ################################################ different regression methods, same kernel, same bandwidth #### spline smoothing statsmodels.quantreg model = QuantReg(response, X) fitted = model.fit(q=0.1) print(fitted.summary()) #############################################################