df = df[[c.corr_x, c.corr_y]] # Create data frame only containing necessary values # The x value will always be in the first column # The y value will always be in the second column df = df[df.surface_bottom == c.depth] df = df.dropna() # Drop rows that contain any NA values x = np.array(df[c.corr_x]) # Set x array from location y = np.array(df[c.corr_y]) # Set y array from location #%% Run statistics p1 = np.polyfit(x, y, 1) # will return (slope, intercept) to the first degree yfit = p1[0] * x + p1[1] # linear fit based on actual values print(yfit) yresid = y - yfit ssresid = sum(pow(yresid,2)) sstotal = len(y) * np.var(y) rsq = 1 - ssresid/sstotal print(rsq) # find r squared value slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y) print(pow(r_value,2)) # same value as previous rsq print(p_value) #%% Plot # run all together to overlay regression slope on top of data points plt.plot(x,y,'.') # data points only plt.plot(x,np.polyval(p1,x),'r-') # linear regression only plt.xlabel(c.xlabel) plt.ylabel(c.ylabel) plt.title(c.title) plt.annotate('R2: ' + str(rsq), (0,27)) plt.annotate('P-Value: ' + str(p_value), (0,25)) plt.savefig(c.plttitle)
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter