to save

PHOTO EMBED

Wed Jul 08 2020 22:49:17 GMT+0000 (Coordinated Universal Time)

Saved by @Isoplexic

df = df[[c.corr_x, c.corr_y]] # Create data frame only containing necessary values
# The x value will always be in the first column
# The y value will always be in the second column

df = df[df.surface_bottom == c.depth]
df = df.dropna() # Drop rows that contain any NA values

x = np.array(df[c.corr_x]) # Set x array from location
y = np.array(df[c.corr_y]) # Set y array from location

#%% Run statistics

p1 = np.polyfit(x, y, 1) # will return (slope, intercept) to the first degree

yfit = p1[0] * x + p1[1] # linear fit based on actual values
print(yfit)

yresid = y - yfit
ssresid = sum(pow(yresid,2))
sstotal = len(y) * np.var(y)
rsq = 1 - ssresid/sstotal
print(rsq) # find r squared value

slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y)
print(pow(r_value,2)) # same value as previous rsq

print(p_value)

#%% Plot
# run all together to overlay regression slope on top of data points
plt.plot(x,y,'.') # data points only
plt.plot(x,np.polyval(p1,x),'r-') # linear regression only
plt.xlabel(c.xlabel)
plt.ylabel(c.ylabel)
plt.title(c.title)
plt.annotate('R2: ' + str(rsq), (0,27))
plt.annotate('P-Value: ' + str(p_value), (0,25))
plt.savefig(c.plttitle)
content_copyCOPY