to save
Wed Jul 08 2020 22:49:17 GMT+0000 (Coordinated Universal Time)
Saved by
@Isoplexic
df = df[[c.corr_x, c.corr_y]] # Create data frame only containing necessary values
# The x value will always be in the first column
# The y value will always be in the second column
df = df[df.surface_bottom == c.depth]
df = df.dropna() # Drop rows that contain any NA values
x = np.array(df[c.corr_x]) # Set x array from location
y = np.array(df[c.corr_y]) # Set y array from location
#%% Run statistics
p1 = np.polyfit(x, y, 1) # will return (slope, intercept) to the first degree
yfit = p1[0] * x + p1[1] # linear fit based on actual values
print(yfit)
yresid = y - yfit
ssresid = sum(pow(yresid,2))
sstotal = len(y) * np.var(y)
rsq = 1 - ssresid/sstotal
print(rsq) # find r squared value
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y)
print(pow(r_value,2)) # same value as previous rsq
print(p_value)
#%% Plot
# run all together to overlay regression slope on top of data points
plt.plot(x,y,'.') # data points only
plt.plot(x,np.polyval(p1,x),'r-') # linear regression only
plt.xlabel(c.xlabel)
plt.ylabel(c.ylabel)
plt.title(c.title)
plt.annotate('R2: ' + str(rsq), (0,27))
plt.annotate('P-Value: ' + str(p_value), (0,25))
plt.savefig(c.plttitle)
content_copyCOPY
Comments