PhysBac Correlations
Fri Jul 10 2020 13:14:48 GMT+0000 (Coordinated Universal Time)
Saved by
@Isoplexic
#%% Imports
print("PhysBacCorrelations-1: Starting ...")
import numpy as np
import pandas as pd
import scipy
from scipy.stats import linregress
import matplotlib.pyplot as plt
from copy import deepcopy as cpy
from munch import Munch as mch
import numpy as np
df = pd.read_pickle(c.indir+c.infil)
#%% Create dataframe and plot
df = df[[c.corr_x, c.corr_y]] # Create data frame containing necessary vaues
# The x value will always be in the first column
# The y value will always be in the second column
df = df.dropna() # Drop rows that contain any NA values
x = np.array(df[c.corr_x]) # Set x array from location
y = np.array(df[c.corr_y]) # Set y array from location
#%% Run statistics
p1 = np.polyfit(x, y, 1) # will return (slope, intercept) to the first degree
yfit = p1[0] * x + p1[1] # linear fit based on actual values
print(yfit)
yresid = y - yfit
ssresid = sum(pow(yresid,2))
sstotal = len(y) * np.var(y)
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y)
rsq = ("R-squared: %f" % r_value**2) # same value as previous rsq
print(p_value)
#%% Plot
# run all together to overlay regression slope on top of data points
plt.plot(x,y,'.') # data points only
plt.plot(x,np.polyval(p1,x),'r-') # linear regression only
plt.xlabel(c.xlabel)
plt.ylabel(c.ylabel)
plt.title(c.title)
plt.annotate((c.annotation), (c.descriptionplacement))
plt.annotate((rsq), (c.rsqplacement))
plt.annotate('R: ' + str(r_value), (c.rvalueplacement))
plt.annotate('P-Value: ' + str(p_value), (c.pvalueplacement))
plt.savefig(c.plttitle)
print("PhysBacCorrelations-1: ... done.")
content_copyCOPY
Comments