PhysBac Correlations

PHOTO EMBED

Fri Jul 10 2020 13:14:48 GMT+0000 (Coordinated Universal Time)

Saved by @Isoplexic

#%% Imports
print("PhysBacCorrelations-1: Starting ...")

import numpy as np
import pandas as pd
import scipy
from scipy.stats import linregress
import matplotlib.pyplot as plt
from copy import deepcopy as cpy
from munch import Munch as mch
import numpy as np

df = pd.read_pickle(c.indir+c.infil)

#%% Create dataframe and plot

df = df[[c.corr_x, c.corr_y]] # Create data frame containing necessary vaues
# The x value will always be in the first column
# The y value will always be in the second column

df = df.dropna() # Drop rows that contain any NA values

x = np.array(df[c.corr_x]) # Set x array from location
y = np.array(df[c.corr_y]) # Set y array from location

#%% Run statistics

p1 = np.polyfit(x, y, 1) # will return (slope, intercept) to the first degree

yfit = p1[0] * x + p1[1] # linear fit based on actual values
print(yfit)

yresid = y - yfit
ssresid = sum(pow(yresid,2))
sstotal = len(y) * np.var(y)
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y)
rsq = ("R-squared: %f" % r_value**2) # same value as previous rsq
print(p_value)

#%% Plot
# run all together to overlay regression slope on top of data points
plt.plot(x,y,'.') # data points only
plt.plot(x,np.polyval(p1,x),'r-') # linear regression only
plt.xlabel(c.xlabel)
plt.ylabel(c.ylabel)
plt.title(c.title)
plt.annotate((c.annotation), (c.descriptionplacement))
plt.annotate((rsq), (c.rsqplacement))
plt.annotate('R: ' + str(r_value), (c.rvalueplacement))
plt.annotate('P-Value: ' + str(p_value), (c.pvalueplacement))
plt.savefig(c.plttitle)

print("PhysBacCorrelations-1: ... done.")
content_copyCOPY