#%% Imports print("PhysBacCorrelations-1: Starting ...") import numpy as np import pandas as pd import scipy from scipy.stats import linregress import matplotlib.pyplot as plt from copy import deepcopy as cpy from munch import Munch as mch import numpy as np df = pd.read_pickle(c.indir+c.infil) #%% Create dataframe and plot df = df[[c.corr_x, c.corr_y]] # Create data frame containing necessary vaues # The x value will always be in the first column # The y value will always be in the second column df = df.dropna() # Drop rows that contain any NA values x = np.array(df[c.corr_x]) # Set x array from location y = np.array(df[c.corr_y]) # Set y array from location #%% Run statistics p1 = np.polyfit(x, y, 1) # will return (slope, intercept) to the first degree yfit = p1[0] * x + p1[1] # linear fit based on actual values print(yfit) yresid = y - yfit ssresid = sum(pow(yresid,2)) sstotal = len(y) * np.var(y) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y) rsq = ("R-squared: %f" % r_value**2) # same value as previous rsq print(p_value) #%% Plot # run all together to overlay regression slope on top of data points plt.plot(x,y,'.') # data points only plt.plot(x,np.polyval(p1,x),'r-') # linear regression only plt.xlabel(c.xlabel) plt.ylabel(c.ylabel) plt.title(c.title) plt.annotate((c.annotation), (c.descriptionplacement)) plt.annotate((rsq), (c.rsqplacement)) plt.annotate('R: ' + str(r_value), (c.rvalueplacement)) plt.annotate('P-Value: ' + str(p_value), (c.pvalueplacement)) plt.savefig(c.plttitle) print("PhysBacCorrelations-1: ... done.")