df.describe() # get statistics of df
df.info()
df.dtypes
df.values # get values of df as numpy array
df.columns # get all columns
df.index # get index of df
df.set_index('col1') # can also be applied on multiple columns (use ['col1', 'col2'])
df.reset_index() # option: drop=True
df.sort_index(level=['col1', 'col2'], ascending=[False, True]) # for multiindex
# then we can slice by indeces:
df.loc[('Pakistan', 'Lahore'):('Russia', 'Moscow')] # Slice from first tuple to second tuple where Pakistan is first index and Lahore second
df.loc[("a", "b"):("c", "d"), "e":"f"] # can also slice two ways
# get last 10 rows of data frame
df.iloc[-10:]
df.info() # get info about missing values
df.shape() # get nbr of rows and columns
df.sort_values(['col1', 'col2'], ascending=[True, False]) # sort values by col1 (ascending) and col2 (descending)
df['col'].mean() # .median(), min(), max(), std(), var(), quantile()
# min() also works for dates
df['col1'].cumsum() # sum of row AND previous row, also .cummax(), cumprod()
# df[["col1", "col2", "col3"]].agg([function1, function2]))
# unique counts
df.drop_duplicates(subset=['col1', 'col2'])
df['col1'].value_counts(sort=True, normalize=True)
df['col1'].unique()
# get largest 10 values in data frame
df.nlargest(10, "col")