df.describe() # get statistics of df df.info() df.dtypes df.values # get values of df as numpy array df.columns # get all columns df.index # get index of df df.set_index('col1') # can also be applied on multiple columns (use ['col1', 'col2']) df.reset_index() # option: drop=True df.sort_index(level=['col1', 'col2'], ascending=[False, True]) # for multiindex # then we can slice by indeces: df.loc[('Pakistan', 'Lahore'):('Russia', 'Moscow')] # Slice from first tuple to second tuple where Pakistan is first index and Lahore second df.loc[("a", "b"):("c", "d"), "e":"f"] # can also slice two ways # get last 10 rows of data frame df.iloc[-10:] df.info() # get info about missing values df.shape() # get nbr of rows and columns df.sort_values(['col1', 'col2'], ascending=[True, False]) # sort values by col1 (ascending) and col2 (descending) df['col'].mean() # .median(), min(), max(), std(), var(), quantile() # min() also works for dates df['col1'].cumsum() # sum of row AND previous row, also .cummax(), cumprod() # df[["col1", "col2", "col3"]].agg([function1, function2])) # unique counts df.drop_duplicates(subset=['col1', 'col2']) df['col1'].value_counts(sort=True, normalize=True) df['col1'].unique() # get largest 10 values in data frame df.nlargest(10, "col")
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter