Snippets Collections
df['column'].unique()  # Unique Values
df['column'].nunique() # Number of unique values
df['column'].value_counts() # Both
df = pd.DataFrame()
for urls in urls_list:
	df = pd.concat([df,df_new])
# Setup fake key in both DataFrames to join on it

df1['key'] = 0
df2['key'] = 0

df1.merge(df2, on='key', how='outer')
def load_table_to_df(fp, **kwargs):

    if fp.endswith('.xls'):
        result_df = pd.read_excel(fp, **kwargs)
        
    elif fp.endswith('.xlsx'):
        result_df = pd.read_excel(fp, engine='openpyxl', **kwargs)
        
    elif fp.endswith('.csv') or fp.endswith('.txt'):
        result_df = pd.read_csv(fp, **kwargs)
        
    elif fp.endswith('.parquet'):
        result_df = pd.read_parquet(fp, **kwargs)
        
    else:
        return "Wrong file extension"

    print('     -->  Succesfully loaded {}'.format(fp))
    return result_df
lins = pd.Series(lines, name='img_path')
train_df = pd.concat([train_df, lins], axis=1)
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out drives_right column as Series
print(cars.loc[:, 'drives_right'])
print(cars.iloc[:, 1])

# Print out drives_right column as DataFrame
print(cars.loc[:, ['drives_right']])
print(cars.iloc[:, [1]]) #[] square brackets are very sensitive here


# Print out cars_per_cap and drives_right as DataFrame
print(cars.loc[:, ['drives_right', 'cars_per_cap']])
print(cars.iloc[:, [0,2]]) #[] square brackets are very sensitive here

# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out drives_right value of Morocco
print(cars.loc[['MOR', 'drives_right']])

# Print sub-DataFrame
print(cars.iloc[[4,5], [1,2]])
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out observation for Japan
print(cars.loc['JPN'])
print(cars.iloc[2])

# Print out observations for Australia and Egypt
print(cars.loc[['AUS','EG']])
print(cars.iloc[[1,6]])
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out country column as Pandas Series
print(cars['country'])

# Print out country column as Pandas DataFrame
print(cars[['country']])


# Print out DataFrame with country and drives_right columns
print(cars[['country','drives_right']])
# Import pandas as pd
import pandas as pd

# Fix import by including index_col
cars = pd.read_csv('cars.csv', index_col = 0)

# Print out cars
print(cars)
import pandas as pd

data = {'Product': ['Desktop Computer','Tablet','Printer','Laptop'],
        'Price': [850,200,150,1300]
        }

df = pd.DataFrame(data, columns= ['Product', 'Price'])

df.to_csv(r'Path where you want to store the exported CSV file\File Name.csv')
# df.to_csv('file_name.csv', encoding='utf-8', index=False)
print (df)

data[['column1','column2','column3',...]].to_csv('fileNameWhereYouwantToWrite.csv')
     
df = pd.DataFrame()
for i in range():
	#....
	df.appen(text)
# best way
data['resume'] = data[['Resume_title', 'City', 'State', 'Description', 'work_experiences', 'Educations', 'Skills', 'Certificates', 'Additional Information']].agg(' '.join, axis=1)


# other way
df["period"] = df["Year"] + df["quarter"]
df['Period'] = df['Year'] + ' ' + df['Quarter']
df["period"] = df["Year"].astype(str) + df["quarter"] #If one (or both) of the columns are not string typed
#Beware of NaNs when doing this!
df['period'] = df[['Year', 'quarter', ...]].agg('-'.join, axis=1) #for multiple string columns
df['period'] = df[['Year', 'quarter']].apply(lambda x: ''.join(x), axis=1)
#method cat() of the .str accessor 
df['Period'] = df.Year.str.cat(df.Quarter)
df['Period'] = df.Year.astype(str).str.cat(df.Quarter.astype(str), sep='q')
df['AllTogether'] = df['Country'].str.cat(df[['State', 'City']], sep=' - ') #add parameter na_rep to replace the NaN values with a string if have nan
columns = ['whatever', 'columns', 'you', 'choose']
df['period'] = df[columns].astype(str).sum(axis=1)

#a function
def str_join(df, sep, *cols):
   ...:     from functools import reduce
   ...:     return reduce(lambda x, y: x.astype(str).str.cat(y.astype(str), sep=sep), 
   ...:                   [df[col] for col in cols])
   ...: 

In [4]: df['cat'] = str_join(df, '-', 'c0', 'c1', 'c2', 'c3')
for c in df_drop.columns:
    df_drop[c] = df_drop[c].str.replace('[^\w\s]+', '')
df_drop = df_drop.astype(str)
df_drop.head()
rmsval = df.loc[:, 'c1':'c4']
def getrms(row):  
  a = np.sqrt(sum(row**2/4))
  return a
df['rms'] = df.apply(getrms,axis=1)
df.head()
import pandas as pd

data = {'Product': ['Desktop Computer','Tablet','Printer','Laptop'],
        'Price': [850,200,150,1300]
        }

df = pd.DataFrame(data, columns= ['Product', 'Price'])

df.to_csv(r'Path where you want to store the exported CSV file\File Name.csv')

print (df)
import re

text = 'this is a text'

try:
    found = re.search('is(.+?)text', text).group(1)
except AttributeError:
    # AAA, ZZZ not found in the original string
    found = '0 wtitle' # apply your error handling
found

=> a
import pandas as pd, re

junk = """Shot - Wounded/Injured, Shot - Dead (murder, accidental, suicide), Suicide - Attempt, Murder/Suicide, Attempted Murder/Suicide (one variable unsuccessful), Institution/Group/Business, Mass Murder (4+ deceased victims excluding the subject/suspect/perpetrator , one location), Mass Shooting (4+ victims injured or killed excluding the subject/suspect"""

rx = re.compile(r'\([^()]+\)|,(\s+)')

data = [x 
        for nugget in rx.split(junk) if nugget
        for x in [nugget.strip()] if x]

df = pd.DataFrame({'incident_characteristics': data})
print(df)
trx_1.select(f.countDistinct("stg_nexus_member_cd")).show()
star

Tue Jun 06 2023 07:39:55 GMT+0000 (Coordinated Universal Time)

#pandas #python #unique #dataframe
star

Thu Apr 27 2023 08:39:30 GMT+0000 (Coordinated Universal Time)

#pandas #dataframe
star

Mon Aug 29 2022 12:59:46 GMT+0000 (Coordinated Universal Time)

#python #pandas #dataframe #crossjoin
star

Thu Jan 06 2022 20:16:40 GMT+0000 (Coordinated Universal Time)

#python #pandas #dataframe
star

Fri Dec 24 2021 17:02:40 GMT+0000 (Coordinated Universal Time)

#python #pandas #dataframe
star

Tue Nov 23 2021 12:46:33 GMT+0000 (Coordinated Universal Time)

##dictionary ##pandas #defining_data_frame #csv #dataframe
star

Tue Jun 29 2021 19:43:47 GMT+0000 (Coordinated Universal Time)

#py #dataframe #pandas #combine #column
star

Tue Jun 29 2021 19:02:21 GMT+0000 (Coordinated Universal Time)

#py #dataframe #pandas #replace
star

Tue Jun 29 2021 16:32:19 GMT+0000 (Coordinated Universal Time)

#py #dataframe #pandas
star

Tue Jun 29 2021 16:21:45 GMT+0000 (Coordinated Universal Time)

#py #dataframe #pandas
star

Tue Jun 29 2021 16:20:27 GMT+0000 (Coordinated Universal Time)

#py #dataframe #pandas #text #exception
star

Mon Jun 28 2021 17:29:44 GMT+0000 (Coordinated Universal Time)

#py #dataframe #pandas
star

Wed May 26 2021 08:31:02 GMT+0000 (Coordinated Universal Time)

#pyspark #unique #count #dataframe #column

Save snippets that work with our extensions

Available in the Chrome Web Store Get Firefox Add-on Get VS Code extension