Snippets Collections
https://vimsky.com/zh-tw/examples/detail/python-method-regex.sub.html
all_filenames = glob.glob("/home/lynaza/Desktop/Quinn/lda/檢察官起訴書/*.txt")

#return only filename (may contain not only duoi file)
 import os
 arr = os.listdir("/home/lynaza/Desktop/Quinn/lda/檢察官起訴書")
 print(arr)



import cv2
import os
import glob

def load_images_name(path):
    
    list_1 = glob.glob(path+'/*.tif') # depth of 1 folder
    
    list_2 = glob.glob(path+'/*/*.tif') # depth of 2 folder
    
    list_3 = glob.glob(path+'/*/*/*.tif')  # depth of 3 folder
    
    list_4 = glob.glob(path+'/*/*/*/*.tif')  # depth of 4 folder
    
    images_path = list_1 +list_2 +list_3 + list_4

    return images_path

images = load_images_name("/home/lynaza/Desktop/traindata/test")
rmsval = df.loc[:, 'c1':'c4']
def getrms(row):  
  a = np.sqrt(sum(row**2/4))
  return a
df['rms'] = df.apply(getrms,axis=1)
df.head()
for c in df_drop.columns:
    df_drop[c] = df_drop[c].str.replace('[^\w\s]+', '')
df_drop = df_drop.astype(str)
df_drop.head()
import pandas as pd

data = {'Product': ['Desktop Computer','Tablet','Printer','Laptop'],
        'Price': [850,200,150,1300]
        }

df = pd.DataFrame(data, columns= ['Product', 'Price'])

df.to_csv(r'Path where you want to store the exported CSV file\File Name.csv')

print (df)
# best way
data['resume'] = data[['Resume_title', 'City', 'State', 'Description', 'work_experiences', 'Educations', 'Skills', 'Certificates', 'Additional Information']].agg(' '.join, axis=1)


# other way
df["period"] = df["Year"] + df["quarter"]
df['Period'] = df['Year'] + ' ' + df['Quarter']
df["period"] = df["Year"].astype(str) + df["quarter"] #If one (or both) of the columns are not string typed
#Beware of NaNs when doing this!
df['period'] = df[['Year', 'quarter', ...]].agg('-'.join, axis=1) #for multiple string columns
df['period'] = df[['Year', 'quarter']].apply(lambda x: ''.join(x), axis=1)
#method cat() of the .str accessor 
df['Period'] = df.Year.str.cat(df.Quarter)
df['Period'] = df.Year.astype(str).str.cat(df.Quarter.astype(str), sep='q')
df['AllTogether'] = df['Country'].str.cat(df[['State', 'City']], sep=' - ') #add parameter na_rep to replace the NaN values with a string if have nan
columns = ['whatever', 'columns', 'you', 'choose']
df['period'] = df[columns].astype(str).sum(axis=1)

#a function
def str_join(df, sep, *cols):
   ...:     from functools import reduce
   ...:     return reduce(lambda x, y: x.astype(str).str.cat(y.astype(str), sep=sep), 
   ...:                   [df[col] for col in cols])
   ...: 

In [4]: df['cat'] = str_join(df, '-', 'c0', 'c1', 'c2', 'c3')
for c in df_drop.columns:
    df_drop[c] = df_drop[c].str.replace('[^\w\s]+', '')
df_drop = df_drop.astype(str)
df_drop.head()
rmsval = df.loc[:, 'c1':'c4']
def getrms(row):  
  a = np.sqrt(sum(row**2/4))
  return a
df['rms'] = df.apply(getrms,axis=1)
df.head()
import pandas as pd

data = {'Product': ['Desktop Computer','Tablet','Printer','Laptop'],
        'Price': [850,200,150,1300]
        }

df = pd.DataFrame(data, columns= ['Product', 'Price'])

df.to_csv(r'Path where you want to store the exported CSV file\File Name.csv')

print (df)
import re

text = 'this is a text'

try:
    found = re.search('is(.+?)text', text).group(1)
except AttributeError:
    # AAA, ZZZ not found in the original string
    found = '0 wtitle' # apply your error handling
found

=> a
import pandas as pd, re

junk = """Shot - Wounded/Injured, Shot - Dead (murder, accidental, suicide), Suicide - Attempt, Murder/Suicide, Attempted Murder/Suicide (one variable unsuccessful), Institution/Group/Business, Mass Murder (4+ deceased victims excluding the subject/suspect/perpetrator , one location), Mass Shooting (4+ victims injured or killed excluding the subject/suspect"""

rx = re.compile(r'\([^()]+\)|,(\s+)')

data = [x 
        for nugget in rx.split(junk) if nugget
        for x in [nugget.strip()] if x]

df = pd.DataFrame({'incident_characteristics': data})
print(df)
def clean(txt):
    txt = txt.str.replace("(<br/>)", "")
    txt = txt.str.replace('(<a).*(>).*(</a>)', '')
    txt = txt.str.replace('(&amp)', '')
    txt = txt.str.replace('(&gt)', '')
    txt = txt.str.replace('(&lt)', '')
    txt = txt.str.replace('(\xa0)', ' ')  
    return txt
df['xxx column'] = clean(df['xxx column'])
mask=np.triu(np.ones_like(corr,dtype=bool))

f ,ax = plt.subplots(figsize=(11,9))
cmap=sns.diverging_palette(230,20, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
           square=True, linewidth=.5, cbar_kws={'shrink':.5})
for p in ax.patches:
    values= '{:.0f}'.format(p.get_height())
    x = p.get_x() + p.get_width()/2
    y = p.get_height()
    ax.annotate(values, (x, y),ha='center', va ='bottom', fontsize = 11)
for p in ax.patches:
    values= '{:.0f}'.format(p.get_height())
    x = p.get_x() + p.get_width()/2
    y = p.get_height()
    ax.annotate(values, (x, y),ha='center', va ='bottom', fontsize = 11)
g = sns.*plot 
ax = g 
for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2., p.get_height(), '{0:.2f}'.format(p.get_height()), 
        fontsize=12, color='black', ha='center', va='bottom')
django-admin startproject mysite
 
python manage.py startapp myapp
import pandas as pd
import matplotlib.pyplot as plt

from pandas_profiling import ProfileReport
profile = ProfileReport(gabijos, title='Gabijos g.', html={'style':{'full_width':True}})
profile.to_file("gabijos.html")

df_query = df_query.assign(comments='NoComment')
qq= dff[~df.astype(str).apply(tuple, 1).isin(dff.astype(str).apply(tuple, 1))]
for p in ax.patches:
    values = '{:.0f}'.format(p.get_height())
    x = p.get_x() + p.get_width()/2
    y = p.get_height()
    ax.annotate(values, (x, y),ha='center', va ='bottom', fontsize = 10)
.apply(lambda x: x.replace(',',',').replace(',',',').split(',')
# importing libraries 
from sklearn.ensemble import VotingClassifier ,BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier
from sklearn.metrics import accuracy_score 
from numpy import mean,std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score,RepeatedStratifiedKFold,train_test_split
from sklearn.linear_model import LogisticRegression,RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from matplotlib import pyplot
from sklearn.datasets import load_wine,load_iris
from matplotlib.pyplot import figure
figure(num=2, figsize=(16, 12), dpi=80, facecolor='w', edgecolor='k')
import xgboost as xgb
from sklearn.feature_selection import SelectKBest,f_regression
from sklearn.linear_model import LinearRegression,BayesianRidge,ElasticNet,Lasso,SGDRegressor,Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,RobustScaler,StandardScaler
from sklearn.pipeline import make_pipeline,Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import PCA,KernelPCA
from sklearn.ensemble import ExtraTreesRegressor,GradientBoostingRegressor,RandomForestRegressor,VotingClassifier
from sklearn.model_selection import cross_val_score,KFold,GridSearchCV,RandomizedSearchCV,StratifiedKFold,train_test_split
from sklearn.base import BaseEstimator,clone,TransformerMixin,RegressorMixin
from sklearn.svm import LinearSVR,SVR
#import xgboost 
from xgboost import XGBRegressor
#Import Pandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from scipy.stats import skew
from scipy.stats.stats import pearsonr
%matplotlib inline
seed = 1075
np.random.seed(seed)
c3 = pd.Series(['China', 'US'])
df[df['countries'].isin(c1)]
# applying filter function 
df.filter(["Name", "College", "Salary"]) 

# importing pandas as pd 
import pandas as pd 
  
# Creating the dataframe  
df = pd.read_csv("nba.csv") 
  
# Using regular expression to extract all 
# columns which has letter 'a' or 'A' in its name. 
df.filter(regex ='[aA]') 
(df.groupby('name')['ext price']
 .agg(['mean', 'sum'])
 .style.format('${0:,.2f}'))
'${:,.2f}'.format(dfCombined['Amount'].sum())
df['column_name'] = pd.to_datetime(df['column_name'])
# new version
df.groupby(pd.Grouper(key='column_name', freq="M")).mean().plot()
def ffill_cols(df, cols_to_fill_name='Unn'):
    """
    Forward fills column names. Propagate last valid column name forward to next invalid column. Works similarly to pandas
    ffill().
    
    :param df: pandas Dataframe; Dataframe
    :param cols_to_fill_name: str; The name of the columns you would like forward filled. Default is 'Unn' as
    the default name pandas gives unnamed columns is 'Unnamed'
    
    :returns: list; List of new column names
    """
    cols = df.columns.to_list()
    for i, j in enumerate(cols):
        if j.startswith(cols_to_fill_name):
            cols[i] = cols[i-1]
    return cols
star

Tue Jul 20 2021 04:01:34 GMT+0000 (UTC)

#python #pandas
star

Tue Jul 20 2021 02:52:15 GMT+0000 (UTC)

#python #pandas
star

Tue Jun 29 2021 16:32:19 GMT+0000 (UTC)

#py #dataframe #pandas
star

Tue Jun 29 2021 16:21:45 GMT+0000 (UTC)

#py #dataframe #pandas
star

Mon Jun 28 2021 17:29:44 GMT+0000 (UTC)

#py #dataframe #pandas
star

Wed May 26 2021 04:07:55 GMT+0000 (UTC)

#python #pandas
star

Thu May 20 2021 05:25:00 GMT+0000 (UTC)

#python #pandas
star

Mon May 17 2021 04:21:23 GMT+0000 (UTC)

#python #pandas
star

Mon May 17 2021 04:04:24 GMT+0000 (UTC)

#python #pandas
star

Thu May 13 2021 06:52:35 GMT+0000 (UTC)

#python #pandas
star

Wed Apr 07 2021 16:50:10 GMT+0000 (UTC)

#undefined #python #pandas
star

Wed Apr 07 2021 16:40:56 GMT+0000 (UTC) https://pandas-profiling.github.io/pandas-profiling/docs/master/rtd/

#undefined #python #pandas
star

Thu Mar 25 2021 06:54:18 GMT+0000 (UTC)

#python #pandas
star

Wed Mar 17 2021 07:23:00 GMT+0000 (UTC)

#python #pandas
star

Thu Mar 11 2021 09:50:49 GMT+0000 (UTC)

#python #pandas
star

Sat Oct 31 2020 00:55:40 GMT+0000 (UTC) https://stackoverflow.com/questions/19960077/how-to-filter-pandas-dataframe-using-in-and-not-in-like-in-sql

#pandas #isin #filter
star

Sat Oct 31 2020 00:40:59 GMT+0000 (UTC) https://www.geeksforgeeks.org/python-pandas-dataframe-filter/

#pandas #filter #column
star

Sat Oct 31 2020 00:38:58 GMT+0000 (UTC) https://www.geeksforgeeks.org/python-pandas-dataframe-filter/

#pandas #filter
star

Mon Oct 26 2020 01:01:58 GMT+0000 (UTC) https://pbpython.com/styling-pandas.html

#python #pandas #format #currency
star

Mon Oct 26 2020 00:28:49 GMT+0000 (UTC) https://www.kite.com/python/answers/how-to-format-a-float-as-currency-in-python

#python #pandas #formatting
star

Fri Oct 23 2020 04:54:30 GMT+0000 (UTC) https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop_duplicates.html

#pandas #duplicates #drop
star

Tue Oct 20 2020 09:28:55 GMT+0000 (UTC) https://www.kite.com/python/answers/how-to-reorder-columns-in-a-pandas-dataframe-in-python

#python #pandas
star

Fri Oct 16 2020 22:26:07 GMT+0000 (UTC) https://stackoverflow.com/questions/47015886/pandas-grouper-vs-time-grouper

#python #pandas #grouper
star

Thu Aug 06 2020 08:57:00 GMT+0000 (UTC)

#python #pandas #data-cleaning

Save snippets that work with our extensions

Available in the Chrome Web Store Get Firefox Add-on Get VS Code extension