convert tra to sim chinese

PHOTO EMBED

Thu Aug 12 2021 07:11:15 GMT+0000 (Coordinated Universal Time)

Saved by @QuinnFox12 #python #pandas #column #nlp #chinese #trasim

sudo pip install opencc
# if nt work, should clone project first

import pandas as pd
import numpy as np
# -*- coding: utf-8 -*-
import opencc
from opencc import OpenCC

df = pd.read_csv('training.csv').astype(str)

def tra_sim(text):
    cc = OpenCC('tw2s')
    sim = cc.convert(text)
    return sim
df['sim_label'] = df['label'].apply(tra_sim)
df['sim_detail_label'] = df['detail_label'].apply(tra_sim)
df['sim_text'] = df['text'].apply(tra_sim)
content_copyCOPY