Python - character encoding
Tue Jul 25 2023 07:21:42 GMT+0000 (Coordinated Universal Time)
Saved by
@yusufalao
#python
#Python
from pathlib import Path
import shutil
import chardet
# region - FUN - detect_encoding
# ----------------------------------------------------------------------------------------------
def detect_encoding(dfn):
with open(dfn, 'rb') as f_csv:
chardet_result = chardet.detect(f_csv.read())
chardet_encoding = chardet_result.get('encoding')
chardet_confidence = chardet_result.get('confidence')
Watch.Log(chardet_confidence, 2)
return chardet_encoding
# ----------------------------------------------------------------------------------------------
# endregion - FUN - detect_encoding
job_file = Watch.ExpandString("%F")
encoding = detect_encoding(job_file)
#Watch.Log(f"detected encoding: {encoding}", 2)
temp = Watch.ExpandString("%ttemp.csv")
#Watch.Log(temp, 2)
#with open(Watch.ExpandString("%F"), 'rb', encoding=encoding) as temp_csv:
with open(Watch.ExpandString("%F"), 'rb') as temp_csv:
content = temp_csv.readlines()
for line in content:
#encoding = detect_encoding(line)
Watch.Log(f"{'*'*100}\n{line}", 2)
Watch.Log(f'utf-8...{line.decode("utf-8")}', 2)
chardet_result = chardet.detect(line)
chardet_encoding = chardet_result.get('encoding')
chardet_confidence = chardet_result.get('confidence')
Watch.Log(f"Enc...{chardet_encoding}", 2)
Watch.Log(f"Con...{chardet_confidence}", 2)
Watch.Log(f"New...{str(line).encode('utf-8').decode('utf-8')}\n", 2)
#Watch.Log(f"{'~'*100}\n{line}", 2)
with open(temp, 'w') as output:
output.write(content.decode("utf-8")
#Path(job_file).unlink()
#shutil.move(temp, job_file)
content_copyCOPY
Comments