Python - character encoding

PHOTO EMBED

Tue Jul 25 2023 07:21:42 GMT+0000 (Coordinated Universal Time)

Saved by @yusufalao #python

#Python

from pathlib import Path
import shutil
import chardet


# region - FUN - detect_encoding

# ----------------------------------------------------------------------------------------------

def detect_encoding(dfn):



    with open(dfn, 'rb') as f_csv:

        chardet_result = chardet.detect(f_csv.read())

        chardet_encoding = chardet_result.get('encoding')

        chardet_confidence = chardet_result.get('confidence')

        Watch.Log(chardet_confidence, 2)

        return chardet_encoding

# ----------------------------------------------------------------------------------------------

# endregion - FUN - detect_encoding

job_file = Watch.ExpandString("%F")
encoding = detect_encoding(job_file)

#Watch.Log(f"detected encoding: {encoding}", 2)

temp = Watch.ExpandString("%ttemp.csv")
#Watch.Log(temp, 2)

#with open(Watch.ExpandString("%F"), 'rb', encoding=encoding) as temp_csv:
with open(Watch.ExpandString("%F"), 'rb') as temp_csv:

    content = temp_csv.readlines()

    for line in content:
        #encoding = detect_encoding(line)
        Watch.Log(f"{'*'*100}\n{line}", 2)
        Watch.Log(f'utf-8...{line.decode("utf-8")}', 2)

        chardet_result = chardet.detect(line)

        chardet_encoding = chardet_result.get('encoding')

        chardet_confidence = chardet_result.get('confidence')

        Watch.Log(f"Enc...{chardet_encoding}", 2)
        Watch.Log(f"Con...{chardet_confidence}", 2)
        Watch.Log(f"New...{str(line).encode('utf-8').decode('utf-8')}\n", 2)
        #Watch.Log(f"{'~'*100}\n{line}", 2)


with open(temp, 'w') as output:

    output.write(content.decode("utf-8")


#Path(job_file).unlink()
#shutil.move(temp, job_file)
content_copyCOPY