Python 3.6: Speed up regex replace in Pandas dataframe - Stack Overflow

PHOTO EMBED

Fri Aug 25 2023 14:36:09 GMT+0000 (Coordinated Universal Time)

Saved by @yfontes #python

import csv
import sqlite3

chr_ranges = (0x00, 0x09), (0x0B, 0x20), (0x7F, 0xA0)  # 'special' character ranges
trans_table = {x: " " for r in chr_ranges for x in range(*r)} # 'special'->space trans. table

def trans_field(value):
    try:
        return value.translate(trans_table)  # try to translate and return
    except AttributeError:  # if there's no translate method on the passed value...
        return value  # return the original value

connection = sqlite3.connect("your_db")  # connect to the database
cursor = connection.cursor()  # grab a database cursor
results = cursor.execute("select * from your_table")  # execute the select query
header = [c[0] for c in cursor.description]  # get the column names for our CSV header

with open("output.csv", "w", newline="") as f:  # open("output.csv", "wb") on Python 2.x
    writer = csv.writer(f, delimiter="\t")  # create a CSV writer with \t as a delimiter
    writer.writerow(header)  # write the header (column names)
    for result in results:  # iterate over the returned results
        writer.writerow(map(trans_field, result))  # process result fields and write the row
content_copyCOPY

https://stackoverflow.com/questions/50376282/python-3-6-speed-up-regex-replace-in-pandas-dataframe