speech to text v1.0.1

PHOTO

Wed Oct 20 2021 21:55:27 GMT+0000 (Coordinated Universal Time)

Saved by @etanderson #python

'''

Ethan Anderson
Balance of Nature
VtT v1.0.1
Oct. 20 2021

'''

# importing libraries 
import speech_recognition as sr 
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence


#create a speech recognition object
r = sr.Recognizer()
#change this to the folder you have your wav files
VM_directory = r'vm temp wav'

def send_to_rr():
    print("Sending to R&R...")
    #put actual transfer process here
def send_to_customer_care():
    print("Sending to CC...")
    #put actual transfer process here
def send_to_new_sales():
    print("Sending to NS...")
    #put actual transfer process here
def send_to_customer_sales():
    print("Sending to CS...")
    #put actual transfer process here
def send_to_returns():
    print("Sending to Returns...")
    #put actual transfer process here
def send_to_declines():
    print("Sending to Declines...")
    #put actual transfer process here
def send_to_scheduling():
    print("Sending to Scheduling...")
    #put actual transfer process here
def no_matches():
    print("No Matches")
    #put actual transfer process here

all_departments_words = [
    {
        'words' : ["tracking", "shipment", "pending", "shipped", "shipping", "where my order is", "where is my order", "delay", "delayed", "not received", "not recieve"],
        'target' : send_to_rr,
        'message' : "Sent to Reception and Routing"
    },

    {
        'words' : ["stop", "discontinue", "backlog", "surplus", "postpone", "no shipments"],
        'target' : send_to_customer_care,
        'message' : "Sent to Customer Care"
    },

    {
        'words' : ["fox","new customer", "cost", "discount code", "commercial", "do not have a computer", "don't have a computer", "35%", "35 percent", "Fox"],
        'target' : send_to_new_sales,
        'message' : "Sent to New Sales"
    },

    {
        'words' : ["reinstate", "old account", "ordered before", "place an order", "place another order", "renew", "change in my order", "reactivate", "reorder", "make an order", "apple", "preferred customer"],
        'target' : send_to_customer_sales,
        'message' : "Sent to Customer Sales"
    },

    {
        'words' : ["refund", "money back", "charged", "RMA", "R M A", "refunded", "refunding", "refused", "refuse"],
        'target' : send_to_returns,
        'message' : "Sent to Returns"
    },

    {
        'words' : ["fraud", "update", "declined", "decline", "account is disabled", "account has been disabled", "declines", "updated"],
        'target' : send_to_declines,
        'message' : "Sent to Declines"
    },

    {
        'words' : ["appointment", "scheduled", "schedule", "reschedule", "coach"],
        'target' : send_to_scheduling,
        'message' : "Sent to Scheduling"
    },

    {
        'words' : ["return", "returns", "returning"],
        'target' : send_to_returns,
        'message' : "Sent to Returns"
    },

    {
        'words' : ["cancel", "cancelled", "cancelling", "canceling"],
        'target' : send_to_customer_care,
        'message' : "Sent to Customer Care"
    },

    {
        'words' : ["ship"],
        'target' : send_to_rr,
        'message' : "Sent to Reception and Routing"
    },

    {
        'words' : [' ', '-'],
        'target' : no_matches,
        'message' : "No Matches"
    }

]



####################################### FUNCTIONS ##########################################

# a function that splits the audio file into chunks
# and applies speech recognition - edited to make it one giant chunk instead of multiple small ones (would want to be split into small ones for conversations)
def get_large_audio_transcription(path):
    """
    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    # open the audio file using pydub
    sound = AudioSegment.from_wav(path)  
    # split audio sound where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 1500, #change this if you want to modify for conversations, detects the silence length to find breaks
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            # try converting it to text
            try:
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    # return the text for all chunks detected
    return whole_text


def main():
    #checklist = []
    for filename in os.scandir(VM_directory):
        #only running through .wav files
        if filename.path.endswith(".wav"):
            vm_text_string = get_large_audio_transcription(filename)
            #flags for if there are no keywords
            flag = 0
            #loops through dictionaries
            for department in all_departments_words:
                if flag == 1:
                    break
                #loops through keywords
                for word in department['words']:
                    #loops words through VM and splits string into words
                    if word in vm_text_string:
                        #calls the function that will actually send the vm
                        department['target']()
                        #prints the "sent to ..." from dict
                        print(department['message'])
                        #checklist.append(department['message'])
                        flag += 1
                        #ends loop to prevent repeats
                        break
            #if no keywords are detected flag will stay 0
            if flag == 0:
                #runs function for no matches
                no_matches()
                #checklist.append('No Matches')
    '''print(checklist)
    answers = ['Sent to Customer Care', 'Sent to Customer Care', 'Sent to Customer Care', 'Sent to Customer Care', 'Sent to Customer Sales', 'Sent to Customer Sales', 'No Matches', 'No Matches', 'No Matches', 'Sent to New Sales', 'Sent to New Sales', 'Sent to Returns', 'Sent to Returns', 'Sent to Reception and Routing', 'Sent to Reception and Routing', 'Sent to Reception and Routing', 'Sent to Reception and Routing', 'Sent to Scheduling', 'Sent to Scheduling']
    i = -1
    while i < 19:
        i += 1
        if checklist[i] == answers[i]:
            print('correct')   
        else:
            print('incorrect')'''

main()

-fixed elif chains -added a list queue

Save snippets that work from anywhere online with our extensions

Available in the Chrome Web Store

Get Firefox Add-on

Get VS Code extension

Comments

More like this

Python

@etanderson

speech to text v1.0.1

Importing images from a directory (Python) to list or dictionary

from PIL import Image
import glob
image_list = []
for filename in glob.glob('yourpath/*.gif'): #assuming gif
    im=Image.open(filename)
    image_list.append(im)

python - Find out the percentage of missing values in each column in the given dataset - Stack Overflow

percent_missing = df.isnull().sum() * 100 / len(df)
missing_value_df = pd.DataFrame({'column_name': df.columns,
                                 'percent_missing': percent_missing})

#python #python #loops #whileloop

Print the name of 7 days in a week - by using while loop

days = 0
week = [‘Monday’, ‘Tuesday’, ‘Wednesday’, ‘Thursday’, ‘Friday’, ‘Saturday’, 3.‘Sunday’]
while day < 7:
print(“Today is” + week[days])
days += 1

Getting the index of an item in a list containing it in Python

>>> ["foo", "bar", "baz"].index("bar")
1

#javascript #python #search #historicalcode #google #algorithms

Google’s PageRank Algorithm from 1996 - the origin of internet search

import numpy as np

def pagerank(M, num_iterations=100, d=0.85):
    N = M.shape[1]
    v = np.random.rand(N, 1)
    v = v / np.linalg.norm(v, 1)
    iteration = 0
    while iteration < num_iterations:
        iteration += 1
        v = d * np.matmul(M, v) + (1 - d) / N
    return v

#python #python #strings #vowels #function

Get vowels in strings

This method gets vowels (‘a’, ‘e’, ‘i’, ‘o’, ‘u’) found in a string.
   
#make a function:
def get_vowels(string):

#return is the keyword which means function have to return value: 
 return [each for each in string if each in 'aeiou']


#assign the words and function will return vowels words.
get_vowels('foobar') # ['o', 'o', 'a']


get_vowels('gym') # []

Could not build wheels for tokenizers which use PEP 517 and cannot be installed directly

https://github.com/pydata/bottleneck/issues/281

How To Bypass Cloudflare Bot Protection In Selenium - CodingTutz

options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=options)

Python Loop through Excel sheets, place into one df - Stack Overflow

import pandas as pd

sheets_dict = pd.read_excel('Book1.xlsx', sheetname=None)

full_table = pd.DataFrame()
for name, sheet in sheets_dict.items():
    sheet['sheet'] = name
    sheet = sheet.rename(columns=lambda x: x.split('\n')[-1])
    full_table = full_table.append(sheet)

full_table.reset_index(inplace=True, drop=True)

print full_table

#python #dates #functions #python3.8

How to parse a String into Datetime in Python

from datetime import datetime

datetime_object = datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')

python - Way to change Google Chrome user agent in Selenium? - Stack Overflow

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent

options = Options()
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\ChromeDriver\chromedriver_win32\chromedriver.exe')
driver.get("https://www.google.co.in")
driver.quit()

python - How to see the progress bar of read_csv - Stack Overflow

def read_csv_pgbar(csv_path, chunksize, usecols, dtype=object):


    # print('Getting row count of csv file')

    rows = sum(1 for _ in open(csv_path, 'r')) - 1 # minus the header
    # chunks = rows//chunksize + 1
    # print('Reading csv file')
    chunk_list = []

    with tqdm(total=rows, desc='Rows read: ') as bar:
        for chunk in pd.read_csv(csv_path, chunksize=chunksize, usecols=usecols, dtype=dtype):
            chunk_list.append(chunk)
            bar.update(len(chunk))

    df = pd.concat((f for f in chunk_list), axis=0)
    print('Finish reading csv file')

    return df

#python #python #lists #dictionary

Convert two lists into a dictionary

keys, values)) # {'a': 2, 'c': 4, 'b': 3}
 
 
#make a function: def is the keyword for the function:
def to_dictionary(keys, values):
 
 
#return is the keyword that tells program that function has to return value   
return dict(zip(keys, values))
 
  
 
# keys and values are the lists:
 
keys = ["a", "b", "c"]   
 
values = [2, 3, 4]

#python #interesting #arrays #sorting #interviewquestions

Sorting an array without changing position of negative numbers

# Python3 implementation of the approach 

# Function to sort the array such that 
# negative values do not get affected 
def sortArray(a, n): 

	# Store all non-negative values 
	ans=[] 
	for i in range(n): 
		if (a[i] >= 0): 
			ans.append(a[i]) 

	# Sort non-negative values 
	ans = sorted(ans) 

	j = 0
	for i in range(n): 

		# If current element is non-negative then 
		# update it such that all the 
		# non-negative values are sorted 
		if (a[i] >= 0): 
			a[i] = ans[j] 
			j += 1

	# Print the sorted array 
	for i in range(n): 
		print(a[i],end = " ") 


# Driver code 

arr = [2, -6, -3, 8, 4, 1] 

n = len(arr) 

sortArray(arr, n)

#python ##python #strings #comments

Create simple string along with variables

#assign a value to a variable:
types_of_people = 10 
# make a string using variable name:
X = f “there are {types_of_people} types of people.”

Output:
There are 10 types of people

Browse more snippets >>