Categorizing data into lengthier category (part of jobs they like); 10% Keyword Match
Mon Sep 04 2023 14:33:24 GMT+0000 (Coordinated Universal Time)
Saved by @dr_dziedzorm
# Load necessary libraries library(dplyr) library(stringr) library(readxl) # For reading .xlsx files library(writexl) # For writing .xlsx files # Read the data data <- read_excel("/Users/felixdavis/Desktop/civicpulse_important part of job.xlsx") # Print out column names for debugging print(colnames(data)) # Define the categories and keywords categories <- list( "Communication and Information Dissemination" = c("Clear and accurate information", "Debunking", "misinformation", "Open and honest communication"), "Public Safety and Health" = c("Safety measures", "Health guidelines", "Emergency services"), "Leadership and Decision-Making" = c("Policy-making", "Oversight and guidance", "Long-term planning"), "Emotional and Psychological Support" = c("Calm and reassurance", "Hope and positivity", "Emotional well-being"), "Resource Management" = c("Budget management", "Resource allocation", "Efficient service delivery"), "Public Engagement" = c("Listening to constituents", "Public engagement", "Transparency"), "Crisis Management" = c("Crisis preparedness", "Emergency response", "PPE provision"), "Legal and Ethical Responsibility" = c("Following state directives", "Ethical considerations", "Legal obligations"), "Future Planning" = c("Financial planning", "Future vision", "Sustainability"), "Community Building" = c("Quality of life", "Community needs", "Social connectivity") ) # Function to categorize each row (NEW CODE) categorize_row <- function(row) { # Check for NA values first if (is.na(row)) { return("Uncategorized - NA") } # Proceed with categorization if the row is not NA category_names <- names(categories) for (cat in category_names) { keywords <- categories[[cat]] for (keyword in keywords) { # Split the keyword into individual words keyword_words <- str_split(keyword, " ")[[1]] # Count how many of these words appear in the row matching_words <- sum(sapply(keyword_words, function(word) str_detect(row, regex(word, ignore_case = TRUE)))) # If more than 10% of the words in the keyword are in the row, categorize it if (matching_words / length(keyword_words) > 0.1) { return(cat) } } } return("Uncategorized") } # Apply the function to categorize each row data$Category <- sapply(data$`What do you think is the single most important part of your job as a public official?`, categorize_row) # Write the categorized data back to a new Excel file write_xlsx(data, "/Users/felixdavis/Desktop/categorized_civicpulse_important part of job.xlsx")
Comments