# Load the necessary packages
library(openxlsx)
library(stringr)
library(readxl) # Added this package for read_excel function
# Define categories and associated keywords
categories <- list(
"Focus on Issues" = c("shift from issues", "social issues", "non-policy matters"),
"Financial Influence" = c("role of money", "fundraising", "corporate influence"),
"Media & Tech" = c("Social Media", "Electronic Communication", "Internet"),
"Negativity & Opponent Attacks" = c("mud-slinging", "opponent", "negative campaign messages", "division"),
"Public Engagement" = c("door-knocking", "yard signs", "face-to-face interactions"),
"Partisanship" = c("Increasing Polarization"),
"Accountability" = c("misinformation", "lack of accountability", "honesty")
)
# Function to categorize responses
categorize_responses <- function(response) {
if (is.na(response) || response == "") {
return("Not Available")
}
category_matches <- c()
for (category in names(categories)) {
keywords <- categories[[category]]
keyword_count <- sum(sapply(keywords, function(k) stringr::str_count(tolower(response), tolower(k))))
total_keywords <- length(unlist(str_split(tolower(response), "\\s")))
if (!is.na(keyword_count) && !is.na(total_keywords) && total_keywords != 0) {
if ((keyword_count / total_keywords) >= 0.10) { # Changed to 10%
category_matches <- c(category_matches, category)
}
}
}
if(length(category_matches) == 0) {
return("None")
}
return(paste(category_matches, collapse = ", "))
}
# Read the dataset
df <- read_excel("/Users/felixdavis/Desktop/civicpulse_how have campaigns changed.xlsx") # Updated the file path
# Apply the function to categorize responses
categories_result <- sapply(df$`In your view, how have political campaigns changed over time, if at all?`, categorize_responses)
# Add the new column to the dataframe
df$Categories <- categories_result
# Save the updated dataframe to a new Excel file
write.xlsx(df, "/Users/felixdavis/Desktop/Categorized_Responses.xlsx") # Saved to the same directory as the original file
Comments