# Load the necessary packages library(openxlsx) library(stringr) library(readxl) # Added this package for read_excel function # Define categories and associated keywords categories <- list( "Focus on Issues" = c("shift from issues", "social issues", "non-policy matters"), "Financial Influence" = c("role of money", "fundraising", "corporate influence"), "Media & Tech" = c("Social Media", "Electronic Communication", "Internet"), "Negativity & Opponent Attacks" = c("mud-slinging", "opponent", "negative campaign messages", "division"), "Public Engagement" = c("door-knocking", "yard signs", "face-to-face interactions"), "Partisanship" = c("Increasing Polarization"), "Accountability" = c("misinformation", "lack of accountability", "honesty") ) # Function to categorize responses categorize_responses <- function(response) { if (is.na(response) || response == "") { return("Not Available") } category_matches <- c() for (category in names(categories)) { keywords <- categories[[category]] keyword_count <- sum(sapply(keywords, function(k) stringr::str_count(tolower(response), tolower(k)))) total_keywords <- length(unlist(str_split(tolower(response), "\\s"))) if (!is.na(keyword_count) && !is.na(total_keywords) && total_keywords != 0) { if ((keyword_count / total_keywords) >= 0.10) { # Changed to 10% category_matches <- c(category_matches, category) } } } if(length(category_matches) == 0) { return("None") } return(paste(category_matches, collapse = ", ")) } # Read the dataset df <- read_excel("/Users/felixdavis/Desktop/civicpulse_how have campaigns changed.xlsx") # Updated the file path # Apply the function to categorize responses categories_result <- sapply(df$`In your view, how have political campaigns changed over time, if at all?`, categorize_responses) # Add the new column to the dataframe df$Categories <- categories_result # Save the updated dataframe to a new Excel file write.xlsx(df, "/Users/felixdavis/Desktop/Categorized_Responses.xlsx") # Saved to the same directory as the original file