Simple_Text_Analysis: Categorization (How Campaigns Have Changed Over Time

PHOTO EMBED

Mon Sep 04 2023 18:45:55 GMT+0000 (Coordinated Universal Time)

Saved by @dr_dziedzorm

# Load the necessary packages
library(openxlsx)
library(stringr)
library(readxl)  # Added this package for read_excel function

# Define categories and associated keywords
categories <- list(
  "Focus on Issues" = c("shift from issues", "social issues", "non-policy matters"),
  "Financial Influence" = c("role of money", "fundraising", "corporate influence"),
  "Media & Tech" = c("Social Media", "Electronic Communication", "Internet"),
  "Negativity & Opponent Attacks" = c("mud-slinging", "opponent", "negative campaign messages", "division"),
  "Public Engagement" = c("door-knocking", "yard signs", "face-to-face interactions"),
  "Partisanship" = c("Increasing Polarization"),
  "Accountability" = c("misinformation", "lack of accountability", "honesty")
)

# Function to categorize responses
categorize_responses <- function(response) {
  if (is.na(response) || response == "") {
    return("Not Available")
  }
  
  category_matches <- c()
  
  for (category in names(categories)) {
    keywords <- categories[[category]]
    keyword_count <- sum(sapply(keywords, function(k) stringr::str_count(tolower(response), tolower(k))))
    total_keywords <- length(unlist(str_split(tolower(response), "\\s")))
    
    if (!is.na(keyword_count) && !is.na(total_keywords) && total_keywords != 0) {
      if ((keyword_count / total_keywords) >= 0.10) {  # Changed to 10%
        category_matches <- c(category_matches, category)
      }
    }
  }
  
  if(length(category_matches) == 0) {
    return("None")
  }
  
  return(paste(category_matches, collapse = ", "))
}

# Read the dataset
df <- read_excel("/Users/felixdavis/Desktop/civicpulse_how have campaigns changed.xlsx")  # Updated the file path

# Apply the function to categorize responses
categories_result <- sapply(df$`In your view, how have political campaigns changed over time, if at all?`, categorize_responses)

# Add the new column to the dataframe
df$Categories <- categories_result

# Save the updated dataframe to a new Excel file
write.xlsx(df, "/Users/felixdavis/Desktop/Categorized_Responses.xlsx")  # Saved to the same directory as the original file
content_copyCOPY