USCLAP/final_lda_Qda.R

library(olsrr)
library(MASS)
library(rpart.plot)
library(ggfortify)
library(ggplot2)
library(tidyverse)
library(car)
library(Rcpp)
library(GGally)
library(leaps)
library(dplyr)
library(caret)
library(rpart)
library(randomForest)

library(pheatmap)
library(viridis)
# Load the dataset
data1 <- read.csv("deck_data.csv")
data1$y <- (data1$Rank / data1$TournamentSize) * 100

# Convert y to a categorical Performance variable
data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
data1$Performance <- as.factor(data1$Performance)

# Split the data into training and test sets
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
train_data <- data1[train_indices, ]
test_data <- data1[-train_indices, ]

recommend_cards <- function(decklist_row, train_data, test_data) {
  # Non-card columns
  non_card_columns <- c(
    "Rank", "TournamentSize", "Commander", "Partner",
    "Num_Artifacts", "Num_Creatures", "Num_Enchantments",
    "Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
    "Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
    "Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
    "Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
    "Num_Boardwipe", "y", "Performance"
  )

  commander_input <- as.character(decklist_row$Commander)
  partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))

  if (partner_input == "") {
    subset_train_data <- subset(train_data, Commander == commander_input)
    subset_test_data <- subset(test_data, Commander == commander_input)
  } else {
    subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
    subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
  }

  if (nrow(subset_train_data) < 10) {
    warning("Not enough data to build a reliable model.")
    return(NULL)
  }

  # Get card columns
  card_columns <- setdiff(names(train_data), non_card_columns)

  predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
  response_train <- subset_train_data$Performance

  # Remove near zero variance predictors before LDA/QDA
  nzv <- nearZeroVar(predictors_train_full)
  if (length(nzv) > 0) {
    predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
  } else {
    predictors_train <- predictors_train_full
  }

  # Update card_columns to reflect the removed predictors
  filtered_card_columns <- colnames(predictors_train)

  # Check if we have at least two classes in response_train (for LDA/QDA)
  run_lda_qda <- length(unique(response_train)) > 1

  # Build Decision Tree
  dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")

  # Try LDA
  lda_model <- NULL
  if (run_lda_qda) {
    lda_model <- tryCatch(
      {
        lda(response_train ~ ., data = data.frame(response_train, predictors_train))
      },
      error = function(e) {
        warning("LDA failed: ", e$message)
        NULL
      }
    )
  } else {
    warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
  }

  # Try QDA
  qda_model <- NULL
  if (run_lda_qda) {
    qda_model <- tryCatch(
      {
        qda(response_train ~ ., data = data.frame(response_train, predictors_train))
      },
      error = function(e) {
        warning("QDA failed: ", e$message)
        NULL
      }
    )
  }

  # Build Random Forest
  rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))

  # Evaluate models on test data if available
  if (nrow(subset_test_data) > 0) {
    # Subset the test data to the same filtered predictors
    predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
    response_test <- subset_test_data$Performance

    # Predictions (Decision Tree always available)
    dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
    cat("\nDecision Tree Confusion Matrix:\n")
    print(confusionMatrix(dt_predictions, response_test))

    if (!is.null(lda_model)) {
      lda_predictions <- predict(lda_model, newdata = predictors_test)$class
      cat("\nLDA Confusion Matrix:\n")
      print(confusionMatrix(lda_predictions, response_test))
    }

    if (!is.null(qda_model)) {
      qda_predictions <- predict(qda_model, newdata = predictors_test)$class
      cat("\nQDA Confusion Matrix:\n")
      print(confusionMatrix(qda_predictions, response_test))
    }

    rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
    cat("\nRandom Forest Confusion Matrix:\n")
    print(confusionMatrix(rf_predictions, response_test))

  } else {
    warning("No test data available for this commander.")
  }

  # Plot the decision tree
  rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
             main = paste("Decision Tree for Commander:", commander_input))

  # Predict performance for the given decklist using the decision tree (example)
  # We must also apply the same filtered predictors to the decklist
  predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
  predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")

  cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
  print(predicted_performance_dt)

  # If variable importance is available from decision tree
  if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
    importance <- data.frame(
      card = names(dt_model$variable.importance),
      importance = dt_model$variable.importance,
      row.names = NULL
    )

    importance <- importance[order(-importance$importance), ]

    card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
    indices <- which(card_values == 1)
    deck_cards <- names(card_values)[indices]

    cards_to_add <- setdiff(importance$card, deck_cards)
    top_cards_to_add <- head(cards_to_add, 5)

    non_important_cards <- setdiff(deck_cards, importance$card)
    non_important_with_scores <- data.frame(
      card = non_important_cards,
      importance = ifelse(non_important_cards %in% importance$card,
                          importance$importance[match(non_important_cards, importance$card)], 0)
    )
    non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
    top_cards_to_remove <- head(non_important_with_scores$card, 5)
  } else {
    warning("No variable importance available from the Decision Tree model.")
    top_cards_to_add <- character(0)
    top_cards_to_remove <- character(0)
  }

  # Confusion matrix for training data (decision tree)
  predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
  cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
  cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
  print(cm_train_dt)

  return(list(
    predicted_standing_dt = predicted_performance_dt,
    cards_to_add = top_cards_to_add,
    cards_to_consider_removing = top_cards_to_remove,
    cm_train_dt = cm_train_dt
  ))
}

# Example usage
test_deck <- test_data[3, ]
print(test_deck$Commander)
recommendations <- recommend_cards(test_deck, train_data, test_data)

cat("\nCards to Consider Adding:\n")
print(recommendations$cards_to_add)

cat("\nCards to Consider Removing:\n")
print(recommendations$cards_to_consider_removing)

cat("\nPredicted Deck's Standing (Decision Tree):\n")
print(recommendations$predicted_standing_dt)
print(cm_train_dt)
# Extract the confusion matrix from your model
cm_train_dt <- recommendations$cm_train_dt
conf_matrix <- as.matrix(cm_train_dt$table)

# Create a high-contrast color palette using viridis
my_palette <- viridis(100)

# Plot the heatmap
pheatmap(conf_matrix,
         cluster_rows = FALSE,
         cluster_cols = FALSE,
         color = my_palette,
         fontsize_row = 8,
         fontsize_col = 8,
         angle_col = 45,
         display_numbers = TRUE,
         number_format = "%.0f",
         main = "Confusion Matrix Heatmap")


# Loop

# Identify the 10 most popular commanders
top_commanders <- data1 %>%
  group_by(Commander) %>%
  summarize(freq = n()) %>%
  arrange(desc(freq)) %>%
  slice(1:5) %>%
  pull(Commander)

# Initialize a list to store results
model_results_list <- list()

# Loop over the top 10 commanders
for (comm in top_commanders) {
  # Find a test deck for this commander (if available)
  subset_test_decks <- subset(test_data, Commander == comm)

  if (nrow(subset_test_decks) == 0) {
    cat("\nNo test deck available for commander:", comm, "\n")
    next
  }

  # Just pick the first test deck for demonstration
  test_deck <- subset_test_decks[1, ]

  cat("\nRunning models for commander:", comm, "\n")
  recommendations <- recommend_cards(test_deck, train_data, test_data)

  if (!is.null(recommendations)) {
    # Store relevant parts of the output
    model_results_list[[comm]] <- list(
      commander = comm,
      predicted_standing_dt = recommendations$predicted_standing_dt,
      cards_to_add = recommendations$cards_to_add,
      cards_to_consider_removing = recommendations$cards_to_consider_removing
    )
  }
}

# After the loop, model_results_list contains the outputs for each of the top 10 commanders
# Print summary of results
cat("\nSummary of Model Results for Top 10 Commanders:\n")
for (comm in names(model_results_list)) {
  res <- model_results_list[[comm]]
  cat("\nCommander:", res$commander, "\n")
  cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
  cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
  cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
}

# Example usage from your code snippet (already integrated in the loop,
# but you could still do individually if desired):
# test_deck <- test_data[3, ]
# print(test_deck$Commander)
# recommendations <- recommend_cards(test_deck, train_data, test_data)
#
# cat("\nCards to Consider Adding:\n")
# print(recommendations$cards_to_add)
#
# cat("\nCards to Consider Removing:\n")
# print(recommendations$cards_to_consider_removing)
#
# cat("\nPredicted Deck's Standing (Decision Tree):\n")
# print(recommendations$predicted_standing_dt)