USCLAP/final_lda_Qda.R

319 lines
10 KiB
R

library(olsrr)
library(MASS)
library(rpart.plot)
library(ggfortify)
library(ggplot2)
library(tidyverse)
library(car)
library(Rcpp)
library(GGally)
library(leaps)
library(dplyr)
library(caret)
library(rpart)
library(randomForest)
library(pheatmap)
library(viridis)
# Load the dataset
data1 <- read.csv("deck_data.csv")
data1$y <- (data1$Rank / data1$TournamentSize) * 100
# Convert y to a categorical Performance variable
data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
data1$Performance <- as.factor(data1$Performance)
# Split the data into training and test sets
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
train_data <- data1[train_indices, ]
test_data <- data1[-train_indices, ]
recommend_cards <- function(decklist_row, train_data, test_data) {
# Non-card columns
non_card_columns <- c(
"Rank", "TournamentSize", "Commander", "Partner",
"Num_Artifacts", "Num_Creatures", "Num_Enchantments",
"Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
"Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
"Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
"Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
"Num_Boardwipe", "y", "Performance"
)
commander_input <- as.character(decklist_row$Commander)
partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))
if (partner_input == "") {
subset_train_data <- subset(train_data, Commander == commander_input)
subset_test_data <- subset(test_data, Commander == commander_input)
} else {
subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
}
if (nrow(subset_train_data) < 10) {
warning("Not enough data to build a reliable model.")
return(NULL)
}
# Get card columns
card_columns <- setdiff(names(train_data), non_card_columns)
predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
response_train <- subset_train_data$Performance
# Remove near zero variance predictors before LDA/QDA
nzv <- nearZeroVar(predictors_train_full)
if (length(nzv) > 0) {
predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
} else {
predictors_train <- predictors_train_full
}
# Update card_columns to reflect the removed predictors
filtered_card_columns <- colnames(predictors_train)
# Check if we have at least two classes in response_train (for LDA/QDA)
run_lda_qda <- length(unique(response_train)) > 1
# Build Decision Tree
dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")
# Try LDA
lda_model <- NULL
if (run_lda_qda) {
lda_model <- tryCatch(
{
lda(response_train ~ ., data = data.frame(response_train, predictors_train))
},
error = function(e) {
warning("LDA failed: ", e$message)
NULL
}
)
} else {
warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
}
# Try QDA
qda_model <- NULL
if (run_lda_qda) {
qda_model <- tryCatch(
{
qda(response_train ~ ., data = data.frame(response_train, predictors_train))
},
error = function(e) {
warning("QDA failed: ", e$message)
NULL
}
)
}
# Build Random Forest
rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))
# Evaluate models on test data if available
if (nrow(subset_test_data) > 0) {
# Subset the test data to the same filtered predictors
predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
response_test <- subset_test_data$Performance
# Predictions (Decision Tree always available)
dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
cat("\nDecision Tree Confusion Matrix:\n")
print(confusionMatrix(dt_predictions, response_test))
if (!is.null(lda_model)) {
lda_predictions <- predict(lda_model, newdata = predictors_test)$class
cat("\nLDA Confusion Matrix:\n")
print(confusionMatrix(lda_predictions, response_test))
}
if (!is.null(qda_model)) {
qda_predictions <- predict(qda_model, newdata = predictors_test)$class
cat("\nQDA Confusion Matrix:\n")
print(confusionMatrix(qda_predictions, response_test))
}
rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
cat("\nRandom Forest Confusion Matrix:\n")
print(confusionMatrix(rf_predictions, response_test))
} else {
warning("No test data available for this commander.")
}
# Plot the decision tree
rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
main = paste("Decision Tree for Commander:", commander_input))
# Predict performance for the given decklist using the decision tree (example)
# We must also apply the same filtered predictors to the decklist
predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")
cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
print(predicted_performance_dt)
# If variable importance is available from decision tree
if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
importance <- data.frame(
card = names(dt_model$variable.importance),
importance = dt_model$variable.importance,
row.names = NULL
)
importance <- importance[order(-importance$importance), ]
card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
indices <- which(card_values == 1)
deck_cards <- names(card_values)[indices]
cards_to_add <- setdiff(importance$card, deck_cards)
top_cards_to_add <- head(cards_to_add, 5)
non_important_cards <- setdiff(deck_cards, importance$card)
non_important_with_scores <- data.frame(
card = non_important_cards,
importance = ifelse(non_important_cards %in% importance$card,
importance$importance[match(non_important_cards, importance$card)], 0)
)
non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
top_cards_to_remove <- head(non_important_with_scores$card, 5)
} else {
warning("No variable importance available from the Decision Tree model.")
top_cards_to_add <- character(0)
top_cards_to_remove <- character(0)
}
# Confusion matrix for training data (decision tree)
predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
print(cm_train_dt)
return(list(
predicted_standing_dt = predicted_performance_dt,
cards_to_add = top_cards_to_add,
cards_to_consider_removing = top_cards_to_remove,
cm_train_dt = cm_train_dt
))
}
# Example usage
test_deck <- test_data[3, ]
print(test_deck$Commander)
recommendations <- recommend_cards(test_deck, train_data, test_data)
cat("\nCards to Consider Adding:\n")
print(recommendations$cards_to_add)
cat("\nCards to Consider Removing:\n")
print(recommendations$cards_to_consider_removing)
cat("\nPredicted Deck's Standing (Decision Tree):\n")
print(recommendations$predicted_standing_dt)
print(cm_train_dt)
# Extract the confusion matrix from your model
cm_train_dt <- recommendations$cm_train_dt
conf_matrix <- as.matrix(cm_train_dt$table)
# Create a high-contrast color palette using viridis
my_palette <- viridis(100)
# Plot the heatmap
pheatmap(conf_matrix,
cluster_rows = FALSE,
cluster_cols = FALSE,
color = my_palette,
fontsize_row = 8,
fontsize_col = 8,
angle_col = 45,
display_numbers = TRUE,
number_format = "%.0f",
main = "Confusion Matrix Heatmap")
# Loop
# Identify the 10 most popular commanders
top_commanders <- data1 %>%
group_by(Commander) %>%
summarize(freq = n()) %>%
arrange(desc(freq)) %>%
slice(1:5) %>%
pull(Commander)
# Initialize a list to store results
model_results_list <- list()
# Loop over the top 10 commanders
for (comm in top_commanders) {
# Find a test deck for this commander (if available)
subset_test_decks <- subset(test_data, Commander == comm)
if (nrow(subset_test_decks) == 0) {
cat("\nNo test deck available for commander:", comm, "\n")
next
}
# Just pick the first test deck for demonstration
test_deck <- subset_test_decks[1, ]
cat("\nRunning models for commander:", comm, "\n")
recommendations <- recommend_cards(test_deck, train_data, test_data)
if (!is.null(recommendations)) {
# Store relevant parts of the output
model_results_list[[comm]] <- list(
commander = comm,
predicted_standing_dt = recommendations$predicted_standing_dt,
cards_to_add = recommendations$cards_to_add,
cards_to_consider_removing = recommendations$cards_to_consider_removing
)
}
}
# After the loop, model_results_list contains the outputs for each of the top 10 commanders
# Print summary of results
cat("\nSummary of Model Results for Top 10 Commanders:\n")
for (comm in names(model_results_list)) {
res <- model_results_list[[comm]]
cat("\nCommander:", res$commander, "\n")
cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
}
# Example usage from your code snippet (already integrated in the loop,
# but you could still do individually if desired):
# test_deck <- test_data[3, ]
# print(test_deck$Commander)
# recommendations <- recommend_cards(test_deck, train_data, test_data)
#
# cat("\nCards to Consider Adding:\n")
# print(recommendations$cards_to_add)
#
# cat("\nCards to Consider Removing:\n")
# print(recommendations$cards_to_consider_removing)
#
# cat("\nPredicted Deck's Standing (Decision Tree):\n")
# print(recommendations$predicted_standing_dt)