319 lines
10 KiB
R
319 lines
10 KiB
R
library(olsrr)
|
|
library(MASS)
|
|
library(rpart.plot)
|
|
library(ggfortify)
|
|
library(ggplot2)
|
|
library(tidyverse)
|
|
library(car)
|
|
library(Rcpp)
|
|
library(GGally)
|
|
library(leaps)
|
|
library(dplyr)
|
|
library(caret)
|
|
library(rpart)
|
|
library(randomForest)
|
|
|
|
library(pheatmap)
|
|
library(viridis)
|
|
# Load the dataset
|
|
data1 <- read.csv("deck_data.csv")
|
|
data1$y <- (data1$Rank / data1$TournamentSize) * 100
|
|
|
|
# Convert y to a categorical Performance variable
|
|
data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
|
|
data1$Performance <- as.factor(data1$Performance)
|
|
|
|
# Split the data into training and test sets
|
|
set.seed(123) # For reproducibility
|
|
train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
|
|
train_data <- data1[train_indices, ]
|
|
test_data <- data1[-train_indices, ]
|
|
|
|
recommend_cards <- function(decklist_row, train_data, test_data) {
|
|
# Non-card columns
|
|
non_card_columns <- c(
|
|
"Rank", "TournamentSize", "Commander", "Partner",
|
|
"Num_Artifacts", "Num_Creatures", "Num_Enchantments",
|
|
"Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
|
|
"Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
|
|
"Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
|
|
"Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
|
|
"Num_Boardwipe", "y", "Performance"
|
|
)
|
|
|
|
commander_input <- as.character(decklist_row$Commander)
|
|
partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))
|
|
|
|
if (partner_input == "") {
|
|
subset_train_data <- subset(train_data, Commander == commander_input)
|
|
subset_test_data <- subset(test_data, Commander == commander_input)
|
|
} else {
|
|
subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
|
|
subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
|
|
}
|
|
|
|
if (nrow(subset_train_data) < 10) {
|
|
warning("Not enough data to build a reliable model.")
|
|
return(NULL)
|
|
}
|
|
|
|
# Get card columns
|
|
card_columns <- setdiff(names(train_data), non_card_columns)
|
|
|
|
predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
|
|
response_train <- subset_train_data$Performance
|
|
|
|
# Remove near zero variance predictors before LDA/QDA
|
|
nzv <- nearZeroVar(predictors_train_full)
|
|
if (length(nzv) > 0) {
|
|
predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
|
|
} else {
|
|
predictors_train <- predictors_train_full
|
|
}
|
|
|
|
# Update card_columns to reflect the removed predictors
|
|
filtered_card_columns <- colnames(predictors_train)
|
|
|
|
# Check if we have at least two classes in response_train (for LDA/QDA)
|
|
run_lda_qda <- length(unique(response_train)) > 1
|
|
|
|
# Build Decision Tree
|
|
dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")
|
|
|
|
# Try LDA
|
|
lda_model <- NULL
|
|
if (run_lda_qda) {
|
|
lda_model <- tryCatch(
|
|
{
|
|
lda(response_train ~ ., data = data.frame(response_train, predictors_train))
|
|
},
|
|
error = function(e) {
|
|
warning("LDA failed: ", e$message)
|
|
NULL
|
|
}
|
|
)
|
|
} else {
|
|
warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
|
|
}
|
|
|
|
# Try QDA
|
|
qda_model <- NULL
|
|
if (run_lda_qda) {
|
|
qda_model <- tryCatch(
|
|
{
|
|
qda(response_train ~ ., data = data.frame(response_train, predictors_train))
|
|
},
|
|
error = function(e) {
|
|
warning("QDA failed: ", e$message)
|
|
NULL
|
|
}
|
|
)
|
|
}
|
|
|
|
# Build Random Forest
|
|
rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))
|
|
|
|
# Evaluate models on test data if available
|
|
if (nrow(subset_test_data) > 0) {
|
|
# Subset the test data to the same filtered predictors
|
|
predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
|
|
response_test <- subset_test_data$Performance
|
|
|
|
# Predictions (Decision Tree always available)
|
|
dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
|
|
cat("\nDecision Tree Confusion Matrix:\n")
|
|
print(confusionMatrix(dt_predictions, response_test))
|
|
|
|
if (!is.null(lda_model)) {
|
|
lda_predictions <- predict(lda_model, newdata = predictors_test)$class
|
|
cat("\nLDA Confusion Matrix:\n")
|
|
print(confusionMatrix(lda_predictions, response_test))
|
|
}
|
|
|
|
if (!is.null(qda_model)) {
|
|
qda_predictions <- predict(qda_model, newdata = predictors_test)$class
|
|
cat("\nQDA Confusion Matrix:\n")
|
|
print(confusionMatrix(qda_predictions, response_test))
|
|
}
|
|
|
|
rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
|
|
cat("\nRandom Forest Confusion Matrix:\n")
|
|
print(confusionMatrix(rf_predictions, response_test))
|
|
|
|
} else {
|
|
warning("No test data available for this commander.")
|
|
}
|
|
|
|
# Plot the decision tree
|
|
rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
|
|
main = paste("Decision Tree for Commander:", commander_input))
|
|
|
|
# Predict performance for the given decklist using the decision tree (example)
|
|
# We must also apply the same filtered predictors to the decklist
|
|
predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
|
|
predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")
|
|
|
|
cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
|
|
print(predicted_performance_dt)
|
|
|
|
# If variable importance is available from decision tree
|
|
if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
|
|
importance <- data.frame(
|
|
card = names(dt_model$variable.importance),
|
|
importance = dt_model$variable.importance,
|
|
row.names = NULL
|
|
)
|
|
|
|
importance <- importance[order(-importance$importance), ]
|
|
|
|
card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
|
|
indices <- which(card_values == 1)
|
|
deck_cards <- names(card_values)[indices]
|
|
|
|
cards_to_add <- setdiff(importance$card, deck_cards)
|
|
top_cards_to_add <- head(cards_to_add, 5)
|
|
|
|
non_important_cards <- setdiff(deck_cards, importance$card)
|
|
non_important_with_scores <- data.frame(
|
|
card = non_important_cards,
|
|
importance = ifelse(non_important_cards %in% importance$card,
|
|
importance$importance[match(non_important_cards, importance$card)], 0)
|
|
)
|
|
non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
|
|
top_cards_to_remove <- head(non_important_with_scores$card, 5)
|
|
} else {
|
|
warning("No variable importance available from the Decision Tree model.")
|
|
top_cards_to_add <- character(0)
|
|
top_cards_to_remove <- character(0)
|
|
}
|
|
|
|
# Confusion matrix for training data (decision tree)
|
|
predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
|
|
cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
|
|
cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
|
|
print(cm_train_dt)
|
|
|
|
return(list(
|
|
predicted_standing_dt = predicted_performance_dt,
|
|
cards_to_add = top_cards_to_add,
|
|
cards_to_consider_removing = top_cards_to_remove,
|
|
cm_train_dt = cm_train_dt
|
|
))
|
|
}
|
|
|
|
# Example usage
|
|
test_deck <- test_data[3, ]
|
|
print(test_deck$Commander)
|
|
recommendations <- recommend_cards(test_deck, train_data, test_data)
|
|
|
|
cat("\nCards to Consider Adding:\n")
|
|
print(recommendations$cards_to_add)
|
|
|
|
cat("\nCards to Consider Removing:\n")
|
|
print(recommendations$cards_to_consider_removing)
|
|
|
|
cat("\nPredicted Deck's Standing (Decision Tree):\n")
|
|
print(recommendations$predicted_standing_dt)
|
|
print(cm_train_dt)
|
|
# Extract the confusion matrix from your model
|
|
cm_train_dt <- recommendations$cm_train_dt
|
|
conf_matrix <- as.matrix(cm_train_dt$table)
|
|
|
|
# Create a high-contrast color palette using viridis
|
|
my_palette <- viridis(100)
|
|
|
|
# Plot the heatmap
|
|
pheatmap(conf_matrix,
|
|
cluster_rows = FALSE,
|
|
cluster_cols = FALSE,
|
|
color = my_palette,
|
|
fontsize_row = 8,
|
|
fontsize_col = 8,
|
|
angle_col = 45,
|
|
display_numbers = TRUE,
|
|
number_format = "%.0f",
|
|
main = "Confusion Matrix Heatmap")
|
|
|
|
|
|
# Loop
|
|
|
|
# Identify the 10 most popular commanders
|
|
top_commanders <- data1 %>%
|
|
group_by(Commander) %>%
|
|
summarize(freq = n()) %>%
|
|
arrange(desc(freq)) %>%
|
|
slice(1:5) %>%
|
|
pull(Commander)
|
|
|
|
# Initialize a list to store results
|
|
model_results_list <- list()
|
|
|
|
# Loop over the top 10 commanders
|
|
for (comm in top_commanders) {
|
|
# Find a test deck for this commander (if available)
|
|
subset_test_decks <- subset(test_data, Commander == comm)
|
|
|
|
if (nrow(subset_test_decks) == 0) {
|
|
cat("\nNo test deck available for commander:", comm, "\n")
|
|
next
|
|
}
|
|
|
|
# Just pick the first test deck for demonstration
|
|
test_deck <- subset_test_decks[1, ]
|
|
|
|
cat("\nRunning models for commander:", comm, "\n")
|
|
recommendations <- recommend_cards(test_deck, train_data, test_data)
|
|
|
|
if (!is.null(recommendations)) {
|
|
# Store relevant parts of the output
|
|
model_results_list[[comm]] <- list(
|
|
commander = comm,
|
|
predicted_standing_dt = recommendations$predicted_standing_dt,
|
|
cards_to_add = recommendations$cards_to_add,
|
|
cards_to_consider_removing = recommendations$cards_to_consider_removing
|
|
)
|
|
}
|
|
}
|
|
|
|
# After the loop, model_results_list contains the outputs for each of the top 10 commanders
|
|
# Print summary of results
|
|
cat("\nSummary of Model Results for Top 10 Commanders:\n")
|
|
for (comm in names(model_results_list)) {
|
|
res <- model_results_list[[comm]]
|
|
cat("\nCommander:", res$commander, "\n")
|
|
cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
|
|
cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
|
|
cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
|
|
}
|
|
|
|
# Example usage from your code snippet (already integrated in the loop,
|
|
# but you could still do individually if desired):
|
|
# test_deck <- test_data[3, ]
|
|
# print(test_deck$Commander)
|
|
# recommendations <- recommend_cards(test_deck, train_data, test_data)
|
|
#
|
|
# cat("\nCards to Consider Adding:\n")
|
|
# print(recommendations$cards_to_add)
|
|
#
|
|
# cat("\nCards to Consider Removing:\n")
|
|
# print(recommendations$cards_to_consider_removing)
|
|
#
|
|
# cat("\nPredicted Deck's Standing (Decision Tree):\n")
|
|
# print(recommendations$predicted_standing_dt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|