library(olsrr) library(MASS) library(rpart.plot) library(ggfortify) library(ggplot2) library(tidyverse) library(car) library(Rcpp) library(GGally) library(leaps) library(dplyr) library(caret) library(rpart) library(randomForest) library(pheatmap) library(viridis) # Load the dataset data1 <- read.csv("deck_data.csv") data1$y <- (data1$Rank / data1$TournamentSize) * 100 # Convert y to a categorical Performance variable data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High") data1$Performance <- as.factor(data1$Performance) # Split the data into training and test sets set.seed(123) # For reproducibility train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1)) train_data <- data1[train_indices, ] test_data <- data1[-train_indices, ] recommend_cards <- function(decklist_row, train_data, test_data) { # Non-card columns non_card_columns <- c( "Rank", "TournamentSize", "Commander", "Partner", "Num_Artifacts", "Num_Creatures", "Num_Enchantments", "Num_Instants", "Num_Sorceries", "Num_Planeswalkers", "Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl", "Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor", "Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection", "Num_Boardwipe", "y", "Performance" ) commander_input <- as.character(decklist_row$Commander) partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner)) if (partner_input == "") { subset_train_data <- subset(train_data, Commander == commander_input) subset_test_data <- subset(test_data, Commander == commander_input) } else { subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input) subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input) } if (nrow(subset_train_data) < 10) { warning("Not enough data to build a reliable model.") return(NULL) } # Get card columns card_columns <- setdiff(names(train_data), non_card_columns) predictors_train_full <- subset_train_data[, card_columns, drop = FALSE] response_train <- subset_train_data$Performance # Remove near zero variance predictors before LDA/QDA nzv <- nearZeroVar(predictors_train_full) if (length(nzv) > 0) { predictors_train <- predictors_train_full[, -nzv, drop = FALSE] } else { predictors_train <- predictors_train_full } # Update card_columns to reflect the removed predictors filtered_card_columns <- colnames(predictors_train) # Check if we have at least two classes in response_train (for LDA/QDA) run_lda_qda <- length(unique(response_train)) > 1 # Build Decision Tree dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class") # Try LDA lda_model <- NULL if (run_lda_qda) { lda_model <- tryCatch( { lda(response_train ~ ., data = data.frame(response_train, predictors_train)) }, error = function(e) { warning("LDA failed: ", e$message) NULL } ) } else { warning("Only one class in the training data for this commander. Cannot run LDA/QDA.") } # Try QDA qda_model <- NULL if (run_lda_qda) { qda_model <- tryCatch( { qda(response_train ~ ., data = data.frame(response_train, predictors_train)) }, error = function(e) { warning("QDA failed: ", e$message) NULL } ) } # Build Random Forest rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train)) # Evaluate models on test data if available if (nrow(subset_test_data) > 0) { # Subset the test data to the same filtered predictors predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE] response_test <- subset_test_data$Performance # Predictions (Decision Tree always available) dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class") cat("\nDecision Tree Confusion Matrix:\n") print(confusionMatrix(dt_predictions, response_test)) if (!is.null(lda_model)) { lda_predictions <- predict(lda_model, newdata = predictors_test)$class cat("\nLDA Confusion Matrix:\n") print(confusionMatrix(lda_predictions, response_test)) } if (!is.null(qda_model)) { qda_predictions <- predict(qda_model, newdata = predictors_test)$class cat("\nQDA Confusion Matrix:\n") print(confusionMatrix(qda_predictions, response_test)) } rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class") cat("\nRandom Forest Confusion Matrix:\n") print(confusionMatrix(rf_predictions, response_test)) } else { warning("No test data available for this commander.") } # Plot the decision tree rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE, main = paste("Decision Tree for Commander:", commander_input)) # Predict performance for the given decklist using the decision tree (example) # We must also apply the same filtered predictors to the decklist predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE] predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class") cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n") print(predicted_performance_dt) # If variable importance is available from decision tree if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) { importance <- data.frame( card = names(dt_model$variable.importance), importance = dt_model$variable.importance, row.names = NULL ) importance <- importance[order(-importance$importance), ] card_values <- decklist_row[, filtered_card_columns, drop = FALSE] indices <- which(card_values == 1) deck_cards <- names(card_values)[indices] cards_to_add <- setdiff(importance$card, deck_cards) top_cards_to_add <- head(cards_to_add, 5) non_important_cards <- setdiff(deck_cards, importance$card) non_important_with_scores <- data.frame( card = non_important_cards, importance = ifelse(non_important_cards %in% importance$card, importance$importance[match(non_important_cards, importance$card)], 0) ) non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ] top_cards_to_remove <- head(non_important_with_scores$card, 5) } else { warning("No variable importance available from the Decision Tree model.") top_cards_to_add <- character(0) top_cards_to_remove <- character(0) } # Confusion matrix for training data (decision tree) predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class") cm_train_dt <- confusionMatrix(predictions_train_dt, response_train) cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n") print(cm_train_dt) return(list( predicted_standing_dt = predicted_performance_dt, cards_to_add = top_cards_to_add, cards_to_consider_removing = top_cards_to_remove, cm_train_dt = cm_train_dt )) } # Example usage test_deck <- test_data[3, ] print(test_deck$Commander) recommendations <- recommend_cards(test_deck, train_data, test_data) cat("\nCards to Consider Adding:\n") print(recommendations$cards_to_add) cat("\nCards to Consider Removing:\n") print(recommendations$cards_to_consider_removing) cat("\nPredicted Deck's Standing (Decision Tree):\n") print(recommendations$predicted_standing_dt) print(cm_train_dt) # Extract the confusion matrix from your model cm_train_dt <- recommendations$cm_train_dt conf_matrix <- as.matrix(cm_train_dt$table) # Create a high-contrast color palette using viridis my_palette <- viridis(100) # Plot the heatmap pheatmap(conf_matrix, cluster_rows = FALSE, cluster_cols = FALSE, color = my_palette, fontsize_row = 8, fontsize_col = 8, angle_col = 45, display_numbers = TRUE, number_format = "%.0f", main = "Confusion Matrix Heatmap") # Loop # Identify the 10 most popular commanders top_commanders <- data1 %>% group_by(Commander) %>% summarize(freq = n()) %>% arrange(desc(freq)) %>% slice(1:5) %>% pull(Commander) # Initialize a list to store results model_results_list <- list() # Loop over the top 10 commanders for (comm in top_commanders) { # Find a test deck for this commander (if available) subset_test_decks <- subset(test_data, Commander == comm) if (nrow(subset_test_decks) == 0) { cat("\nNo test deck available for commander:", comm, "\n") next } # Just pick the first test deck for demonstration test_deck <- subset_test_decks[1, ] cat("\nRunning models for commander:", comm, "\n") recommendations <- recommend_cards(test_deck, train_data, test_data) if (!is.null(recommendations)) { # Store relevant parts of the output model_results_list[[comm]] <- list( commander = comm, predicted_standing_dt = recommendations$predicted_standing_dt, cards_to_add = recommendations$cards_to_add, cards_to_consider_removing = recommendations$cards_to_consider_removing ) } } # After the loop, model_results_list contains the outputs for each of the top 10 commanders # Print summary of results cat("\nSummary of Model Results for Top 10 Commanders:\n") for (comm in names(model_results_list)) { res <- model_results_list[[comm]] cat("\nCommander:", res$commander, "\n") cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n") cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n") cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n") } # Example usage from your code snippet (already integrated in the loop, # but you could still do individually if desired): # test_deck <- test_data[3, ] # print(test_deck$Commander) # recommendations <- recommend_cards(test_deck, train_data, test_data) # # cat("\nCards to Consider Adding:\n") # print(recommendations$cards_to_add) # # cat("\nCards to Consider Removing:\n") # print(recommendations$cards_to_consider_removing) # # cat("\nPredicted Deck's Standing (Decision Tree):\n") # print(recommendations$predicted_standing_dt)