Initial commit: Organized project code and documentation for public release.
This commit is contained in:
commit
e673f32d31
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
.Rproj.user
|
||||||
|
.Rhistory
|
||||||
|
.RData
|
||||||
|
.Ruserdata
|
||||||
13
USCLAP.Rproj
Normal file
13
USCLAP.Rproj
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Version: 1.0
|
||||||
|
|
||||||
|
RestoreWorkspace: Default
|
||||||
|
SaveWorkspace: Default
|
||||||
|
AlwaysSaveHistory: Default
|
||||||
|
|
||||||
|
EnableCodeIndexing: Yes
|
||||||
|
UseSpacesForTab: Yes
|
||||||
|
NumSpacesForTab: 2
|
||||||
|
Encoding: UTF-8
|
||||||
|
|
||||||
|
RnwWeave: Sweave
|
||||||
|
LaTeX: pdfLaTeX
|
||||||
1871
deck_data.csv
Normal file
1871
deck_data.csv
Normal file
File diff suppressed because one or more lines are too long
206
download_tournament.py
Normal file
206
download_tournament.py
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.firefox.options import Options
|
||||||
|
from selenium.webdriver.firefox.service import Service
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
|
||||||
|
def setup_browser(download_dir):
|
||||||
|
firefox_options = Options()
|
||||||
|
firefox_options.set_preference('browser.download.folderList', 2)
|
||||||
|
firefox_options.set_preference('browser.download.dir', download_dir) # Use the tournament-specific directory
|
||||||
|
firefox_options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/plain,text/csv,application/octet-stream')
|
||||||
|
firefox_options.set_preference('pdfjs.disabled', True)
|
||||||
|
firefox_options.set_preference('browser.download.manager.showWhenStarting', False)
|
||||||
|
firefox_options.set_preference('browser.download.manager.useWindow', False)
|
||||||
|
firefox_options.set_preference('browser.download.manager.focusWhenStarting', False)
|
||||||
|
firefox_options.set_preference('browser.download.manager.alertOnEXEOpen', False)
|
||||||
|
firefox_options.set_preference('browser.download.manager.showAlertOnComplete', False)
|
||||||
|
firefox_options.set_preference('browser.download.manager.closeWhenDone', False)
|
||||||
|
|
||||||
|
# Specify the path to geckodriver
|
||||||
|
gecko_service = Service(executable_path='/usr/local/bin/geckodriver')
|
||||||
|
|
||||||
|
browser = webdriver.Firefox(service=gecko_service, options=firefox_options)
|
||||||
|
return browser
|
||||||
|
|
||||||
|
def parse_date(date_str):
|
||||||
|
date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)
|
||||||
|
try:
|
||||||
|
tournament_date = datetime.strptime(date_str, '%B %d %Y').date()
|
||||||
|
print(f"Parsed date: {tournament_date}")
|
||||||
|
return tournament_date
|
||||||
|
except ValueError as e:
|
||||||
|
print(f"Error parsing date '{date_str}': {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_tournaments_after_date(browser, date_threshold):
|
||||||
|
tournaments = []
|
||||||
|
|
||||||
|
print("Navigating to the tournaments page...")
|
||||||
|
tournaments_page_url = 'https://edhtop16.com/tournaments?sortBy=DATE'
|
||||||
|
browser.get(tournaments_page_url)
|
||||||
|
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# Find all tournament entries
|
||||||
|
tournament_entries = browser.find_elements(By.CSS_SELECTOR, "div.group.relative.overflow-hidden.rounded-lg.bg-white.shadow")
|
||||||
|
print(f"Found {len(tournament_entries)} tournaments on the page.")
|
||||||
|
|
||||||
|
for entry in tournament_entries:
|
||||||
|
try:
|
||||||
|
# Extract tournament link, name, and date
|
||||||
|
link_element = entry.find_element(By.CSS_SELECTOR, 'a.line-clamp-2.text-xl.font-bold.underline')
|
||||||
|
tournament_name = link_element.text
|
||||||
|
tournament_url = link_element.get_attribute('href')
|
||||||
|
|
||||||
|
date_element = entry.find_element(By.CSS_SELECTOR, 'span')
|
||||||
|
tournament_date_str = date_element.text
|
||||||
|
print(f"Tournament found: {tournament_name}, Date: {tournament_date_str}, URL: {tournament_url}")
|
||||||
|
|
||||||
|
# Parse the date string
|
||||||
|
tournament_date = parse_date(tournament_date_str)
|
||||||
|
if tournament_date and tournament_date >= date_threshold:
|
||||||
|
tournaments.append((tournament_url, tournament_name, tournament_date))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing tournament entry: {e}")
|
||||||
|
|
||||||
|
print(f"Total tournaments after {date_threshold}: {len(tournaments)}")
|
||||||
|
return tournaments
|
||||||
|
|
||||||
|
def get_tournament_info(browser):
|
||||||
|
print("Retrieving the tournament name and deck links...")
|
||||||
|
time.sleep(3) # Wait for the page to load
|
||||||
|
|
||||||
|
# Get the tournament name
|
||||||
|
try:
|
||||||
|
tournament_name_element = browser.find_element(By.TAG_NAME, 'h1')
|
||||||
|
tournament_name = tournament_name_element.text if tournament_name_element else "Tournament"
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error retrieving tournament name: {e}")
|
||||||
|
tournament_name = "Tournament"
|
||||||
|
|
||||||
|
tournament_name = tournament_name.replace('/', '-') # Replace invalid filename characters
|
||||||
|
print(f"Tournament Name: {tournament_name}")
|
||||||
|
|
||||||
|
# Get the list of decks
|
||||||
|
deck_elements = browser.find_elements(By.CSS_SELECTOR, "a.line-clamp-2.text-xl.font-bold.underline")
|
||||||
|
deck_links = []
|
||||||
|
for deck_element in deck_elements:
|
||||||
|
deck_url = deck_element.get_attribute('href')
|
||||||
|
deck_name = deck_element.text
|
||||||
|
deck_links.append((deck_url, deck_name))
|
||||||
|
|
||||||
|
print(f"Total decks found: {len(deck_links)}")
|
||||||
|
return tournament_name, deck_links
|
||||||
|
|
||||||
|
def download_deck(browser, deck_url, rank, total_decks, save_dir):
|
||||||
|
print(f"Navigating to deck {rank} page...")
|
||||||
|
browser.get(deck_url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Wait for the "More" button and click it
|
||||||
|
print("Waiting for the 'More' button to appear...")
|
||||||
|
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'More')]/..")))
|
||||||
|
more_button = browser.find_element(By.XPATH, "//span[contains(text(), 'More')]/..")
|
||||||
|
browser.execute_script("arguments[0].click();", more_button)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Wait for and click the "Export" option within the dropdown
|
||||||
|
print("Waiting for the 'Export' option...")
|
||||||
|
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")))
|
||||||
|
export_option = browser.find_element(By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")
|
||||||
|
browser.execute_script("arguments[0].click();", export_option)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Wait for and click the "Download for MTGO" link
|
||||||
|
print("Clicking 'Download for MTGO' link...")
|
||||||
|
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")))
|
||||||
|
mtgo_download_link = browser.find_element(By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")
|
||||||
|
mtgo_download_link.click()
|
||||||
|
time.sleep(2) # Wait for the download to initiate
|
||||||
|
|
||||||
|
# Wait for the downloaded file to appear in the download directory
|
||||||
|
print("Waiting for the file to download...")
|
||||||
|
download_wait_time = 0
|
||||||
|
downloaded_filepath = None
|
||||||
|
|
||||||
|
# Check the download directory for a new file
|
||||||
|
while download_wait_time < 30:
|
||||||
|
files = os.listdir(save_dir)
|
||||||
|
if files:
|
||||||
|
# Find the most recent file in the directory
|
||||||
|
downloaded_filepath = max([os.path.join(save_dir, f) for f in files], key=os.path.getctime)
|
||||||
|
if downloaded_filepath.endswith('.txt'):
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
download_wait_time += 1
|
||||||
|
|
||||||
|
# Move and rename the file if it was found
|
||||||
|
if downloaded_filepath and downloaded_filepath.endswith('.txt'):
|
||||||
|
original_filename = os.path.basename(downloaded_filepath)
|
||||||
|
# Sanitize the original filename
|
||||||
|
sanitized_filename = original_filename.replace('/', '-').replace('\\', '-')
|
||||||
|
new_filename = os.path.join(save_dir, f"{rank}-{total_decks}-{sanitized_filename}")
|
||||||
|
os.rename(downloaded_filepath, new_filename)
|
||||||
|
print(f"Downloaded deck {rank}/{total_decks}: {new_filename}")
|
||||||
|
else:
|
||||||
|
print(f"Failed to download deck {rank}: Download timed out.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error downloading deck {rank}: {e}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Set up the base download directory
|
||||||
|
base_download_dir = os.path.join(os.getcwd(), 'downloads')
|
||||||
|
if not os.path.exists(base_download_dir):
|
||||||
|
os.makedirs(base_download_dir)
|
||||||
|
print(f"Created base download directory at {base_download_dir}")
|
||||||
|
|
||||||
|
# Set the date threshold (e.g., September 1, 2023)
|
||||||
|
date_threshold = datetime(2024, 11, 14).date()
|
||||||
|
|
||||||
|
# Initialize the browser
|
||||||
|
print("Setting up the browser...")
|
||||||
|
browser = setup_browser(base_download_dir) # Initial browser setup
|
||||||
|
try:
|
||||||
|
# Retrieve list of tournaments after the date threshold
|
||||||
|
tournaments = get_tournaments_after_date(browser, date_threshold)
|
||||||
|
|
||||||
|
for tournament_url, tournament_name, tournament_date in tournaments:
|
||||||
|
print(f"\nProcessing tournament: {tournament_name} dated {tournament_date}")
|
||||||
|
|
||||||
|
# Create a specific directory for the tournament
|
||||||
|
tournament_dir = os.path.join(base_download_dir, tournament_name.replace('/', '-'))
|
||||||
|
if not os.path.exists(tournament_dir):
|
||||||
|
os.makedirs(tournament_dir)
|
||||||
|
print(f"Created tournament directory at {tournament_dir}")
|
||||||
|
|
||||||
|
# Reconfigure the browser to use the tournament directory
|
||||||
|
browser.quit()
|
||||||
|
browser = setup_browser(tournament_dir) # Reinitialize with tournament-specific directory
|
||||||
|
|
||||||
|
# Navigate to the tournament page
|
||||||
|
print(f"Navigating to the tournament page {tournament_url}...")
|
||||||
|
browser.get(tournament_url)
|
||||||
|
time.sleep(3) # Wait for the page to load
|
||||||
|
|
||||||
|
# Retrieve tournament info (this will get the decks)
|
||||||
|
tournament_name, deck_links = get_tournament_info(browser)
|
||||||
|
|
||||||
|
# Download each deck
|
||||||
|
total_decks = len(deck_links)
|
||||||
|
for idx, (deck_url, deck_name) in enumerate(deck_links, start=1):
|
||||||
|
print(f"\nProcessing deck {idx}/{total_decks}: {deck_name}")
|
||||||
|
download_deck(browser, deck_url, idx, total_decks, tournament_dir)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
print("Closing the browser...")
|
||||||
|
browser.quit()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
318
final_lda_Qda.R
Normal file
318
final_lda_Qda.R
Normal file
@ -0,0 +1,318 @@
|
|||||||
|
library(olsrr)
|
||||||
|
library(MASS)
|
||||||
|
library(rpart.plot)
|
||||||
|
library(ggfortify)
|
||||||
|
library(ggplot2)
|
||||||
|
library(tidyverse)
|
||||||
|
library(car)
|
||||||
|
library(Rcpp)
|
||||||
|
library(GGally)
|
||||||
|
library(leaps)
|
||||||
|
library(dplyr)
|
||||||
|
library(caret)
|
||||||
|
library(rpart)
|
||||||
|
library(randomForest)
|
||||||
|
|
||||||
|
library(pheatmap)
|
||||||
|
library(viridis)
|
||||||
|
# Load the dataset
|
||||||
|
data1 <- read.csv("deck_data.csv")
|
||||||
|
data1$y <- (data1$Rank / data1$TournamentSize) * 100
|
||||||
|
|
||||||
|
# Convert y to a categorical Performance variable
|
||||||
|
data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
|
||||||
|
data1$Performance <- as.factor(data1$Performance)
|
||||||
|
|
||||||
|
# Split the data into training and test sets
|
||||||
|
set.seed(123) # For reproducibility
|
||||||
|
train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
|
||||||
|
train_data <- data1[train_indices, ]
|
||||||
|
test_data <- data1[-train_indices, ]
|
||||||
|
|
||||||
|
recommend_cards <- function(decklist_row, train_data, test_data) {
|
||||||
|
# Non-card columns
|
||||||
|
non_card_columns <- c(
|
||||||
|
"Rank", "TournamentSize", "Commander", "Partner",
|
||||||
|
"Num_Artifacts", "Num_Creatures", "Num_Enchantments",
|
||||||
|
"Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
|
||||||
|
"Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
|
||||||
|
"Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
|
||||||
|
"Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
|
||||||
|
"Num_Boardwipe", "y", "Performance"
|
||||||
|
)
|
||||||
|
|
||||||
|
commander_input <- as.character(decklist_row$Commander)
|
||||||
|
partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))
|
||||||
|
|
||||||
|
if (partner_input == "") {
|
||||||
|
subset_train_data <- subset(train_data, Commander == commander_input)
|
||||||
|
subset_test_data <- subset(test_data, Commander == commander_input)
|
||||||
|
} else {
|
||||||
|
subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
|
||||||
|
subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nrow(subset_train_data) < 10) {
|
||||||
|
warning("Not enough data to build a reliable model.")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get card columns
|
||||||
|
card_columns <- setdiff(names(train_data), non_card_columns)
|
||||||
|
|
||||||
|
predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
|
||||||
|
response_train <- subset_train_data$Performance
|
||||||
|
|
||||||
|
# Remove near zero variance predictors before LDA/QDA
|
||||||
|
nzv <- nearZeroVar(predictors_train_full)
|
||||||
|
if (length(nzv) > 0) {
|
||||||
|
predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
|
||||||
|
} else {
|
||||||
|
predictors_train <- predictors_train_full
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update card_columns to reflect the removed predictors
|
||||||
|
filtered_card_columns <- colnames(predictors_train)
|
||||||
|
|
||||||
|
# Check if we have at least two classes in response_train (for LDA/QDA)
|
||||||
|
run_lda_qda <- length(unique(response_train)) > 1
|
||||||
|
|
||||||
|
# Build Decision Tree
|
||||||
|
dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")
|
||||||
|
|
||||||
|
# Try LDA
|
||||||
|
lda_model <- NULL
|
||||||
|
if (run_lda_qda) {
|
||||||
|
lda_model <- tryCatch(
|
||||||
|
{
|
||||||
|
lda(response_train ~ ., data = data.frame(response_train, predictors_train))
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
warning("LDA failed: ", e$message)
|
||||||
|
NULL
|
||||||
|
}
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Try QDA
|
||||||
|
qda_model <- NULL
|
||||||
|
if (run_lda_qda) {
|
||||||
|
qda_model <- tryCatch(
|
||||||
|
{
|
||||||
|
qda(response_train ~ ., data = data.frame(response_train, predictors_train))
|
||||||
|
},
|
||||||
|
error = function(e) {
|
||||||
|
warning("QDA failed: ", e$message)
|
||||||
|
NULL
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build Random Forest
|
||||||
|
rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))
|
||||||
|
|
||||||
|
# Evaluate models on test data if available
|
||||||
|
if (nrow(subset_test_data) > 0) {
|
||||||
|
# Subset the test data to the same filtered predictors
|
||||||
|
predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
|
||||||
|
response_test <- subset_test_data$Performance
|
||||||
|
|
||||||
|
# Predictions (Decision Tree always available)
|
||||||
|
dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
|
||||||
|
cat("\nDecision Tree Confusion Matrix:\n")
|
||||||
|
print(confusionMatrix(dt_predictions, response_test))
|
||||||
|
|
||||||
|
if (!is.null(lda_model)) {
|
||||||
|
lda_predictions <- predict(lda_model, newdata = predictors_test)$class
|
||||||
|
cat("\nLDA Confusion Matrix:\n")
|
||||||
|
print(confusionMatrix(lda_predictions, response_test))
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is.null(qda_model)) {
|
||||||
|
qda_predictions <- predict(qda_model, newdata = predictors_test)$class
|
||||||
|
cat("\nQDA Confusion Matrix:\n")
|
||||||
|
print(confusionMatrix(qda_predictions, response_test))
|
||||||
|
}
|
||||||
|
|
||||||
|
rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
|
||||||
|
cat("\nRandom Forest Confusion Matrix:\n")
|
||||||
|
print(confusionMatrix(rf_predictions, response_test))
|
||||||
|
|
||||||
|
} else {
|
||||||
|
warning("No test data available for this commander.")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Plot the decision tree
|
||||||
|
rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
|
||||||
|
main = paste("Decision Tree for Commander:", commander_input))
|
||||||
|
|
||||||
|
# Predict performance for the given decklist using the decision tree (example)
|
||||||
|
# We must also apply the same filtered predictors to the decklist
|
||||||
|
predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
|
||||||
|
predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")
|
||||||
|
|
||||||
|
cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
|
||||||
|
print(predicted_performance_dt)
|
||||||
|
|
||||||
|
# If variable importance is available from decision tree
|
||||||
|
if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
|
||||||
|
importance <- data.frame(
|
||||||
|
card = names(dt_model$variable.importance),
|
||||||
|
importance = dt_model$variable.importance,
|
||||||
|
row.names = NULL
|
||||||
|
)
|
||||||
|
|
||||||
|
importance <- importance[order(-importance$importance), ]
|
||||||
|
|
||||||
|
card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
|
||||||
|
indices <- which(card_values == 1)
|
||||||
|
deck_cards <- names(card_values)[indices]
|
||||||
|
|
||||||
|
cards_to_add <- setdiff(importance$card, deck_cards)
|
||||||
|
top_cards_to_add <- head(cards_to_add, 5)
|
||||||
|
|
||||||
|
non_important_cards <- setdiff(deck_cards, importance$card)
|
||||||
|
non_important_with_scores <- data.frame(
|
||||||
|
card = non_important_cards,
|
||||||
|
importance = ifelse(non_important_cards %in% importance$card,
|
||||||
|
importance$importance[match(non_important_cards, importance$card)], 0)
|
||||||
|
)
|
||||||
|
non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
|
||||||
|
top_cards_to_remove <- head(non_important_with_scores$card, 5)
|
||||||
|
} else {
|
||||||
|
warning("No variable importance available from the Decision Tree model.")
|
||||||
|
top_cards_to_add <- character(0)
|
||||||
|
top_cards_to_remove <- character(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Confusion matrix for training data (decision tree)
|
||||||
|
predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
|
||||||
|
cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
|
||||||
|
cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
|
||||||
|
print(cm_train_dt)
|
||||||
|
|
||||||
|
return(list(
|
||||||
|
predicted_standing_dt = predicted_performance_dt,
|
||||||
|
cards_to_add = top_cards_to_add,
|
||||||
|
cards_to_consider_removing = top_cards_to_remove,
|
||||||
|
cm_train_dt = cm_train_dt
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
test_deck <- test_data[3, ]
|
||||||
|
print(test_deck$Commander)
|
||||||
|
recommendations <- recommend_cards(test_deck, train_data, test_data)
|
||||||
|
|
||||||
|
cat("\nCards to Consider Adding:\n")
|
||||||
|
print(recommendations$cards_to_add)
|
||||||
|
|
||||||
|
cat("\nCards to Consider Removing:\n")
|
||||||
|
print(recommendations$cards_to_consider_removing)
|
||||||
|
|
||||||
|
cat("\nPredicted Deck's Standing (Decision Tree):\n")
|
||||||
|
print(recommendations$predicted_standing_dt)
|
||||||
|
print(cm_train_dt)
|
||||||
|
# Extract the confusion matrix from your model
|
||||||
|
cm_train_dt <- recommendations$cm_train_dt
|
||||||
|
conf_matrix <- as.matrix(cm_train_dt$table)
|
||||||
|
|
||||||
|
# Create a high-contrast color palette using viridis
|
||||||
|
my_palette <- viridis(100)
|
||||||
|
|
||||||
|
# Plot the heatmap
|
||||||
|
pheatmap(conf_matrix,
|
||||||
|
cluster_rows = FALSE,
|
||||||
|
cluster_cols = FALSE,
|
||||||
|
color = my_palette,
|
||||||
|
fontsize_row = 8,
|
||||||
|
fontsize_col = 8,
|
||||||
|
angle_col = 45,
|
||||||
|
display_numbers = TRUE,
|
||||||
|
number_format = "%.0f",
|
||||||
|
main = "Confusion Matrix Heatmap")
|
||||||
|
|
||||||
|
|
||||||
|
# Loop
|
||||||
|
|
||||||
|
# Identify the 10 most popular commanders
|
||||||
|
top_commanders <- data1 %>%
|
||||||
|
group_by(Commander) %>%
|
||||||
|
summarize(freq = n()) %>%
|
||||||
|
arrange(desc(freq)) %>%
|
||||||
|
slice(1:5) %>%
|
||||||
|
pull(Commander)
|
||||||
|
|
||||||
|
# Initialize a list to store results
|
||||||
|
model_results_list <- list()
|
||||||
|
|
||||||
|
# Loop over the top 10 commanders
|
||||||
|
for (comm in top_commanders) {
|
||||||
|
# Find a test deck for this commander (if available)
|
||||||
|
subset_test_decks <- subset(test_data, Commander == comm)
|
||||||
|
|
||||||
|
if (nrow(subset_test_decks) == 0) {
|
||||||
|
cat("\nNo test deck available for commander:", comm, "\n")
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# Just pick the first test deck for demonstration
|
||||||
|
test_deck <- subset_test_decks[1, ]
|
||||||
|
|
||||||
|
cat("\nRunning models for commander:", comm, "\n")
|
||||||
|
recommendations <- recommend_cards(test_deck, train_data, test_data)
|
||||||
|
|
||||||
|
if (!is.null(recommendations)) {
|
||||||
|
# Store relevant parts of the output
|
||||||
|
model_results_list[[comm]] <- list(
|
||||||
|
commander = comm,
|
||||||
|
predicted_standing_dt = recommendations$predicted_standing_dt,
|
||||||
|
cards_to_add = recommendations$cards_to_add,
|
||||||
|
cards_to_consider_removing = recommendations$cards_to_consider_removing
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# After the loop, model_results_list contains the outputs for each of the top 10 commanders
|
||||||
|
# Print summary of results
|
||||||
|
cat("\nSummary of Model Results for Top 10 Commanders:\n")
|
||||||
|
for (comm in names(model_results_list)) {
|
||||||
|
res <- model_results_list[[comm]]
|
||||||
|
cat("\nCommander:", res$commander, "\n")
|
||||||
|
cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
|
||||||
|
cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
|
||||||
|
cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Example usage from your code snippet (already integrated in the loop,
|
||||||
|
# but you could still do individually if desired):
|
||||||
|
# test_deck <- test_data[3, ]
|
||||||
|
# print(test_deck$Commander)
|
||||||
|
# recommendations <- recommend_cards(test_deck, train_data, test_data)
|
||||||
|
#
|
||||||
|
# cat("\nCards to Consider Adding:\n")
|
||||||
|
# print(recommendations$cards_to_add)
|
||||||
|
#
|
||||||
|
# cat("\nCards to Consider Removing:\n")
|
||||||
|
# print(recommendations$cards_to_consider_removing)
|
||||||
|
#
|
||||||
|
# cat("\nPredicted Deck's Standing (Decision Tree):\n")
|
||||||
|
# print(recommendations$predicted_standing_dt)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
266
process_decklists.py
Normal file
266
process_decklists.py
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
import csv
|
||||||
|
import time # Optional: For adding delays between API requests
|
||||||
|
|
||||||
|
# Initialize variables
|
||||||
|
unique_cards = set()
|
||||||
|
card_types = {}
|
||||||
|
card_tags = {}
|
||||||
|
deck_data = []
|
||||||
|
|
||||||
|
# Tags of interest
|
||||||
|
tags_of_interest = [
|
||||||
|
'ramp', 'draw', 'tutor', 'counterspell', 'removal',
|
||||||
|
'stax', 'protection', 'boardwipe'
|
||||||
|
]
|
||||||
|
|
||||||
|
# Function to process a decklist file and separate it into deck, sideboard, and commander sections
|
||||||
|
def process_decklist_file(filepath):
|
||||||
|
with open(filepath, 'r', encoding='utf-8') as file:
|
||||||
|
lines = file.readlines()
|
||||||
|
|
||||||
|
# Keep all lines including empty ones, strip newline characters
|
||||||
|
lines = [line.rstrip('\n') for line in lines]
|
||||||
|
|
||||||
|
deck_cards = []
|
||||||
|
sideboard_cards = []
|
||||||
|
commander_cards = []
|
||||||
|
|
||||||
|
section = 'deck'
|
||||||
|
i = 0
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i].strip()
|
||||||
|
|
||||||
|
if section == 'deck':
|
||||||
|
if line == '':
|
||||||
|
# Empty line indicates potential section change
|
||||||
|
# Check if next non-empty line is 'SIDEBOARD:'
|
||||||
|
j = i + 1
|
||||||
|
while j < len(lines) and lines[j].strip() == '':
|
||||||
|
j += 1
|
||||||
|
if j < len(lines) and lines[j].strip().upper() == 'SIDEBOARD:':
|
||||||
|
section = 'sideboard'
|
||||||
|
i = j # Move to 'SIDEBOARD:' line
|
||||||
|
else:
|
||||||
|
# Otherwise, assume commanders are after the empty line
|
||||||
|
section = 'commander'
|
||||||
|
i += 1 # Skip the empty line
|
||||||
|
continue
|
||||||
|
elif line.upper() == 'SIDEBOARD:':
|
||||||
|
section = 'sideboard'
|
||||||
|
else:
|
||||||
|
deck_cards.append(line)
|
||||||
|
elif section == 'sideboard':
|
||||||
|
if line == '':
|
||||||
|
# Empty line after sideboard indicates commanders
|
||||||
|
section = 'commander'
|
||||||
|
i += 1 # Move to next line after the empty line
|
||||||
|
continue
|
||||||
|
elif line.upper() == 'SIDEBOARD:':
|
||||||
|
pass # Already in sideboard section
|
||||||
|
else:
|
||||||
|
sideboard_cards.append(line)
|
||||||
|
elif section == 'commander':
|
||||||
|
if line != '':
|
||||||
|
commander_cards.append(line)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return deck_cards, sideboard_cards, commander_cards
|
||||||
|
|
||||||
|
# Function to fetch cards for a given tag from Scryfall
|
||||||
|
def fetch_cards_for_tag(tag):
|
||||||
|
card_names = set()
|
||||||
|
page = 1
|
||||||
|
has_more = True
|
||||||
|
while has_more:
|
||||||
|
query = f"f:edh otag:{tag}"
|
||||||
|
url = "https://api.scryfall.com/cards/search"
|
||||||
|
params = {'q': query, 'page': page}
|
||||||
|
response = requests.get(url, params=params)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
for card in data['data']:
|
||||||
|
card_names.add(card['name'])
|
||||||
|
has_more = data.get('has_more', False)
|
||||||
|
page += 1
|
||||||
|
time.sleep(0.1) # Sleep to respect rate limits
|
||||||
|
else:
|
||||||
|
print(f"Error fetching cards for tag: {tag}")
|
||||||
|
break
|
||||||
|
return card_names
|
||||||
|
|
||||||
|
# Step 4: Collect Unique Card Names
|
||||||
|
decklist_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
for filename in os.listdir(decklist_dir):
|
||||||
|
# Process only decklist files matching the pattern
|
||||||
|
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
|
||||||
|
filepath = os.path.join(decklist_dir, filename)
|
||||||
|
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
|
||||||
|
# Collect unique cards from the deck and commanders, skip sideboard
|
||||||
|
for line in deck_cards + commander_cards:
|
||||||
|
match = re.match(r'(\d+)\s+(.*)', line)
|
||||||
|
if match:
|
||||||
|
quantity = int(match.group(1))
|
||||||
|
card_name = match.group(2)
|
||||||
|
unique_cards.add(card_name)
|
||||||
|
|
||||||
|
# Step 5: Retrieve Card Types Using Scryfall API
|
||||||
|
scryfall_api_url = 'https://api.scryfall.com/cards/named?exact='
|
||||||
|
|
||||||
|
for card_name in unique_cards:
|
||||||
|
encoded_name = requests.utils.quote(card_name)
|
||||||
|
url = scryfall_api_url + encoded_name
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
types = data['type_line']
|
||||||
|
main_types = types.split('—')[0].strip()
|
||||||
|
type_list = [t.strip() for t in main_types.split()]
|
||||||
|
card_types[card_name] = type_list
|
||||||
|
else:
|
||||||
|
print(f"Error fetching data for card: {card_name}")
|
||||||
|
card_types[card_name] = []
|
||||||
|
# Optional: Add a delay to respect API rate limits
|
||||||
|
time.sleep(0.1) # Sleep for 100 milliseconds
|
||||||
|
|
||||||
|
# Step 5b: Build Card Tags Mapping
|
||||||
|
# Initialize card_tags dictionary
|
||||||
|
card_tags = {card_name: [] for card_name in unique_cards}
|
||||||
|
|
||||||
|
# Fetch cards for each tag and build the mapping
|
||||||
|
for tag in tags_of_interest:
|
||||||
|
print(f"Fetching cards for tag: {tag}")
|
||||||
|
tagged_cards = fetch_cards_for_tag(tag)
|
||||||
|
for card_name in tagged_cards:
|
||||||
|
if card_name in card_tags:
|
||||||
|
card_tags[card_name].append(tag.capitalize())
|
||||||
|
else:
|
||||||
|
# Handle cases where card name variations exist
|
||||||
|
for unique_card in unique_cards:
|
||||||
|
if card_name.lower() == unique_card.lower():
|
||||||
|
card_tags[unique_card].append(tag.capitalize())
|
||||||
|
break
|
||||||
|
|
||||||
|
# Step 6: Process Each Decklist to Collect Deck Data
|
||||||
|
for filename in os.listdir(decklist_dir):
|
||||||
|
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
|
||||||
|
match = re.match(r'(\d+)-(\d+)-.*\.txt', filename)
|
||||||
|
if match:
|
||||||
|
rank = int(match.group(1))
|
||||||
|
tournament_size = int(match.group(2))
|
||||||
|
else:
|
||||||
|
rank = None
|
||||||
|
tournament_size = None
|
||||||
|
filepath = os.path.join(decklist_dir, filename)
|
||||||
|
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
|
||||||
|
deck = {}
|
||||||
|
commander = None
|
||||||
|
partner = None
|
||||||
|
num_artifacts = num_creatures = num_enchantments = 0
|
||||||
|
num_instants = num_sorceries = num_planeswalkers = 0
|
||||||
|
num_lands = 0
|
||||||
|
|
||||||
|
# Initialize tag card sets to store unique cards per tag
|
||||||
|
tag_card_sets = {tag.capitalize(): set() for tag in tags_of_interest}
|
||||||
|
|
||||||
|
# Process main deck cards
|
||||||
|
for line in deck_cards:
|
||||||
|
match = re.match(r'(\d+)\s+(.*)', line)
|
||||||
|
if match:
|
||||||
|
quantity = int(match.group(1))
|
||||||
|
card_name = match.group(2)
|
||||||
|
deck[card_name] = quantity
|
||||||
|
types = card_types.get(card_name, [])
|
||||||
|
if 'Artifact' in types:
|
||||||
|
num_artifacts += quantity
|
||||||
|
if 'Creature' in types:
|
||||||
|
num_creatures += quantity
|
||||||
|
if 'Enchantment' in types:
|
||||||
|
num_enchantments += quantity
|
||||||
|
if 'Instant' in types:
|
||||||
|
num_instants += quantity
|
||||||
|
if 'Sorcery' in types:
|
||||||
|
num_sorceries += quantity
|
||||||
|
if 'Planeswalker' in types:
|
||||||
|
num_planeswalkers += quantity
|
||||||
|
if 'Land' in types:
|
||||||
|
num_lands += quantity
|
||||||
|
|
||||||
|
# Process tags
|
||||||
|
tags = card_tags.get(card_name, [])
|
||||||
|
for tag in tags:
|
||||||
|
if tag in tag_card_sets:
|
||||||
|
tag_card_sets[tag].add(card_name)
|
||||||
|
|
||||||
|
# Process commander cards
|
||||||
|
for idx, line in enumerate(commander_cards):
|
||||||
|
match = re.match(r'(\d+)\s+(.*)', line)
|
||||||
|
if match:
|
||||||
|
card_name = match.group(2)
|
||||||
|
if idx == 0:
|
||||||
|
commander = card_name
|
||||||
|
elif idx == 1:
|
||||||
|
partner = card_name
|
||||||
|
|
||||||
|
# Compute tag counts as the number of unique cards per tag
|
||||||
|
tag_counts = {tag: len(cards) for tag, cards in tag_card_sets.items()}
|
||||||
|
|
||||||
|
deck_row = {
|
||||||
|
'rank': rank,
|
||||||
|
'tournament_size': tournament_size,
|
||||||
|
'commander': commander,
|
||||||
|
'partner': partner,
|
||||||
|
'num_artifacts': num_artifacts,
|
||||||
|
'num_creatures': num_creatures,
|
||||||
|
'num_enchantments': num_enchantments,
|
||||||
|
'num_instants': num_instants,
|
||||||
|
'num_sorceries': num_sorceries,
|
||||||
|
'num_planeswalkers': num_planeswalkers,
|
||||||
|
'num_lands': num_lands,
|
||||||
|
'tag_counts': tag_counts,
|
||||||
|
'deck': deck
|
||||||
|
}
|
||||||
|
deck_data.append(deck_row)
|
||||||
|
|
||||||
|
# Step 7: Prepare the CSV Header
|
||||||
|
header = ['Rank', 'TournamentSize', 'Commander', 'Partner', 'Num_Artifacts', 'Num_Creatures',
|
||||||
|
'Num_Enchantments', 'Num_Instants', 'Num_Sorceries',
|
||||||
|
'Num_Planeswalkers', 'Num_Lands']
|
||||||
|
|
||||||
|
# Add tag columns
|
||||||
|
header.extend(['Num_' + tag.capitalize() for tag in tags_of_interest])
|
||||||
|
|
||||||
|
sorted_cards = sorted(unique_cards)
|
||||||
|
header.extend(sorted_cards)
|
||||||
|
|
||||||
|
# Step 8: Write Data to CSV File
|
||||||
|
with open('deck_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
|
writer = csv.writer(csvfile)
|
||||||
|
writer.writerow(header)
|
||||||
|
for deck in deck_data:
|
||||||
|
row = [
|
||||||
|
deck['rank'],
|
||||||
|
deck['tournament_size'],
|
||||||
|
deck['commander'],
|
||||||
|
deck['partner'],
|
||||||
|
deck['num_artifacts'],
|
||||||
|
deck['num_creatures'],
|
||||||
|
deck['num_enchantments'],
|
||||||
|
deck['num_instants'],
|
||||||
|
deck['num_sorceries'],
|
||||||
|
deck['num_planeswalkers'],
|
||||||
|
deck['num_lands']
|
||||||
|
]
|
||||||
|
# Add tag counts
|
||||||
|
for tag in tags_of_interest:
|
||||||
|
row.append(deck['tag_counts'][tag.capitalize()])
|
||||||
|
# Add card presence (1 or 0)
|
||||||
|
for card_name in sorted_cards:
|
||||||
|
row.append(1 if card_name in deck['deck'] else 0)
|
||||||
|
# If you prefer to include quantities, use:
|
||||||
|
# row.append(deck['deck'].get(card_name, 0))
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user