Initial commit: Organized project code and documentation for public release.
This commit is contained in:
commit
e673f32d31
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
.Rproj.user
|
||||
.Rhistory
|
||||
.RData
|
||||
.Ruserdata
|
||||
13
USCLAP.Rproj
Normal file
13
USCLAP.Rproj
Normal file
@ -0,0 +1,13 @@
|
||||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Default
|
||||
SaveWorkspace: Default
|
||||
AlwaysSaveHistory: Default
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: Yes
|
||||
NumSpacesForTab: 2
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
||||
1871
deck_data.csv
Normal file
1871
deck_data.csv
Normal file
File diff suppressed because one or more lines are too long
206
download_tournament.py
Normal file
206
download_tournament.py
Normal file
@ -0,0 +1,206 @@
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
from datetime import datetime
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
from selenium.webdriver.firefox.service import Service
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
|
||||
def setup_browser(download_dir):
|
||||
firefox_options = Options()
|
||||
firefox_options.set_preference('browser.download.folderList', 2)
|
||||
firefox_options.set_preference('browser.download.dir', download_dir) # Use the tournament-specific directory
|
||||
firefox_options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/plain,text/csv,application/octet-stream')
|
||||
firefox_options.set_preference('pdfjs.disabled', True)
|
||||
firefox_options.set_preference('browser.download.manager.showWhenStarting', False)
|
||||
firefox_options.set_preference('browser.download.manager.useWindow', False)
|
||||
firefox_options.set_preference('browser.download.manager.focusWhenStarting', False)
|
||||
firefox_options.set_preference('browser.download.manager.alertOnEXEOpen', False)
|
||||
firefox_options.set_preference('browser.download.manager.showAlertOnComplete', False)
|
||||
firefox_options.set_preference('browser.download.manager.closeWhenDone', False)
|
||||
|
||||
# Specify the path to geckodriver
|
||||
gecko_service = Service(executable_path='/usr/local/bin/geckodriver')
|
||||
|
||||
browser = webdriver.Firefox(service=gecko_service, options=firefox_options)
|
||||
return browser
|
||||
|
||||
def parse_date(date_str):
|
||||
date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)
|
||||
try:
|
||||
tournament_date = datetime.strptime(date_str, '%B %d %Y').date()
|
||||
print(f"Parsed date: {tournament_date}")
|
||||
return tournament_date
|
||||
except ValueError as e:
|
||||
print(f"Error parsing date '{date_str}': {e}")
|
||||
return None
|
||||
|
||||
def get_tournaments_after_date(browser, date_threshold):
|
||||
tournaments = []
|
||||
|
||||
print("Navigating to the tournaments page...")
|
||||
tournaments_page_url = 'https://edhtop16.com/tournaments?sortBy=DATE'
|
||||
browser.get(tournaments_page_url)
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
# Find all tournament entries
|
||||
tournament_entries = browser.find_elements(By.CSS_SELECTOR, "div.group.relative.overflow-hidden.rounded-lg.bg-white.shadow")
|
||||
print(f"Found {len(tournament_entries)} tournaments on the page.")
|
||||
|
||||
for entry in tournament_entries:
|
||||
try:
|
||||
# Extract tournament link, name, and date
|
||||
link_element = entry.find_element(By.CSS_SELECTOR, 'a.line-clamp-2.text-xl.font-bold.underline')
|
||||
tournament_name = link_element.text
|
||||
tournament_url = link_element.get_attribute('href')
|
||||
|
||||
date_element = entry.find_element(By.CSS_SELECTOR, 'span')
|
||||
tournament_date_str = date_element.text
|
||||
print(f"Tournament found: {tournament_name}, Date: {tournament_date_str}, URL: {tournament_url}")
|
||||
|
||||
# Parse the date string
|
||||
tournament_date = parse_date(tournament_date_str)
|
||||
if tournament_date and tournament_date >= date_threshold:
|
||||
tournaments.append((tournament_url, tournament_name, tournament_date))
|
||||
except Exception as e:
|
||||
print(f"Error processing tournament entry: {e}")
|
||||
|
||||
print(f"Total tournaments after {date_threshold}: {len(tournaments)}")
|
||||
return tournaments
|
||||
|
||||
def get_tournament_info(browser):
|
||||
print("Retrieving the tournament name and deck links...")
|
||||
time.sleep(3) # Wait for the page to load
|
||||
|
||||
# Get the tournament name
|
||||
try:
|
||||
tournament_name_element = browser.find_element(By.TAG_NAME, 'h1')
|
||||
tournament_name = tournament_name_element.text if tournament_name_element else "Tournament"
|
||||
except Exception as e:
|
||||
print(f"Error retrieving tournament name: {e}")
|
||||
tournament_name = "Tournament"
|
||||
|
||||
tournament_name = tournament_name.replace('/', '-') # Replace invalid filename characters
|
||||
print(f"Tournament Name: {tournament_name}")
|
||||
|
||||
# Get the list of decks
|
||||
deck_elements = browser.find_elements(By.CSS_SELECTOR, "a.line-clamp-2.text-xl.font-bold.underline")
|
||||
deck_links = []
|
||||
for deck_element in deck_elements:
|
||||
deck_url = deck_element.get_attribute('href')
|
||||
deck_name = deck_element.text
|
||||
deck_links.append((deck_url, deck_name))
|
||||
|
||||
print(f"Total decks found: {len(deck_links)}")
|
||||
return tournament_name, deck_links
|
||||
|
||||
def download_deck(browser, deck_url, rank, total_decks, save_dir):
|
||||
print(f"Navigating to deck {rank} page...")
|
||||
browser.get(deck_url)
|
||||
|
||||
try:
|
||||
# Wait for the "More" button and click it
|
||||
print("Waiting for the 'More' button to appear...")
|
||||
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'More')]/..")))
|
||||
more_button = browser.find_element(By.XPATH, "//span[contains(text(), 'More')]/..")
|
||||
browser.execute_script("arguments[0].click();", more_button)
|
||||
time.sleep(1)
|
||||
|
||||
# Wait for and click the "Export" option within the dropdown
|
||||
print("Waiting for the 'Export' option...")
|
||||
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")))
|
||||
export_option = browser.find_element(By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")
|
||||
browser.execute_script("arguments[0].click();", export_option)
|
||||
time.sleep(1)
|
||||
|
||||
# Wait for and click the "Download for MTGO" link
|
||||
print("Clicking 'Download for MTGO' link...")
|
||||
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")))
|
||||
mtgo_download_link = browser.find_element(By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")
|
||||
mtgo_download_link.click()
|
||||
time.sleep(2) # Wait for the download to initiate
|
||||
|
||||
# Wait for the downloaded file to appear in the download directory
|
||||
print("Waiting for the file to download...")
|
||||
download_wait_time = 0
|
||||
downloaded_filepath = None
|
||||
|
||||
# Check the download directory for a new file
|
||||
while download_wait_time < 30:
|
||||
files = os.listdir(save_dir)
|
||||
if files:
|
||||
# Find the most recent file in the directory
|
||||
downloaded_filepath = max([os.path.join(save_dir, f) for f in files], key=os.path.getctime)
|
||||
if downloaded_filepath.endswith('.txt'):
|
||||
break
|
||||
time.sleep(1)
|
||||
download_wait_time += 1
|
||||
|
||||
# Move and rename the file if it was found
|
||||
if downloaded_filepath and downloaded_filepath.endswith('.txt'):
|
||||
original_filename = os.path.basename(downloaded_filepath)
|
||||
# Sanitize the original filename
|
||||
sanitized_filename = original_filename.replace('/', '-').replace('\\', '-')
|
||||
new_filename = os.path.join(save_dir, f"{rank}-{total_decks}-{sanitized_filename}")
|
||||
os.rename(downloaded_filepath, new_filename)
|
||||
print(f"Downloaded deck {rank}/{total_decks}: {new_filename}")
|
||||
else:
|
||||
print(f"Failed to download deck {rank}: Download timed out.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error downloading deck {rank}: {e}")
|
||||
|
||||
def main():
|
||||
# Set up the base download directory
|
||||
base_download_dir = os.path.join(os.getcwd(), 'downloads')
|
||||
if not os.path.exists(base_download_dir):
|
||||
os.makedirs(base_download_dir)
|
||||
print(f"Created base download directory at {base_download_dir}")
|
||||
|
||||
# Set the date threshold (e.g., September 1, 2023)
|
||||
date_threshold = datetime(2024, 11, 14).date()
|
||||
|
||||
# Initialize the browser
|
||||
print("Setting up the browser...")
|
||||
browser = setup_browser(base_download_dir) # Initial browser setup
|
||||
try:
|
||||
# Retrieve list of tournaments after the date threshold
|
||||
tournaments = get_tournaments_after_date(browser, date_threshold)
|
||||
|
||||
for tournament_url, tournament_name, tournament_date in tournaments:
|
||||
print(f"\nProcessing tournament: {tournament_name} dated {tournament_date}")
|
||||
|
||||
# Create a specific directory for the tournament
|
||||
tournament_dir = os.path.join(base_download_dir, tournament_name.replace('/', '-'))
|
||||
if not os.path.exists(tournament_dir):
|
||||
os.makedirs(tournament_dir)
|
||||
print(f"Created tournament directory at {tournament_dir}")
|
||||
|
||||
# Reconfigure the browser to use the tournament directory
|
||||
browser.quit()
|
||||
browser = setup_browser(tournament_dir) # Reinitialize with tournament-specific directory
|
||||
|
||||
# Navigate to the tournament page
|
||||
print(f"Navigating to the tournament page {tournament_url}...")
|
||||
browser.get(tournament_url)
|
||||
time.sleep(3) # Wait for the page to load
|
||||
|
||||
# Retrieve tournament info (this will get the decks)
|
||||
tournament_name, deck_links = get_tournament_info(browser)
|
||||
|
||||
# Download each deck
|
||||
total_decks = len(deck_links)
|
||||
for idx, (deck_url, deck_name) in enumerate(deck_links, start=1):
|
||||
print(f"\nProcessing deck {idx}/{total_decks}: {deck_name}")
|
||||
download_deck(browser, deck_url, idx, total_decks, tournament_dir)
|
||||
|
||||
finally:
|
||||
print("Closing the browser...")
|
||||
browser.quit()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
318
final_lda_Qda.R
Normal file
318
final_lda_Qda.R
Normal file
@ -0,0 +1,318 @@
|
||||
library(olsrr)
|
||||
library(MASS)
|
||||
library(rpart.plot)
|
||||
library(ggfortify)
|
||||
library(ggplot2)
|
||||
library(tidyverse)
|
||||
library(car)
|
||||
library(Rcpp)
|
||||
library(GGally)
|
||||
library(leaps)
|
||||
library(dplyr)
|
||||
library(caret)
|
||||
library(rpart)
|
||||
library(randomForest)
|
||||
|
||||
library(pheatmap)
|
||||
library(viridis)
|
||||
# Load the dataset
|
||||
data1 <- read.csv("deck_data.csv")
|
||||
data1$y <- (data1$Rank / data1$TournamentSize) * 100
|
||||
|
||||
# Convert y to a categorical Performance variable
|
||||
data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
|
||||
data1$Performance <- as.factor(data1$Performance)
|
||||
|
||||
# Split the data into training and test sets
|
||||
set.seed(123) # For reproducibility
|
||||
train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
|
||||
train_data <- data1[train_indices, ]
|
||||
test_data <- data1[-train_indices, ]
|
||||
|
||||
recommend_cards <- function(decklist_row, train_data, test_data) {
|
||||
# Non-card columns
|
||||
non_card_columns <- c(
|
||||
"Rank", "TournamentSize", "Commander", "Partner",
|
||||
"Num_Artifacts", "Num_Creatures", "Num_Enchantments",
|
||||
"Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
|
||||
"Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
|
||||
"Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
|
||||
"Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
|
||||
"Num_Boardwipe", "y", "Performance"
|
||||
)
|
||||
|
||||
commander_input <- as.character(decklist_row$Commander)
|
||||
partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))
|
||||
|
||||
if (partner_input == "") {
|
||||
subset_train_data <- subset(train_data, Commander == commander_input)
|
||||
subset_test_data <- subset(test_data, Commander == commander_input)
|
||||
} else {
|
||||
subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
|
||||
subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
|
||||
}
|
||||
|
||||
if (nrow(subset_train_data) < 10) {
|
||||
warning("Not enough data to build a reliable model.")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# Get card columns
|
||||
card_columns <- setdiff(names(train_data), non_card_columns)
|
||||
|
||||
predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
|
||||
response_train <- subset_train_data$Performance
|
||||
|
||||
# Remove near zero variance predictors before LDA/QDA
|
||||
nzv <- nearZeroVar(predictors_train_full)
|
||||
if (length(nzv) > 0) {
|
||||
predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
|
||||
} else {
|
||||
predictors_train <- predictors_train_full
|
||||
}
|
||||
|
||||
# Update card_columns to reflect the removed predictors
|
||||
filtered_card_columns <- colnames(predictors_train)
|
||||
|
||||
# Check if we have at least two classes in response_train (for LDA/QDA)
|
||||
run_lda_qda <- length(unique(response_train)) > 1
|
||||
|
||||
# Build Decision Tree
|
||||
dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")
|
||||
|
||||
# Try LDA
|
||||
lda_model <- NULL
|
||||
if (run_lda_qda) {
|
||||
lda_model <- tryCatch(
|
||||
{
|
||||
lda(response_train ~ ., data = data.frame(response_train, predictors_train))
|
||||
},
|
||||
error = function(e) {
|
||||
warning("LDA failed: ", e$message)
|
||||
NULL
|
||||
}
|
||||
)
|
||||
} else {
|
||||
warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
|
||||
}
|
||||
|
||||
# Try QDA
|
||||
qda_model <- NULL
|
||||
if (run_lda_qda) {
|
||||
qda_model <- tryCatch(
|
||||
{
|
||||
qda(response_train ~ ., data = data.frame(response_train, predictors_train))
|
||||
},
|
||||
error = function(e) {
|
||||
warning("QDA failed: ", e$message)
|
||||
NULL
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
# Build Random Forest
|
||||
rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))
|
||||
|
||||
# Evaluate models on test data if available
|
||||
if (nrow(subset_test_data) > 0) {
|
||||
# Subset the test data to the same filtered predictors
|
||||
predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
|
||||
response_test <- subset_test_data$Performance
|
||||
|
||||
# Predictions (Decision Tree always available)
|
||||
dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
|
||||
cat("\nDecision Tree Confusion Matrix:\n")
|
||||
print(confusionMatrix(dt_predictions, response_test))
|
||||
|
||||
if (!is.null(lda_model)) {
|
||||
lda_predictions <- predict(lda_model, newdata = predictors_test)$class
|
||||
cat("\nLDA Confusion Matrix:\n")
|
||||
print(confusionMatrix(lda_predictions, response_test))
|
||||
}
|
||||
|
||||
if (!is.null(qda_model)) {
|
||||
qda_predictions <- predict(qda_model, newdata = predictors_test)$class
|
||||
cat("\nQDA Confusion Matrix:\n")
|
||||
print(confusionMatrix(qda_predictions, response_test))
|
||||
}
|
||||
|
||||
rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
|
||||
cat("\nRandom Forest Confusion Matrix:\n")
|
||||
print(confusionMatrix(rf_predictions, response_test))
|
||||
|
||||
} else {
|
||||
warning("No test data available for this commander.")
|
||||
}
|
||||
|
||||
# Plot the decision tree
|
||||
rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
|
||||
main = paste("Decision Tree for Commander:", commander_input))
|
||||
|
||||
# Predict performance for the given decklist using the decision tree (example)
|
||||
# We must also apply the same filtered predictors to the decklist
|
||||
predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
|
||||
predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")
|
||||
|
||||
cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
|
||||
print(predicted_performance_dt)
|
||||
|
||||
# If variable importance is available from decision tree
|
||||
if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
|
||||
importance <- data.frame(
|
||||
card = names(dt_model$variable.importance),
|
||||
importance = dt_model$variable.importance,
|
||||
row.names = NULL
|
||||
)
|
||||
|
||||
importance <- importance[order(-importance$importance), ]
|
||||
|
||||
card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
|
||||
indices <- which(card_values == 1)
|
||||
deck_cards <- names(card_values)[indices]
|
||||
|
||||
cards_to_add <- setdiff(importance$card, deck_cards)
|
||||
top_cards_to_add <- head(cards_to_add, 5)
|
||||
|
||||
non_important_cards <- setdiff(deck_cards, importance$card)
|
||||
non_important_with_scores <- data.frame(
|
||||
card = non_important_cards,
|
||||
importance = ifelse(non_important_cards %in% importance$card,
|
||||
importance$importance[match(non_important_cards, importance$card)], 0)
|
||||
)
|
||||
non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
|
||||
top_cards_to_remove <- head(non_important_with_scores$card, 5)
|
||||
} else {
|
||||
warning("No variable importance available from the Decision Tree model.")
|
||||
top_cards_to_add <- character(0)
|
||||
top_cards_to_remove <- character(0)
|
||||
}
|
||||
|
||||
# Confusion matrix for training data (decision tree)
|
||||
predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
|
||||
cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
|
||||
cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
|
||||
print(cm_train_dt)
|
||||
|
||||
return(list(
|
||||
predicted_standing_dt = predicted_performance_dt,
|
||||
cards_to_add = top_cards_to_add,
|
||||
cards_to_consider_removing = top_cards_to_remove,
|
||||
cm_train_dt = cm_train_dt
|
||||
))
|
||||
}
|
||||
|
||||
# Example usage
|
||||
test_deck <- test_data[3, ]
|
||||
print(test_deck$Commander)
|
||||
recommendations <- recommend_cards(test_deck, train_data, test_data)
|
||||
|
||||
cat("\nCards to Consider Adding:\n")
|
||||
print(recommendations$cards_to_add)
|
||||
|
||||
cat("\nCards to Consider Removing:\n")
|
||||
print(recommendations$cards_to_consider_removing)
|
||||
|
||||
cat("\nPredicted Deck's Standing (Decision Tree):\n")
|
||||
print(recommendations$predicted_standing_dt)
|
||||
print(cm_train_dt)
|
||||
# Extract the confusion matrix from your model
|
||||
cm_train_dt <- recommendations$cm_train_dt
|
||||
conf_matrix <- as.matrix(cm_train_dt$table)
|
||||
|
||||
# Create a high-contrast color palette using viridis
|
||||
my_palette <- viridis(100)
|
||||
|
||||
# Plot the heatmap
|
||||
pheatmap(conf_matrix,
|
||||
cluster_rows = FALSE,
|
||||
cluster_cols = FALSE,
|
||||
color = my_palette,
|
||||
fontsize_row = 8,
|
||||
fontsize_col = 8,
|
||||
angle_col = 45,
|
||||
display_numbers = TRUE,
|
||||
number_format = "%.0f",
|
||||
main = "Confusion Matrix Heatmap")
|
||||
|
||||
|
||||
# Loop
|
||||
|
||||
# Identify the 10 most popular commanders
|
||||
top_commanders <- data1 %>%
|
||||
group_by(Commander) %>%
|
||||
summarize(freq = n()) %>%
|
||||
arrange(desc(freq)) %>%
|
||||
slice(1:5) %>%
|
||||
pull(Commander)
|
||||
|
||||
# Initialize a list to store results
|
||||
model_results_list <- list()
|
||||
|
||||
# Loop over the top 10 commanders
|
||||
for (comm in top_commanders) {
|
||||
# Find a test deck for this commander (if available)
|
||||
subset_test_decks <- subset(test_data, Commander == comm)
|
||||
|
||||
if (nrow(subset_test_decks) == 0) {
|
||||
cat("\nNo test deck available for commander:", comm, "\n")
|
||||
next
|
||||
}
|
||||
|
||||
# Just pick the first test deck for demonstration
|
||||
test_deck <- subset_test_decks[1, ]
|
||||
|
||||
cat("\nRunning models for commander:", comm, "\n")
|
||||
recommendations <- recommend_cards(test_deck, train_data, test_data)
|
||||
|
||||
if (!is.null(recommendations)) {
|
||||
# Store relevant parts of the output
|
||||
model_results_list[[comm]] <- list(
|
||||
commander = comm,
|
||||
predicted_standing_dt = recommendations$predicted_standing_dt,
|
||||
cards_to_add = recommendations$cards_to_add,
|
||||
cards_to_consider_removing = recommendations$cards_to_consider_removing
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
# After the loop, model_results_list contains the outputs for each of the top 10 commanders
|
||||
# Print summary of results
|
||||
cat("\nSummary of Model Results for Top 10 Commanders:\n")
|
||||
for (comm in names(model_results_list)) {
|
||||
res <- model_results_list[[comm]]
|
||||
cat("\nCommander:", res$commander, "\n")
|
||||
cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
|
||||
cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
|
||||
cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
|
||||
}
|
||||
|
||||
# Example usage from your code snippet (already integrated in the loop,
|
||||
# but you could still do individually if desired):
|
||||
# test_deck <- test_data[3, ]
|
||||
# print(test_deck$Commander)
|
||||
# recommendations <- recommend_cards(test_deck, train_data, test_data)
|
||||
#
|
||||
# cat("\nCards to Consider Adding:\n")
|
||||
# print(recommendations$cards_to_add)
|
||||
#
|
||||
# cat("\nCards to Consider Removing:\n")
|
||||
# print(recommendations$cards_to_consider_removing)
|
||||
#
|
||||
# cat("\nPredicted Deck's Standing (Decision Tree):\n")
|
||||
# print(recommendations$predicted_standing_dt)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
266
process_decklists.py
Normal file
266
process_decklists.py
Normal file
@ -0,0 +1,266 @@
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import csv
|
||||
import time # Optional: For adding delays between API requests
|
||||
|
||||
# Initialize variables
|
||||
unique_cards = set()
|
||||
card_types = {}
|
||||
card_tags = {}
|
||||
deck_data = []
|
||||
|
||||
# Tags of interest
|
||||
tags_of_interest = [
|
||||
'ramp', 'draw', 'tutor', 'counterspell', 'removal',
|
||||
'stax', 'protection', 'boardwipe'
|
||||
]
|
||||
|
||||
# Function to process a decklist file and separate it into deck, sideboard, and commander sections
|
||||
def process_decklist_file(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
# Keep all lines including empty ones, strip newline characters
|
||||
lines = [line.rstrip('\n') for line in lines]
|
||||
|
||||
deck_cards = []
|
||||
sideboard_cards = []
|
||||
commander_cards = []
|
||||
|
||||
section = 'deck'
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
if section == 'deck':
|
||||
if line == '':
|
||||
# Empty line indicates potential section change
|
||||
# Check if next non-empty line is 'SIDEBOARD:'
|
||||
j = i + 1
|
||||
while j < len(lines) and lines[j].strip() == '':
|
||||
j += 1
|
||||
if j < len(lines) and lines[j].strip().upper() == 'SIDEBOARD:':
|
||||
section = 'sideboard'
|
||||
i = j # Move to 'SIDEBOARD:' line
|
||||
else:
|
||||
# Otherwise, assume commanders are after the empty line
|
||||
section = 'commander'
|
||||
i += 1 # Skip the empty line
|
||||
continue
|
||||
elif line.upper() == 'SIDEBOARD:':
|
||||
section = 'sideboard'
|
||||
else:
|
||||
deck_cards.append(line)
|
||||
elif section == 'sideboard':
|
||||
if line == '':
|
||||
# Empty line after sideboard indicates commanders
|
||||
section = 'commander'
|
||||
i += 1 # Move to next line after the empty line
|
||||
continue
|
||||
elif line.upper() == 'SIDEBOARD:':
|
||||
pass # Already in sideboard section
|
||||
else:
|
||||
sideboard_cards.append(line)
|
||||
elif section == 'commander':
|
||||
if line != '':
|
||||
commander_cards.append(line)
|
||||
i += 1
|
||||
|
||||
return deck_cards, sideboard_cards, commander_cards
|
||||
|
||||
# Function to fetch cards for a given tag from Scryfall
|
||||
def fetch_cards_for_tag(tag):
|
||||
card_names = set()
|
||||
page = 1
|
||||
has_more = True
|
||||
while has_more:
|
||||
query = f"f:edh otag:{tag}"
|
||||
url = "https://api.scryfall.com/cards/search"
|
||||
params = {'q': query, 'page': page}
|
||||
response = requests.get(url, params=params)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for card in data['data']:
|
||||
card_names.add(card['name'])
|
||||
has_more = data.get('has_more', False)
|
||||
page += 1
|
||||
time.sleep(0.1) # Sleep to respect rate limits
|
||||
else:
|
||||
print(f"Error fetching cards for tag: {tag}")
|
||||
break
|
||||
return card_names
|
||||
|
||||
# Step 4: Collect Unique Card Names
|
||||
decklist_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
for filename in os.listdir(decklist_dir):
|
||||
# Process only decklist files matching the pattern
|
||||
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
|
||||
filepath = os.path.join(decklist_dir, filename)
|
||||
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
|
||||
# Collect unique cards from the deck and commanders, skip sideboard
|
||||
for line in deck_cards + commander_cards:
|
||||
match = re.match(r'(\d+)\s+(.*)', line)
|
||||
if match:
|
||||
quantity = int(match.group(1))
|
||||
card_name = match.group(2)
|
||||
unique_cards.add(card_name)
|
||||
|
||||
# Step 5: Retrieve Card Types Using Scryfall API
|
||||
scryfall_api_url = 'https://api.scryfall.com/cards/named?exact='
|
||||
|
||||
for card_name in unique_cards:
|
||||
encoded_name = requests.utils.quote(card_name)
|
||||
url = scryfall_api_url + encoded_name
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
types = data['type_line']
|
||||
main_types = types.split('—')[0].strip()
|
||||
type_list = [t.strip() for t in main_types.split()]
|
||||
card_types[card_name] = type_list
|
||||
else:
|
||||
print(f"Error fetching data for card: {card_name}")
|
||||
card_types[card_name] = []
|
||||
# Optional: Add a delay to respect API rate limits
|
||||
time.sleep(0.1) # Sleep for 100 milliseconds
|
||||
|
||||
# Step 5b: Build Card Tags Mapping
|
||||
# Initialize card_tags dictionary
|
||||
card_tags = {card_name: [] for card_name in unique_cards}
|
||||
|
||||
# Fetch cards for each tag and build the mapping
|
||||
for tag in tags_of_interest:
|
||||
print(f"Fetching cards for tag: {tag}")
|
||||
tagged_cards = fetch_cards_for_tag(tag)
|
||||
for card_name in tagged_cards:
|
||||
if card_name in card_tags:
|
||||
card_tags[card_name].append(tag.capitalize())
|
||||
else:
|
||||
# Handle cases where card name variations exist
|
||||
for unique_card in unique_cards:
|
||||
if card_name.lower() == unique_card.lower():
|
||||
card_tags[unique_card].append(tag.capitalize())
|
||||
break
|
||||
|
||||
# Step 6: Process Each Decklist to Collect Deck Data
|
||||
for filename in os.listdir(decklist_dir):
|
||||
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
|
||||
match = re.match(r'(\d+)-(\d+)-.*\.txt', filename)
|
||||
if match:
|
||||
rank = int(match.group(1))
|
||||
tournament_size = int(match.group(2))
|
||||
else:
|
||||
rank = None
|
||||
tournament_size = None
|
||||
filepath = os.path.join(decklist_dir, filename)
|
||||
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
|
||||
deck = {}
|
||||
commander = None
|
||||
partner = None
|
||||
num_artifacts = num_creatures = num_enchantments = 0
|
||||
num_instants = num_sorceries = num_planeswalkers = 0
|
||||
num_lands = 0
|
||||
|
||||
# Initialize tag card sets to store unique cards per tag
|
||||
tag_card_sets = {tag.capitalize(): set() for tag in tags_of_interest}
|
||||
|
||||
# Process main deck cards
|
||||
for line in deck_cards:
|
||||
match = re.match(r'(\d+)\s+(.*)', line)
|
||||
if match:
|
||||
quantity = int(match.group(1))
|
||||
card_name = match.group(2)
|
||||
deck[card_name] = quantity
|
||||
types = card_types.get(card_name, [])
|
||||
if 'Artifact' in types:
|
||||
num_artifacts += quantity
|
||||
if 'Creature' in types:
|
||||
num_creatures += quantity
|
||||
if 'Enchantment' in types:
|
||||
num_enchantments += quantity
|
||||
if 'Instant' in types:
|
||||
num_instants += quantity
|
||||
if 'Sorcery' in types:
|
||||
num_sorceries += quantity
|
||||
if 'Planeswalker' in types:
|
||||
num_planeswalkers += quantity
|
||||
if 'Land' in types:
|
||||
num_lands += quantity
|
||||
|
||||
# Process tags
|
||||
tags = card_tags.get(card_name, [])
|
||||
for tag in tags:
|
||||
if tag in tag_card_sets:
|
||||
tag_card_sets[tag].add(card_name)
|
||||
|
||||
# Process commander cards
|
||||
for idx, line in enumerate(commander_cards):
|
||||
match = re.match(r'(\d+)\s+(.*)', line)
|
||||
if match:
|
||||
card_name = match.group(2)
|
||||
if idx == 0:
|
||||
commander = card_name
|
||||
elif idx == 1:
|
||||
partner = card_name
|
||||
|
||||
# Compute tag counts as the number of unique cards per tag
|
||||
tag_counts = {tag: len(cards) for tag, cards in tag_card_sets.items()}
|
||||
|
||||
deck_row = {
|
||||
'rank': rank,
|
||||
'tournament_size': tournament_size,
|
||||
'commander': commander,
|
||||
'partner': partner,
|
||||
'num_artifacts': num_artifacts,
|
||||
'num_creatures': num_creatures,
|
||||
'num_enchantments': num_enchantments,
|
||||
'num_instants': num_instants,
|
||||
'num_sorceries': num_sorceries,
|
||||
'num_planeswalkers': num_planeswalkers,
|
||||
'num_lands': num_lands,
|
||||
'tag_counts': tag_counts,
|
||||
'deck': deck
|
||||
}
|
||||
deck_data.append(deck_row)
|
||||
|
||||
# Step 7: Prepare the CSV Header
|
||||
header = ['Rank', 'TournamentSize', 'Commander', 'Partner', 'Num_Artifacts', 'Num_Creatures',
|
||||
'Num_Enchantments', 'Num_Instants', 'Num_Sorceries',
|
||||
'Num_Planeswalkers', 'Num_Lands']
|
||||
|
||||
# Add tag columns
|
||||
header.extend(['Num_' + tag.capitalize() for tag in tags_of_interest])
|
||||
|
||||
sorted_cards = sorted(unique_cards)
|
||||
header.extend(sorted_cards)
|
||||
|
||||
# Step 8: Write Data to CSV File
|
||||
with open('deck_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
writer.writerow(header)
|
||||
for deck in deck_data:
|
||||
row = [
|
||||
deck['rank'],
|
||||
deck['tournament_size'],
|
||||
deck['commander'],
|
||||
deck['partner'],
|
||||
deck['num_artifacts'],
|
||||
deck['num_creatures'],
|
||||
deck['num_enchantments'],
|
||||
deck['num_instants'],
|
||||
deck['num_sorceries'],
|
||||
deck['num_planeswalkers'],
|
||||
deck['num_lands']
|
||||
]
|
||||
# Add tag counts
|
||||
for tag in tags_of_interest:
|
||||
row.append(deck['tag_counts'][tag.capitalize()])
|
||||
# Add card presence (1 or 0)
|
||||
for card_name in sorted_cards:
|
||||
row.append(1 if card_name in deck['deck'] else 0)
|
||||
# If you prefer to include quantities, use:
|
||||
# row.append(deck['deck'].get(card_name, 0))
|
||||
writer.writerow(row)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user