Initial commit: Organized project code and documentation for public release.

This commit is contained in:
Andreas Hauck 2024-12-20 23:41:07 +00:00
commit e673f32d31
6 changed files with 2678 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata

13
USCLAP.Rproj Normal file
View File

@ -0,0 +1,13 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX

1871
deck_data.csv Normal file

File diff suppressed because one or more lines are too long

206
download_tournament.py Normal file
View File

@ -0,0 +1,206 @@
import os
import time
import re
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def setup_browser(download_dir):
firefox_options = Options()
firefox_options.set_preference('browser.download.folderList', 2)
firefox_options.set_preference('browser.download.dir', download_dir) # Use the tournament-specific directory
firefox_options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/plain,text/csv,application/octet-stream')
firefox_options.set_preference('pdfjs.disabled', True)
firefox_options.set_preference('browser.download.manager.showWhenStarting', False)
firefox_options.set_preference('browser.download.manager.useWindow', False)
firefox_options.set_preference('browser.download.manager.focusWhenStarting', False)
firefox_options.set_preference('browser.download.manager.alertOnEXEOpen', False)
firefox_options.set_preference('browser.download.manager.showAlertOnComplete', False)
firefox_options.set_preference('browser.download.manager.closeWhenDone', False)
# Specify the path to geckodriver
gecko_service = Service(executable_path='/usr/local/bin/geckodriver')
browser = webdriver.Firefox(service=gecko_service, options=firefox_options)
return browser
def parse_date(date_str):
date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)
try:
tournament_date = datetime.strptime(date_str, '%B %d %Y').date()
print(f"Parsed date: {tournament_date}")
return tournament_date
except ValueError as e:
print(f"Error parsing date '{date_str}': {e}")
return None
def get_tournaments_after_date(browser, date_threshold):
tournaments = []
print("Navigating to the tournaments page...")
tournaments_page_url = 'https://edhtop16.com/tournaments?sortBy=DATE'
browser.get(tournaments_page_url)
time.sleep(3)
# Find all tournament entries
tournament_entries = browser.find_elements(By.CSS_SELECTOR, "div.group.relative.overflow-hidden.rounded-lg.bg-white.shadow")
print(f"Found {len(tournament_entries)} tournaments on the page.")
for entry in tournament_entries:
try:
# Extract tournament link, name, and date
link_element = entry.find_element(By.CSS_SELECTOR, 'a.line-clamp-2.text-xl.font-bold.underline')
tournament_name = link_element.text
tournament_url = link_element.get_attribute('href')
date_element = entry.find_element(By.CSS_SELECTOR, 'span')
tournament_date_str = date_element.text
print(f"Tournament found: {tournament_name}, Date: {tournament_date_str}, URL: {tournament_url}")
# Parse the date string
tournament_date = parse_date(tournament_date_str)
if tournament_date and tournament_date >= date_threshold:
tournaments.append((tournament_url, tournament_name, tournament_date))
except Exception as e:
print(f"Error processing tournament entry: {e}")
print(f"Total tournaments after {date_threshold}: {len(tournaments)}")
return tournaments
def get_tournament_info(browser):
print("Retrieving the tournament name and deck links...")
time.sleep(3) # Wait for the page to load
# Get the tournament name
try:
tournament_name_element = browser.find_element(By.TAG_NAME, 'h1')
tournament_name = tournament_name_element.text if tournament_name_element else "Tournament"
except Exception as e:
print(f"Error retrieving tournament name: {e}")
tournament_name = "Tournament"
tournament_name = tournament_name.replace('/', '-') # Replace invalid filename characters
print(f"Tournament Name: {tournament_name}")
# Get the list of decks
deck_elements = browser.find_elements(By.CSS_SELECTOR, "a.line-clamp-2.text-xl.font-bold.underline")
deck_links = []
for deck_element in deck_elements:
deck_url = deck_element.get_attribute('href')
deck_name = deck_element.text
deck_links.append((deck_url, deck_name))
print(f"Total decks found: {len(deck_links)}")
return tournament_name, deck_links
def download_deck(browser, deck_url, rank, total_decks, save_dir):
print(f"Navigating to deck {rank} page...")
browser.get(deck_url)
try:
# Wait for the "More" button and click it
print("Waiting for the 'More' button to appear...")
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'More')]/..")))
more_button = browser.find_element(By.XPATH, "//span[contains(text(), 'More')]/..")
browser.execute_script("arguments[0].click();", more_button)
time.sleep(1)
# Wait for and click the "Export" option within the dropdown
print("Waiting for the 'Export' option...")
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")))
export_option = browser.find_element(By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")
browser.execute_script("arguments[0].click();", export_option)
time.sleep(1)
# Wait for and click the "Download for MTGO" link
print("Clicking 'Download for MTGO' link...")
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")))
mtgo_download_link = browser.find_element(By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")
mtgo_download_link.click()
time.sleep(2) # Wait for the download to initiate
# Wait for the downloaded file to appear in the download directory
print("Waiting for the file to download...")
download_wait_time = 0
downloaded_filepath = None
# Check the download directory for a new file
while download_wait_time < 30:
files = os.listdir(save_dir)
if files:
# Find the most recent file in the directory
downloaded_filepath = max([os.path.join(save_dir, f) for f in files], key=os.path.getctime)
if downloaded_filepath.endswith('.txt'):
break
time.sleep(1)
download_wait_time += 1
# Move and rename the file if it was found
if downloaded_filepath and downloaded_filepath.endswith('.txt'):
original_filename = os.path.basename(downloaded_filepath)
# Sanitize the original filename
sanitized_filename = original_filename.replace('/', '-').replace('\\', '-')
new_filename = os.path.join(save_dir, f"{rank}-{total_decks}-{sanitized_filename}")
os.rename(downloaded_filepath, new_filename)
print(f"Downloaded deck {rank}/{total_decks}: {new_filename}")
else:
print(f"Failed to download deck {rank}: Download timed out.")
except Exception as e:
print(f"Error downloading deck {rank}: {e}")
def main():
# Set up the base download directory
base_download_dir = os.path.join(os.getcwd(), 'downloads')
if not os.path.exists(base_download_dir):
os.makedirs(base_download_dir)
print(f"Created base download directory at {base_download_dir}")
# Set the date threshold (e.g., September 1, 2023)
date_threshold = datetime(2024, 11, 14).date()
# Initialize the browser
print("Setting up the browser...")
browser = setup_browser(base_download_dir) # Initial browser setup
try:
# Retrieve list of tournaments after the date threshold
tournaments = get_tournaments_after_date(browser, date_threshold)
for tournament_url, tournament_name, tournament_date in tournaments:
print(f"\nProcessing tournament: {tournament_name} dated {tournament_date}")
# Create a specific directory for the tournament
tournament_dir = os.path.join(base_download_dir, tournament_name.replace('/', '-'))
if not os.path.exists(tournament_dir):
os.makedirs(tournament_dir)
print(f"Created tournament directory at {tournament_dir}")
# Reconfigure the browser to use the tournament directory
browser.quit()
browser = setup_browser(tournament_dir) # Reinitialize with tournament-specific directory
# Navigate to the tournament page
print(f"Navigating to the tournament page {tournament_url}...")
browser.get(tournament_url)
time.sleep(3) # Wait for the page to load
# Retrieve tournament info (this will get the decks)
tournament_name, deck_links = get_tournament_info(browser)
# Download each deck
total_decks = len(deck_links)
for idx, (deck_url, deck_name) in enumerate(deck_links, start=1):
print(f"\nProcessing deck {idx}/{total_decks}: {deck_name}")
download_deck(browser, deck_url, idx, total_decks, tournament_dir)
finally:
print("Closing the browser...")
browser.quit()
if __name__ == '__main__':
main()

318
final_lda_Qda.R Normal file
View File

@ -0,0 +1,318 @@
library(olsrr)
library(MASS)
library(rpart.plot)
library(ggfortify)
library(ggplot2)
library(tidyverse)
library(car)
library(Rcpp)
library(GGally)
library(leaps)
library(dplyr)
library(caret)
library(rpart)
library(randomForest)
library(pheatmap)
library(viridis)
# Load the dataset
data1 <- read.csv("deck_data.csv")
data1$y <- (data1$Rank / data1$TournamentSize) * 100
# Convert y to a categorical Performance variable
data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
data1$Performance <- as.factor(data1$Performance)
# Split the data into training and test sets
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
train_data <- data1[train_indices, ]
test_data <- data1[-train_indices, ]
recommend_cards <- function(decklist_row, train_data, test_data) {
# Non-card columns
non_card_columns <- c(
"Rank", "TournamentSize", "Commander", "Partner",
"Num_Artifacts", "Num_Creatures", "Num_Enchantments",
"Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
"Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
"Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
"Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
"Num_Boardwipe", "y", "Performance"
)
commander_input <- as.character(decklist_row$Commander)
partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))
if (partner_input == "") {
subset_train_data <- subset(train_data, Commander == commander_input)
subset_test_data <- subset(test_data, Commander == commander_input)
} else {
subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
}
if (nrow(subset_train_data) < 10) {
warning("Not enough data to build a reliable model.")
return(NULL)
}
# Get card columns
card_columns <- setdiff(names(train_data), non_card_columns)
predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
response_train <- subset_train_data$Performance
# Remove near zero variance predictors before LDA/QDA
nzv <- nearZeroVar(predictors_train_full)
if (length(nzv) > 0) {
predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
} else {
predictors_train <- predictors_train_full
}
# Update card_columns to reflect the removed predictors
filtered_card_columns <- colnames(predictors_train)
# Check if we have at least two classes in response_train (for LDA/QDA)
run_lda_qda <- length(unique(response_train)) > 1
# Build Decision Tree
dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")
# Try LDA
lda_model <- NULL
if (run_lda_qda) {
lda_model <- tryCatch(
{
lda(response_train ~ ., data = data.frame(response_train, predictors_train))
},
error = function(e) {
warning("LDA failed: ", e$message)
NULL
}
)
} else {
warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
}
# Try QDA
qda_model <- NULL
if (run_lda_qda) {
qda_model <- tryCatch(
{
qda(response_train ~ ., data = data.frame(response_train, predictors_train))
},
error = function(e) {
warning("QDA failed: ", e$message)
NULL
}
)
}
# Build Random Forest
rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))
# Evaluate models on test data if available
if (nrow(subset_test_data) > 0) {
# Subset the test data to the same filtered predictors
predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
response_test <- subset_test_data$Performance
# Predictions (Decision Tree always available)
dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
cat("\nDecision Tree Confusion Matrix:\n")
print(confusionMatrix(dt_predictions, response_test))
if (!is.null(lda_model)) {
lda_predictions <- predict(lda_model, newdata = predictors_test)$class
cat("\nLDA Confusion Matrix:\n")
print(confusionMatrix(lda_predictions, response_test))
}
if (!is.null(qda_model)) {
qda_predictions <- predict(qda_model, newdata = predictors_test)$class
cat("\nQDA Confusion Matrix:\n")
print(confusionMatrix(qda_predictions, response_test))
}
rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
cat("\nRandom Forest Confusion Matrix:\n")
print(confusionMatrix(rf_predictions, response_test))
} else {
warning("No test data available for this commander.")
}
# Plot the decision tree
rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
main = paste("Decision Tree for Commander:", commander_input))
# Predict performance for the given decklist using the decision tree (example)
# We must also apply the same filtered predictors to the decklist
predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")
cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
print(predicted_performance_dt)
# If variable importance is available from decision tree
if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
importance <- data.frame(
card = names(dt_model$variable.importance),
importance = dt_model$variable.importance,
row.names = NULL
)
importance <- importance[order(-importance$importance), ]
card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
indices <- which(card_values == 1)
deck_cards <- names(card_values)[indices]
cards_to_add <- setdiff(importance$card, deck_cards)
top_cards_to_add <- head(cards_to_add, 5)
non_important_cards <- setdiff(deck_cards, importance$card)
non_important_with_scores <- data.frame(
card = non_important_cards,
importance = ifelse(non_important_cards %in% importance$card,
importance$importance[match(non_important_cards, importance$card)], 0)
)
non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
top_cards_to_remove <- head(non_important_with_scores$card, 5)
} else {
warning("No variable importance available from the Decision Tree model.")
top_cards_to_add <- character(0)
top_cards_to_remove <- character(0)
}
# Confusion matrix for training data (decision tree)
predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
print(cm_train_dt)
return(list(
predicted_standing_dt = predicted_performance_dt,
cards_to_add = top_cards_to_add,
cards_to_consider_removing = top_cards_to_remove,
cm_train_dt = cm_train_dt
))
}
# Example usage
test_deck <- test_data[3, ]
print(test_deck$Commander)
recommendations <- recommend_cards(test_deck, train_data, test_data)
cat("\nCards to Consider Adding:\n")
print(recommendations$cards_to_add)
cat("\nCards to Consider Removing:\n")
print(recommendations$cards_to_consider_removing)
cat("\nPredicted Deck's Standing (Decision Tree):\n")
print(recommendations$predicted_standing_dt)
print(cm_train_dt)
# Extract the confusion matrix from your model
cm_train_dt <- recommendations$cm_train_dt
conf_matrix <- as.matrix(cm_train_dt$table)
# Create a high-contrast color palette using viridis
my_palette <- viridis(100)
# Plot the heatmap
pheatmap(conf_matrix,
cluster_rows = FALSE,
cluster_cols = FALSE,
color = my_palette,
fontsize_row = 8,
fontsize_col = 8,
angle_col = 45,
display_numbers = TRUE,
number_format = "%.0f",
main = "Confusion Matrix Heatmap")
# Loop
# Identify the 10 most popular commanders
top_commanders <- data1 %>%
group_by(Commander) %>%
summarize(freq = n()) %>%
arrange(desc(freq)) %>%
slice(1:5) %>%
pull(Commander)
# Initialize a list to store results
model_results_list <- list()
# Loop over the top 10 commanders
for (comm in top_commanders) {
# Find a test deck for this commander (if available)
subset_test_decks <- subset(test_data, Commander == comm)
if (nrow(subset_test_decks) == 0) {
cat("\nNo test deck available for commander:", comm, "\n")
next
}
# Just pick the first test deck for demonstration
test_deck <- subset_test_decks[1, ]
cat("\nRunning models for commander:", comm, "\n")
recommendations <- recommend_cards(test_deck, train_data, test_data)
if (!is.null(recommendations)) {
# Store relevant parts of the output
model_results_list[[comm]] <- list(
commander = comm,
predicted_standing_dt = recommendations$predicted_standing_dt,
cards_to_add = recommendations$cards_to_add,
cards_to_consider_removing = recommendations$cards_to_consider_removing
)
}
}
# After the loop, model_results_list contains the outputs for each of the top 10 commanders
# Print summary of results
cat("\nSummary of Model Results for Top 10 Commanders:\n")
for (comm in names(model_results_list)) {
res <- model_results_list[[comm]]
cat("\nCommander:", res$commander, "\n")
cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
}
# Example usage from your code snippet (already integrated in the loop,
# but you could still do individually if desired):
# test_deck <- test_data[3, ]
# print(test_deck$Commander)
# recommendations <- recommend_cards(test_deck, train_data, test_data)
#
# cat("\nCards to Consider Adding:\n")
# print(recommendations$cards_to_add)
#
# cat("\nCards to Consider Removing:\n")
# print(recommendations$cards_to_consider_removing)
#
# cat("\nPredicted Deck's Standing (Decision Tree):\n")
# print(recommendations$predicted_standing_dt)

266
process_decklists.py Normal file
View File

@ -0,0 +1,266 @@
import os
import re
import requests
import csv
import time # Optional: For adding delays between API requests
# Initialize variables
unique_cards = set()
card_types = {}
card_tags = {}
deck_data = []
# Tags of interest
tags_of_interest = [
'ramp', 'draw', 'tutor', 'counterspell', 'removal',
'stax', 'protection', 'boardwipe'
]
# Function to process a decklist file and separate it into deck, sideboard, and commander sections
def process_decklist_file(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
lines = file.readlines()
# Keep all lines including empty ones, strip newline characters
lines = [line.rstrip('\n') for line in lines]
deck_cards = []
sideboard_cards = []
commander_cards = []
section = 'deck'
i = 0
while i < len(lines):
line = lines[i].strip()
if section == 'deck':
if line == '':
# Empty line indicates potential section change
# Check if next non-empty line is 'SIDEBOARD:'
j = i + 1
while j < len(lines) and lines[j].strip() == '':
j += 1
if j < len(lines) and lines[j].strip().upper() == 'SIDEBOARD:':
section = 'sideboard'
i = j # Move to 'SIDEBOARD:' line
else:
# Otherwise, assume commanders are after the empty line
section = 'commander'
i += 1 # Skip the empty line
continue
elif line.upper() == 'SIDEBOARD:':
section = 'sideboard'
else:
deck_cards.append(line)
elif section == 'sideboard':
if line == '':
# Empty line after sideboard indicates commanders
section = 'commander'
i += 1 # Move to next line after the empty line
continue
elif line.upper() == 'SIDEBOARD:':
pass # Already in sideboard section
else:
sideboard_cards.append(line)
elif section == 'commander':
if line != '':
commander_cards.append(line)
i += 1
return deck_cards, sideboard_cards, commander_cards
# Function to fetch cards for a given tag from Scryfall
def fetch_cards_for_tag(tag):
card_names = set()
page = 1
has_more = True
while has_more:
query = f"f:edh otag:{tag}"
url = "https://api.scryfall.com/cards/search"
params = {'q': query, 'page': page}
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
for card in data['data']:
card_names.add(card['name'])
has_more = data.get('has_more', False)
page += 1
time.sleep(0.1) # Sleep to respect rate limits
else:
print(f"Error fetching cards for tag: {tag}")
break
return card_names
# Step 4: Collect Unique Card Names
decklist_dir = os.path.dirname(os.path.abspath(__file__))
for filename in os.listdir(decklist_dir):
# Process only decklist files matching the pattern
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
filepath = os.path.join(decklist_dir, filename)
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
# Collect unique cards from the deck and commanders, skip sideboard
for line in deck_cards + commander_cards:
match = re.match(r'(\d+)\s+(.*)', line)
if match:
quantity = int(match.group(1))
card_name = match.group(2)
unique_cards.add(card_name)
# Step 5: Retrieve Card Types Using Scryfall API
scryfall_api_url = 'https://api.scryfall.com/cards/named?exact='
for card_name in unique_cards:
encoded_name = requests.utils.quote(card_name)
url = scryfall_api_url + encoded_name
response = requests.get(url)
if response.status_code == 200:
data = response.json()
types = data['type_line']
main_types = types.split('')[0].strip()
type_list = [t.strip() for t in main_types.split()]
card_types[card_name] = type_list
else:
print(f"Error fetching data for card: {card_name}")
card_types[card_name] = []
# Optional: Add a delay to respect API rate limits
time.sleep(0.1) # Sleep for 100 milliseconds
# Step 5b: Build Card Tags Mapping
# Initialize card_tags dictionary
card_tags = {card_name: [] for card_name in unique_cards}
# Fetch cards for each tag and build the mapping
for tag in tags_of_interest:
print(f"Fetching cards for tag: {tag}")
tagged_cards = fetch_cards_for_tag(tag)
for card_name in tagged_cards:
if card_name in card_tags:
card_tags[card_name].append(tag.capitalize())
else:
# Handle cases where card name variations exist
for unique_card in unique_cards:
if card_name.lower() == unique_card.lower():
card_tags[unique_card].append(tag.capitalize())
break
# Step 6: Process Each Decklist to Collect Deck Data
for filename in os.listdir(decklist_dir):
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
match = re.match(r'(\d+)-(\d+)-.*\.txt', filename)
if match:
rank = int(match.group(1))
tournament_size = int(match.group(2))
else:
rank = None
tournament_size = None
filepath = os.path.join(decklist_dir, filename)
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
deck = {}
commander = None
partner = None
num_artifacts = num_creatures = num_enchantments = 0
num_instants = num_sorceries = num_planeswalkers = 0
num_lands = 0
# Initialize tag card sets to store unique cards per tag
tag_card_sets = {tag.capitalize(): set() for tag in tags_of_interest}
# Process main deck cards
for line in deck_cards:
match = re.match(r'(\d+)\s+(.*)', line)
if match:
quantity = int(match.group(1))
card_name = match.group(2)
deck[card_name] = quantity
types = card_types.get(card_name, [])
if 'Artifact' in types:
num_artifacts += quantity
if 'Creature' in types:
num_creatures += quantity
if 'Enchantment' in types:
num_enchantments += quantity
if 'Instant' in types:
num_instants += quantity
if 'Sorcery' in types:
num_sorceries += quantity
if 'Planeswalker' in types:
num_planeswalkers += quantity
if 'Land' in types:
num_lands += quantity
# Process tags
tags = card_tags.get(card_name, [])
for tag in tags:
if tag in tag_card_sets:
tag_card_sets[tag].add(card_name)
# Process commander cards
for idx, line in enumerate(commander_cards):
match = re.match(r'(\d+)\s+(.*)', line)
if match:
card_name = match.group(2)
if idx == 0:
commander = card_name
elif idx == 1:
partner = card_name
# Compute tag counts as the number of unique cards per tag
tag_counts = {tag: len(cards) for tag, cards in tag_card_sets.items()}
deck_row = {
'rank': rank,
'tournament_size': tournament_size,
'commander': commander,
'partner': partner,
'num_artifacts': num_artifacts,
'num_creatures': num_creatures,
'num_enchantments': num_enchantments,
'num_instants': num_instants,
'num_sorceries': num_sorceries,
'num_planeswalkers': num_planeswalkers,
'num_lands': num_lands,
'tag_counts': tag_counts,
'deck': deck
}
deck_data.append(deck_row)
# Step 7: Prepare the CSV Header
header = ['Rank', 'TournamentSize', 'Commander', 'Partner', 'Num_Artifacts', 'Num_Creatures',
'Num_Enchantments', 'Num_Instants', 'Num_Sorceries',
'Num_Planeswalkers', 'Num_Lands']
# Add tag columns
header.extend(['Num_' + tag.capitalize() for tag in tags_of_interest])
sorted_cards = sorted(unique_cards)
header.extend(sorted_cards)
# Step 8: Write Data to CSV File
with open('deck_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header)
for deck in deck_data:
row = [
deck['rank'],
deck['tournament_size'],
deck['commander'],
deck['partner'],
deck['num_artifacts'],
deck['num_creatures'],
deck['num_enchantments'],
deck['num_instants'],
deck['num_sorceries'],
deck['num_planeswalkers'],
deck['num_lands']
]
# Add tag counts
for tag in tags_of_interest:
row.append(deck['tag_counts'][tag.capitalize()])
# Add card presence (1 or 0)
for card_name in sorted_cards:
row.append(1 if card_name in deck['deck'] else 0)
# If you prefer to include quantities, use:
# row.append(deck['deck'].get(card_name, 0))
writer.writerow(row)