Initial commit: Organized project code and documentation for public release.

2024-12-20 23:41:07 +00:00 · 2024-12-20 23:41:07 +00:00 · e673f32d31
commit e673f32d31
6 changed files with 2678 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 .Rproj.user
 .Rhistory
 .RData
 .Ruserdata
--- a/USCLAP.Rproj
+++ b/USCLAP.Rproj
@ -0,0 +1,13 @@
 Version: 1.0
 RestoreWorkspace: Default
 SaveWorkspace: Default
 AlwaysSaveHistory: Default
 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
 NumSpacesForTab: 2
 Encoding: UTF-8
 RnwWeave: Sweave
 LaTeX: pdfLaTeX
--- a/deck_data.csv
+++ b/deck_data.csv
--- a/download_tournament.py
+++ b/download_tournament.py
@ -0,0 +1,206 @@
 import os
 import time
 import re
 from datetime import datetime
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.firefox.options import Options
 from selenium.webdriver.firefox.service import Service
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 def setup_browser(download_dir):
    firefox_options = Options()
    firefox_options.set_preference('browser.download.folderList', 2)
    firefox_options.set_preference('browser.download.dir', download_dir)  # Use the tournament-specific directory
    firefox_options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/plain,text/csv,application/octet-stream')
    firefox_options.set_preference('pdfjs.disabled', True)
    firefox_options.set_preference('browser.download.manager.showWhenStarting', False)
    firefox_options.set_preference('browser.download.manager.useWindow', False)
    firefox_options.set_preference('browser.download.manager.focusWhenStarting', False)
    firefox_options.set_preference('browser.download.manager.alertOnEXEOpen', False)
    firefox_options.set_preference('browser.download.manager.showAlertOnComplete', False)
    firefox_options.set_preference('browser.download.manager.closeWhenDone', False)
    # Specify the path to geckodriver
    gecko_service = Service(executable_path='/usr/local/bin/geckodriver')
    browser = webdriver.Firefox(service=gecko_service, options=firefox_options)
    return browser
 def parse_date(date_str):
    date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)
    try:
        tournament_date = datetime.strptime(date_str, '%B %d %Y').date()
        print(f"Parsed date: {tournament_date}")
        return tournament_date
    except ValueError as e:
        print(f"Error parsing date '{date_str}': {e}")
        return None
 def get_tournaments_after_date(browser, date_threshold):
    tournaments = []
    print("Navigating to the tournaments page...")
    tournaments_page_url = 'https://edhtop16.com/tournaments?sortBy=DATE'
    browser.get(tournaments_page_url)
    time.sleep(3)
    # Find all tournament entries
    tournament_entries = browser.find_elements(By.CSS_SELECTOR, "div.group.relative.overflow-hidden.rounded-lg.bg-white.shadow")
    print(f"Found {len(tournament_entries)} tournaments on the page.")
    for entry in tournament_entries:
        try:
            # Extract tournament link, name, and date
            link_element = entry.find_element(By.CSS_SELECTOR, 'a.line-clamp-2.text-xl.font-bold.underline')
            tournament_name = link_element.text
            tournament_url = link_element.get_attribute('href')
            date_element = entry.find_element(By.CSS_SELECTOR, 'span')
            tournament_date_str = date_element.text
            print(f"Tournament found: {tournament_name}, Date: {tournament_date_str}, URL: {tournament_url}")
            # Parse the date string
            tournament_date = parse_date(tournament_date_str)
            if tournament_date and tournament_date >= date_threshold:
                tournaments.append((tournament_url, tournament_name, tournament_date))
        except Exception as e:
            print(f"Error processing tournament entry: {e}")
    print(f"Total tournaments after {date_threshold}: {len(tournaments)}")
    return tournaments
 def get_tournament_info(browser):
    print("Retrieving the tournament name and deck links...")
    time.sleep(3)  # Wait for the page to load
    # Get the tournament name
    try:
        tournament_name_element = browser.find_element(By.TAG_NAME, 'h1')
        tournament_name = tournament_name_element.text if tournament_name_element else "Tournament"
    except Exception as e:
        print(f"Error retrieving tournament name: {e}")
        tournament_name = "Tournament"
    tournament_name = tournament_name.replace('/', '-')  # Replace invalid filename characters
    print(f"Tournament Name: {tournament_name}")
    # Get the list of decks
    deck_elements = browser.find_elements(By.CSS_SELECTOR, "a.line-clamp-2.text-xl.font-bold.underline")
    deck_links = []
    for deck_element in deck_elements:
        deck_url = deck_element.get_attribute('href')
        deck_name = deck_element.text
        deck_links.append((deck_url, deck_name))
    print(f"Total decks found: {len(deck_links)}")
    return tournament_name, deck_links
 def download_deck(browser, deck_url, rank, total_decks, save_dir):
    print(f"Navigating to deck {rank} page...")
    browser.get(deck_url)
    try:
        # Wait for the "More" button and click it
        print("Waiting for the 'More' button to appear...")
        WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'More')]/..")))
        more_button = browser.find_element(By.XPATH, "//span[contains(text(), 'More')]/..")
        browser.execute_script("arguments[0].click();", more_button)
        time.sleep(1)
        # Wait for and click the "Export" option within the dropdown
        print("Waiting for the 'Export' option...")
        WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")))
        export_option = browser.find_element(By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")
        browser.execute_script("arguments[0].click();", export_option)
        time.sleep(1)
        # Wait for and click the "Download for MTGO" link
        print("Clicking 'Download for MTGO' link...")
        WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")))
        mtgo_download_link = browser.find_element(By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")
        mtgo_download_link.click()
        time.sleep(2)  # Wait for the download to initiate
        # Wait for the downloaded file to appear in the download directory
        print("Waiting for the file to download...")
        download_wait_time = 0
        downloaded_filepath = None
        # Check the download directory for a new file
        while download_wait_time < 30:
            files = os.listdir(save_dir)
            if files:
                # Find the most recent file in the directory
                downloaded_filepath = max([os.path.join(save_dir, f) for f in files], key=os.path.getctime)
                if downloaded_filepath.endswith('.txt'):
                    break
            time.sleep(1)
            download_wait_time += 1
        # Move and rename the file if it was found
        if downloaded_filepath and downloaded_filepath.endswith('.txt'):
            original_filename = os.path.basename(downloaded_filepath)
            # Sanitize the original filename
            sanitized_filename = original_filename.replace('/', '-').replace('\\', '-')
            new_filename = os.path.join(save_dir, f"{rank}-{total_decks}-{sanitized_filename}")
            os.rename(downloaded_filepath, new_filename)
            print(f"Downloaded deck {rank}/{total_decks}: {new_filename}")
        else:
            print(f"Failed to download deck {rank}: Download timed out.")
    except Exception as e:
        print(f"Error downloading deck {rank}: {e}")
 def main():
    # Set up the base download directory
    base_download_dir = os.path.join(os.getcwd(), 'downloads')
    if not os.path.exists(base_download_dir):
        os.makedirs(base_download_dir)
        print(f"Created base download directory at {base_download_dir}")
    # Set the date threshold (e.g., September 1, 2023)
    date_threshold = datetime(2024, 11, 14).date()
    # Initialize the browser
    print("Setting up the browser...")
    browser = setup_browser(base_download_dir)  # Initial browser setup
    try:
        # Retrieve list of tournaments after the date threshold
        tournaments = get_tournaments_after_date(browser, date_threshold)
        for tournament_url, tournament_name, tournament_date in tournaments:
            print(f"\nProcessing tournament: {tournament_name} dated {tournament_date}")
            # Create a specific directory for the tournament
            tournament_dir = os.path.join(base_download_dir, tournament_name.replace('/', '-'))
            if not os.path.exists(tournament_dir):
                os.makedirs(tournament_dir)
                print(f"Created tournament directory at {tournament_dir}")
            # Reconfigure the browser to use the tournament directory
            browser.quit()
            browser = setup_browser(tournament_dir)  # Reinitialize with tournament-specific directory
            # Navigate to the tournament page
            print(f"Navigating to the tournament page {tournament_url}...")
            browser.get(tournament_url)
            time.sleep(3)  # Wait for the page to load
            # Retrieve tournament info (this will get the decks)
            tournament_name, deck_links = get_tournament_info(browser)
            # Download each deck
            total_decks = len(deck_links)
            for idx, (deck_url, deck_name) in enumerate(deck_links, start=1):
                print(f"\nProcessing deck {idx}/{total_decks}: {deck_name}")
                download_deck(browser, deck_url, idx, total_decks, tournament_dir)
    finally:
        print("Closing the browser...")
        browser.quit()
 if __name__ == '__main__':
    main()
--- a/final_lda_Qda.R
+++ b/final_lda_Qda.R
@ -0,0 +1,318 @@
 library(olsrr)
 library(MASS)
 library(rpart.plot)
 library(ggfortify)
 library(ggplot2)
 library(tidyverse)
 library(car)
 library(Rcpp)
 library(GGally)
 library(leaps)
 library(dplyr)
 library(caret)
 library(rpart)
 library(randomForest)
 library(pheatmap)
 library(viridis)
 # Load the dataset
 data1 <- read.csv("deck_data.csv")
 data1$y <- (data1$Rank / data1$TournamentSize) * 100
 # Convert y to a categorical Performance variable
 data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
 data1$Performance <- as.factor(data1$Performance)
 # Split the data into training and test sets
 set.seed(123) # For reproducibility
 train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
 train_data <- data1[train_indices, ]
 test_data <- data1[-train_indices, ]
 recommend_cards <- function(decklist_row, train_data, test_data) {
  # Non-card columns
  non_card_columns <- c(
    "Rank", "TournamentSize", "Commander", "Partner",
    "Num_Artifacts", "Num_Creatures", "Num_Enchantments",
    "Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
    "Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
    "Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
    "Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
    "Num_Boardwipe", "y", "Performance"
  )
  commander_input <- as.character(decklist_row$Commander)
  partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))
  if (partner_input == "") {
    subset_train_data <- subset(train_data, Commander == commander_input)
    subset_test_data <- subset(test_data, Commander == commander_input)
  } else {
    subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
    subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
  }
  if (nrow(subset_train_data) < 10) {
    warning("Not enough data to build a reliable model.")
    return(NULL)
  }
  # Get card columns
  card_columns <- setdiff(names(train_data), non_card_columns)
  predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
  response_train <- subset_train_data$Performance
  # Remove near zero variance predictors before LDA/QDA
  nzv <- nearZeroVar(predictors_train_full)
  if (length(nzv) > 0) {
    predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
  } else {
    predictors_train <- predictors_train_full
  }
  # Update card_columns to reflect the removed predictors
  filtered_card_columns <- colnames(predictors_train)
  # Check if we have at least two classes in response_train (for LDA/QDA)
  run_lda_qda <- length(unique(response_train)) > 1
  # Build Decision Tree
  dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")
  # Try LDA
  lda_model <- NULL
  if (run_lda_qda) {
    lda_model <- tryCatch(
      {
        lda(response_train ~ ., data = data.frame(response_train, predictors_train))
      },
      error = function(e) {
        warning("LDA failed: ", e$message)
        NULL
      }
    )
  } else {
    warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
  }
  # Try QDA
  qda_model <- NULL
  if (run_lda_qda) {
    qda_model <- tryCatch(
      {
        qda(response_train ~ ., data = data.frame(response_train, predictors_train))
      },
      error = function(e) {
        warning("QDA failed: ", e$message)
        NULL
      }
    )
  }
  # Build Random Forest
  rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))
  # Evaluate models on test data if available
  if (nrow(subset_test_data) > 0) {
    # Subset the test data to the same filtered predictors
    predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
    response_test <- subset_test_data$Performance
    # Predictions (Decision Tree always available)
    dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
    cat("\nDecision Tree Confusion Matrix:\n")
    print(confusionMatrix(dt_predictions, response_test))
    if (!is.null(lda_model)) {
      lda_predictions <- predict(lda_model, newdata = predictors_test)$class
      cat("\nLDA Confusion Matrix:\n")
      print(confusionMatrix(lda_predictions, response_test))
    }
    if (!is.null(qda_model)) {
      qda_predictions <- predict(qda_model, newdata = predictors_test)$class
      cat("\nQDA Confusion Matrix:\n")
      print(confusionMatrix(qda_predictions, response_test))
    }
    rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
    cat("\nRandom Forest Confusion Matrix:\n")
    print(confusionMatrix(rf_predictions, response_test))
  } else {
    warning("No test data available for this commander.")
  }
  # Plot the decision tree
  rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
             main = paste("Decision Tree for Commander:", commander_input))
  # Predict performance for the given decklist using the decision tree (example)
  # We must also apply the same filtered predictors to the decklist
  predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
  predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")
  cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
  print(predicted_performance_dt)
  # If variable importance is available from decision tree
  if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
    importance <- data.frame(
      card = names(dt_model$variable.importance),
      importance = dt_model$variable.importance,
      row.names = NULL
    )
    importance <- importance[order(-importance$importance), ]
    card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
    indices <- which(card_values == 1)
    deck_cards <- names(card_values)[indices]
    cards_to_add <- setdiff(importance$card, deck_cards)
    top_cards_to_add <- head(cards_to_add, 5)
    non_important_cards <- setdiff(deck_cards, importance$card)
    non_important_with_scores <- data.frame(
      card = non_important_cards,
      importance = ifelse(non_important_cards %in% importance$card,
                          importance$importance[match(non_important_cards, importance$card)], 0)
    )
    non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
    top_cards_to_remove <- head(non_important_with_scores$card, 5)
  } else {
    warning("No variable importance available from the Decision Tree model.")
    top_cards_to_add <- character(0)
    top_cards_to_remove <- character(0)
  }
  # Confusion matrix for training data (decision tree)
  predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
  cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
  cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
  print(cm_train_dt)
  return(list(
    predicted_standing_dt = predicted_performance_dt,
    cards_to_add = top_cards_to_add,
    cards_to_consider_removing = top_cards_to_remove,
    cm_train_dt = cm_train_dt
  ))
 }
 # Example usage
 test_deck <- test_data[3, ]
 print(test_deck$Commander)
 recommendations <- recommend_cards(test_deck, train_data, test_data)
 cat("\nCards to Consider Adding:\n")
 print(recommendations$cards_to_add)
 cat("\nCards to Consider Removing:\n")
 print(recommendations$cards_to_consider_removing)
 cat("\nPredicted Deck's Standing (Decision Tree):\n")
 print(recommendations$predicted_standing_dt)
 print(cm_train_dt)
 # Extract the confusion matrix from your model
 cm_train_dt <- recommendations$cm_train_dt
 conf_matrix <- as.matrix(cm_train_dt$table)
 # Create a high-contrast color palette using viridis
 my_palette <- viridis(100)
 # Plot the heatmap
 pheatmap(conf_matrix, 
         cluster_rows = FALSE, 
         cluster_cols = FALSE,
         color = my_palette,
         fontsize_row = 8, 
         fontsize_col = 8,
         angle_col = 45,
         display_numbers = TRUE,
         number_format = "%.0f",
         main = "Confusion Matrix Heatmap")
 # Loop
 # Identify the 10 most popular commanders
 top_commanders <- data1 %>%
  group_by(Commander) %>%
  summarize(freq = n()) %>%
  arrange(desc(freq)) %>%
  slice(1:5) %>%
  pull(Commander)
 # Initialize a list to store results
 model_results_list <- list()
 # Loop over the top 10 commanders
 for (comm in top_commanders) {
  # Find a test deck for this commander (if available)
  subset_test_decks <- subset(test_data, Commander == comm)
  if (nrow(subset_test_decks) == 0) {
    cat("\nNo test deck available for commander:", comm, "\n")
    next
  }
  # Just pick the first test deck for demonstration
  test_deck <- subset_test_decks[1, ]
  cat("\nRunning models for commander:", comm, "\n")
  recommendations <- recommend_cards(test_deck, train_data, test_data)
  if (!is.null(recommendations)) {
    # Store relevant parts of the output
    model_results_list[[comm]] <- list(
      commander = comm,
      predicted_standing_dt = recommendations$predicted_standing_dt,
      cards_to_add = recommendations$cards_to_add,
      cards_to_consider_removing = recommendations$cards_to_consider_removing
    )
  }
 }
 # After the loop, model_results_list contains the outputs for each of the top 10 commanders
 # Print summary of results
 cat("\nSummary of Model Results for Top 10 Commanders:\n")
 for (comm in names(model_results_list)) {
  res <- model_results_list[[comm]]
  cat("\nCommander:", res$commander, "\n")
  cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
  cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
  cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
 }
 # Example usage from your code snippet (already integrated in the loop, 
 # but you could still do individually if desired):
 # test_deck <- test_data[3, ]
 # print(test_deck$Commander)
 # recommendations <- recommend_cards(test_deck, train_data, test_data)
 # 
 # cat("\nCards to Consider Adding:\n")
 # print(recommendations$cards_to_add)
 # 
 # cat("\nCards to Consider Removing:\n")
 # print(recommendations$cards_to_consider_removing)
 # 
 # cat("\nPredicted Deck's Standing (Decision Tree):\n")
 # print(recommendations$predicted_standing_dt)
--- a/process_decklists.py
+++ b/process_decklists.py
@ -0,0 +1,266 @@
 import os
 import re
 import requests
 import csv
 import time  # Optional: For adding delays between API requests
 # Initialize variables
 unique_cards = set()
 card_types = {}
 card_tags = {}
 deck_data = []
 # Tags of interest
 tags_of_interest = [
    'ramp', 'draw', 'tutor', 'counterspell', 'removal',
    'stax', 'protection', 'boardwipe'
 ]
 # Function to process a decklist file and separate it into deck, sideboard, and commander sections
 def process_decklist_file(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    # Keep all lines including empty ones, strip newline characters
    lines = [line.rstrip('\n') for line in lines]
    deck_cards = []
    sideboard_cards = []
    commander_cards = []
    section = 'deck'
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        if section == 'deck':
            if line == '':
                # Empty line indicates potential section change
                # Check if next non-empty line is 'SIDEBOARD:'
                j = i + 1
                while j < len(lines) and lines[j].strip() == '':
                    j += 1
                if j < len(lines) and lines[j].strip().upper() == 'SIDEBOARD:':
                    section = 'sideboard'
                    i = j  # Move to 'SIDEBOARD:' line
                else:
                    # Otherwise, assume commanders are after the empty line
                    section = 'commander'
                    i += 1  # Skip the empty line
                continue
            elif line.upper() == 'SIDEBOARD:':
                section = 'sideboard'
            else:
                deck_cards.append(line)
        elif section == 'sideboard':
            if line == '':
                # Empty line after sideboard indicates commanders
                section = 'commander'
                i += 1  # Move to next line after the empty line
                continue
            elif line.upper() == 'SIDEBOARD:':
                pass  # Already in sideboard section
            else:
                sideboard_cards.append(line)
        elif section == 'commander':
            if line != '':
                commander_cards.append(line)
        i += 1
    return deck_cards, sideboard_cards, commander_cards
 # Function to fetch cards for a given tag from Scryfall
 def fetch_cards_for_tag(tag):
    card_names = set()
    page = 1
    has_more = True
    while has_more:
        query = f"f:edh otag:{tag}"
        url = "https://api.scryfall.com/cards/search"
        params = {'q': query, 'page': page}
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            for card in data['data']:
                card_names.add(card['name'])
            has_more = data.get('has_more', False)
            page += 1
            time.sleep(0.1)  # Sleep to respect rate limits
        else:
            print(f"Error fetching cards for tag: {tag}")
            break
    return card_names
 # Step 4: Collect Unique Card Names
 decklist_dir = os.path.dirname(os.path.abspath(__file__))
 for filename in os.listdir(decklist_dir):
    # Process only decklist files matching the pattern
    if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
        filepath = os.path.join(decklist_dir, filename)
        deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
        # Collect unique cards from the deck and commanders, skip sideboard
        for line in deck_cards + commander_cards:
            match = re.match(r'(\d+)\s+(.*)', line)
            if match:
                quantity = int(match.group(1))
                card_name = match.group(2)
                unique_cards.add(card_name)
 # Step 5: Retrieve Card Types Using Scryfall API
 scryfall_api_url = 'https://api.scryfall.com/cards/named?exact='
 for card_name in unique_cards:
    encoded_name = requests.utils.quote(card_name)
    url = scryfall_api_url + encoded_name
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        types = data['type_line']
        main_types = types.split('—')[0].strip()
        type_list = [t.strip() for t in main_types.split()]
        card_types[card_name] = type_list
    else:
        print(f"Error fetching data for card: {card_name}")
        card_types[card_name] = []
    # Optional: Add a delay to respect API rate limits
    time.sleep(0.1)  # Sleep for 100 milliseconds
 # Step 5b: Build Card Tags Mapping
 # Initialize card_tags dictionary
 card_tags = {card_name: [] for card_name in unique_cards}
 # Fetch cards for each tag and build the mapping
 for tag in tags_of_interest:
    print(f"Fetching cards for tag: {tag}")
    tagged_cards = fetch_cards_for_tag(tag)
    for card_name in tagged_cards:
        if card_name in card_tags:
            card_tags[card_name].append(tag.capitalize())
        else:
            # Handle cases where card name variations exist
            for unique_card in unique_cards:
                if card_name.lower() == unique_card.lower():
                    card_tags[unique_card].append(tag.capitalize())
                    break
 # Step 6: Process Each Decklist to Collect Deck Data
 for filename in os.listdir(decklist_dir):
    if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
        match = re.match(r'(\d+)-(\d+)-.*\.txt', filename)
        if match:
            rank = int(match.group(1))
            tournament_size = int(match.group(2))
        else:
            rank = None
            tournament_size = None
        filepath = os.path.join(decklist_dir, filename)
        deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
        deck = {}
        commander = None
        partner = None
        num_artifacts = num_creatures = num_enchantments = 0
        num_instants = num_sorceries = num_planeswalkers = 0
        num_lands = 0
        # Initialize tag card sets to store unique cards per tag
        tag_card_sets = {tag.capitalize(): set() for tag in tags_of_interest}
        # Process main deck cards
        for line in deck_cards:
            match = re.match(r'(\d+)\s+(.*)', line)
            if match:
                quantity = int(match.group(1))
                card_name = match.group(2)
                deck[card_name] = quantity
                types = card_types.get(card_name, [])
                if 'Artifact' in types:
                    num_artifacts += quantity
                if 'Creature' in types:
                    num_creatures += quantity
                if 'Enchantment' in types:
                    num_enchantments += quantity
                if 'Instant' in types:
                    num_instants += quantity
                if 'Sorcery' in types:
                    num_sorceries += quantity
                if 'Planeswalker' in types:
                    num_planeswalkers += quantity
                if 'Land' in types:
                    num_lands += quantity
                # Process tags
                tags = card_tags.get(card_name, [])
                for tag in tags:
                    if tag in tag_card_sets:
                        tag_card_sets[tag].add(card_name)
        # Process commander cards
        for idx, line in enumerate(commander_cards):
            match = re.match(r'(\d+)\s+(.*)', line)
            if match:
                card_name = match.group(2)
                if idx == 0:
                    commander = card_name
                elif idx == 1:
                    partner = card_name
        # Compute tag counts as the number of unique cards per tag
        tag_counts = {tag: len(cards) for tag, cards in tag_card_sets.items()}
        deck_row = {
            'rank': rank,
            'tournament_size': tournament_size,
            'commander': commander,
            'partner': partner,
            'num_artifacts': num_artifacts,
            'num_creatures': num_creatures,
            'num_enchantments': num_enchantments,
            'num_instants': num_instants,
            'num_sorceries': num_sorceries,
            'num_planeswalkers': num_planeswalkers,
            'num_lands': num_lands,
            'tag_counts': tag_counts,
            'deck': deck
        }
        deck_data.append(deck_row)
 # Step 7: Prepare the CSV Header
 header = ['Rank', 'TournamentSize', 'Commander', 'Partner', 'Num_Artifacts', 'Num_Creatures',
          'Num_Enchantments', 'Num_Instants', 'Num_Sorceries',
          'Num_Planeswalkers', 'Num_Lands']
 # Add tag columns
 header.extend(['Num_' + tag.capitalize() for tag in tags_of_interest])
 sorted_cards = sorted(unique_cards)
 header.extend(sorted_cards)
 # Step 8: Write Data to CSV File
 with open('deck_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    for deck in deck_data:
        row = [
            deck['rank'],
            deck['tournament_size'],
            deck['commander'],
            deck['partner'],
            deck['num_artifacts'],
            deck['num_creatures'],
            deck['num_enchantments'],
            deck['num_instants'],
            deck['num_sorceries'],
            deck['num_planeswalkers'],
            deck['num_lands']
        ]
        # Add tag counts
        for tag in tags_of_interest:
            row.append(deck['tag_counts'][tag.capitalize()])
        # Add card presence (1 or 0)
        for card_name in sorted_cards:
            row.append(1 if card_name in deck['deck'] else 0)
            # If you prefer to include quantities, use:
            # row.append(deck['deck'].get(card_name, 0))
        writer.writerow(row)