Initial commit: Organized project code and documentation for public release.

2024-12-20 23:41:07 +00:00 · 2024-12-20 23:41:07 +00:00 · e673f32d31
commit e673f32d31
6 changed files with 2678 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
--- a/USCLAP.Rproj
+++ b/USCLAP.Rproj
@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
--- a/deck_data.csv
+++ b/deck_data.csv
--- a/download_tournament.py
+++ b/download_tournament.py
@ -0,0 +1,206 @@
+import os
+import time
+import re
+from datetime import datetime
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.firefox.options import Options
+from selenium.webdriver.firefox.service import Service
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+def setup_browser(download_dir):
+    firefox_options = Options()
+    firefox_options.set_preference('browser.download.folderList', 2)
+    firefox_options.set_preference('browser.download.dir', download_dir)  # Use the tournament-specific directory
+    firefox_options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/plain,text/csv,application/octet-stream')
+    firefox_options.set_preference('pdfjs.disabled', True)
+    firefox_options.set_preference('browser.download.manager.showWhenStarting', False)
+    firefox_options.set_preference('browser.download.manager.useWindow', False)
+    firefox_options.set_preference('browser.download.manager.focusWhenStarting', False)
+    firefox_options.set_preference('browser.download.manager.alertOnEXEOpen', False)
+    firefox_options.set_preference('browser.download.manager.showAlertOnComplete', False)
+    firefox_options.set_preference('browser.download.manager.closeWhenDone', False)
+
+    # Specify the path to geckodriver
+    gecko_service = Service(executable_path='/usr/local/bin/geckodriver')
+
+    browser = webdriver.Firefox(service=gecko_service, options=firefox_options)
+    return browser
+
+def parse_date(date_str):
+    date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)
+    try:
+        tournament_date = datetime.strptime(date_str, '%B %d %Y').date()
+        print(f"Parsed date: {tournament_date}")
+        return tournament_date
+    except ValueError as e:
+        print(f"Error parsing date '{date_str}': {e}")
+        return None
+
+def get_tournaments_after_date(browser, date_threshold):
+    tournaments = []
+
+    print("Navigating to the tournaments page...")
+    tournaments_page_url = 'https://edhtop16.com/tournaments?sortBy=DATE'
+    browser.get(tournaments_page_url)
+
+    time.sleep(3)
+
+    # Find all tournament entries
+    tournament_entries = browser.find_elements(By.CSS_SELECTOR, "div.group.relative.overflow-hidden.rounded-lg.bg-white.shadow")
+    print(f"Found {len(tournament_entries)} tournaments on the page.")
+
+    for entry in tournament_entries:
+        try:
+            # Extract tournament link, name, and date
+            link_element = entry.find_element(By.CSS_SELECTOR, 'a.line-clamp-2.text-xl.font-bold.underline')
+            tournament_name = link_element.text
+            tournament_url = link_element.get_attribute('href')
+
+            date_element = entry.find_element(By.CSS_SELECTOR, 'span')
+            tournament_date_str = date_element.text
+            print(f"Tournament found: {tournament_name}, Date: {tournament_date_str}, URL: {tournament_url}")
+
+            # Parse the date string
+            tournament_date = parse_date(tournament_date_str)
+            if tournament_date and tournament_date >= date_threshold:
+                tournaments.append((tournament_url, tournament_name, tournament_date))
+        except Exception as e:
+            print(f"Error processing tournament entry: {e}")
+
+    print(f"Total tournaments after {date_threshold}: {len(tournaments)}")
+    return tournaments
+
+def get_tournament_info(browser):
+    print("Retrieving the tournament name and deck links...")
+    time.sleep(3)  # Wait for the page to load
+
+    # Get the tournament name
+    try:
+        tournament_name_element = browser.find_element(By.TAG_NAME, 'h1')
+        tournament_name = tournament_name_element.text if tournament_name_element else "Tournament"
+    except Exception as e:
+        print(f"Error retrieving tournament name: {e}")
+        tournament_name = "Tournament"
+
+    tournament_name = tournament_name.replace('/', '-')  # Replace invalid filename characters
+    print(f"Tournament Name: {tournament_name}")
+
+    # Get the list of decks
+    deck_elements = browser.find_elements(By.CSS_SELECTOR, "a.line-clamp-2.text-xl.font-bold.underline")
+    deck_links = []
+    for deck_element in deck_elements:
+        deck_url = deck_element.get_attribute('href')
+        deck_name = deck_element.text
+        deck_links.append((deck_url, deck_name))
+
+    print(f"Total decks found: {len(deck_links)}")
+    return tournament_name, deck_links
+
+def download_deck(browser, deck_url, rank, total_decks, save_dir):
+    print(f"Navigating to deck {rank} page...")
+    browser.get(deck_url)
+    
+    try:
+        # Wait for the "More" button and click it
+        print("Waiting for the 'More' button to appear...")
+        WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'More')]/..")))
+        more_button = browser.find_element(By.XPATH, "//span[contains(text(), 'More')]/..")
+        browser.execute_script("arguments[0].click();", more_button)
+        time.sleep(1)
+
+        # Wait for and click the "Export" option within the dropdown
+        print("Waiting for the 'Export' option...")
+        WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")))
+        export_option = browser.find_element(By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")
+        browser.execute_script("arguments[0].click();", export_option)
+        time.sleep(1)
+
+        # Wait for and click the "Download for MTGO" link
+        print("Clicking 'Download for MTGO' link...")
+        WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")))
+        mtgo_download_link = browser.find_element(By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")
+        mtgo_download_link.click()
+        time.sleep(2)  # Wait for the download to initiate
+
+        # Wait for the downloaded file to appear in the download directory
+        print("Waiting for the file to download...")
+        download_wait_time = 0
+        downloaded_filepath = None
+
+        # Check the download directory for a new file
+        while download_wait_time < 30:
+            files = os.listdir(save_dir)
+            if files:
+                # Find the most recent file in the directory
+                downloaded_filepath = max([os.path.join(save_dir, f) for f in files], key=os.path.getctime)
+                if downloaded_filepath.endswith('.txt'):
+                    break
+            time.sleep(1)
+            download_wait_time += 1
+
+        # Move and rename the file if it was found
+        if downloaded_filepath and downloaded_filepath.endswith('.txt'):
+            original_filename = os.path.basename(downloaded_filepath)
+            # Sanitize the original filename
+            sanitized_filename = original_filename.replace('/', '-').replace('\\', '-')
+            new_filename = os.path.join(save_dir, f"{rank}-{total_decks}-{sanitized_filename}")
+            os.rename(downloaded_filepath, new_filename)
+            print(f"Downloaded deck {rank}/{total_decks}: {new_filename}")
+        else:
+            print(f"Failed to download deck {rank}: Download timed out.")
+
+    except Exception as e:
+        print(f"Error downloading deck {rank}: {e}")
+
+def main():
+    # Set up the base download directory
+    base_download_dir = os.path.join(os.getcwd(), 'downloads')
+    if not os.path.exists(base_download_dir):
+        os.makedirs(base_download_dir)
+        print(f"Created base download directory at {base_download_dir}")
+
+    # Set the date threshold (e.g., September 1, 2023)
+    date_threshold = datetime(2024, 11, 14).date()
+
+    # Initialize the browser
+    print("Setting up the browser...")
+    browser = setup_browser(base_download_dir)  # Initial browser setup
+    try:
+        # Retrieve list of tournaments after the date threshold
+        tournaments = get_tournaments_after_date(browser, date_threshold)
+
+        for tournament_url, tournament_name, tournament_date in tournaments:
+            print(f"\nProcessing tournament: {tournament_name} dated {tournament_date}")
+
+            # Create a specific directory for the tournament
+            tournament_dir = os.path.join(base_download_dir, tournament_name.replace('/', '-'))
+            if not os.path.exists(tournament_dir):
+                os.makedirs(tournament_dir)
+                print(f"Created tournament directory at {tournament_dir}")
+
+            # Reconfigure the browser to use the tournament directory
+            browser.quit()
+            browser = setup_browser(tournament_dir)  # Reinitialize with tournament-specific directory
+
+            # Navigate to the tournament page
+            print(f"Navigating to the tournament page {tournament_url}...")
+            browser.get(tournament_url)
+            time.sleep(3)  # Wait for the page to load
+
+            # Retrieve tournament info (this will get the decks)
+            tournament_name, deck_links = get_tournament_info(browser)
+
+            # Download each deck
+            total_decks = len(deck_links)
+            for idx, (deck_url, deck_name) in enumerate(deck_links, start=1):
+                print(f"\nProcessing deck {idx}/{total_decks}: {deck_name}")
+                download_deck(browser, deck_url, idx, total_decks, tournament_dir)
+
+    finally:
+        print("Closing the browser...")
+        browser.quit()
+
+if __name__ == '__main__':
+    main()
--- a/final_lda_Qda.R
+++ b/final_lda_Qda.R
@ -0,0 +1,318 @@
+library(olsrr)
+library(MASS)
+library(rpart.plot)
+library(ggfortify)
+library(ggplot2)
+library(tidyverse)
+library(car)
+library(Rcpp)
+library(GGally)
+library(leaps)
+library(dplyr)
+library(caret)
+library(rpart)
+library(randomForest)
+
+library(pheatmap)
+library(viridis)
+# Load the dataset
+data1 <- read.csv("deck_data.csv")
+data1$y <- (data1$Rank / data1$TournamentSize) * 100
+
+# Convert y to a categorical Performance variable
+data1$Performance <- ifelse(data1$y <= median(data1$y), "Low", "High")
+data1$Performance <- as.factor(data1$Performance)
+
+# Split the data into training and test sets
+set.seed(123) # For reproducibility
+train_indices <- sample(1:nrow(data1), size = 0.7 * nrow(data1))
+train_data <- data1[train_indices, ]
+test_data <- data1[-train_indices, ]
+
+recommend_cards <- function(decklist_row, train_data, test_data) {
+  # Non-card columns
+  non_card_columns <- c(
+    "Rank", "TournamentSize", "Commander", "Partner",
+    "Num_Artifacts", "Num_Creatures", "Num_Enchantments",
+    "Num_Instants", "Num_Sorceries", "Num_Planeswalkers",
+    "Num_Lands", "Average_Mana_Value", "Average_Mana_Value_Excl",
+    "Num_Free_Spells", "Num_Ramp", "Num_Draw", "Num_Tutor",
+    "Num_Counterspell", "Num_Removal", "Num_Stax", "Num_Protection",
+    "Num_Boardwipe", "y", "Performance"
+  )
+  
+  commander_input <- as.character(decklist_row$Commander)
+  partner_input <- ifelse(is.na(decklist_row$Partner), "", as.character(decklist_row$Partner))
+  
+  if (partner_input == "") {
+    subset_train_data <- subset(train_data, Commander == commander_input)
+    subset_test_data <- subset(test_data, Commander == commander_input)
+  } else {
+    subset_train_data <- subset(train_data, Commander == commander_input & Partner == partner_input)
+    subset_test_data <- subset(test_data, Commander == commander_input & Partner == partner_input)
+  }
+  
+  if (nrow(subset_train_data) < 10) {
+    warning("Not enough data to build a reliable model.")
+    return(NULL)
+  }
+  
+  # Get card columns
+  card_columns <- setdiff(names(train_data), non_card_columns)
+  
+  predictors_train_full <- subset_train_data[, card_columns, drop = FALSE]
+  response_train <- subset_train_data$Performance
+  
+  # Remove near zero variance predictors before LDA/QDA
+  nzv <- nearZeroVar(predictors_train_full)
+  if (length(nzv) > 0) {
+    predictors_train <- predictors_train_full[, -nzv, drop = FALSE]
+  } else {
+    predictors_train <- predictors_train_full
+  }
+  
+  # Update card_columns to reflect the removed predictors
+  filtered_card_columns <- colnames(predictors_train)
+  
+  # Check if we have at least two classes in response_train (for LDA/QDA)
+  run_lda_qda <- length(unique(response_train)) > 1
+  
+  # Build Decision Tree
+  dt_model <- rpart(response_train ~ ., data = data.frame(response_train, predictors_train), method = "class")
+  
+  # Try LDA
+  lda_model <- NULL
+  if (run_lda_qda) {
+    lda_model <- tryCatch(
+      {
+        lda(response_train ~ ., data = data.frame(response_train, predictors_train))
+      },
+      error = function(e) {
+        warning("LDA failed: ", e$message)
+        NULL
+      }
+    )
+  } else {
+    warning("Only one class in the training data for this commander. Cannot run LDA/QDA.")
+  }
+  
+  # Try QDA
+  qda_model <- NULL
+  if (run_lda_qda) {
+    qda_model <- tryCatch(
+      {
+        qda(response_train ~ ., data = data.frame(response_train, predictors_train))
+      },
+      error = function(e) {
+        warning("QDA failed: ", e$message)
+        NULL
+      }
+    )
+  }
+  
+  # Build Random Forest
+  rf_model <- randomForest(response_train ~ ., data = data.frame(response_train, predictors_train))
+  
+  # Evaluate models on test data if available
+  if (nrow(subset_test_data) > 0) {
+    # Subset the test data to the same filtered predictors
+    predictors_test <- subset_test_data[, filtered_card_columns, drop = FALSE]
+    response_test <- subset_test_data$Performance
+    
+    # Predictions (Decision Tree always available)
+    dt_predictions <- predict(dt_model, newdata = predictors_test, type = "class")
+    cat("\nDecision Tree Confusion Matrix:\n")
+    print(confusionMatrix(dt_predictions, response_test))
+    
+    if (!is.null(lda_model)) {
+      lda_predictions <- predict(lda_model, newdata = predictors_test)$class
+      cat("\nLDA Confusion Matrix:\n")
+      print(confusionMatrix(lda_predictions, response_test))
+    }
+    
+    if (!is.null(qda_model)) {
+      qda_predictions <- predict(qda_model, newdata = predictors_test)$class
+      cat("\nQDA Confusion Matrix:\n")
+      print(confusionMatrix(qda_predictions, response_test))
+    }
+    
+    rf_predictions <- predict(rf_model, newdata = predictors_test, type = "class")
+    cat("\nRandom Forest Confusion Matrix:\n")
+    print(confusionMatrix(rf_predictions, response_test))
+    
+  } else {
+    warning("No test data available for this commander.")
+  }
+  
+  # Plot the decision tree
+  rpart.plot(dt_model, type = 3, extra = 101, under = TRUE, fallen.leaves = TRUE,
+             main = paste("Decision Tree for Commander:", commander_input))
+  
+  # Predict performance for the given decklist using the decision tree (example)
+  # We must also apply the same filtered predictors to the decklist
+  predictors_decklist <- decklist_row[, filtered_card_columns, drop = FALSE]
+  predicted_performance_dt <- predict(dt_model, newdata = predictors_decklist, type = "class")
+  
+  cat("\nPredicted Deck's Standing (Decision Tree) for the Given Decklist:\n")
+  print(predicted_performance_dt)
+  
+  # If variable importance is available from decision tree
+  if (!is.null(dt_model$variable.importance) && length(dt_model$variable.importance) > 0) {
+    importance <- data.frame(
+      card = names(dt_model$variable.importance),
+      importance = dt_model$variable.importance,
+      row.names = NULL
+    )
+    
+    importance <- importance[order(-importance$importance), ]
+    
+    card_values <- decklist_row[, filtered_card_columns, drop = FALSE]
+    indices <- which(card_values == 1)
+    deck_cards <- names(card_values)[indices]
+    
+    cards_to_add <- setdiff(importance$card, deck_cards)
+    top_cards_to_add <- head(cards_to_add, 5)
+    
+    non_important_cards <- setdiff(deck_cards, importance$card)
+    non_important_with_scores <- data.frame(
+      card = non_important_cards,
+      importance = ifelse(non_important_cards %in% importance$card,
+                          importance$importance[match(non_important_cards, importance$card)], 0)
+    )
+    non_important_with_scores <- non_important_with_scores[order(non_important_with_scores$importance), ]
+    top_cards_to_remove <- head(non_important_with_scores$card, 5)
+  } else {
+    warning("No variable importance available from the Decision Tree model.")
+    top_cards_to_add <- character(0)
+    top_cards_to_remove <- character(0)
+  }
+  
+  # Confusion matrix for training data (decision tree)
+  predictions_train_dt <- predict(dt_model, newdata = predictors_train, type = "class")
+  cm_train_dt <- confusionMatrix(predictions_train_dt, response_train)
+  cat("\nConfusion Matrix for Training Data (Decision Tree, Same Commander):\n")
+  print(cm_train_dt)
+  
+  return(list(
+    predicted_standing_dt = predicted_performance_dt,
+    cards_to_add = top_cards_to_add,
+    cards_to_consider_removing = top_cards_to_remove,
+    cm_train_dt = cm_train_dt
+  ))
+}
+
+# Example usage
+test_deck <- test_data[3, ]
+print(test_deck$Commander)
+recommendations <- recommend_cards(test_deck, train_data, test_data)
+
+cat("\nCards to Consider Adding:\n")
+print(recommendations$cards_to_add)
+
+cat("\nCards to Consider Removing:\n")
+print(recommendations$cards_to_consider_removing)
+
+cat("\nPredicted Deck's Standing (Decision Tree):\n")
+print(recommendations$predicted_standing_dt)
+print(cm_train_dt)
+# Extract the confusion matrix from your model
+cm_train_dt <- recommendations$cm_train_dt
+conf_matrix <- as.matrix(cm_train_dt$table)
+
+# Create a high-contrast color palette using viridis
+my_palette <- viridis(100)
+
+# Plot the heatmap
+pheatmap(conf_matrix, 
+         cluster_rows = FALSE, 
+         cluster_cols = FALSE,
+         color = my_palette,
+         fontsize_row = 8, 
+         fontsize_col = 8,
+         angle_col = 45,
+         display_numbers = TRUE,
+         number_format = "%.0f",
+         main = "Confusion Matrix Heatmap")
+
+
+# Loop
+
+# Identify the 10 most popular commanders
+top_commanders <- data1 %>%
+  group_by(Commander) %>%
+  summarize(freq = n()) %>%
+  arrange(desc(freq)) %>%
+  slice(1:5) %>%
+  pull(Commander)
+
+# Initialize a list to store results
+model_results_list <- list()
+
+# Loop over the top 10 commanders
+for (comm in top_commanders) {
+  # Find a test deck for this commander (if available)
+  subset_test_decks <- subset(test_data, Commander == comm)
+  
+  if (nrow(subset_test_decks) == 0) {
+    cat("\nNo test deck available for commander:", comm, "\n")
+    next
+  }
+  
+  # Just pick the first test deck for demonstration
+  test_deck <- subset_test_decks[1, ]
+  
+  cat("\nRunning models for commander:", comm, "\n")
+  recommendations <- recommend_cards(test_deck, train_data, test_data)
+  
+  if (!is.null(recommendations)) {
+    # Store relevant parts of the output
+    model_results_list[[comm]] <- list(
+      commander = comm,
+      predicted_standing_dt = recommendations$predicted_standing_dt,
+      cards_to_add = recommendations$cards_to_add,
+      cards_to_consider_removing = recommendations$cards_to_consider_removing
+    )
+  }
+}
+
+# After the loop, model_results_list contains the outputs for each of the top 10 commanders
+# Print summary of results
+cat("\nSummary of Model Results for Top 10 Commanders:\n")
+for (comm in names(model_results_list)) {
+  res <- model_results_list[[comm]]
+  cat("\nCommander:", res$commander, "\n")
+  cat("Predicted Standing (Decision Tree):", res$predicted_standing_dt, "\n")
+  cat("Top Cards to Add:", paste(res$cards_to_add, collapse = ", "), "\n")
+  cat("Top Cards to Remove:", paste(res$cards_to_consider_removing, collapse = ", "), "\n")
+}
+
+# Example usage from your code snippet (already integrated in the loop, 
+# but you could still do individually if desired):
+# test_deck <- test_data[3, ]
+# print(test_deck$Commander)
+# recommendations <- recommend_cards(test_deck, train_data, test_data)
+# 
+# cat("\nCards to Consider Adding:\n")
+# print(recommendations$cards_to_add)
+# 
+# cat("\nCards to Consider Removing:\n")
+# print(recommendations$cards_to_consider_removing)
+# 
+# cat("\nPredicted Deck's Standing (Decision Tree):\n")
+# print(recommendations$predicted_standing_dt)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/process_decklists.py
+++ b/process_decklists.py
@ -0,0 +1,266 @@
+import os
+import re
+import requests
+import csv
+import time  # Optional: For adding delays between API requests
+
+# Initialize variables
+unique_cards = set()
+card_types = {}
+card_tags = {}
+deck_data = []
+
+# Tags of interest
+tags_of_interest = [
+    'ramp', 'draw', 'tutor', 'counterspell', 'removal',
+    'stax', 'protection', 'boardwipe'
+]
+
+# Function to process a decklist file and separate it into deck, sideboard, and commander sections
+def process_decklist_file(filepath):
+    with open(filepath, 'r', encoding='utf-8') as file:
+        lines = file.readlines()
+
+    # Keep all lines including empty ones, strip newline characters
+    lines = [line.rstrip('\n') for line in lines]
+
+    deck_cards = []
+    sideboard_cards = []
+    commander_cards = []
+
+    section = 'deck'
+    i = 0
+    while i < len(lines):
+        line = lines[i].strip()
+
+        if section == 'deck':
+            if line == '':
+                # Empty line indicates potential section change
+                # Check if next non-empty line is 'SIDEBOARD:'
+                j = i + 1
+                while j < len(lines) and lines[j].strip() == '':
+                    j += 1
+                if j < len(lines) and lines[j].strip().upper() == 'SIDEBOARD:':
+                    section = 'sideboard'
+                    i = j  # Move to 'SIDEBOARD:' line
+                else:
+                    # Otherwise, assume commanders are after the empty line
+                    section = 'commander'
+                    i += 1  # Skip the empty line
+                continue
+            elif line.upper() == 'SIDEBOARD:':
+                section = 'sideboard'
+            else:
+                deck_cards.append(line)
+        elif section == 'sideboard':
+            if line == '':
+                # Empty line after sideboard indicates commanders
+                section = 'commander'
+                i += 1  # Move to next line after the empty line
+                continue
+            elif line.upper() == 'SIDEBOARD:':
+                pass  # Already in sideboard section
+            else:
+                sideboard_cards.append(line)
+        elif section == 'commander':
+            if line != '':
+                commander_cards.append(line)
+        i += 1
+
+    return deck_cards, sideboard_cards, commander_cards
+
+# Function to fetch cards for a given tag from Scryfall
+def fetch_cards_for_tag(tag):
+    card_names = set()
+    page = 1
+    has_more = True
+    while has_more:
+        query = f"f:edh otag:{tag}"
+        url = "https://api.scryfall.com/cards/search"
+        params = {'q': query, 'page': page}
+        response = requests.get(url, params=params)
+        if response.status_code == 200:
+            data = response.json()
+            for card in data['data']:
+                card_names.add(card['name'])
+            has_more = data.get('has_more', False)
+            page += 1
+            time.sleep(0.1)  # Sleep to respect rate limits
+        else:
+            print(f"Error fetching cards for tag: {tag}")
+            break
+    return card_names
+
+# Step 4: Collect Unique Card Names
+decklist_dir = os.path.dirname(os.path.abspath(__file__))
+
+for filename in os.listdir(decklist_dir):
+    # Process only decklist files matching the pattern
+    if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
+        filepath = os.path.join(decklist_dir, filename)
+        deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
+        # Collect unique cards from the deck and commanders, skip sideboard
+        for line in deck_cards + commander_cards:
+            match = re.match(r'(\d+)\s+(.*)', line)
+            if match:
+                quantity = int(match.group(1))
+                card_name = match.group(2)
+                unique_cards.add(card_name)
+
+# Step 5: Retrieve Card Types Using Scryfall API
+scryfall_api_url = 'https://api.scryfall.com/cards/named?exact='
+
+for card_name in unique_cards:
+    encoded_name = requests.utils.quote(card_name)
+    url = scryfall_api_url + encoded_name
+    response = requests.get(url)
+    if response.status_code == 200:
+        data = response.json()
+        types = data['type_line']
+        main_types = types.split('—')[0].strip()
+        type_list = [t.strip() for t in main_types.split()]
+        card_types[card_name] = type_list
+    else:
+        print(f"Error fetching data for card: {card_name}")
+        card_types[card_name] = []
+    # Optional: Add a delay to respect API rate limits
+    time.sleep(0.1)  # Sleep for 100 milliseconds
+
+# Step 5b: Build Card Tags Mapping
+# Initialize card_tags dictionary
+card_tags = {card_name: [] for card_name in unique_cards}
+
+# Fetch cards for each tag and build the mapping
+for tag in tags_of_interest:
+    print(f"Fetching cards for tag: {tag}")
+    tagged_cards = fetch_cards_for_tag(tag)
+    for card_name in tagged_cards:
+        if card_name in card_tags:
+            card_tags[card_name].append(tag.capitalize())
+        else:
+            # Handle cases where card name variations exist
+            for unique_card in unique_cards:
+                if card_name.lower() == unique_card.lower():
+                    card_tags[unique_card].append(tag.capitalize())
+                    break
+
+# Step 6: Process Each Decklist to Collect Deck Data
+for filename in os.listdir(decklist_dir):
+    if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
+        match = re.match(r'(\d+)-(\d+)-.*\.txt', filename)
+        if match:
+            rank = int(match.group(1))
+            tournament_size = int(match.group(2))
+        else:
+            rank = None
+            tournament_size = None
+        filepath = os.path.join(decklist_dir, filename)
+        deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
+        deck = {}
+        commander = None
+        partner = None
+        num_artifacts = num_creatures = num_enchantments = 0
+        num_instants = num_sorceries = num_planeswalkers = 0
+        num_lands = 0
+
+        # Initialize tag card sets to store unique cards per tag
+        tag_card_sets = {tag.capitalize(): set() for tag in tags_of_interest}
+
+        # Process main deck cards
+        for line in deck_cards:
+            match = re.match(r'(\d+)\s+(.*)', line)
+            if match:
+                quantity = int(match.group(1))
+                card_name = match.group(2)
+                deck[card_name] = quantity
+                types = card_types.get(card_name, [])
+                if 'Artifact' in types:
+                    num_artifacts += quantity
+                if 'Creature' in types:
+                    num_creatures += quantity
+                if 'Enchantment' in types:
+                    num_enchantments += quantity
+                if 'Instant' in types:
+                    num_instants += quantity
+                if 'Sorcery' in types:
+                    num_sorceries += quantity
+                if 'Planeswalker' in types:
+                    num_planeswalkers += quantity
+                if 'Land' in types:
+                    num_lands += quantity
+
+                # Process tags
+                tags = card_tags.get(card_name, [])
+                for tag in tags:
+                    if tag in tag_card_sets:
+                        tag_card_sets[tag].add(card_name)
+
+        # Process commander cards
+        for idx, line in enumerate(commander_cards):
+            match = re.match(r'(\d+)\s+(.*)', line)
+            if match:
+                card_name = match.group(2)
+                if idx == 0:
+                    commander = card_name
+                elif idx == 1:
+                    partner = card_name
+
+        # Compute tag counts as the number of unique cards per tag
+        tag_counts = {tag: len(cards) for tag, cards in tag_card_sets.items()}
+
+        deck_row = {
+            'rank': rank,
+            'tournament_size': tournament_size,
+            'commander': commander,
+            'partner': partner,
+            'num_artifacts': num_artifacts,
+            'num_creatures': num_creatures,
+            'num_enchantments': num_enchantments,
+            'num_instants': num_instants,
+            'num_sorceries': num_sorceries,
+            'num_planeswalkers': num_planeswalkers,
+            'num_lands': num_lands,
+            'tag_counts': tag_counts,
+            'deck': deck
+        }
+        deck_data.append(deck_row)
+
+# Step 7: Prepare the CSV Header
+header = ['Rank', 'TournamentSize', 'Commander', 'Partner', 'Num_Artifacts', 'Num_Creatures',
+          'Num_Enchantments', 'Num_Instants', 'Num_Sorceries',
+          'Num_Planeswalkers', 'Num_Lands']
+
+# Add tag columns
+header.extend(['Num_' + tag.capitalize() for tag in tags_of_interest])
+
+sorted_cards = sorted(unique_cards)
+header.extend(sorted_cards)
+
+# Step 8: Write Data to CSV File
+with open('deck_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
+    writer = csv.writer(csvfile)
+    writer.writerow(header)
+    for deck in deck_data:
+        row = [
+            deck['rank'],
+            deck['tournament_size'],
+            deck['commander'],
+            deck['partner'],
+            deck['num_artifacts'],
+            deck['num_creatures'],
+            deck['num_enchantments'],
+            deck['num_instants'],
+            deck['num_sorceries'],
+            deck['num_planeswalkers'],
+            deck['num_lands']
+        ]
+        # Add tag counts
+        for tag in tags_of_interest:
+            row.append(deck['tag_counts'][tag.capitalize()])
+        # Add card presence (1 or 0)
+        for card_name in sorted_cards:
+            row.append(1 if card_name in deck['deck'] else 0)
+            # If you prefer to include quantities, use:
+            # row.append(deck['deck'].get(card_name, 0))
+        writer.writerow(row)
+