USCLAP/process_decklists.py

import os
import re
import requests
import csv
import time  # Optional: For adding delays between API requests

# Initialize variables
unique_cards = set()
card_types = {}
card_tags = {}
deck_data = []

# Tags of interest
tags_of_interest = [
    'ramp', 'draw', 'tutor', 'counterspell', 'removal',
    'stax', 'protection', 'boardwipe'
]

# Function to process a decklist file and separate it into deck, sideboard, and commander sections
def process_decklist_file(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # Keep all lines including empty ones, strip newline characters
    lines = [line.rstrip('\n') for line in lines]

    deck_cards = []
    sideboard_cards = []
    commander_cards = []

    section = 'deck'
    i = 0
    while i < len(lines):
        line = lines[i].strip()

        if section == 'deck':
            if line == '':
                # Empty line indicates potential section change
                # Check if next non-empty line is 'SIDEBOARD:'
                j = i + 1
                while j < len(lines) and lines[j].strip() == '':
                    j += 1
                if j < len(lines) and lines[j].strip().upper() == 'SIDEBOARD:':
                    section = 'sideboard'
                    i = j  # Move to 'SIDEBOARD:' line
                else:
                    # Otherwise, assume commanders are after the empty line
                    section = 'commander'
                    i += 1  # Skip the empty line
                continue
            elif line.upper() == 'SIDEBOARD:':
                section = 'sideboard'
            else:
                deck_cards.append(line)
        elif section == 'sideboard':
            if line == '':
                # Empty line after sideboard indicates commanders
                section = 'commander'
                i += 1  # Move to next line after the empty line
                continue
            elif line.upper() == 'SIDEBOARD:':
                pass  # Already in sideboard section
            else:
                sideboard_cards.append(line)
        elif section == 'commander':
            if line != '':
                commander_cards.append(line)
        i += 1

    return deck_cards, sideboard_cards, commander_cards

# Function to fetch cards for a given tag from Scryfall
def fetch_cards_for_tag(tag):
    card_names = set()
    page = 1
    has_more = True
    while has_more:
        query = f"f:edh otag:{tag}"
        url = "https://api.scryfall.com/cards/search"
        params = {'q': query, 'page': page}
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            for card in data['data']:
                card_names.add(card['name'])
            has_more = data.get('has_more', False)
            page += 1
            time.sleep(0.1)  # Sleep to respect rate limits
        else:
            print(f"Error fetching cards for tag: {tag}")
            break
    return card_names

# Step 4: Collect Unique Card Names
decklist_dir = os.path.dirname(os.path.abspath(__file__))

for filename in os.listdir(decklist_dir):
    # Process only decklist files matching the pattern
    if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
        filepath = os.path.join(decklist_dir, filename)
        deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
        # Collect unique cards from the deck and commanders, skip sideboard
        for line in deck_cards + commander_cards:
            match = re.match(r'(\d+)\s+(.*)', line)
            if match:
                quantity = int(match.group(1))
                card_name = match.group(2)
                unique_cards.add(card_name)

# Step 5: Retrieve Card Types Using Scryfall API
scryfall_api_url = 'https://api.scryfall.com/cards/named?exact='

for card_name in unique_cards:
    encoded_name = requests.utils.quote(card_name)
    url = scryfall_api_url + encoded_name
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        types = data['type_line']
        main_types = types.split('—')[0].strip()
        type_list = [t.strip() for t in main_types.split()]
        card_types[card_name] = type_list
    else:
        print(f"Error fetching data for card: {card_name}")
        card_types[card_name] = []
    # Optional: Add a delay to respect API rate limits
    time.sleep(0.1)  # Sleep for 100 milliseconds

# Step 5b: Build Card Tags Mapping
# Initialize card_tags dictionary
card_tags = {card_name: [] for card_name in unique_cards}

# Fetch cards for each tag and build the mapping
for tag in tags_of_interest:
    print(f"Fetching cards for tag: {tag}")
    tagged_cards = fetch_cards_for_tag(tag)
    for card_name in tagged_cards:
        if card_name in card_tags:
            card_tags[card_name].append(tag.capitalize())
        else:
            # Handle cases where card name variations exist
            for unique_card in unique_cards:
                if card_name.lower() == unique_card.lower():
                    card_tags[unique_card].append(tag.capitalize())
                    break

# Step 6: Process Each Decklist to Collect Deck Data
for filename in os.listdir(decklist_dir):
    if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
        match = re.match(r'(\d+)-(\d+)-.*\.txt', filename)
        if match:
            rank = int(match.group(1))
            tournament_size = int(match.group(2))
        else:
            rank = None
            tournament_size = None
        filepath = os.path.join(decklist_dir, filename)
        deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
        deck = {}
        commander = None
        partner = None
        num_artifacts = num_creatures = num_enchantments = 0
        num_instants = num_sorceries = num_planeswalkers = 0
        num_lands = 0

        # Initialize tag card sets to store unique cards per tag
        tag_card_sets = {tag.capitalize(): set() for tag in tags_of_interest}

        # Process main deck cards
        for line in deck_cards:
            match = re.match(r'(\d+)\s+(.*)', line)
            if match:
                quantity = int(match.group(1))
                card_name = match.group(2)
                deck[card_name] = quantity
                types = card_types.get(card_name, [])
                if 'Artifact' in types:
                    num_artifacts += quantity
                if 'Creature' in types:
                    num_creatures += quantity
                if 'Enchantment' in types:
                    num_enchantments += quantity
                if 'Instant' in types:
                    num_instants += quantity
                if 'Sorcery' in types:
                    num_sorceries += quantity
                if 'Planeswalker' in types:
                    num_planeswalkers += quantity
                if 'Land' in types:
                    num_lands += quantity

                # Process tags
                tags = card_tags.get(card_name, [])
                for tag in tags:
                    if tag in tag_card_sets:
                        tag_card_sets[tag].add(card_name)

        # Process commander cards
        for idx, line in enumerate(commander_cards):
            match = re.match(r'(\d+)\s+(.*)', line)
            if match:
                card_name = match.group(2)
                if idx == 0:
                    commander = card_name
                elif idx == 1:
                    partner = card_name

        # Compute tag counts as the number of unique cards per tag
        tag_counts = {tag: len(cards) for tag, cards in tag_card_sets.items()}

        deck_row = {
            'rank': rank,
            'tournament_size': tournament_size,
            'commander': commander,
            'partner': partner,
            'num_artifacts': num_artifacts,
            'num_creatures': num_creatures,
            'num_enchantments': num_enchantments,
            'num_instants': num_instants,
            'num_sorceries': num_sorceries,
            'num_planeswalkers': num_planeswalkers,
            'num_lands': num_lands,
            'tag_counts': tag_counts,
            'deck': deck
        }
        deck_data.append(deck_row)

# Step 7: Prepare the CSV Header
header = ['Rank', 'TournamentSize', 'Commander', 'Partner', 'Num_Artifacts', 'Num_Creatures',
          'Num_Enchantments', 'Num_Instants', 'Num_Sorceries',
          'Num_Planeswalkers', 'Num_Lands']

# Add tag columns
header.extend(['Num_' + tag.capitalize() for tag in tags_of_interest])

sorted_cards = sorted(unique_cards)
header.extend(sorted_cards)

# Step 8: Write Data to CSV File
with open('deck_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    for deck in deck_data:
        row = [
            deck['rank'],
            deck['tournament_size'],
            deck['commander'],
            deck['partner'],
            deck['num_artifacts'],
            deck['num_creatures'],
            deck['num_enchantments'],
            deck['num_instants'],
            deck['num_sorceries'],
            deck['num_planeswalkers'],
            deck['num_lands']
        ]
        # Add tag counts
        for tag in tags_of_interest:
            row.append(deck['tag_counts'][tag.capitalize()])
        # Add card presence (1 or 0)
        for card_name in sorted_cards:
            row.append(1 if card_name in deck['deck'] else 0)
            # If you prefer to include quantities, use:
            # row.append(deck['deck'].get(card_name, 0))
        writer.writerow(row)