USCLAP/process_decklists.py

267 lines
9.5 KiB
Python

import os
import re
import requests
import csv
import time # Optional: For adding delays between API requests
# Initialize variables
unique_cards = set()
card_types = {}
card_tags = {}
deck_data = []
# Tags of interest
tags_of_interest = [
'ramp', 'draw', 'tutor', 'counterspell', 'removal',
'stax', 'protection', 'boardwipe'
]
# Function to process a decklist file and separate it into deck, sideboard, and commander sections
def process_decklist_file(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
lines = file.readlines()
# Keep all lines including empty ones, strip newline characters
lines = [line.rstrip('\n') for line in lines]
deck_cards = []
sideboard_cards = []
commander_cards = []
section = 'deck'
i = 0
while i < len(lines):
line = lines[i].strip()
if section == 'deck':
if line == '':
# Empty line indicates potential section change
# Check if next non-empty line is 'SIDEBOARD:'
j = i + 1
while j < len(lines) and lines[j].strip() == '':
j += 1
if j < len(lines) and lines[j].strip().upper() == 'SIDEBOARD:':
section = 'sideboard'
i = j # Move to 'SIDEBOARD:' line
else:
# Otherwise, assume commanders are after the empty line
section = 'commander'
i += 1 # Skip the empty line
continue
elif line.upper() == 'SIDEBOARD:':
section = 'sideboard'
else:
deck_cards.append(line)
elif section == 'sideboard':
if line == '':
# Empty line after sideboard indicates commanders
section = 'commander'
i += 1 # Move to next line after the empty line
continue
elif line.upper() == 'SIDEBOARD:':
pass # Already in sideboard section
else:
sideboard_cards.append(line)
elif section == 'commander':
if line != '':
commander_cards.append(line)
i += 1
return deck_cards, sideboard_cards, commander_cards
# Function to fetch cards for a given tag from Scryfall
def fetch_cards_for_tag(tag):
card_names = set()
page = 1
has_more = True
while has_more:
query = f"f:edh otag:{tag}"
url = "https://api.scryfall.com/cards/search"
params = {'q': query, 'page': page}
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
for card in data['data']:
card_names.add(card['name'])
has_more = data.get('has_more', False)
page += 1
time.sleep(0.1) # Sleep to respect rate limits
else:
print(f"Error fetching cards for tag: {tag}")
break
return card_names
# Step 4: Collect Unique Card Names
decklist_dir = os.path.dirname(os.path.abspath(__file__))
for filename in os.listdir(decklist_dir):
# Process only decklist files matching the pattern
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
filepath = os.path.join(decklist_dir, filename)
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
# Collect unique cards from the deck and commanders, skip sideboard
for line in deck_cards + commander_cards:
match = re.match(r'(\d+)\s+(.*)', line)
if match:
quantity = int(match.group(1))
card_name = match.group(2)
unique_cards.add(card_name)
# Step 5: Retrieve Card Types Using Scryfall API
scryfall_api_url = 'https://api.scryfall.com/cards/named?exact='
for card_name in unique_cards:
encoded_name = requests.utils.quote(card_name)
url = scryfall_api_url + encoded_name
response = requests.get(url)
if response.status_code == 200:
data = response.json()
types = data['type_line']
main_types = types.split('')[0].strip()
type_list = [t.strip() for t in main_types.split()]
card_types[card_name] = type_list
else:
print(f"Error fetching data for card: {card_name}")
card_types[card_name] = []
# Optional: Add a delay to respect API rate limits
time.sleep(0.1) # Sleep for 100 milliseconds
# Step 5b: Build Card Tags Mapping
# Initialize card_tags dictionary
card_tags = {card_name: [] for card_name in unique_cards}
# Fetch cards for each tag and build the mapping
for tag in tags_of_interest:
print(f"Fetching cards for tag: {tag}")
tagged_cards = fetch_cards_for_tag(tag)
for card_name in tagged_cards:
if card_name in card_tags:
card_tags[card_name].append(tag.capitalize())
else:
# Handle cases where card name variations exist
for unique_card in unique_cards:
if card_name.lower() == unique_card.lower():
card_tags[unique_card].append(tag.capitalize())
break
# Step 6: Process Each Decklist to Collect Deck Data
for filename in os.listdir(decklist_dir):
if filename.endswith('.txt') and re.match(r'\d+-\d+-.*\.txt', filename):
match = re.match(r'(\d+)-(\d+)-.*\.txt', filename)
if match:
rank = int(match.group(1))
tournament_size = int(match.group(2))
else:
rank = None
tournament_size = None
filepath = os.path.join(decklist_dir, filename)
deck_cards, sideboard_cards, commander_cards = process_decklist_file(filepath)
deck = {}
commander = None
partner = None
num_artifacts = num_creatures = num_enchantments = 0
num_instants = num_sorceries = num_planeswalkers = 0
num_lands = 0
# Initialize tag card sets to store unique cards per tag
tag_card_sets = {tag.capitalize(): set() for tag in tags_of_interest}
# Process main deck cards
for line in deck_cards:
match = re.match(r'(\d+)\s+(.*)', line)
if match:
quantity = int(match.group(1))
card_name = match.group(2)
deck[card_name] = quantity
types = card_types.get(card_name, [])
if 'Artifact' in types:
num_artifacts += quantity
if 'Creature' in types:
num_creatures += quantity
if 'Enchantment' in types:
num_enchantments += quantity
if 'Instant' in types:
num_instants += quantity
if 'Sorcery' in types:
num_sorceries += quantity
if 'Planeswalker' in types:
num_planeswalkers += quantity
if 'Land' in types:
num_lands += quantity
# Process tags
tags = card_tags.get(card_name, [])
for tag in tags:
if tag in tag_card_sets:
tag_card_sets[tag].add(card_name)
# Process commander cards
for idx, line in enumerate(commander_cards):
match = re.match(r'(\d+)\s+(.*)', line)
if match:
card_name = match.group(2)
if idx == 0:
commander = card_name
elif idx == 1:
partner = card_name
# Compute tag counts as the number of unique cards per tag
tag_counts = {tag: len(cards) for tag, cards in tag_card_sets.items()}
deck_row = {
'rank': rank,
'tournament_size': tournament_size,
'commander': commander,
'partner': partner,
'num_artifacts': num_artifacts,
'num_creatures': num_creatures,
'num_enchantments': num_enchantments,
'num_instants': num_instants,
'num_sorceries': num_sorceries,
'num_planeswalkers': num_planeswalkers,
'num_lands': num_lands,
'tag_counts': tag_counts,
'deck': deck
}
deck_data.append(deck_row)
# Step 7: Prepare the CSV Header
header = ['Rank', 'TournamentSize', 'Commander', 'Partner', 'Num_Artifacts', 'Num_Creatures',
'Num_Enchantments', 'Num_Instants', 'Num_Sorceries',
'Num_Planeswalkers', 'Num_Lands']
# Add tag columns
header.extend(['Num_' + tag.capitalize() for tag in tags_of_interest])
sorted_cards = sorted(unique_cards)
header.extend(sorted_cards)
# Step 8: Write Data to CSV File
with open('deck_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header)
for deck in deck_data:
row = [
deck['rank'],
deck['tournament_size'],
deck['commander'],
deck['partner'],
deck['num_artifacts'],
deck['num_creatures'],
deck['num_enchantments'],
deck['num_instants'],
deck['num_sorceries'],
deck['num_planeswalkers'],
deck['num_lands']
]
# Add tag counts
for tag in tags_of_interest:
row.append(deck['tag_counts'][tag.capitalize()])
# Add card presence (1 or 0)
for card_name in sorted_cards:
row.append(1 if card_name in deck['deck'] else 0)
# If you prefer to include quantities, use:
# row.append(deck['deck'].get(card_name, 0))
writer.writerow(row)