207 lines
9.4 KiB
Python
207 lines
9.4 KiB
Python
import os
|
|
import time
|
|
import re
|
|
from datetime import datetime
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.firefox.options import Options
|
|
from selenium.webdriver.firefox.service import Service
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
|
def setup_browser(download_dir):
|
|
firefox_options = Options()
|
|
firefox_options.set_preference('browser.download.folderList', 2)
|
|
firefox_options.set_preference('browser.download.dir', download_dir) # Use the tournament-specific directory
|
|
firefox_options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/plain,text/csv,application/octet-stream')
|
|
firefox_options.set_preference('pdfjs.disabled', True)
|
|
firefox_options.set_preference('browser.download.manager.showWhenStarting', False)
|
|
firefox_options.set_preference('browser.download.manager.useWindow', False)
|
|
firefox_options.set_preference('browser.download.manager.focusWhenStarting', False)
|
|
firefox_options.set_preference('browser.download.manager.alertOnEXEOpen', False)
|
|
firefox_options.set_preference('browser.download.manager.showAlertOnComplete', False)
|
|
firefox_options.set_preference('browser.download.manager.closeWhenDone', False)
|
|
|
|
# Specify the path to geckodriver
|
|
gecko_service = Service(executable_path='/usr/local/bin/geckodriver')
|
|
|
|
browser = webdriver.Firefox(service=gecko_service, options=firefox_options)
|
|
return browser
|
|
|
|
def parse_date(date_str):
|
|
date_str = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', date_str)
|
|
try:
|
|
tournament_date = datetime.strptime(date_str, '%B %d %Y').date()
|
|
print(f"Parsed date: {tournament_date}")
|
|
return tournament_date
|
|
except ValueError as e:
|
|
print(f"Error parsing date '{date_str}': {e}")
|
|
return None
|
|
|
|
def get_tournaments_after_date(browser, date_threshold):
|
|
tournaments = []
|
|
|
|
print("Navigating to the tournaments page...")
|
|
tournaments_page_url = 'https://edhtop16.com/tournaments?sortBy=DATE'
|
|
browser.get(tournaments_page_url)
|
|
|
|
time.sleep(3)
|
|
|
|
# Find all tournament entries
|
|
tournament_entries = browser.find_elements(By.CSS_SELECTOR, "div.group.relative.overflow-hidden.rounded-lg.bg-white.shadow")
|
|
print(f"Found {len(tournament_entries)} tournaments on the page.")
|
|
|
|
for entry in tournament_entries:
|
|
try:
|
|
# Extract tournament link, name, and date
|
|
link_element = entry.find_element(By.CSS_SELECTOR, 'a.line-clamp-2.text-xl.font-bold.underline')
|
|
tournament_name = link_element.text
|
|
tournament_url = link_element.get_attribute('href')
|
|
|
|
date_element = entry.find_element(By.CSS_SELECTOR, 'span')
|
|
tournament_date_str = date_element.text
|
|
print(f"Tournament found: {tournament_name}, Date: {tournament_date_str}, URL: {tournament_url}")
|
|
|
|
# Parse the date string
|
|
tournament_date = parse_date(tournament_date_str)
|
|
if tournament_date and tournament_date >= date_threshold:
|
|
tournaments.append((tournament_url, tournament_name, tournament_date))
|
|
except Exception as e:
|
|
print(f"Error processing tournament entry: {e}")
|
|
|
|
print(f"Total tournaments after {date_threshold}: {len(tournaments)}")
|
|
return tournaments
|
|
|
|
def get_tournament_info(browser):
|
|
print("Retrieving the tournament name and deck links...")
|
|
time.sleep(3) # Wait for the page to load
|
|
|
|
# Get the tournament name
|
|
try:
|
|
tournament_name_element = browser.find_element(By.TAG_NAME, 'h1')
|
|
tournament_name = tournament_name_element.text if tournament_name_element else "Tournament"
|
|
except Exception as e:
|
|
print(f"Error retrieving tournament name: {e}")
|
|
tournament_name = "Tournament"
|
|
|
|
tournament_name = tournament_name.replace('/', '-') # Replace invalid filename characters
|
|
print(f"Tournament Name: {tournament_name}")
|
|
|
|
# Get the list of decks
|
|
deck_elements = browser.find_elements(By.CSS_SELECTOR, "a.line-clamp-2.text-xl.font-bold.underline")
|
|
deck_links = []
|
|
for deck_element in deck_elements:
|
|
deck_url = deck_element.get_attribute('href')
|
|
deck_name = deck_element.text
|
|
deck_links.append((deck_url, deck_name))
|
|
|
|
print(f"Total decks found: {len(deck_links)}")
|
|
return tournament_name, deck_links
|
|
|
|
def download_deck(browser, deck_url, rank, total_decks, save_dir):
|
|
print(f"Navigating to deck {rank} page...")
|
|
browser.get(deck_url)
|
|
|
|
try:
|
|
# Wait for the "More" button and click it
|
|
print("Waiting for the 'More' button to appear...")
|
|
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'More')]/..")))
|
|
more_button = browser.find_element(By.XPATH, "//span[contains(text(), 'More')]/..")
|
|
browser.execute_script("arguments[0].click();", more_button)
|
|
time.sleep(1)
|
|
|
|
# Wait for and click the "Export" option within the dropdown
|
|
print("Waiting for the 'Export' option...")
|
|
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")))
|
|
export_option = browser.find_element(By.XPATH, "//a[contains(@class, 'dropdown-item') and contains(text(), 'Export')]")
|
|
browser.execute_script("arguments[0].click();", export_option)
|
|
time.sleep(1)
|
|
|
|
# Wait for and click the "Download for MTGO" link
|
|
print("Clicking 'Download for MTGO' link...")
|
|
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")))
|
|
mtgo_download_link = browser.find_element(By.XPATH, "//a[contains(@class, 'btn btn-primary') and contains(text(), 'Download for MTGO')]")
|
|
mtgo_download_link.click()
|
|
time.sleep(2) # Wait for the download to initiate
|
|
|
|
# Wait for the downloaded file to appear in the download directory
|
|
print("Waiting for the file to download...")
|
|
download_wait_time = 0
|
|
downloaded_filepath = None
|
|
|
|
# Check the download directory for a new file
|
|
while download_wait_time < 30:
|
|
files = os.listdir(save_dir)
|
|
if files:
|
|
# Find the most recent file in the directory
|
|
downloaded_filepath = max([os.path.join(save_dir, f) for f in files], key=os.path.getctime)
|
|
if downloaded_filepath.endswith('.txt'):
|
|
break
|
|
time.sleep(1)
|
|
download_wait_time += 1
|
|
|
|
# Move and rename the file if it was found
|
|
if downloaded_filepath and downloaded_filepath.endswith('.txt'):
|
|
original_filename = os.path.basename(downloaded_filepath)
|
|
# Sanitize the original filename
|
|
sanitized_filename = original_filename.replace('/', '-').replace('\\', '-')
|
|
new_filename = os.path.join(save_dir, f"{rank}-{total_decks}-{sanitized_filename}")
|
|
os.rename(downloaded_filepath, new_filename)
|
|
print(f"Downloaded deck {rank}/{total_decks}: {new_filename}")
|
|
else:
|
|
print(f"Failed to download deck {rank}: Download timed out.")
|
|
|
|
except Exception as e:
|
|
print(f"Error downloading deck {rank}: {e}")
|
|
|
|
def main():
|
|
# Set up the base download directory
|
|
base_download_dir = os.path.join(os.getcwd(), 'downloads')
|
|
if not os.path.exists(base_download_dir):
|
|
os.makedirs(base_download_dir)
|
|
print(f"Created base download directory at {base_download_dir}")
|
|
|
|
# Set the date threshold (e.g., September 1, 2023)
|
|
date_threshold = datetime(2024, 11, 14).date()
|
|
|
|
# Initialize the browser
|
|
print("Setting up the browser...")
|
|
browser = setup_browser(base_download_dir) # Initial browser setup
|
|
try:
|
|
# Retrieve list of tournaments after the date threshold
|
|
tournaments = get_tournaments_after_date(browser, date_threshold)
|
|
|
|
for tournament_url, tournament_name, tournament_date in tournaments:
|
|
print(f"\nProcessing tournament: {tournament_name} dated {tournament_date}")
|
|
|
|
# Create a specific directory for the tournament
|
|
tournament_dir = os.path.join(base_download_dir, tournament_name.replace('/', '-'))
|
|
if not os.path.exists(tournament_dir):
|
|
os.makedirs(tournament_dir)
|
|
print(f"Created tournament directory at {tournament_dir}")
|
|
|
|
# Reconfigure the browser to use the tournament directory
|
|
browser.quit()
|
|
browser = setup_browser(tournament_dir) # Reinitialize with tournament-specific directory
|
|
|
|
# Navigate to the tournament page
|
|
print(f"Navigating to the tournament page {tournament_url}...")
|
|
browser.get(tournament_url)
|
|
time.sleep(3) # Wait for the page to load
|
|
|
|
# Retrieve tournament info (this will get the decks)
|
|
tournament_name, deck_links = get_tournament_info(browser)
|
|
|
|
# Download each deck
|
|
total_decks = len(deck_links)
|
|
for idx, (deck_url, deck_name) in enumerate(deck_links, start=1):
|
|
print(f"\nProcessing deck {idx}/{total_decks}: {deck_name}")
|
|
download_deck(browser, deck_url, idx, total_decks, tournament_dir)
|
|
|
|
finally:
|
|
print("Closing the browser...")
|
|
browser.quit()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|