去年、スキーチケットの価格を楽に調査しようと思って作ったPythonとSeleniumで作ったウェブスクレイピングはこちらでした。
そして、無事取得できたことがうれしくて、何度も実行した結果、Akamaiから不正アクセスIPとして認定されるという苦痛を味わったのは、そこに書いた通りです。それ以来Akamaiは嫌いです。Akamai使ってなくてもレスポンス良いサーバーなんていくらでもあると思う。
1週間ぐらいおとなしくしていると無事にいろんなアクセス制限が解除されたので、今回のところは最悪1週間待てばなんとかなる、という安心感?のもと、待ち時間をたっぷり入れて人間がアクセスしてるのと変わらない感じにしておきます。去年とは価格が書かれているサイトの構造がいくつか変わっていたので更新しました。
というわけで、全ソースコードを貼っておきます。くれぐれもご注意ください。
from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions from selenium.webdriver.common.by import By import re import time url_epic4 = "https://www.epicpass.com/passes/epic-day-pass.aspx?days=4" url_epic7 = "https://www.epicpass.com/passes/epic-day-pass.aspx?days=7" url_epic_tahoe_local = "https://www.epicpass.com/passes/tahoe-local-pass.aspx" url_epic_tahoe_value = "https://www.epicpass.com/passes/tahoe-value-pass.aspx" url_epic = "https://www.epicpass.com/passes/epic-pass.aspx" url_tahoe = "https://squawalpine.com/tickets-passes/lift-tickets/tahoe-super-4-lift-ticket-pack" url_sugar_unristriced = "https://estore.sugarbowl.com/eStore/Content/Commerce/Products/DisplayProducts.aspx?ProductGroupCode=1101&AspxAutoDetectCookieSupport=1" url_sugar_slightly = "https://estore.sugarbowl.com/eStore/Content/Commerce/Products/DisplayProducts.aspx?ProductGroupCode=1101&ProductCategoryCode=8023" url_ikon = "https://www.ikonpass.com/en/shop-passes/ikon-pass-2019-2020" url_ikon_base = "https://www.ikonpass.com/en/shop-passes/ikon-base-pass-2019-2020" def epic_day(url): selector = "#epic_day_pass_detail__category__product_1 > div.epic_day_pass_detail__category__product_context.col-xs-7 > div.epic_day_pass_detail__category__product_price.c143__price--v1" options = Options() options.headless = True driver = webdriver.Chrome(options=options) driver.get(url) wait = WebDriverWait(driver, 10) e = wait.until(expected_conditions.visibility_of_element_located((By.CSS_SELECTOR, selector))) result = e.text driver.close() return result def epic(url): selector = "#c27_Product_Detail_0 > div.col-xs-3.hidden-xs.pass_category_detail__price_col > span" options = Options() options.headless = True driver = webdriver.Chrome(options=options) driver.get(url) wait = WebDriverWait(driver, 10) e = wait.until(expected_conditions.visibility_of_element_located((By.CSS_SELECTOR, selector))) result = e.text driver.close() return result def epic4(): print ("epic4: " + epic_day(url_epic4)) def epic7(): print ("epic7: " + epic_day(url_epic7)) def epic_tahoe_local(): print ("epic tahoe local: " + epic(url_epic_tahoe_local)) def epic_tahoe_value(): print ("epic tahoe value: " + epic(url_epic_tahoe_value)) def epic_epic(): print ("epic: " + epic(url_epic)) def tahoe4(url): selector = "#content > div.wrapper.clearfix > article > div.field.field-name-body.field-type-text-with-summary.field-label-hidden > div > div > div > div > table > tbody > tr:nth-child(1) > th" options = Options() options.headless = True driver = webdriver.Chrome(options=options) driver.get(url) wait = WebDriverWait(driver, 10) e = wait.until(expected_conditions.visibility_of_element_located((By.CSS_SELECTOR, selector))) pattern = "\$[0-9]+" print ("tahoe super4: " + re.search(pattern, e.text).group() ) driver.close() def sugar(url): selector = "#ctl00_ctl00_ctl00_ctl00_ContentPlaceHolder1_ContentPlaceHolder1_contentMain_commerceMain_ProductSelectControl_ProductSelectList_gvProducts_ctl02_lblPrice" options = Options() options.headless = True driver = webdriver.Chrome(options=options) driver.get(url) wait = WebDriverWait(driver, 10) e = wait.until(expected_conditions.visibility_of_element_located((By.CSS_SELECTOR, selector))) result = e.text driver.close() return result def sugar_unristricted(): print ("Sugar unristricted: " + sugar(url_sugar_unristriced)) def sugar_slightly(): print ("Sugar slightly: " + sugar(url_sugar_slightly)) def ikon(url): selector = "#pass-cart-widget > div.pass-cart-widget-left > div.pass-cart-widget-passes > div:nth-child(1) > div.pass-cart-widget-pass-actions > h4" options = Options() options.headless = True driver = webdriver.Chrome(options=options) driver.get(url) wait = WebDriverWait(driver, 10) e = wait.until(expected_conditions.visibility_of_element_located((By.CSS_SELECTOR, selector))) result = e.text driver.close() return result def ikon_ikon(): print("Ikon: " + ikon(url_ikon)) def ikon_base(): print("Ikon base: " + ikon(url_ikon_base)) epic4() time.sleep(5) epic7() time.sleep(6) epic_tahoe_local() time.sleep(7) epic_tahoe_value() time.sleep(5) epic_epic() time.sleep(6) tahoe4(url_tahoe) time.sleep(7) sugar_unristricted() time.sleep(5) sugar_slightly() time.sleep(6) ikon_ikon() time.sleep(7) ikon_base()