東証の変化率ランキングをスクレイピング【python】【selinium】【beautifulsoup】

2022年5月17日 00:04

pythonの勉強にヤフーファイナンスから値上がり率上位・値下がり率下位を自動抽出してテーブルに並べるプログラムを作った。

指定の時間にいつでも実行できるので、タスクマネージャーやLINE notifyと組み合わせて銘柄監視の自動化に役立てたい。

前段：seliniumを使って上昇率上位を抽出
下段：beautifulsoupを使って下落率下位を抽出
※それぞれ作成したテーブルを指定ファイルに画像保存

from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep
import pandas as pd
import japanize_matplotlib
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup

#selinium version
def jp_stock_raking_morning_ascending_selinium():
    #get latest chrome driver
    browser = webdriver.Chrome(ChromeDriverManager().install())
    
    #go to url
    url = 'https://info.finance.yahoo.co.jp/ranking/?kd=1&mk=2&tm=d&vl=a'
    browser.get(url)
    
    #wait until load
    sleep(4)

    #get all page element(s) 
    elems_td = browser.find_elements(by=By.TAG_NAME, value="td")
    
    #prepare open frame to dump data
    keys = []

    #clense html data
    for elem_td in elems_td:
        #make raw data to text
        key = elem_td.text
        #eliminate nonuseful data
        key = key.replace("(株)","")
        key = key.replace("株","")
        key = key.split("\n")
        #make each data to array
        keys.extend(key)

    #reorganize array
    code = keys[1:135:9]
    name = keys[0:135:9]
    price = keys[4:135:9]
    time = keys[5:135:9]
    change = keys[6:135:9]
    percent = keys[7:135:9]
    volume = keys[8:135:9]
    
    #shorten name to fit table
    shortedname = []
    for shortname in name:
        shortname = shortname[:7]
        shortedname.append(shortname)
    shortedname

    #insert table column
    df = pd.DataFrame()
    df["証券コード"] = code
    df["企業"] = shortedname
    df["価格"] = price
    df["時間"] = time
    df["価格差"] = change
    df["前日比"] = percent
    df["出来高"] = volume

    #create table
    fig, ax = plt.subplots(figsize=(10,14))
    ax.axis('off')
    ax.axis('tight')
    table = ax.table(cellText=df.values,
             colLabels=df.columns,
             loc='center',
             bbox=[0,0,1,1])
    table.auto_set_font_size(False)
    plt.title("東証上昇銘柄", fontsize=30)
    table.set_fontsize(10)
    plt.savefig('JPEQRankUp.png')

#beautifulsoup version
def jp_stock_raking_morning_descending_beautifulsoup():
    import requests
    from bs4 import BeautifulSoup

    #access to website
    url = 'https://info.finance.yahoo.co.jp/ranking/?kd=2&mk=2&tm=d&vl=a'
    res = requests.get(url)

    #interpret to HTML
    soup = BeautifulSoup(res.text, 'html.parser')
    
    #get each element
    codes_li = []
    codes = soup.find_all('li', attrs={'class': '_2AbqlM50'})
    for code in codes:
        code_li = code.text
        codes_li.append(code_li)
    codes_li = codes_li[0:45:3]

    names_a = []
    names = soup.find_all('a', attrs={'data-ylk': 'slk:name;pos:0'})
    for name in names:
        name_a = name.text
        name_a = name_a.replace("(株)","")
        names_a_short = name_a[:7]
        names_a.append(names_a_short)
    names_a = names_a[:15]

    prices_span = []
    prices = soup.find_all('span', attrs={'class': '_3rXWJKZF'})
    for price in prices:
        price_span = price.text
        prices_span.append(price_span)
    prices_span = prices_span[:60:4]

    times_span = []
    times = soup.find_all('span', attrs={'class': '_3Kcv5jz2'})
    for time in times:
        time_span = time.text
        times_span.append(time_span)
    times_span = times_span[:15]

    changes_span = []
    changes = soup.find_all('span', attrs={'class': '_3rXWJKZF'})
    for change in changes:
        change_span = change.text
        changes_span.append(change_span)
    changes_span = changes_span[1:60:4]

    percents_span = []
    percents = soup.find_all('span', attrs={'class': '_1-yujUee Sc83af6i'})
    for percent in percents:
        percent_span = percent.text
        #percent_span = percent.string + '%'
        percents_span.append(percent_span)
    percents_span = percents_span[1:30:2]

    volumes_span = []
    volumes = soup.find_all('span', attrs={'class': '_1-yujUee'})
    for volume in volumes:
        volume_span = volume.text
        volumes_span.append(volume_span)
    volumes_span = volumes_span[3:60:4]

    #organize in table
    df = pd.DataFrame()
    df["証券コード"] = codes_li
    df["企業"] = names_a
    df["価格"] = prices_span
    df["時間"] = times_span
    df["価格差"] = changes_span
    df["前日比"] = percents_span
    df["出来高"] = volumes_span
    df

    #create table
    fig, ax = plt.subplots(figsize=(10,14))
    ax.axis('off')
    ax.axis('tight')
    table = ax.table(cellText=df.values,
             colLabels=df.columns,
             loc='center',
             bbox=[0,0,1,1])
    table.auto_set_font_size(False)
    plt.title("東証下落銘柄", fontsize=30)
    table.set_fontsize(10)
    plt.savefig('JPEQRankDown.png')

jp_stock_raking_morning_ascending_selinium()
jp_stock_raking_morning_descending_beautifulsoup()

東証の変化率ランキングをスクレイピング【python】【selinium】【beautifulsoup】

いいなと思ったら応援しよう！