【Python】YoutubeAPIでYoutubeのバズった動画を分析、調査しCSV出力する方法

Python専門データ収集/スクレイピング【Sarukyo】

2024年3月19日 16:58

基本の仕組み

Youtubeではすでに登録者数が多いチャンネルの動画が必然的にファンが多いので再生回数が伸びやすいです。

そこで、チャンネル登録者数が少なく、チャンネルの再生回数もまだあまりないチャンネルから「○○万再生以上の動画がでた」

これはバズ動画だと思います。自分の興味のあるジャンルや初めてみたいカテゴリーから収集するのに役立てください。

準備

googleアカウント
youtube API

以前google spread sheet APIをgoogle cloudで取得しましたが、やり方は非常に似ています。

ググってもYoutubeAPIの取得方法は出てきますので参考にしてみてください。

import urllib.request
import urllib.parse
import requests
import json
import csv
import datetime
import logging
import os

from api import variable

# -----------------------------------------------------------
# ********************** ログの設定 *******************
# -----------------------------------------------------------
logging.basicConfig(filename=os.path.join(os.path.dirname(__file__), "youtube_short.log"),
                    filemode="w",
                    encoding="utf-8",
                    format='%(levelname)s:%(message)s', 
                    level=logging.INFO)

# -----------------------------------------------------------
# ********************** 変数 *******************
# -----------------------------------------------------------
dt_now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
short_buzz_lists_count = []
csv_outputs_header = []
dobled_list = []
csv_outputs_header.append(['title', 'description', 'viewCount', 'publishedAt', 
                           'thumbnail', 'video_url', 'name', 'subscriberCount', 'channel_url'])
# -----------------------------------------------------------
# ********************** 変数 *******************
# -----------------------------------------------------------

# -----------------------------------------------------------
# ********************** YOUTUBE DATA API *******************
# -----------------------------------------------------------
APIKEY = variable.youtube_api_key
# APIKEY = "049i12jiu9hfa9iurfy89qjh4512u4gyt89eijd"
key_word = ''
regionCode = 'JP'
publishedAfter = '2024-01-14T00:00:00.000Z'
publishedBefore = '2024-03-13T00:00:00.000Z'
view_number = 10000
# チャンネル登録者数 ○○人以下
subscribercount_level = 10
short_video_count = 10
videoCategoryId = ''
# チャンネル内の動画合計視聴回数○○回以下 初期-1億
ChannelVideoCount = 100000000
channelId = ''
SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"
VIDEO_URL = "https://www.googleapis.com/youtube/v3/videos"
CHANNEL_URL = "https://www.googleapis.com/youtube/v3/channels"
# Falseにすると「普通の動画」、Trueのままだと「ショート動画」を取得します
YOUTUBE_SHORT_FLAG = True
# -----------------------------------------------------------
# ********************** YOUTUBE DATA API *******************
# -----------------------------------------------------------

def main(csv_path):
    nextPageToken = ""
    with open(csv_path, 'w', newline='', encoding='UTF-8') as f:
        writer = csv.writer(f)
        writer.writerows(csv_outputs_header)
        #無限ループ
        while True:
            short_buzz_lists = []
            # -----------------------------------------------------------
            # ********************** YOUTUBE DATA API search 検索 *******************
            # -----------------------------------------------------------
            param = {
                'part':'snippet',
                'q':key_word,
                'regionCode':regionCode, 
                'maxResults':50,
                'order':'viewcount',
                'publishedAfter':publishedAfter,
                'publishedBefore':publishedBefore,
                'type':'video',
                'channelId':channelId,
                'pageToken':nextPageToken,
                'key':APIKEY
            }
            if not videoCategoryId:
                pass
            else:
                param['videoCategoryId'] = videoCategoryId
            res = requests.get(SEARCH_URL, params=param)
            try:
                search_body = res.json()
                video_list = []
                channels_list = []
                for item in search_body['items']:
                    #videoメソッド用list作成
                    video_list.append(item['id']['videoId'])
                    channels_list.append(item['snippet']['channelId'])
                    #出力用データに追記
                    short_buzz_lists.append( {'videoId':item['id']['videoId'],
                                            'channelId':item['snippet']['channelId']} )
                # -----------------------------------------------------------
                # ********************** YOUTUBE DATA API video 動画の詳細検索 *******************
                # -----------------------------------------------------------
                video_param = {
                    'part':'snippet,statistics',
                    'id':",".join(video_list),
                    'key':APIKEY
                }
                res = requests.get(VIDEO_URL, params=video_param)
                try:
                    videos_body = res.json()
                    for video_index, item in enumerate(videos_body['items']):
                        short_buzz_lists[video_index]['title'] = item['snippet']['title']
                        short_buzz_lists[video_index]['description'] = item['snippet']['description']
                        try:
                            short_buzz_lists[video_index]['viewCount'] = item['statistics']['viewCount']
                        except KeyError:
                            short_buzz_lists[video_index]['viewCount'] = 0
                        short_buzz_lists[video_index]['publishedAt'] = item['snippet']['publishedAt']
                        short_buzz_lists[video_index]['thumbnails'] = item['snippet']['thumbnails']['high']['url']
                        short_buzz_lists[video_index]['video_id'] = item['id']
                except requests.HTTPError as e:
                    logging.exception(e)
                    break
                except requests.ConnectionError as e:
                    logging.exception(e)
                    break
                # -----------------------------------------------------------
                # ********************** YOUTUBE DATA API channel チャンネル情報抽出 *******************
                # -----------------------------------------------------------
                ch_param = {
                    'part':'snippet,statistics',
                    'id':",".join(channels_list),
                    'key':APIKEY
                }
                res= requests.get(CHANNEL_URL, params=ch_param)
                try:
                    channels_body = res.json()
                    for ch_index, channel_list in enumerate(short_buzz_lists):
                        ch_list_data = [ item for item in channels_body['items'] if item['id'] == channel_list['channelId'] ]
                        short_buzz_lists[ch_index]['name'] = ch_list_data[0]['snippet']['title']
                        short_buzz_lists[ch_index]['subscriberCount'] = ch_list_data[0]['statistics']['subscriberCount']
                        short_buzz_lists[ch_index]['videoCount'] = ch_list_data[0]['statistics']['videoCount']
                        short_buzz_lists[ch_index]['channel_url'] = 'https://www.youtube.com/channel/'+ ch_list_data[0]['id']
                except requests.HTTPError as e:
                    logging.exception(e)
                    break
                except requests.ConnectionError as e:
                    logging.exception(e)
                    break
                except KeyError as e:
                    logging.exception(e)
                    break
                # -----------------------------------------------------------
                # ********************** 指定した条件に基づいてCSV出力 *******************
                # -----------------------------------------------------------
                for short_buzz_list in short_buzz_lists:
                    # 条件↓
                    if YOUTUBE_SHORT_FLAG:
                        if(int(short_buzz_list['viewCount']) >= view_number and int(short_buzz_list['subscriberCount']) <= subscribercount_level and int(short_buzz_list["videoCount"]) <= ChannelVideoCount):
                            if not requests.get('https://www.youtube.com/shorts/' + short_buzz_list['video_id']).history:
                                video_url = 'https://www.youtube.com/shorts/' + short_buzz_list['video_id']
                                if video_url not in dobled_list:
                                    short_buzz_lists_count.append(short_buzz_list)
                                    writer.writerow([short_buzz_list['title'], short_buzz_list['description'], short_buzz_list['viewCount'], 
                                                    short_buzz_list['publishedAt'], short_buzz_list['thumbnails'], video_url, 
                                                    short_buzz_list['name'], short_buzz_list['subscriberCount'], short_buzz_list['channel_url'] ])
                                    # 重複回避書き込み
                                    dobled_list.append(video_url)
                                    if( len(short_buzz_lists_count) >= short_video_count):
                                        break
                                    print(len(short_buzz_lists_count), "個目の動画を見つけました。")
                    else:
                        if(int(short_buzz_list['viewCount']) >= view_number and int(short_buzz_list['subscriberCount']) <= subscribercount_level and int(short_buzz_list["videoCount"]) <= ChannelVideoCount):
                            if requests.get('https://www.youtube.com/shorts/' + short_buzz_list['video_id']).history:
                                video_url = 'https://www.youtube.com/watch?v=' + short_buzz_list['video_id']
                                if video_url not in dobled_list:
                                    short_buzz_lists_count.append(short_buzz_list)
                                    writer.writerow([short_buzz_list['title'], short_buzz_list['description'], short_buzz_list['viewCount'], 
                                                    short_buzz_list['publishedAt'], short_buzz_list['thumbnails'], video_url, 
                                                    short_buzz_list['name'], short_buzz_list['subscriberCount'], short_buzz_list['channel_url'] ])
                                    # 重複回避書き込み
                                    dobled_list.append(video_url)
                                    if( len(short_buzz_lists_count) >= short_video_count):
                                        break
                                    print(len(short_buzz_lists_count), "個目の動画を見つけました。")
                #nextPageTokenが表示されなくなったらストップ
                if 'nextPageToken' in search_body:
                    nextPageToken = search_body['nextPageToken']
                else:
                    logging.warning("NextPageTokenがありません。指定した条件では取得できる動画が少ないです。")
                    break
            except requests.HTTPError as e:
                logging.exception(e)
                break
            except requests.ConnectionError as e:
                logging.exception(e)
                break
            
if __name__ == "__main__":
    main(os.path.join(os.path.dirname(__file__), f"{dt_now}_youtube-short_popular_list.csv"))

取得する動画を指定する

APIKEY = variable.youtube_api_key
# APIKEY = "049i12jiu9hfa9iurfy89qjh4512u4gyt89eijd"
key_word = ''
regionCode = 'JP'
publishedAfter = '2024-01-14T00:00:00.000Z'
publishedBefore = '2024-03-13T00:00:00.000Z'
view_number = 10000
# チャンネル登録者数 ○○人以下
subscribercount_level = 10
short_video_count = 10
videoCategoryId = ''
# チャンネル内の動画合計視聴回数○○回以下 初期-1億
ChannelVideoCount = 100000000
channelId = ''
SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"
VIDEO_URL = "https://www.googleapis.com/youtube/v3/videos"
CHANNEL_URL = "https://www.googleapis.com/youtube/v3/channels"
# Falseにすると「普通の動画」、Trueのままだと「ショート動画」を取得します
YOUTUBE_SHORT_FLAG = True

APIKEY = variable.youtube_api_key
# APIKEY = "049i12jiu9hfa9iurfy89qjh4512u4gyt89eijd"

まずはAPIKEYを上記の部分に上書きします。

view_number = 10000
# チャンネル登録者数 ○○人以下
subscribercount_level = 10
short_video_count = 10

view_numberを指定すると再生回数○○回以上の動画を指定します。
subscribercount_levelはチャンネル登録者数○○人以上のチャンネルを探します。
short_video_countは検索する動画数です。

ChannelVideoCount = 100000000

ChannelVideoCountはチャンネルの動画再生総数です。1億以上も再生されているチャンネルは大きすぎですよね？

ショート動画の取得

# Falseにすると「普通の動画」、Trueのままだと「ショート動画」を取得します
YOUTUBE_SHORT_FLAG = True

Trueとなっている場合は、ショート動画のみを取得。Falseと書き換えると普通の尺の動画を取得します。

最近ではショート動画をやってみたい方など増えていると思いますので、別々で取得できるようにしました。