爆サイのレス投稿時刻のヒストグラム

2024年12月8日 20:30

とりあえず。これで。

import requests
from bs4 import BeautifulSoup
import datetime
from urllib.parse import urljoin
from datetime import datetime
from collections import Counter

# BASE_URL = "https://bakusai.com/thr_res/acode=3/ctgid=103/bid=416/tid=12316263/tp=1/rw=1/" # 最新
BASE_URL = "https://bakusai.com/thr_res/acode=3/ctgid=103/bid=416/tid=12301117/tp=1/rw=1/" # 前のスレ

# https://bakusai.com/thr_res/acode=3/ctgid=103/bid=416/tid=12316263/p=2/tp=1/rw=1/

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
}

def main():
    all_hour_list = []
    for i in range(1,21):
        url = f"{BASE_URL}p={i}/"
        # print(f"retriving on {url}")
        # all_hour_list.append(get_post_times(url))
        all_hour_list = all_hour_list + get_post_times(url)
        
        print("final result")
        print(all_hour_list)
        
        counter = Counter(all_hour_list)
        
        for num, count in sorted(counter.items()):
            print(f'{num} : {"." * count}')
    
        

def get_post_times(url):
    response = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(response.text, 'html.parser')
    elements = soup.select('dt > div > span[itemprop="commentTime"]')
    
    page_hour_list = []
    for element in elements:
        
        time_obj = datetime.strptime(element.text, '%Y/%m/%d %H:%M')
        hour = time_obj.hour
        page_hour_list.append(hour)
    
    print(page_hour_list)
    return page_hour_list



if __name__ == "__main__":
    main()

1Round前スレ

Cute前スレ

cute前スレ分との相関係数。

hour 1r cute
0 26 12
1 13 15
2 9 5
3 6 4
4 6 2
5 10 2
6 27 3
7 30 21
8 104 36
9 83 29
10 44 13
11 38 22
12 55 71
13 29 28
14 33 70
15 47 35
16 38 33
17 21 79
18 45 127
19 50 103
20 60 81
21 98 50
22 67 93
23 45 23

1rとcuteの相関係数rは、r=0.408でした。

爆サイのレス投稿時刻のヒストグラム

いいなと思ったら応援しよう！