バヌアツの法則をコードインタープリターで確かめてみる コード編
こちらでは、バヌアツの法則を確かめるためにPythonが生成してくれたコードを示します。
ちなみに、本編はこちら。
バヌアツの法則をコードインタープリター で確かめてみる|usamix (note.com)
ChatGPTとのやりとりは、以下から参照できます。
国名の分類処理
https://chat.openai.com/share/e7b9c990-db92-45ea-9542-2ec67a812bd6
バヌアツの法則検証
https://chat.openai.com/share/f75edf77-618d-4d85-a26e-0ea03b6f8fa0
データはUSGSのサイトからダウンロードしました。
Search Earthquake Catalog (usgs.gov)
以下は、コードの主要部分を抜粋したものです。
前処理
# Convert the time column to datetime format
df['time'] = pd.to_datetime(df['time'])
# Extract only earthquakes in Vanuatu and Japan
df_vanuatu = df[df['countries'] == 'Vanuatu']
df_japan = df[df['countries'] == 'Japan']
# Compute the number of days between the first and last earthquake
total_days = (df['time'].max() - df['time'].min()).days
df_vanuatu.head(), df_japan.head(), total_days
頻度計算
import numpy as np
from datetime import timedelta
# Define a function to count the number of earthquakes in Japan within 2 weeks after each earthquake in Vanuatu
def count_earthquakes_in_japan_within_2_weeks(row):
start_time = row['time']
end_time = start_time + timedelta(weeks=2)
return ((df_japan['time'] >= start_time) & (df_japan['time'] <= end_time)).sum()
# Apply the function to each row in the Vanuatu earthquake data
df_vanuatu['num_japan_earthquakes_within_2_weeks'] = df_vanuatu.apply(count_earthquakes_in_japan_within_2_weeks, axis=1)
# Calculate the average number of earthquakes per day in Japan within 2 weeks after each earthquake in Vanuatu
avg_num_japan_earthquakes_per_day_after_vanuatu = df_vanuatu['num_japan_earthquakes_within_2_weeks'].mean() / 14
# Calculate the average number of earthquakes per day in Japan over the total period
avg_num_japan_earthquakes_per_day_total = df_japan.shape[0] / total_days
avg_num_japan_earthquakes_per_day_after_vanuatu, avg_num_japan_earthquakes_per_day_total
グラフ表示
import matplotlib.pyplot as plt
# Create the figure and the axes
fig, ax = plt.subplots()
# The labels for the bars
labels = ['After Vanuatu Earthquake', 'Total Period']
# The values for the bars
values = [avg_num_japan_earthquakes_per_day_after_vanuatu, avg_num_japan_earthquakes_per_day_total]
# Create the bar chart
bars = ax.bar(labels, values, color=['blue', 'orange'])
# Add the value of each bar on top
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width() / 2, height,
round(height, 4), ha='center', va='bottom')
# Set the title and the labels
ax.set_title('Average Number of Earthquakes per Day in Japan')
ax.set_ylabel('Average Number of Earthquakes per Day')
# Show the plot
plt.show()
2011年のデータの削除
# Remove rows with earthquakes that happened in 2011
df_no_2011 = df[df['time'].dt.year != 2011]
df_no_2011.head(), df_no_2011.shape
頻度の比較(2011年を除く)
# Extract only earthquakes in Vanuatu and Japan from the data without 2011
df_no_2011_vanuatu = df_no_2011[df_no_2011['countries'] == 'Vanuatu']
df_no_2011_japan = df_no_2011[df_no_2011['countries'] == 'Japan']
# Compute the number of days between the first and last earthquake excluding 2011
total_days_no_2011 = total_days - 365 # subtract the number of days in 2011
# Apply the function to each row in the Vanuatu earthquake data excluding 2011
df_no_2011_vanuatu['num_japan_earthquakes_within_2_weeks'] = df_no_2011_vanuatu.apply(count_earthquakes_in_japan_within_2_weeks, axis=1)
# Calculate the average number of earthquakes per day in Japan within 2 weeks after each earthquake in Vanuatu excluding 2011
avg_num_japan_earthquakes_per_day_after_vanuatu_no_2011 = df_no_2011_vanuatu['num_japan_earthquakes_within_2_weeks'].mean() / 14
# Calculate the average number of earthquakes per day in Japan over the total period excluding 2011
avg_num_japan_earthquakes_per_day_total_no_2011 = df_no_2011_japan.shape[0] / total_days_no_2011
avg_num_japan_earthquakes_per_day_after_vanuatu_no_2011, avg_num_japan_earthquakes_per_day_total_no_2011
逆計算(日本→バヌアツ)
# Define a function to count the number of earthquakes in Vanuatu within 2 weeks after each earthquake in Japan
def count_earthquakes_in_vanuatu_within_2_weeks(row):
start_time = row['time']
end_time = start_time + timedelta(weeks=2)
return ((df_no_2011_vanuatu['time'] >= start_time) & (df_no_2011_vanuatu['time'] <= end_time)).sum()
# Apply the function to each row in the Japan earthquake data excluding 2011
df_no_2011_japan['num_vanuatu_earthquakes_within_2_weeks'] = df_no_2011_japan.apply(count_earthquakes_in_vanuatu_within_2_weeks, axis=1)
# Calculate the average number of earthquakes per day in Vanuatu within 2 weeks after each earthquake in Japan excluding 2011
avg_num_vanuatu_earthquakes_per_day_after_japan_no_2011 = df_no_2011_japan['num_vanuatu_earthquakes_within_2_weeks'].mean() / 14
# Calculate the average number of earthquakes per day in Vanuatu over the total period excluding 2011
avg_num_vanuatu_earthquakes_per_day_total_no_2011 = df_no_2011_vanuatu.shape[0] / total_days_no_2011
avg_num_vanuatu_earthquakes_per_day_after_japan_no_2011, avg_num_vanuatu_earthquakes_per_day_total_no_2011
国別の頻度計算
# Define a function to count the number of earthquakes in Japan within 2 weeks after each earthquake in a specific country
def count_earthquakes_in_japan_within_2_weeks_for_country(df_country):
df_country['num_japan_earthquakes_within_2_weeks'] = df_country.apply(count_earthquakes_in_japan_within_2_weeks, axis=1)
avg_num_japan_earthquakes_per_day_after_country = df_country['num_japan_earthquakes_within_2_weeks'].mean() / 14
return avg_num_japan_earthquakes_per_day_after_country
# List of countries
countries = df_no_2011['countries'].unique()
# Dictionary to store the average number of earthquakes per day in Japan within 2 weeks after each earthquake in each country
avg_num_japan_earthquakes_per_day_after_each_country = {}
# For each country, calculate the average number of earthquakes per day in Japan within 2 weeks after each earthquake in that country
for country in countries:
df_country = df_no_2011[df_no_2011['countries'] == country]
if df_country.shape[0] >= 50: # only consider countries with at least 50 earthquakes
avg_num_japan_earthquakes_per_day_after_each_country[country] = count_earthquakes_in_japan_within_2_weeks_for_country(df_country)
# Sort the dictionary by the average number of earthquakes
sorted_avg_num_japan_earthquakes_per_day_after_each_country = dict(sorted(avg_num_japan_earthquakes_per_day_after_each_country.items(), key=lambda item: item[1], reverse=True))
sorted_avg_num_japan_earthquakes_per_day_after_each_country
国別グラフ表示
# Create the figure and the axes
fig, ax = plt.subplots(figsize=(8, 6))
# Reverse the labels and the values for the bars to have the highest values on top
labels_no_2011_countries.reverse()
values_no_2011_countries.reverse()
# Create the horizontal bar chart
bars_no_2011_countries = ax.barh(labels_no_2011_countries, values_no_2011_countries, color='blue')
# Add the value of each bar next to it
for bar in bars_no_2011_countries:
width = bar.get_width()
ax.text(width, bar.get_y() + bar.get_height() / 2,
round(width, 4), ha='left', va='center')
# Set the title and the labels
ax.set_title('Average Number of Earthquakes per Day in Japan After Each Country\'s Earthquake (Excluding 2011)')
ax.set_xlabel('Average Number of Earthquakes per Day')
# Show the plot
plt.show()
ランダムな2週間をとった場合のシミュレーション
# Define a function to generate random 2-week intervals excluding 2011 and calculate the frequency of earthquakes in Japan during these intervals
def simulate_random_intervals_and_calculate_frequency(df_japan, num_intervals, num_simulations):
# Define the start and end dates of the period excluding 2011
start_date = pd.to_datetime('2003-07-29')
end_date_2010 = pd.to_datetime('2010-12-31')
start_date_2012 = pd.to_datetime('2012-01-01')
end_date = pd.to_datetime('2023-07-30')
# Calculate the number of days in these periods
num_days_2003_2010 = (end_date_2010 - start_date).days
num_days_2012_2023 = (end_date - start_date_2012).days
# Initialize a list to store the average frequency of earthquakes in Japan during the random intervals in each simulation
avg_frequencies = []
# Perform the simulations
for _ in range(num_simulations):
# Initialize a list to store the number of earthquakes in Japan during each random interval
num_earthquakes = []
# Generate the random intervals and count the number of earthquakes in Japan during these intervals
for _ in range(num_intervals):
# Randomly choose a start day from 2003 to 2010 or from 2012 to 2023
if np.random.rand() < num_days_2003_2010 / (num_days_2003_2010 + num_days_2012_2023):
start_day = np.random.randint(num_days_2003_2010)
start_time = start_date + timedelta(days=start_day)
else:
start_day = np.random.randint(num_days_2012_2023)
start_time = start_date_2012 + timedelta(days=start_day)
# The end time is 2 weeks after the start time
end_time = start_time + timedelta(weeks=2)
# Count the number of earthquakes in Japan during this interval and add it to the list
num_earthquakes.append(((df_japan['time'] >= start_time) & (df_japan['time'] <= end_time)).sum())
# Calculate the average frequency of earthquakes in Japan during the random intervals and add it to the list
avg_frequencies.append(np.mean(num_earthquakes) / 14)
return avg_frequencies
# Perform the simulation and calculate the average frequency of earthquakes in Japan during random 2-week intervals excluding 2011
avg_frequencies = simulate_random_intervals_and_calculate_frequency(df_no_2011_japan, num_earthquakes_vanuatu_no_2011, 1000)
# Calculate the percentiles
percentiles = np.percentile(avg_frequencies, [5, 25, 75, 95])
avg_frequencies, percentiles
修正パート
# Remove timezone information from the 'time' column in df_no_2011_japan
df_no_2011_japan['time'] = df_no_2011_japan['time'].dt.tz_convert(None)
# Perform the simulation again and calculate the average frequency of earthquakes in Japan during random 2-week intervals excluding 2011
avg_frequencies = simulate_random_intervals_and_calculate_frequency(df_no_2011_japan, num_earthquakes_vanuatu_no_2011, 1000)
# Calculate the percentiles
percentiles = np.percentile(avg_frequencies, [5, 25, 75, 95])
avg_frequencies, percentiles
グラフ表示
# Plot histogram of average frequencies
plt.figure(figsize=(10, 6))
plt.hist(avg_frequencies, bins=20, edgecolor='black')
plt.axvline(percentiles[0], color='r', linestyle='dashed', linewidth=1, label='5% percentile')
plt.axvline(percentiles[1], color='orange', linestyle='dashed', linewidth=1, label='25% percentile')
plt.axvline(percentiles[2], color='blue', linestyle='dashed', linewidth=1, label='75% percentile')
plt.axvline(percentiles[3], color='purple', linestyle='dashed', linewidth=1, label='95% percentile')
plt.title('Distribution of Average Earthquake Frequencies in Japan (Excluding 2011)')
plt.xlabel('Average Frequency')
plt.ylabel('Count')
plt.legend()
plt.show()
この記事が気に入ったらサポートをしてみませんか?