月次のアクセスデータを集計したり、しなかったり。


import pandas as pd

wkpath = r"C:\pythonwork\data\parquet\visitor.parquet"

#DAU
df = pd.read_parquet(wkpath)
df = df.drop_duplicates()
df_sum = df.groupby(['day']).size().reset_index(name='count')
df_mean = df_sum[['day']].mean().reset_index(name='count')
df_mean = df_mean[['count']]
df_dau = pd.concat([df_sum,df_mean])
print(df_dau)

#MAU
df = pd.read_parquet(wkpath)
dfnodup = df.drop_duplicates(subset=['id'])
df_mau=dfnodup[['id']].count().reset_index(name='count')
print(df_mau)

いいなと思ったら応援しよう!