!LC_TIME=C mu find -f "d,m" "" | awk -F, '/sent/{print $1>"/tmp/sent.csv"}{print $1>"/tmp/received.csv"}'


import pandas as pd


def get_mail_count(fn):
    df = pd.read_csv(
        fn,
        header=None,
        names=["date"],
        parse_dates=["date"]
    )
    return df.date.dt.floor("h").value_counts().to_frame().sort_index()


sent = get_mail_count("/tmp/sent.csv")
received = get_mail_count("/tmp/received.csv")
received.head()


import calendar
import datetime

import numpy as np
import plotly.graph_objs as go
import seaborn as sns
from plotly.subplots import make_subplots

def display_year(
    z,
    year: int,
    month_lines: bool = True,
    fig=None,
    row: int = None,
    title: str = None,
    palette: str = "rocket_r",
):
    month_names = [calendar.month_abbr[i] for i in range(1, 13)]
    month_days = [calendar.monthrange(year, i)[-1] for i in range(1, 13)]
    month_positions = (np.cumsum(month_days) - 15) / 7
    month_days = sum(
        [list(zip([i + 1] * m, range(1, m + 1))) for i, m in enumerate(month_days)], []
    )

    weekdays_in_year = [calendar.weekday(year, month, day) for month, day in month_days]
    dates_in_year = [datetime.date(year, month, day) for month, day in month_days]
    weeknumber_of_dates = []
    for date in dates_in_year:
        inferred_week_no = date.isocalendar().week
        if inferred_week_no >= 52 and date.month == 1:
            weeknumber_of_dates.append(0)
        elif inferred_week_no == 1 and date.month == 12:
            weeknumber_of_dates.append(53)
        else:
            weeknumber_of_dates.append(inferred_week_no)

    data = [
        go.Heatmap(
            x=weeknumber_of_dates,
            y=weekdays_in_year,
            z=z,
            text=[str(date) for date in dates_in_year],
            hoverinfo="text+z",
            xgap=3,
            ygap=3,
            showscale=False,
            colorscale=["#eeeeee"] + sns.color_palette(palette).as_hex(),
        )
    ]

    if month_lines:
        kwargs = dict(
            mode="lines",
            line=dict(
                color="#9e9e9e",
                width=1,
            ),
            hoverinfo="skip",
        )

        for date, dow, wkn in zip(dates_in_year, weekdays_in_year, weeknumber_of_dates):
            if date.day == 1:
                data += [
                    go.Scatter(
                        x=[wkn - 0.5, wkn - 0.5],
                        y=[dow - 0.5, 6.5],
                        **kwargs,
                    )
                ]
                if dow:
                    data += [
                        go.Scatter(
                            x=[wkn - 0.5, wkn + 0.5],
                            y=[dow - 0.5, dow - 0.5],
                            **kwargs,
                        ),
                        go.Scatter(
                            x=[wkn + 0.5, wkn + 0.5],
                            y=[dow - 0.5, -0.5],
                            **kwargs,
                        ),
                    ]

    layout = go.Layout(
        title=title,
        height=250,
        yaxis=dict(
            showline=False,
            showgrid=False,
            zeroline=False,
            tickmode="array",
            ticktext=[calendar.day_abbr[i] for i in range(7)],
            tickvals=list(range(7)),
            autorange="reversed",
        ),
        xaxis=dict(
            showline=False,
            showgrid=False,
            zeroline=False,
            tickmode="array",
            ticktext=month_names,
            tickvals=month_positions,
        ),
        font={"size": 10, "color": "#9e9e9e"},
        plot_bgcolor=("#fff"),
        margin=dict(t=40),
        showlegend=False,
    )

    if fig is None:
        fig = go.Figure(data=data, layout=layout)
    else:
        fig.add_traces(data, rows=[(row + 1)] * len(data), cols=[1] * len(data))
        fig.update_layout(layout)
        fig.update_xaxes(layout["xaxis"])
        fig.update_yaxes(layout["yaxis"])

    return fig


def display_years(df, palette):
    years = df.index.year.unique().tolist()
    fig = make_subplots(
        rows=len(years), cols=1, subplot_titles=years, vertical_spacing=0.2 / len(years)
    )
    for i, year in enumerate(years):
        data = np.zeros(365 + calendar.isleap(year))
        mask = df.index.year == year
        for index, row in df[mask].iterrows():
            data[index.day_of_year - 1] = row["count"]
        display_year(data, year=year, fig=fig, row=i, palette=palette)
        fig.update_layout(height=250 * len(years))
    return fig


display_years(received, palette="Greens")


display_years(sent, palette="Blues")


pd.options.plotting.backend = "plotly"


def get_kde_resample(sample="D"):
    return pd.concat(
        [
            globals()[flag]
            .resample(sample)
            .sum()
            .rolling(50, center=True, win_type="gaussian")
            .mean(std=10)
            .assign(flag=flag)
            for flag in ["received", "sent"]
        ]
    )


df = get_kde_resample()
fig = df.plot(
    **(
        kwargs := dict(
            y="count",
            color="flag",
            labels={"count": "number of mails", "flag": ""},
            template="seaborn",
        )
    )
).update_layout(
    **(
        layout_kwargs := dict(
            xaxis_title=None,
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        )
    )
)

for year in df.index.year.unique():
    fig.add_vrect(x0=f"{year}-07-01", x1=f"{year}-08-31", fillcolor="grey", opacity=0.25)
fig


df = pd.concat([globals()[flag].assign(flag=flag) for flag in ["received", "sent"]])


by_year = (
    df.groupby(by=["flag", df.index.year])
    .sum()
    .unstack()
    .fillna(0)
    .astype(int)
    .transpose()
    .droplevel(0)
)
by_year.style.background_gradient(axis=0, cmap=sns.diverging_palette(220, 20, as_cmap=True))


(
    pd.melt(by_year, value_name="count", ignore_index=False)
    .plot(**kwargs)
    .update_layout(**layout_kwargs)
)


(
    df.groupby(by=["flag", df.index.time])
    .mean()
    .reset_index(level=0)
    .plot(**kwargs)
    .update_layout(**layout_kwargs)
)


(
    df.groupby(by=["flag", df.index.dayofweek])
    .mean()
    .reset_index(level=0)
    .plot.bar(**kwargs)
    .update_layout(
        xaxis=dict(
            title=None, tickvals=list(range(7)), ticktext=[calendar.day_name[i] for i in range(7)]
        ),
        **layout_kwargs
    )
)

	count
date
2005-04-06 15:00:00	2
2005-06-01 11:00:00	1
2005-07-04 17:00:00	1
2005-09-21 21:00:00	1
2005-09-27 14:00:00	1

flag	received	sent
date
2005	31	0
2006	252	0
2007	490	0
2008	1019	0
2009	3533	0
2010	4196	0
2011	6289	1198
2012	7559	799
2013	7601	0
2014	12446	2028
2015	13716	2315
2016	8586	1973
2017	10565	2533
2018	12196	2869
2019	12102	2798
2020	12514	2349
2021	7884	944
2022	7698	984
2023	5046	654

Getting timestamps of every mails¶

Interactive chart with `plotly`¶

Linear visualization of the variation of number of mails¶

Getting timestamps of every mails¶

Interactive chart with plotly¶

Linear visualization of the variation of number of mails¶

Interactive chart with `plotly`¶