"for p1, p2 in zip(sorted_data.index[:-1], sorted_data.index[1:]):\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" if delta > pd.Timedelta('1s'):\n",
" print(p1, p2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As expected, there is only one instance where the time difference between the end and start of 2 consecutive periods is > 0: it corresponds to the week that was removed from the dataset due to missing values.\n",
"Everything seems consistent, we can now analyze the sorted dataset."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**TODO:** Change week format"
"# Data Analysis"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"first_september_week = [pd.Period(pd.Timestamp(y, 9, 1), \"W\") for y in range(1991, sorted_data.index[-1].year)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"year = []\n",
"yearly_incidence = []\n",
"for week1, week2 in zip(first_september_week[:-1],\n",