From 2a256dd79b033e93f8e6f749e0ea1d497d158300 Mon Sep 17 00:00:00 2001
From: 7eba932125d7468e05c00632ef18215f
<7eba932125d7468e05c00632ef18215f@app-learninglab.inria.fr>
Date: Fri, 11 Jun 2021 15:53:03 +0000
Subject: [PATCH] =?UTF-8?q?Ajout=20Extrapol=20lin=C3=A9aire?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
module3/exo3/exercice.ipynb | 257 ++++++++++++++++++++----------------
1 file changed, 144 insertions(+), 113 deletions(-)
diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb
index c4cc194..07d0fa4 100644
--- a/module3/exo3/exercice.ipynb
+++ b/module3/exo3/exercice.ipynb
@@ -84,7 +84,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -206,7 +206,7 @@
"4 314.91 315.70 314.44 "
]
},
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -225,7 +225,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {
"scrolled": true
},
@@ -349,7 +349,7 @@
"6 315.07 317.51 314.70 "
]
},
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -374,7 +374,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -511,7 +511,7 @@
"1958-05 315.07 317.51 314.70 "
]
},
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -534,7 +534,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -549,12 +549,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Toutes les périodes sont bien renseignées. Quand il n'y a pas de données pour la période, la valeur -99.99 est entrée. Nous enlevons pour le moment ces valeurs. Mais avant cela, il faut convertir les valeurs de CO2 en données numériques:"
+ "Toutes les périodes sont bien renseignées. Quand il n'y a pas de données pour la période, la valeur -99.99 est entrée. Nous enlevons pour le moment ces valeurs. La colonne `'index'` est créée avant pour tenir compte de l'espacement irrégulier des périodes. Il faut convertir les valeurs de CO2 en données numériques:"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -588,6 +588,7 @@
"
CO2_4 \n",
" C02_5 \n",
" CO2_6 \n",
+ " index \n",
" \n",
" \n",
" period \n",
@@ -601,6 +602,7 @@
" \n",
" \n",
" \n",
+ " \n",
" \n",
" \n",
" \n",
@@ -616,6 +618,7 @@
" 314.91 \n",
" 315.70 \n",
" 314.44 \n",
+ " 3.0 \n",
" \n",
" \n",
" 1958-04 \n",
@@ -629,6 +632,7 @@
" 314.99 \n",
" 317.45 \n",
" 315.16 \n",
+ " 4.0 \n",
" \n",
" \n",
" 1958-05 \n",
@@ -642,6 +646,7 @@
" 315.07 \n",
" 317.51 \n",
" 314.70 \n",
+ " 5.0 \n",
" \n",
" \n",
" 1958-07 \n",
@@ -655,6 +660,7 @@
" 315.22 \n",
" 315.86 \n",
" 315.19 \n",
+ " 7.0 \n",
" \n",
" \n",
" 1958-08 \n",
@@ -668,6 +674,7 @@
" 315.29 \n",
" 314.93 \n",
" 316.19 \n",
+ " 8.0 \n",
" \n",
" \n",
"\n",
@@ -682,16 +689,16 @@
"1958-07 1958 07 21381 1958.5370 315.86 315.19 315.86 \n",
"1958-08 1958 08 21412 1958.6219 314.93 316.19 313.99 \n",
"\n",
- " CO2_4 C02_5 CO2_6 \n",
- "period \n",
- "1958-03 314.91 315.70 314.44 \n",
- "1958-04 314.99 317.45 315.16 \n",
- "1958-05 315.07 317.51 314.70 \n",
- "1958-07 315.22 315.86 315.19 \n",
- "1958-08 315.29 314.93 316.19 "
+ " CO2_4 C02_5 CO2_6 index \n",
+ "period \n",
+ "1958-03 314.91 315.70 314.44 3.0 \n",
+ "1958-04 314.99 317.45 315.16 4.0 \n",
+ "1958-05 315.07 317.51 314.70 5.0 \n",
+ "1958-07 315.22 315.86 315.19 7.0 \n",
+ "1958-08 315.29 314.93 316.19 8.0 "
]
},
- "execution_count": 7,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -699,6 +706,7 @@
"source": [
"data_valuesonly = data.copy()\n",
"data_valuesonly['CO2'] = pd.to_numeric(data_valuesonly['CO2'])\n",
+ "data_valuesonly['index'] = np.linspace(1,len(data_valuesonly['CO2']), len(data_valuesonly['CO2']))\n",
"\n",
"periods_novalue = []\n",
"for i in data_valuesonly.index:\n",
@@ -710,16 +718,16 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 8,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
},
@@ -750,16 +758,16 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 9,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
@@ -785,7 +793,7 @@
"metadata": {},
"source": [
"## La contribution lente \n",
- "On veut maintenant extraire la contribution lente et l'extrapoler à 2025. Une première approche est une évolution linéaire à partir de l'année 2000."
+ "On veut extraire la contribution lente et l'extrapoler à 2025. Une première approche est une évolution linéaire à partir de 1958."
]
},
{
@@ -822,7 +830,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -840,16 +848,16 @@
" Model Family: Gaussian Df Model: 1 \n",
"\n",
"\n",
- " Link Function: identity Scale: 19.998 \n",
+ " Link Function: identity Scale: 20.521 \n",
" \n",
"\n",
- " Method: IRLS Log-Likelihood: -2195.3 \n",
+ " Method: IRLS Log-Likelihood: -2205.0 \n",
" \n",
"\n",
- " Date: Thu, 10 Jun 2021 Deviance: 15019. \n",
+ " Date: Fri, 11 Jun 2021 Deviance: 15412. \n",
" \n",
"\n",
- " Time: 13:48:15 Pearson chi2: 1.50e+04 \n",
+ " Time: 15:22:50 Pearson chi2: 1.54e+04 \n",
" \n",
"\n",
" No. Iterations: 3 Covariance Type: nonrobust \n",
@@ -860,10 +868,10 @@
" coef std err z P>|z| [0.025 0.975] \n",
" \n",
"\n",
- " Intercept 306.1259 0.326 938.290 0.000 305.486 306.765 \n",
+ " Intercept 305.3562 0.334 913.159 0.000 304.701 306.012 \n",
" \n",
"\n",
- " index 0.1329 0.001 177.232 0.000 0.131 0.134 \n",
+ " index 0.1326 0.001 174.904 0.000 0.131 0.134 \n",
" \n",
""
],
@@ -875,21 +883,21 @@
"Dep. Variable: CO2 No. Observations: 753\n",
"Model: GLM Df Residuals: 751\n",
"Model Family: Gaussian Df Model: 1\n",
- "Link Function: identity Scale: 19.998\n",
- "Method: IRLS Log-Likelihood: -2195.3\n",
- "Date: Thu, 10 Jun 2021 Deviance: 15019.\n",
- "Time: 13:48:15 Pearson chi2: 1.50e+04\n",
+ "Link Function: identity Scale: 20.521\n",
+ "Method: IRLS Log-Likelihood: -2205.0\n",
+ "Date: Fri, 11 Jun 2021 Deviance: 15412.\n",
+ "Time: 15:22:50 Pearson chi2: 1.54e+04\n",
"No. Iterations: 3 Covariance Type: nonrobust\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
- "Intercept 306.1259 0.326 938.290 0.000 305.486 306.765\n",
- "index 0.1329 0.001 177.232 0.000 0.131 0.134\n",
+ "Intercept 305.3562 0.334 913.159 0.000 304.701 306.012\n",
+ "index 0.1326 0.001 174.904 0.000 0.131 0.134\n",
"==============================================================================\n",
"\"\"\""
]
},
- "execution_count": 11,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -898,19 +906,18 @@
"import statsmodels.api as sm\n",
"\n",
"data_valuesonly[\"Intercept\"]=1\n",
- "data_valuesonly['index'] = np.linspace(1,len(data_valuesonly['CO2']), len(data_valuesonly['CO2']))\n",
"logmodel=sm.GLM(data_valuesonly['CO2'], data_valuesonly[['Intercept','index']]).fit()\n",
"logmodel.summary()"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
- "image/png": "\n",
+ "image/png": "\n",
"text/plain": [
""
]
@@ -935,97 +942,121 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Avec cette prédiction, la teneur en CO2 dans l'atmosphère en avril 2025 serait de $$"
+ "Avec cette prédiction, la teneur en CO2 dans l'atmosphère en avril 2025 serait de $412\\ ppm$. On reste donc en dessous des dernières valeurs atteintes. Pour être plus réalistes, on veut maintenant estimer la teneur en CO2 avec une approximation linéaire à partir de l'an 2000 (index 505 pour le mois de Janvier 2000) au vue de la croissance plus rapide sur les dernières années:"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Intercept \n",
- " index \n",
- " CO2 \n",
- " period \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 748 \n",
- " 1 \n",
- " 749.0 \n",
- " 405.648073 \n",
- " 2020-12 \n",
- " \n",
- " \n",
- " 749 \n",
- " 1 \n",
- " 750.0 \n",
- " 405.780947 \n",
- " 2021-01 \n",
- " \n",
- " \n",
- " 750 \n",
- " 1 \n",
- " 751.0 \n",
- " 405.913820 \n",
- " 2021-02 \n",
- " \n",
- " \n",
- " 751 \n",
- " 1 \n",
- " 752.0 \n",
- " 406.046693 \n",
- " 2021-03 \n",
- " \n",
- " \n",
- " 752 \n",
- " 1 \n",
- " 753.0 \n",
- " 406.179567 \n",
- " 2021-04 \n",
- " \n",
- " \n",
+ "\n",
+ "Generalized Linear Model Regression Results \n",
+ "\n",
+ " Dep. Variable: CO2 No. Observations: 248 \n",
+ " \n",
+ "\n",
+ " Model: GLM Df Residuals: 246 \n",
+ " \n",
+ "\n",
+ " Model Family: Gaussian Df Model: 1 \n",
+ " \n",
+ "\n",
+ " Link Function: identity Scale: 5.4151 \n",
+ " \n",
+ "\n",
+ " Method: IRLS Log-Likelihood: -560.35 \n",
+ " \n",
+ "\n",
+ " Date: Fri, 11 Jun 2021 Deviance: 1332.1 \n",
+ " \n",
+ "\n",
+ " Time: 15:34:11 Pearson chi2: 1.33e+03 \n",
+ " \n",
+ "\n",
+ " No. Iterations: 3 Covariance Type: nonrobust \n",
+ " \n",
"
\n",
- ""
+ "\n",
+ "\n",
+ " coef std err z P>|z| [0.025 0.975] \n",
+ " \n",
+ "\n",
+ " Intercept 272.7958 1.322 206.343 0.000 270.205 275.387 \n",
+ " \n",
+ "\n",
+ " index 0.1866 0.002 90.406 0.000 0.183 0.191 \n",
+ " \n",
+ "
"
],
"text/plain": [
- " Intercept index CO2 period\n",
- "748 1 749.0 405.648073 2020-12\n",
- "749 1 750.0 405.780947 2021-01\n",
- "750 1 751.0 405.913820 2021-02\n",
- "751 1 752.0 406.046693 2021-03\n",
- "752 1 753.0 406.179567 2021-04"
+ "\n",
+ "\"\"\"\n",
+ " Generalized Linear Model Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: CO2 No. Observations: 248\n",
+ "Model: GLM Df Residuals: 246\n",
+ "Model Family: Gaussian Df Model: 1\n",
+ "Link Function: identity Scale: 5.4151\n",
+ "Method: IRLS Log-Likelihood: -560.35\n",
+ "Date: Fri, 11 Jun 2021 Deviance: 1332.1\n",
+ "Time: 15:34:11 Pearson chi2: 1.33e+03\n",
+ "No. Iterations: 3 Covariance Type: nonrobust\n",
+ "==============================================================================\n",
+ " coef std err z P>|z| [0.025 0.975]\n",
+ "------------------------------------------------------------------------------\n",
+ "Intercept 272.7958 1.322 206.343 0.000 270.205 275.387\n",
+ "index 0.1866 0.002 90.406 0.000 0.183 0.191\n",
+ "==============================================================================\n",
+ "\"\"\""
]
},
- "execution_count": 17,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "data_pred.tail()"
+ "logmodel=sm.GLM(data_valuesonly['CO2'][505:], data_valuesonly[['Intercept','index']][505:]).fit()\n",
+ "logmodel.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "data_pred = pd.DataFrame({'index': data_valuesonly['index'][505:],\n",
+ " 'Intercept': 1})\n",
+ "data_pred['CO2'] = logmodel.predict(data_pred[['Intercept','index']])\n",
+ "data_pred['period'] = data_valuesonly.index[505:]\n",
+ "data_pred.plot(x=\"period\",y=\"CO2\",kind='line',color='r')\n",
+ "plt.scatter(x=data_valuesonly.index,y = data_valuesonly[\"CO2\"])\n",
+ "plt.grid(True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Cette fois, la teneur en CO2 en avril 2025 est estimée à $422\\ ppm$. "
]
},
{
--
2.18.1