diff --git a/module3/exo3/exercice_en.ipynb b/module3/exo3/exercice_en.ipynb index 1c05f032e7fc74c819a754ad03d601187c14f444..4ed08fb3214812e57e093275ef16ceeee759ab8d 100644 --- a/module3/exo3/exercice_en.ipynb +++ b/module3/exo3/exercice_en.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -127,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -144,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -182,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -221,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -288,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -326,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -383,7 +383,7 @@ "source": [ "# Slow contribution. Creating the graph to show the average CO2 concentration per year over time\n", "plt.figure(figsize=(10, 6))\n", - "plt.plot(annual_mean_co2.index, annual_mean_co2.values, marker='o', linestyle='-', label='Mean CO2 Concentration per Year Over Time')\n", + "plt.plot(annual_mean_co2.index, annual_mean_co2.values, label='Mean CO2 Concentration per Year Over Time', marker='o', linestyle='-')\n", "plt.title('Mean CO2 Concentration per Year Over Time')\n", "plt.xlabel('Year')\n", "plt.ylabel('CO2 Concentration (ppm)')\n", @@ -402,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -425,9 +425,16 @@ "print(data.head())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A graph is made with the oscilation of the CO2 concentration over the time" + ] + }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -444,7 +451,7 @@ } ], "source": [ - "# Periodic oscillation. Creation of the graph to monitor the oscillation of the CO2 concentration over the time\n", + "# Periodic oscillation. Creation of the graph to show the oscillation of the CO2 concentration over the time\n", "plt.figure(figsize=(10, 6))\n", "plt.plot(data['Date'], data['Oscilation'], label='Oscilation CO2 Concentration Over Time')\n", "plt.title('Oscilation CO2 Concentration Over Time')\n", @@ -454,6 +461,13 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For a better visualization of the periodic oscillation, only the last 300 rows of the table are graphed." + ] + }, { "cell_type": "code", "execution_count": 41, @@ -473,7 +487,7 @@ } ], "source": [ - "# Periodic oscillation. Creation of the graph to monitor the oscillation of the CO2 concentration over the time (last 300 rows of the table)\n", + "# Periodic oscillation. Creation of the graph to show the oscillation of the CO2 concentration over the time (last 300 rows of the table)\n", "plt.figure(figsize=(10, 6))\n", "plt.plot(data['Date'][-300:], data['Oscilation'][-300:], label='Oscilation CO2 Concentration Over Time')\n", "plt.title('Oscilation CO2 Concentration Over Time')\n", @@ -494,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -521,28 +535,28 @@ } ], "source": [ - "# Contribution lente. Modéliser une évolution lente en utilisant la régression linéaire\n", - "X = np.arange(len(data)).reshape(-1, 1) # Variable indépendante : nombre de semaines\n", - "y = data['Mean_CO2_Concentration'].values.reshape(-1, 1) # Variable dépendante : concentration de CO2\n", + "# Slow contribution. Modelling slow change using linear regression\n", + "X = np.arange(len(data)).reshape(-1, 1) # Independent variable: number of weeks\n", + "y = data['Mean_CO2_Concentration'].values.reshape(-1, 1) # Dependent variable: CO2 concentration\n", "\n", - "# Ajuster le modèle de régression linéaire\n", + "# Fit the linear regression model\n", "model = LinearRegression()\n", "model.fit(X, y)\n", "\n", - "# Paramètres du modèle de régression linéaire\n", - "# Obtenez les coefficients de régression et l'interception\n", + "# Linear regression model parameters\n", + "# Get the regression coefficients and the intercept\n", "coeficiente = model.coef_[0][0]\n", "intercepto = model.intercept_[0]\n", "\n", - "# Obtenir le coefficient de détermination (R²)\n", + "# Obtain the coefficient of determination (R²)\n", "r_cuadrado = model.score(X, y)\n", "\n", - "# Imprimer les paramètres du modèle\n", + "# Print model parameters\n", "print(\"Regression coefficient:\", coeficiente)\n", "print(\"Intercept:\", intercepto)\n", "print(\"R-squared (R²):\", r_cuadrado)\n", "\n", - "# Prédire la concentration de CO2 en 2025\n", + "# Predicting CO2 concentration in 2025\n", "weeks_in_2025 = (2025 - data['Date'].dt.year.min()) * 52\n", "predicted_CO2_2025 = model.predict([[weeks_in_2025]])\n", "\n", @@ -560,17 +574,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Pour amelliorer le document un plot est plus utile que les paramètres de la régression." + "To improve the document, a plot is more useful than the regression parameters" ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -582,29 +596,38 @@ } ], "source": [ - "# Visualizar la evolución temporal de la concentración de CO2\n", - "plt.figure(figsize=(12, 8)) # Aumentar el tamaño de la figura\n", - "plt.plot(data['Date'], data['Concentration'], label='Concentración de CO2', color='blue')\n", - "plt.xlabel('Fecha')\n", - "plt.ylabel('Concentración de CO2 (ppm)')\n", - "plt.title('Evolución temporal de la concentración de CO2')# Graficar el modelo de regresión lineal y la proyección\n", + "# Visualisation of the temporal evolution of CO2 concentration\n", + "plt.figure(figsize=(12, 8)) # Size of the figure\n", + "plt.plot(data['Date'], data['Concentration'], label='CO2 Concentration', color='blue')\n", + "plt.title('CO2 Concentration Over Time')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('CO2 Concentration (ppm)')\n", "\n", - "plt.plot(data['Date'], model.predict(X), label='Modelo de regresión lineal', color='red')\n", - "plt.plot(pd.to_datetime(['2025-01-01']), predicted_CO2_2025, marker='o', markersize=8, label='Proyección a 2025', color='green')\n", + "# Plot the linear regression model and the projection\n", + "plt.plot(data['Date'], model.predict(X), label='Linear regression model', color='red')\n", + "plt.plot(pd.to_datetime(['2025-01-01']), predicted_CO2_2025, marker='o', markersize=8, label='Projection to 2025', color='green')\n", "\n", "plt.legend()\n", "plt.grid(True)\n", "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Periodic oscillation. For this step, we have already separated the oscillation phenomena in the \"oscillation\" column of the pandas table and now we characterised them by their amplitude and frequency." + ] + }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ - "# Oscillation périodique. Caractériser l’oscillation\n", - "import numpy as np\n", + "# Periodic oscillation. Characterisation of the oscillation\n", + "# Importing the necessary libraries\n", + "# import numpy as np\n", "# from scipy.fft import fft " ] }, @@ -612,13 +635,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "fft de scipy.fft, a des problèmes de chargement, il se peut que nous utilisions une version de SciPy antérieure à 1.4.0. La fonction fft a été ajoutée dans SciPy version 1.4.0.\n", - "Pour résoudre le problème, nous pouvons utiliser la fonction fft de numpy à la place, puisque numpy fournit également des fonctions pour effectuer la transformée de Fourier" + "\"numpy as np\" is already included when determining the linear regression model\n", + "\n", + "\"from scipy.fft import fft) is having problems loading, we may be using a version of SciPy earlier than 1.4.0. The fft function has been added in SciPy version 1.4.0.\n", + "To solve the problem, we can use numpy's fft function instead, since numpy also provides functions to perform the Fourier transform" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -635,14 +660,24 @@ } ], "source": [ - "# Oscillation périodique. Caractériser l’oscillation\n", - "# Affichage des premières lignes des données pour vérification\n", + "# Periodic oscillation\n", + "# Displaying the first rows of data for verification\n", "print(data.head())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Periodic oscillation\n", + " \n", + " Characterisation of the periodic oscillation, we use the Fourier transform to characterise the oscillation\n", + " " + ] + }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -650,51 +685,42 @@ "output_type": "stream", "text": [ "Dominant oscillation frequency: 51.661538461538456 week/cycles\n", - "Maximum oscillation amplitude in CO2 anomalies: 3207.2901558554163 ppm\n" + "Maximum oscillation amplitude in CO2 oscilation: 3207.2901558554163 ppm\n" ] } ], "source": [ - "# Oscillation périodique. Calculer la transformée de Fourier de la série chronologique des anomalies CO2\n", + "# Periodic oscillation. Calculate the Fourier transform of the CO2 oscilation time series.\n", "co2_oscilation_fft = np.fft.fft(data['Oscilation'])\n", "\n", - "# Calculer les fréquences correspondant aux composantes de Fourier\n", + "# Calculate the frequencies corresponding to the Fourier components\n", "n = len(data)\n", - "frequencies = np.fft.fftfreq(n, d=1) # Fréquences en cycles par semaine\n", + "frequencies = np.fft.fftfreq(n, d=1) # Frequency in cycles per week\n", "\n", - "# Trouver la fréquence et l'amplitude maximales\n", + "# Find the maximum frequency and amplitude\n", "max_freq_index = np.argmax(np.abs(co2_oscilation_fft))\n", "max_freq = frequencies[max_freq_index]\n", "max_amplitude = np.abs(co2_oscilation_fft[max_freq_index])\n", "\n", - "# Calcula la frecuencia inversa en semanas por ciclo\n", + "# Calculate the inverse frequency in weeks per cycle.\n", "max_freq_week_cycles = 1 / max_freq\n", "\n", "print(\"Dominant oscillation frequency:\", max_freq_week_cycles, \"week/cycles\")\n", - "print(\"Maximum oscillation amplitude in CO2 anomalies:\", max_amplitude, \"ppm\")" + "print(\"Maximum oscillation amplitude in CO2 oscilation:\", max_amplitude, \"ppm\")" ] }, { - "cell_type": "code", - "execution_count": 22, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Comment caractériser l'oscillation périodique en calculant la transformée de Fourier donne des valeurs pas faciles à comprendre a priori, une autre manière est choisie pour caractériser l'oscillation\n" - ] - } - ], "source": [ - "# Oscillation périodique. Comment caractériser l'oscillation en calculant la transformée de Fourier donne des valeurs pas faciles à comprendre a priori, une autre manière est choisie pour caractériser l'oscillation\n", - "print(\"Comment caractériser l'oscillation périodique en calculant la transformée de Fourier donne des valeurs pas faciles à comprendre a priori, une autre manière est choisie pour caractériser l'oscillation\")" + "Characterising a periodic oscillation by calculating the Fourier transform gives values that are difficult to understand a priori, so another method is chosen to characterise the oscillation\n", + "\n", + "Remembering the periodic oscillation in CO2 concentration (column \"Oscilation\" in the pandas table)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 56, "metadata": {}, "outputs": [ { @@ -711,7 +737,7 @@ } ], "source": [ - "# Oscillation périodique. Création du graphique pour montrer l'oscillation de la concentration de CO2 au fil du temps (300 dernières lignes du tableau)\n", + "# Periodic oscillation. Creation of the graph to show the oscillation of the CO2 concentration over the time (last 300 rows of the table)\n", "plt.figure(figsize=(10, 6))\n", "plt.plot(data['Date'][-300:], data['Oscilation'][-300:], label='Oscilation CO2 Concentration Over Time')\n", "plt.title('Oscilation CO2 Concentration Over Time')\n", @@ -721,27 +747,34 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Characterisation of the periodic oscillation, determination of the amplitudes max and min" + ] + }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Maximum oscillation amplitude in CO2 anomalies: 4.419807692307586 ppm\n", - "Minimum oscillation amplitude in CO2 anomalies: -4.191923076923047 ppm\n" + "Maximum oscillation amplitude in CO2 periodic oscilation: 4.419807692307586 ppm\n", + "Minimum oscillation amplitude in CO2 periodic oscilation: -4.191923076923047 ppm\n" ] } ], "source": [ - "# Oscillation périodique. Calculez la valeur maximale et minimale de la colonne 'Oscillation'\n", + "# Periodic oscillation. Calculate the maximum and minimum value of the 'Oscillation' column\n", "maximum_value = data['Oscilation'].max()\n", "minimum_value = data['Oscilation'].min()\n", "\n", - "print(\"Maximum oscillation amplitude in CO2 anomalies:\", maximum_value, \"ppm\")\n", - "print(\"Minimum oscillation amplitude in CO2 anomalies:\", minimum_value, \"ppm\")" + "print(\"Maximum oscillation amplitude in CO2 periodic oscilation:\", maximum_value, \"ppm\")\n", + "print(\"Minimum oscillation amplitude in CO2 periodic oscilation:\", minimum_value, \"ppm\")" ] }, { @@ -751,39 +784,105 @@ "From the graph it is observed that maximum oscillation values around 3 ppm are frequent and the same is true for minimum oscillation values around -3 ppm\"" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Characterisation of the periodic oscillation, determination of the oscilation frecuency\n", + " \n", + " Searching for concentrations equal to zero" + ] + }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 61, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cette forme de détermination ne donne pas de résultats car les données de concentration de CO2 de l'oscillation ne valent pas zéro mais ont de faibles valeurs, à la fois positives et négatives, donc la méthode correcte doit détecter ces passages par zéro\n" - ] - } - ], + "outputs": [], "source": [ - "# Oscillation périodique. Trouver les indices où la concentration est égale à zéro\n", + "# Periodic oscillation. Find the indices where the concentration is equal to zero\n", "zero_indices = data.index[data['Oscilation'] == 0].tolist()\n", "\n", - "# Calculer les temps entre deux passages à zéro consécutifs\n", + "# Determination of the time between two consecutive zero crossings\n", "times_between_steps = []\n", "for i in range(1, len(zero_indices)):\n", " time_between_steps = data.iloc[zero_indices[i]]['Date'] - data.iloc[zero_indices[i-1]]['Date']\n", " times_between_steps.append(time_between_steps)\n", "\n", - "# Calculer le temps moyen entre les passages à zéro\n", + "# Determination of the average time between zero crossings\n", "# average_time_between_steps = sum(times_between_steps, pd.Timedelta(0)) / len(times_between_steps)\n", "\n", - "# print(\"Average time between zero steps:\", average_time_between_steps)\n", - "print(\"Cette forme de détermination ne donne pas de résultats car les données de concentration de CO2 de l'oscillation ne valent pas zéro mais ont de faibles valeurs, à la fois positives et négatives, donc la méthode correcte doit détecter ces passages par zéro\")" + "# print(\"Average time between zero steps:\", average_time_between_steps)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This form of determination does not give results because the CO2 concentration data from the oscillation are not zero but have low values, both positive and negative, so the correct method must detect these zero crossings\".\n", + "\n", + "Searching for zero crossings\n" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "crosses_by_zero is of type : \n" + ] + } + ], + "source": [ + "# Periodic oscillation. Identifying zero crossings\n", + "crosses_by_zero = (data['Oscilation'] * data['Oscilation'].shift(1) < 0) & (data['Oscilation'] != 0) \n", + "# Displaying verification of type\n", + "print(\"crosses_by_zero is of type :\",type(crosses_by_zero))" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "Name: Oscilation, dtype: bool\n" + ] + } + ], + "source": [ + "# Displaying the first rows of data for verification\n", + "print(crosses_by_zero.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"crosses_by_zero\" is a pandas series which contains booleans values, True in the rows where a zero crossing occurs in the \"Oscilation\" column of the DataFrame and False in the other rows. This pandas series is then used to filter the DataFrame and obtain the rows corresponding to the zero crossings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally we get the Average time between zero crossings we are looking for" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -795,25 +894,15 @@ } ], "source": [ - "# Oscillation périodique. Identifier les passages à zéro\n", - "crosses_by_zero = (data['Oscilation'] * data['Oscilation'].shift(1) < 0) & (data['Oscilation'] != 0) # Série booléenne qui contient True dans les lignes où un passage à zéro se produit dans la colonne « Oscilation » du DataFrame et False dans les autres lignes. Cette série est ensuite utilisée pour filtrer le DataFrame et obtenir les lignes qui correspondent aux passages par zéro\n", - "\n", - "# Filtrer les lignes contenant des passages à zéro\n", + "# Filter lines containing zero crossings\n", "data_crosses_by_zero = data[crosses_by_zero]\n", "\n", - "# Calculer les temps entre les passages à zéro consécutifs\n", + "# Calculate the times between consecutive zero crossings\n", "times_between_crosses = data_crosses_by_zero['Date'].diff().mean()\n", "\n", "print(\"Average time between zero crossings:\", times_between_crosses)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -823,7 +912,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -836,7 +925,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -851,7 +940,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -881,7 +970,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -911,7 +1000,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -933,7 +1022,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -963,7 +1052,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -993,7 +1082,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1022,7 +1111,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -1051,7 +1140,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -1080,7 +1169,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -1114,6 +1203,13 @@ "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null,