{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebook Python R\n",
"\n",
"## Import des données dans Python"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" speed | \n",
" dist | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 4 | \n",
" 10 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 7 | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 7 | \n",
" 22 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 8 | \n",
" 16 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 speed dist\n",
"0 1 4 2\n",
"1 2 4 10\n",
"2 3 7 4\n",
"3 4 7 22\n",
"4 5 8 16"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"# data_url = \"https://forge.scilab.org/index.php/p/rdataset/source/file/master/csv/datasets/cars.csv\"\n",
"data_url = \"cars.csv\"\n",
"df_python = pd.read_csv(data_url)\n",
"df_python.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Supression de la première colonne"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" speed | \n",
" dist | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 | \n",
" 10 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 7 | \n",
" 22 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 | \n",
" 16 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" speed dist\n",
"0 4 2\n",
"1 4 10\n",
"2 7 4\n",
"3 7 22\n",
"4 8 16"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_python.drop(df_python.columns[[0]], axis=1, inplace=True)\n",
"df_python.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summary avec Python"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" speed | \n",
" dist | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 50.000000 | \n",
" 50.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 15.400000 | \n",
" 42.980000 | \n",
"
\n",
" \n",
" std | \n",
" 5.287644 | \n",
" 25.769377 | \n",
"
\n",
" \n",
" min | \n",
" 4.000000 | \n",
" 2.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 12.000000 | \n",
" 26.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 15.000000 | \n",
" 36.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 19.000000 | \n",
" 56.000000 | \n",
"
\n",
" \n",
" max | \n",
" 25.000000 | \n",
" 120.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" speed dist\n",
"count 50.000000 50.000000\n",
"mean 15.400000 42.980000\n",
"std 5.287644 25.769377\n",
"min 4.000000 2.000000\n",
"25% 12.000000 26.000000\n",
"50% 15.000000 36.000000\n",
"75% 19.000000 56.000000\n",
"max 25.000000 120.000000"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_python.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summary avec R"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from rpy2.robjects import pandas2ri\n",
"pandas2ri.activate()\n",
"from rpy2.robjects.packages import importr"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" speed dist \n",
"\r\n",
" Min. : 4.0 Min. : 2.00 \n",
"\r\n",
" 1st Qu.:12.0 1st Qu.: 26.00 \n",
"\r\n",
" Median :15.0 Median : 36.00 \n",
"\r\n",
" Mean :15.4 Mean : 42.98 \n",
"\r\n",
" 3rd Qu.:19.0 3rd Qu.: 56.00 \n",
"\r\n",
" Max. :25.0 Max. :120.00 \n",
"\n"
]
}
],
"source": [
"base = importr('base')\n",
"print(base.summary(df_python))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\r\n",
" Numeric \n",
"\r\n",
" mean median var sd valid.n\n",
"\r\n",
"speed 15.40 15 27.96 5.29 50\n",
"\r\n",
"dist 42.98 36 664.06 25.77 50\n",
"\n"
]
}
],
"source": [
"prettyR = importr('prettyR')\n",
"print(prettyR.describe(df_python))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Liens utiles\n",
"\n",
"- http://rpy.sourceforge.net/rpy2/doc-2.4/html/introduction.html\n",
"- https://rpy2.readthedocs.io/en/version_2.8.x/"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}