{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Notebook Python R\n", "\n", "## Import des données dans Python" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0speeddist
0142
12410
2374
34722
45816
\n", "
" ], "text/plain": [ " Unnamed: 0 speed dist\n", "0 1 4 2\n", "1 2 4 10\n", "2 3 7 4\n", "3 4 7 22\n", "4 5 8 16" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "# data_url = \"https://forge.scilab.org/index.php/p/rdataset/source/file/master/csv/datasets/cars.csv\"\n", "data_url = \"cars.csv\"\n", "df_python = pd.read_csv(data_url)\n", "df_python.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Supression de la première colonne" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
speeddist
042
1410
274
3722
4816
\n", "
" ], "text/plain": [ " speed dist\n", "0 4 2\n", "1 4 10\n", "2 7 4\n", "3 7 22\n", "4 8 16" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_python.drop(df_python.columns[[0]], axis=1, inplace=True)\n", "df_python.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Summary avec Python" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
speeddist
count50.00000050.000000
mean15.40000042.980000
std5.28764425.769377
min4.0000002.000000
25%12.00000026.000000
50%15.00000036.000000
75%19.00000056.000000
max25.000000120.000000
\n", "
" ], "text/plain": [ " speed dist\n", "count 50.000000 50.000000\n", "mean 15.400000 42.980000\n", "std 5.287644 25.769377\n", "min 4.000000 2.000000\n", "25% 12.000000 26.000000\n", "50% 15.000000 36.000000\n", "75% 19.000000 56.000000\n", "max 25.000000 120.000000" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_python.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Summary avec R" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from rpy2.robjects import pandas2ri\n", "pandas2ri.activate()\n", "from rpy2.robjects.packages import importr" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " speed dist \n", " Min. : 4.0 Min. : 2.00 \n", " 1st Qu.:12.0 1st Qu.: 26.00 \n", " Median :15.0 Median : 36.00 \n", " Mean :15.4 Mean : 42.98 \n", " 3rd Qu.:19.0 3rd Qu.: 56.00 \n", " Max. :25.0 Max. :120.00 \n", "\n" ] } ], "source": [ "base = importr('base')\n", "print(base.summary(df_python))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "ename": "RRuntimeError", "evalue": "Error in loadNamespace(name) : there is no package called ‘prettyR’\n", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprettyR\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimportr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'prettyR'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprettyR\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdescribe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_python\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/rpy2/robjects/packages.py\u001b[0m in \u001b[0;36mimportr\u001b[0;34m(name, lib_loc, robject_translations, signature_translation, suppress_messages, on_conflict, symbol_r2python, symbol_check_after, data)\u001b[0m\n\u001b[1;32m 451\u001b[0m if _package_has_namespace(rname, \n\u001b[1;32m 452\u001b[0m _system_file(package = rname)):\n\u001b[0;32m--> 453\u001b[0;31m \u001b[0menv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_namespace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 454\u001b[0m \u001b[0mversion\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_namespace_version\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[0mexported_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_get_namespace_exports\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRRuntimeError\u001b[0m: Error in loadNamespace(name) : there is no package called ‘prettyR’\n" ] } ], "source": [ "prettyR = importr('prettyR')\n", "print(prettyR.describe(df_python))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Liens utiles\n", "\n", "- http://rpy.sourceforge.net/rpy2/doc-2.4/html/introduction.html\n", "- https://rpy2.readthedocs.io/en/version_2.8.x/" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }