From 6c0f247ebb848c12f3f11c05be268e8155fa5613 Mon Sep 17 00:00:00 2001
From: c6b76b23b68ff2880e300802cab9a6e3
 <c6b76b23b68ff2880e300802cab9a6e3@app-learninglab.inria.fr>
Date: Wed, 12 Aug 2020 15:24:19 +0000
Subject: [PATCH] MAJ exercice 4 (module 2)

---
 module2/exo4/datasets.csv   |   6 +-
 module2/exo4/exercice.ipynb | 205 +++++++++++++++++++++++++++++++++++-
 2 files changed, 204 insertions(+), 7 deletions(-)

diff --git a/module2/exo4/datasets.csv b/module2/exo4/datasets.csv
index 105c017..6e23410 100644
--- a/module2/exo4/datasets.csv
+++ b/module2/exo4/datasets.csv
@@ -2,6 +2,6 @@ name;description;website;tags
 KITTI Vision Benchmark Suite;We take advantage of our autonomous driving platform Annieway to develop novel challenging real-world computer vision benchmarks. Our tasks of interest are: stereo, optical flow, visual odometry, 3D object detection and 3D tracking;http://www.cvlibs.net/datasets/kitti/;stereo,flow,odometry,tracking,detection,road,maps,city
 Audi Autonomous Driving Dataset;We have published the Audi Autonomous Driving Dataset (A2D2) to support startups and academic researchers working on autonomous driving. Equipping a vehicle with a multimodal sensor suite, recording a large dataset, and labelling it, is time and labour intensive.;https://www.a2d2.audi/a2d2/en.html;semantic,cloud,segmentation,detection,road,maps,city
 ApolloScape Dataset;Trajectory dataset, 3D Perception Lidar Object Detection and Tracking dataset including about 100K image frames, 80k lidar point cloud and 1000km trajectories for urban traffic. The dataset consisting of varying conditions and traffic densities which includes many challenging scenarios where vehicles, bicycles, and pedestrians move among one another.;http://apolloscape.auto/;stereo,flow,semantic,cloud,segmentation,detection,road,maps,city
-Velodyne SLAM;Here, you can find two challenging datasets recorded with the Velodyne HDL64E-S2 scanner in the city of Karlsruhe, Germany.;http://www.mrt.kit.edu/z/publ/download/velodyneslam/dataset.html;
-Daimler Urban Segmentation Dataset;The Daimler Urban Segmentation Dataset consists of video sequences recorded in urban traffic. The dataset consists of 5000 rectified stereo image pairs with a resolution of 1024x440. 500 frames come with pixel-level semantic class annotations into 5 classes: ground, building, vehicle, pedestrian, sky. Dense disparity maps are provided as a reference, however these are not manually annotated but computed using semi-global matching.;http://www.6d-vision.com/scene-labeling
-nuScenes dataset;The nuScenes dataset is a public large-scale dataset for autonomous driving developed by Aptiv Autonomous Mobility. By releasing a subset of our data to the public, Aptiv aims to support public research into computer vision and autonomous driving.;https://www.nuscenes.org/;
+Velodyne SLAM;Here, you can find two challenging datasets recorded with the Velodyne HDL64E-S2 scanner in the city of Karlsruhe, Germany.;http://www.mrt.kit.edu/z/publ/download/velodyneslam/dataset.html;detection,images,city
+Daimler Urban Segmentation Dataset;The Daimler Urban Segmentation Dataset consists of video sequences recorded in urban traffic. The dataset consists of 5000 rectified stereo image pairs with a resolution of 1024x440. 500 frames come with pixel-level semantic class annotations into 5 classes: ground, building, vehicle, pedestrian, sky. Dense disparity maps are provided as a reference, however these are not manually annotated but computed using semi-global matching.;http://www.6d-vision.com/scene-labeling;stereo,labelling,detection,road,maps,city
+nuScenes dataset;The nuScenes dataset is a public large-scale dataset for autonomous driving developed by Aptiv Autonomous Mobility. By releasing a subset of our data to the public, Aptiv aims to support public research into computer vision and autonomous driving.;https://www.nuscenes.org/;labelling,detection,road,maps,city
diff --git a/module2/exo4/exercice.ipynb b/module2/exo4/exercice.ipynb
index 04cada5..d3d6e0f 100644
--- a/module2/exo4/exercice.ipynb
+++ b/module2/exo4/exercice.ipynb
@@ -8,7 +8,7 @@
     "\n",
     "- [x] Extraire un résumé, des informations vers un fichier CSV (datasets.csv)\n",
     "- [x] Lire et afficher les données du fichier CSV pour vérification\n",
-    "- [ ] Extraire des mots-clés, des étiquettes décrivant ces datasets (colonne tags)\n",
+    "- [x] Extraire des mots-clés, des étiquettes décrivant ces datasets (colonne tags)\n",
     "- [ ] Créer quelques statistiques de base de ces datasets\n",
     "- [ ] Créer une représentation graphique de ces datasets\n"
    ]
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {
     "scrolled": false
    },
@@ -120,7 +120,7 @@
        "5                          https://www.nuscenes.org/  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -130,9 +130,13 @@
     "# Structure de l'entête/données : name;description;website;tags (avec des points virgules pour les champs)\n",
     "\n",
     "# https://pandas.pydata.org\n",
-    "# Version 0.22.0 sur ce Jupyter\n",
+    "# Version 0.22.0 (December 29, 2017) sur ce Jupyter !\n",
     "import pandas as pd\n",
     "\n",
+    "# print(pd.__version__) \n",
+    "# pd.show_versions() # Toutes les extensions installées\n",
+    "\n",
+    "# Afficher les colonnes principales\n",
     "datasets = pd.read_csv('datasets.csv', delimiter = ';', usecols=[0,1,2])\n",
     "datasets"
    ]
@@ -144,6 +148,199 @@
     "#### Extraire des mots-clés, des étiquettes décrivant ces datasets"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tags</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>stereo,flow,odometry,tracking,detection,road,m...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>semantic,cloud,segmentation,detection,road,map...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>stereo,flow,semantic,cloud,segmentation,detect...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>detection,images,city</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>stereo,labelling,detection,road,maps,city</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>labelling,detection,road,maps,city</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                tags\n",
+       "0  stereo,flow,odometry,tracking,detection,road,m...\n",
+       "1  semantic,cloud,segmentation,detection,road,map...\n",
+       "2  stereo,flow,semantic,cloud,segmentation,detect...\n",
+       "3                              detection,images,city\n",
+       "4          stereo,labelling,detection,road,maps,city\n",
+       "5                 labelling,detection,road,maps,city"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Vérifier les mots-clés\n",
+    "tags = pd.read_csv('datasets.csv', delimiter = ';', usecols=[3])\n",
+    "tags"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Créer quelques statistiques de base de ces datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tags</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>unique</th>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>top</th>\n",
+       "      <td>stereo,labelling,detection,road,maps,city</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>freq</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                             tags\n",
+       "count                                           6\n",
+       "unique                                          6\n",
+       "top     stereo,labelling,detection,road,maps,city\n",
+       "freq                                            1"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Vérifier les mots-clés\n",
+    "tags = pd.read_csv('datasets.csv', delimiter = ';', usecols=[3])\n",
+    "tags.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['stereo,flow,odometry,tracking,detection,road,maps,city']\n",
+      "['semantic,cloud,segmentation,detection,road,maps,city']\n",
+      "['stereo,flow,semantic,cloud,segmentation,detection,road,maps,city']\n",
+      "['detection,images,city']\n",
+      "['stereo,labelling,detection,road,maps,city']\n",
+      "['labelling,detection,road,maps,city']\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "# import re\n",
+    "\n",
+    "# Extraire les mots-clés\n",
+    "tags = pd.read_csv('datasets.csv', delimiter = ';', usecols=[3])\n",
+    "\n",
+    "for t in tags.values:\n",
+    "    print(t)\n",
+    "    \n",
+    "    \n",
+    "#     array = re.split('\\,',t)\n",
+    "#     str = np.array_split(t,1)\n",
+    "#     print(str)\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-- 
2.18.1