diff --git a/module3/exo3/exercice_R_en.org b/module3/exo3/exercice_R_en.org index 2b73d64e82dad485ab562f8bda6434ffa7b68d27..3b7dccfde1a6aae47145df6227e4a7ce8e030a0e 100644 --- a/module3/exo3/exercice_R_en.org +++ b/module3/exo3/exercice_R_en.org @@ -1,81 +1,754 @@ -#+TITLE: Your title -#+AUTHOR: Your name -#+DATE: Today's date +#+TITLE: Latency and capacity estimation for a network connection from asymmetric measurements +#+AUTHOR: Alexandre D. Jesus +#+DATE: 2020-05-30 #+LANGUAGE: en -# #+PROPERTY: header-args :eval never-export +#+OPTIONS: toc:nil +#+PROPERTY: header-args:R :session *R* +#+LATEX_CLASS: article +#+LATEX_CLASS_OPTIONS: [a4paper] +#+LATEX_HEADER: \usepackage{geometry} +#+LATEX_HEADER: \linespread{1.2} +#+LATEX_HEADER: \setminted{breaklines} +#+LATEX_HEADER: \usepackage[parfill]{parskip} -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: +* Introduction -* Some explanations +A simple and commonly used model for the performance of a network +connection is given by +\[ +T(S) = L + S/C +\] +where $S$ denotes the size of a message in bytes, $L$ denotes the +latency of the connection in seconds, and $C$ denotes the capacity of +the connection. This model $T(S)$ denotes the time required to send a +message of size $S$. -This is an org-mode document with code examples in R. Once opened in -Emacs, this document can easily be exported to HTML, PDF, and Office -formats. For more information on org-mode, see -https://orgmode.org/guide/. +In this work, we are interested in estimating the values $L$ and $C$ +for a network connection given sample values of $S$ and $T(S)$. -When you type the shortcut =C-c C-e h o=, this document will be -exported as HTML. All the code in it will be re-executed, and the -results will be retrieved and included into the exported document. If -you do not want to re-execute all code each time, you can delete the # -and the space before ~#+PROPERTY:~ in the header of this document. +* Reading the data -Like we showed in the video, R code is included as follows (and is -exxecuted by typing ~C-c C-c~): +** Dataset format +In this work we consider two datasets for two connections: + +- A short on-campus connection: file://./liglab2.log.gz +- A connection to =stackoverflow.com=: file://./stackoverflow.log.gz + +These files contain the raw output of a =ping= command. Each line is +of the form +#+begin_example +[TIMESTAMP] SIZE bytes from DOMAIN (IP): icmp_seq=ICMP_SEQ ttl=TTL time=TIME ms +#+end_example +where: +- =TIMESTAMP= denotes the date of the measurement expressed in seconds + since January 1, 1970 +- =SIZE= denotes the size of the message expressed in bytes, which + corresponds to the variable $S$ of our model +- =DOMAIN= and =IP= denote the domain and IP address of the target + machine, which should be fixed for each file +- =ICMP_SEQ= and =TTL= are not relevant for our analysis +- =TIME= denotes the round-trip time of the message in milliseconds + +An example line looks like this + +#+begin_example +[1421761682.052172] 665 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 +ttl=60 time=22.5 ms +#+end_example + +** Reading the data + +To process these files we devise a function that takes a file as input +and returns a dataframe with the relevant data for our analysis +#+begin_src R :results none :noweb yes +read_data <- function(path) { + <> +} +#+end_src + +In this function we start by checking if a file exists or not + +#+begin_src R :noweb-ref read-data :eval never +if(!file.exists(path)) { + stop("File does not exist") +} +#+end_src + +Then we define regexps that look for the timestamp, size and time data +in a line + +#+begin_src R :noweb-ref read-data :eval never +timestamp_r <- "\\[[0-9]+(\\.[0-9]+)?\\]" +size_r <- "[0-9]+[[:space:]]bytes" +time_r <- "time=[0-9]+(\\.[0-9]+)?" +all_r <- sprintf("%s.*%s.*%s", timestamp_r, size_r, time_r) +#+end_src + +Then, we find all the lines with no missing data, and gather all +the data + +#+begin_src R :noweb-ref read-data :eval never +lines <- readLines(path) +ind <- grep(all_r, lines) + +timestamps <- sapply( + regmatches(lines[ind], regexpr(timestamp_r, lines[ind])), + function(s) as.numeric(substr(s, 2, nchar(s)-1)), + USE.NAMES = F +) + +sizes <- sapply( + regmatches(lines[ind], regexpr(size_r, lines[ind])), + function(s) as.numeric(substr(s, 1, nchar(s)-6)), + USE.NAMES = F +) + +times <- sapply( + regmatches(lines[ind], regexpr(time_r, lines[ind])), + function(s) as.numeric(substr(s, 6, nchar(s))), + USE.NAMES = F +) +#+end_src + +Finally we aggregate this data into a data frame + +#+begin_src R :noweb-ref read-data :eval never +df <- data.frame( + timestamp = timestamps, + size = sizes, + time = times +) + +df$timestamp <- as.POSIXct( + df$timestamp, + origin = "1970-01-01" +) + +df +#+end_src + +and quickly check if the function is working correctly #+begin_src R :results output :exports both -print("Hello world!") +head(read_data("./stackoverflow.log.gz")) #+end_src #+RESULTS: -: [1] "Hello world!" +: timestamp size time +: 1 2015-01-20 16:26:43 1257 120 +: 2 2015-01-20 16:26:43 454 120 +: 3 2015-01-20 16:26:43 775 126 +: 4 2015-01-20 16:26:44 1334 112 +: 5 2015-01-20 16:26:44 83 111 +: 6 2015-01-20 16:26:44 694 111 -And now the same but in an R session. This is the most frequent -situation, because R is really an interactive language. With a -session, R's state, i.e. the values of all the variables, remains -persistent from one code block to the next. The code is still executed -using ~C-c C-c~. +* Analysis of the dataset =liglab2= -#+begin_src R :results output :session *R* :exports both -summary(cars) +** Effect of timestamp on time + +We start by looking at the effect of timestamp into time by plotting +all samples from our dataset. + +#+begin_src R :exports code :results none +dat <- read_data("./liglab2.log.gz") +#+end_src + +#+begin_src R :exports results :results file graphics :file ./liglab1.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = timestamp, y = time)) + + geom_point() + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./liglab1.pdf]] + +This plot does not show an impact of the timestamp on time. Another +way to look at this data is through a bar plot that shows the frequency +of time over certain timestamp intervals. + +#+begin_src R :exports results :results file graphics :file ./liglab2.pdf :width 6 :height 4 +steps <- 16 +dif <- (max(dat$timestamp) - min(dat$timestamp)) / (steps) +lower_ts <- c() +mean_ts <- c() +upper_ts <- c() +counts <- c() +types <- c() + +for(i in 1:steps) { + mi <- min(dat$timestamp)+dif*(i-1) + me <- mi + dif / 2 + ma <- min(dat$timestamp)+dif*i + if(i == steps) { ma = ma + 1; } + + lower_ts <- c(lower_ts, mi, mi, mi, mi, mi) + mean_ts <- c(mean_ts, me, me, me, me, me) + upper_ts <- c(upper_ts, ma, ma, ma, ma, ma) + count_times <- function(tl, ts, tsl, tsu) { + sum(dat$time >= tl & + dat$time < ts & + dat$timestamp >= tsl & + dat$timestamp < tsu) + } + types <- c( + types, + "0 <= t < 10", + "10 <= t < 20", + "20 <= t < 50", + "50 <= t < 100", + "100 <= t" + ) + counts <- c( + counts, + count_times(0, 10, mi, ma), + count_times(10, 20, mi, ma), + count_times(20, 50, mi, ma), + count_times(50, 100, mi, ma), + count_times(100, max(dat$time)+1, mi, ma) + ) +} + +aux <- data.frame( + lower_ts = lower_ts, + upper_ts = upper_ts, + mean_ts = mean_ts, + type = types, + count = counts +) +aux$lower_ts <- as.POSIXct(aux$lower_ts, origin = "1970-01-01") +aux$mean_ts <- as.POSIXct(aux$mean_ts, origin = "1970-01-01") +aux$upper_ts <- as.POSIXct(aux$upper_ts, origin = "1970-01-01") + +ggplot(aux, aes(x=mean_ts, y = count, fill = type)) + + geom_bar(stat="identity") + + xlab("Timestamp") + + ylab("Count") + + theme_bw() #+end_src #+RESULTS: -: speed dist -: Min. : 4.0 Min. : 2.00 -: 1st Qu.:12.0 1st Qu.: 26.00 -: Median :15.0 Median : 36.00 -: Mean :15.4 Mean : 42.98 -: 3rd Qu.:19.0 3rd Qu.: 56.00 -: Max. :25.0 Max. :120.00 +[[file:./liglab2.pdf]] + +This plot also shows no significant impact of the timestamp on +time. There are a couple of intervals where the time seems to increase +slightly but nothing too significant. Also we note that the number of +samples per timestamp period is consistent. + +** Effect of size on time + +We start by plotting time over size + +#+begin_src R :exports results :results file graphics :file ./liglab3.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point() + + theme_bw() +#+end_src -Finally, an example for graphical output: -#+begin_src R :results output graphics :file "./cars.png" :exports results :width 600 :height 400 :session *R* -plot(cars) +#+RESULTS[b9c033ff60ca4c488d122b684442d2c8304b9702]: +[[file:./liglab3.pdf]] + +This plot shows that there is an increase in time after a certain +size. To better determine at what size this increase happens we zoom +around that point. + +#+begin_src R :exports results :results file graphics :file ./liglab4.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat[dat$size >= 1450 & dat$size <= 1500,], aes(x = size, y = time)) + + geom_point() + + theme_bw() #+end_src #+RESULTS: -[[file:./cars.png]] +[[file:./liglab4.pdf]] + +From this plot the increase in time seems to be after size 1480. As +such, to be able to deal with these two cases separately, we introduce +a class column into the dataset. + +#+begin_src R :exports code :results none +dat$class <- NA +dat[dat$size <= 1480,]$class <- "small" +dat[dat$size > 1480,]$class <- "large" +dat$class <- as.factor(dat$class) +#+end_src + +** Estimating values of L and C + +We will estimate the values of L and C for each of the two datasets +previously defined. + +*** Linear regression + +To estimate the parameters of our network model we start by +considering two linear regression models (one for each class). For the +class "small" we have the linear regression model + +#+begin_src R :exports both :results output +lmMod.small <- lm(time ~ size, data = dat[dat$class == "small",]) +lmMod.small +#+end_src + +#+RESULTS: +: +: Call: +: lm(formula = time ~ size, data = dat[dat$class == "small", ]) +: +: Coefficients: +: (Intercept) size +: 3.2756742 0.0003263 + +From this model, we can estimate the latency and capacity values for +the network model. In particular the linear regression model gives us +the equation +\[ +T(S) = 3.2756742 + 0.0003263 \times S +\] +Then, since our network model is given by +\[ +T(S) = L + S / C +\] +we can estimate a latency $L = 3.2756742$ and a capacity $C = 1 / +0.0003263 = 3064.664$. + +For class "large" we have the linear regression model + +#+begin_src R :exports both :results output +lmMod.large <- lm(time ~ size, data = dat[dat$class == "large",]) +lmMod.large +#+end_src + +#+RESULTS: +: +: Call: +: lm(formula = time ~ size, data = dat[dat$class == "large", ]) +: +: Coefficients: +: (Intercept) size +: 5.289833 0.002579 + +From this we estimate a latency $L = 5.289833$ and a capacity $C = 1 / +0.002579 = 387.7472$. + +Finally, we plot both the regression models against our samples + +#+begin_src R :exports results :results file graphics :file ./liglab5.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point(color = "gray") + + geom_smooth(method="lm", formula = y ~ x, se = F, color = "black") + + facet_wrap(vars(class), scales="free") + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./liglab5.pdf]] + +Note that these seem to be overestimating most of the data. In the +next section we will look into how we can improve this. + +*** Quantile regression + +If we look at a box plot of our data +#+begin_src R :exports results :results file graphics :file ./liglab5.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(class, time)) + + geom_boxplot() + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./liglab5.pdf]] + +it indicates that a first look at the points means that we mostly see +outliers. We can zoom in further to take a better look at the box +plot. + +#+begin_src R :exports results :results file graphics :file ./liglab6.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(class, time)) + + geom_boxplot() + + coord_cartesian(ylim = c(0,3)) + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./liglab6.pdf]] + +As a result of this asymmetry in the data, its probably better to use +quantile regression with respect to the median. + +#+begin_src R :results none +library(quantreg) +#+end_src + +#+begin_src R :exports both :results output +rqMod.small <- rq(time ~ size, tau = 0.5, data = dat[dat$class == "small",]) +rqMod.small +#+end_src + +#+RESULTS: +#+begin_example + +Call: +rq(formula = time ~ size, tau = 0.5, data = dat[dat$class == + "small", ]) + +Coefficients: + (Intercept) size +1.1390594059 0.0002475248 + +Degrees of freedom: 32667 total; 32665 residual +#+end_example + +For the "small" class we have an estimated latency $L = 1.139$ and +capacity $C = 1 / 0.0002475248 = 4039.999$. + +#+begin_src R :exports both :results output +rqMod.large <- rq(time ~ size, tau = 0.5, data = dat[dat$class == "large",]) +rqMod.large +#+end_src + +#+RESULTS: +#+begin_example + +Call: +rq(formula = time ~ size, tau = 0.5, data = dat[dat$class == + "large", ]) + +Coefficients: + (Intercept) size +1.8853521127 0.0002464789 + +Degrees of freedom: 11369 total; 11367 residual +#+end_example -Note the parameter ~:exports results~, which indicates that the code -will not appear in the exported document. We recommend that in the -context of this MOOC, you always leave this parameter setting as -~:exports both~, because we want your analyses to be perfectly -transparent and reproducible. +For the "large" class we have an estimated latency $L = 1.885$ and +capacity $C = 1 / 0.0002464789 = 4057.142$. -Watch out: the figure generated by the code block is /not/ stored in -the org document. It's a plain file, here named ~cars.png~. You have -to commit it explicitly if you want your analysis to be legible and -understandable on GitLab. +We can see that the quantile regression gives a different estimate, +and that the latency estimate is significantly lower as expected. + +We can plot our models + +#+begin_src R :exports results :results file graphics :file ./liglab7.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point(color = "gray") + + geom_smooth(method="rq", formula = y ~ x, se = F, method.args = list(tau = 0.5), color = "black") + + facet_wrap(vars(class), scales="free") + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./liglab7.pdf]] + +And zoom in to take a better look + +#+begin_src R :exports results :results file graphics :file ./liglab8.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point(color = "gray") + + geom_smooth(method="rq", formula = y ~ x, se = F, method.args = list(tau = 0.5), color = "black") + + facet_wrap(vars(class), scales="free") + + coord_cartesian(ylim = c(0, 3)) + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./liglab8.pdf]] + +Indeed there seems to be a better fit than with the linear regression which +we plot below on a similar scale. + +#+begin_src R :exports results :results file graphics :file ./liglab9.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point(color = "gray") + + geom_smooth(method="lm", formula = y ~ x, se = F, color = "black") + + facet_wrap(vars(class), scales="free") + + coord_cartesian(ylim = c(0, 10)) + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./liglab9.pdf]] + +* Analysis of the dataset =stackoverflow= + +Now we will perform a similar analysis on the =stackoverflow= dataset. + +** Effect of timestamp on time + +We start by looking of the effect of the timestamp into the time. + +#+begin_src R :exports code :results none +dat <- read_data("./stackoverflow.log.gz") +#+end_src + +#+begin_src R :exports results :results file graphics :file ./stackoverflow1.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = timestamp, y = time)) + + geom_point() + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./stackoverflow1.pdf]] + +Like in the previous dataset, there seems to be no significant impact +of timestamp on time. We can also look frequency bar plots. + +#+begin_src R :exports results :results file graphics :file ./stackoverflow2.pdf :width 6 :height 4 +steps <- 16 +dif <- (max(dat$timestamp) - min(dat$timestamp)) / (steps) +lower_ts <- c() +mean_ts <- c() +upper_ts <- c() +counts <- c() +types <- c() + +for(i in 1:steps) { + mi <- min(dat$timestamp)+dif*(i-1) + me <- mi + dif / 2 + ma <- min(dat$timestamp)+dif*i + if(i == steps) { ma = ma + 1; } + + lower_ts <- c(lower_ts, mi, mi, mi) + mean_ts <- c(mean_ts, me, me, me) + upper_ts <- c(upper_ts, ma, ma, ma) + count_times <- function(tl, ts, tsl, tsu) { + sum(dat$time >= tl & + dat$time < ts & + dat$timestamp >= tsl & + dat$timestamp < tsu) + } + types <- c( + types, + "0 <= t < 115", + "115 <= t < 140", + "140 <= t" + ) + counts <- c( + counts, + count_times(0, 115, mi, ma), + count_times(115, 140, mi, ma), + count_times(140, max(dat$time)+1, mi, ma) + ) +} + +aux <- data.frame( + lower_ts = lower_ts, + upper_ts = upper_ts, + mean_ts = mean_ts, + type = types, + count = counts +) +aux$lower_ts <- as.POSIXct(aux$lower_ts, origin = "1970-01-01") +aux$mean_ts <- as.POSIXct(aux$mean_ts, origin = "1970-01-01") +aux$upper_ts <- as.POSIXct(aux$upper_ts, origin = "1970-01-01") + +ggplot(aux, aes(x=mean_ts, y = count, fill = type)) + + geom_bar(stat="identity") + + xlab("Timestamp") + + ylab("Count") + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./stackoverflow2.pdf]] + +This plot shows that there are a couple of intervals with a slight +increase in time (e.g. around 16:40). However, this does not seem to +warrant a separation of the data. + +** Effect of size on time + +We start by plotting the points of time over size + +#+begin_src R :exports results :results file graphics :file ./stackoverflow3.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point() + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./stackoverflow3.pdf]] + +This plot shows that the size seems to have an impact on time. If we +zoom in + +#+begin_src R :exports results :results file graphics :file ./stackoverflow4.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point() + + coord_cartesian(xlim = c(1440, 1520)) + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./stackoverflow4.pdf]] + +we see that this difference is after size 1480. As such we split the +data into two classes. + +#+begin_src R :exports code :results none +dat$class <- NA +dat[dat$size <= 1480,]$class <- "small" +dat[dat$size > 1480,]$class <- "large" +dat$class <- as.factor(dat$class) +#+end_src + +** Estimating values of L and C + +*** Linear regression + +We again start by estimating the parameters of our network model with two +linear regression models (one for each class). + +#+begin_src R :exports both :results output +lmMod.small <- lm(time ~ size, data = dat[dat$class == "small",]) +lmMod.small +#+end_src + +#+RESULTS: +: +: Call: +: lm(formula = time ~ size, data = dat[dat$class == "small", ]) +: +: Coefficients: +: (Intercept) size +: 1.132e+02 4.521e-05 + +For the class "small" (size less than or equal to 1480) the linear +model gives us the equation +\[ +T(S) = 113.2 + 0.00004521 \times S +\] +meaning that we can estimate a latency $L = 113.2$ and capacity $C = 1 +/ 0.00004521 = 22119$. + +#+begin_src R :exports both :results output +lmMod.large <- lm(time ~ size, data = dat[dat$class == "large",]) +lmMod.large +#+end_src + +#+RESULTS: +: +: Call: +: lm(formula = time ~ size, data = dat[dat$class == "large", ]) +: +: Coefficients: +: (Intercept) size +: 120.053588 -0.001803 + +For class "large" we estimate a latency $L = 120.053588$ and capacity +$C = 1 / -0.001803 = -554.63$. Note that a negative capacity does not +make sense in terms of the network model. However, we may assume that +this is an artifact of our dataset. + +A plot of the regression model against the data seems to indicate that +both the models seem to be overestimating the latency due to +outliers. The same as it happened for the previous dataset. + +#+begin_src R :exports results :results file graphics :file ./stackoverflow5.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point(color = "gray") + + geom_smooth(method="lm", formula = y ~ x, color = "black", se = F) + + facet_wrap(vars(class), scales="free") + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./stackoverflow5.pdf]] + +*** Quantile regression + +As in the previous case we notice an asymmetry in the data, and the +linear regression model seems to influenced by the outliers. +#+begin_src R :exports results :results file graphics :file ./stackoverflow6.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(class, time)) + + geom_boxplot() + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./stackoverflow6.pdf]] + +As such, we will once again look into quantile regression with respect +to the median. + +#+begin_src R :results none +library(quantreg) +#+end_src + +#+begin_src R :exports both :results output +rqMod.small <- rq(time ~ size, tau = 0.5, data = dat[dat$class == "small",]) +rqMod.small +#+end_src + +#+RESULTS: +#+begin_example + +Call: +rq(formula = time ~ size, tau = 0.5, data = dat[dat$class == + "small", ]) + +Coefficients: + (Intercept) size +1.110000e+02 7.013151e-18 + +Degrees of freedom: 5015 total; 5013 residual +#+end_example + +For the "small" class we estimate a latency $L = 111$ and a capacity +$C = 1 / 7.013151e-18 = 1.425893e+17$. Note that the capacity seems to +have no impact. + +#+begin_src R :exports both :results output +rqMod.large <- rq(time ~ size, tau = 0.5, data = dat[dat$class == "large",]) +rqMod.large +#+end_src + +#+RESULTS: +#+begin_example + +Call: +rq(formula = time ~ size, tau = 0.5, data = dat[dat$class == + "large", ]) + +Coefficients: + (Intercept) size + 1.120000e+02 -4.405647e-19 + +Degrees of freedom: 1809 total; 1807 residual +#+end_example + +In the "large" class we estimate a latency $L = 112$ and a capacity +$C = 1 / -4.405647e-19 = -2.269814e+18$. Note that once again we have +a negative capacity but that the slope is very close to zero. As such, +this seems to be an artifact of the data, and we could change the sign +of the slope to be positive with no significant detriment to our model. + +Finally, we plot the model along with the data + +#+begin_src R :exports results :results file graphics :file ./stackoverflow7.pdf :width 6 :height 4 +library("ggplot2") +ggplot(dat, aes(x = size, y = time)) + + geom_point(color = "gray") + + geom_smooth(method="rq", formula = y ~ x, se = F, method.args = list(tau = 0.5), color = "black") + + facet_wrap(vars(class), scales="free") + + theme_bw() +#+end_src + +#+RESULTS: +[[file:./stackoverflow7.pdf]] -Finally, don't forget that we provide in the resource section of this -MOOC a configuration with a few keyboard shortcuts that allow you to -quickly create code blocks in R by typing ~