diff --git a/journal/journal.org b/journal/journal.org index 5cdb4a0415058d68d5cdbe9d25256eb716b7d09b..5943549a3f2b8fa2f09e8a0898f34a7edc4dd09c 100644 --- a/journal/journal.org +++ b/journal/journal.org @@ -1,7 +1,7 @@ # -*- mode: org -*- # -*- coding: utf-8 -*- #+STARTUP: overview indent inlineimages logdrawer -#+TITLE: Journal +#+TITLE: Journal MOOC #+AUTHOR: Victor Martins Gomes #+LANGUAGE: en #+TAGS: LIG(L) HOME(H) Europe(E) Blog(B) noexport(n) Stats(S) diff --git a/module2/exo1/toy_document_orgmode_python_en.org b/module2/exo1/toy_document_orgmode_python_en.org index 6a68177a1fab4b46d56743dac7aa4d9bd3c476f2..d97e106af516be424042a7af367620c9b0c37e06 100644 --- a/module2/exo1/toy_document_orgmode_python_en.org +++ b/module2/exo1/toy_document_orgmode_python_en.org @@ -50,6 +50,8 @@ print(matplot_lib_filename) #+RESULTS: [[file:figure_pi_mc2.png]] + fig, ax = plt.subplots(1) + figure_pi_mc2.png]] It is then straightforward to obtain a (not really good) approximation to $\pi$ by counting how many times, on average, $X^2 + Y^2$ is smaller than 1: diff --git a/module2/exo4/data_hist.png b/module2/exo4/data_hist.png new file mode 100644 index 0000000000000000000000000000000000000000..cfaddb27b422c0a3162eb375585dce4b5684d240 Binary files /dev/null and b/module2/exo4/data_hist.png differ diff --git a/module2/exo4/data_plot.png b/module2/exo4/data_plot.png new file mode 100644 index 0000000000000000000000000000000000000000..a9ca617be2c9252660eda23f68f791ae1973d494 Binary files /dev/null and b/module2/exo4/data_plot.png differ diff --git a/module2/exo4/exercice_python_en.html b/module2/exo4/exercice_python_en.html new file mode 100644 index 0000000000000000000000000000000000000000..88860b5466993c8e87a542f3231aacad5af905aa --- /dev/null +++ b/module2/exo4/exercice_python_en.html @@ -0,0 +1,505 @@ + + + + + + + + +Reading and Analyzing some experimental hdf5 data + + + + + + + + +
+

Reading and Analyzing some experimental hdf5 data

+
+

Table of Contents

+ +
+

+I will read and get the statistics of some data I usually acquire in the lab. I +will use this log as a future reference to when I need to read hdf5 files +contained my data. +

+ +
+

1 Data reading using h5py package

+
+

+To read the data we make use of the h5py package which contains most of what +might be necessary to read hdf5 files. +

+ +
+
import h5py as h5
+f = h5.File('testdata.h5','r')
+print(f.keys())
+
+
+ +
+<KeysViewHDF5 ['FileType', 'Frame', 'Waveforms']>
+
+ + +

+Now that we know what is inside, let’s try to see which waveforms we have. +

+ +
+
print(f['Waveforms'].keys())
+
+
+ +
+<KeysViewHDF5 ['Channel 1', 'Channel 2']>
+
+ + +

+We have two channels, thus another subgroup. Gotta investigate deeper: +

+
+
print(f['Waveforms/Channel 1'].keys())
+print('---')
+print(f['Waveforms/Channel 2'].keys())
+
+
+ +
+<KeysViewHDF5 ['Channel 1Data']>
+---
+<KeysViewHDF5 ['Channel 2Data']>
+
+ + +

+Now we can get the attributes of the recorded channels. They are going to be +necessary to properly map the data to a numpy array. +

+
+
print(f['Waveforms/Channel 1'].attrs.keys())
+
+
+ +
+<KeysViewHDF5 ['Count', 'DispInterpFactor', 'InterpSetting', 'MaxBandwidth', 'MinBandwidth', 'NumPoints', 'NumSegments', 'SavedInterpFactor', 'Start', 'WavAttr', 'WaveformType', 'XDispOrigin', 'XDispRange', 'XInc', 'XOrg', 'XUnits', 'YDispOrigin', 'YDispRange', 'YInc', 'YOrg', 'YUnits']>
+
+ + +

+Besides the channels we’ve read, a time series should be created. Each of +its elements should correspond to a data sample. To do that we will use the +information stored in one of the channels (supposing they were acquired using +the same oscilloscope config). +

+
+
import numpy as np
+import matplotlib.pyplot as plt
+
+idata1 = f['Waveforms/Channel 1'].attrs.get('XDispRange')
+# XDispRange and YDispRange are the total range covered in the oscilloscope display.
+idata2 = f['Waveforms/Channel 1'].attrs.get('XOrg')
+# XOrg is the x-origin of the data and will be the same as XDispOrigin if the saved
+# data is constrained to the displayed area.
+idata3 = f['Waveforms/Channel 1'].attrs.get('XInc')
+# Xinc is the incremental value of X, or the sampling period (i.e. XDispRange/NumPoints).
+idata4 = f['Waveforms/Channel 1'].attrs.get('NumPoints')
+temp1 = idata3*(np.arange(1,idata4+1))+idata2
+
+plt.figure()
+plt.plot(temp1,'b')
+plt.grid(linestyle='dotted')
+plt.axis('tight')
+plt.title('Times series')
+
+plt.savefig(filename)
+print(filename)
+
+
+ + +
+

time_plot.png +

+
+ +

+Once the time array has been created we can move to reading the data. +

+
+
yinc = f['Waveforms/Channel 1'].attrs.get('YInc') # Vertical sampling period
+yorg = f['Waveforms/Channel 1'].attrs.get('YOrg') # Closest value to origin (y=0)
+data1 = yorg + yinc*f['Waveforms/Channel 1/Channel 1Data']
+yinc = f['Waveforms/Channel 2'].attrs.get('YInc')
+yorg = f['Waveforms/Channel 2'].attrs.get('YOrg')
+data2 = yorg + yinc*f['Waveforms/Channel 2/Channel 2Data']
+print('Data type data:', data1.dtype)
+
+
+ +
+Data type data: float64
+
+ + +

+Now we can plot the data to see it. +

+ +
+
plt.close('all')
+plt.figure()
+plt.plot(temp1*1e6,data1,'b')
+plt.plot(temp1*1e6,data2,'r')
+plt.grid(linestyle='dotted')
+plt.axis('tight')
+plt.xlabel('Time ($\mu$s)')
+plt.ylabel('Amplitude (Volts)')
+plt.title('Trigger (blue) and Data (red)')
+
+plt.savefig(filename)
+print(filename)
+
+
+ + +
+

data_plot.png +

+
+ +

+Data looks nice and amplitudes agree with what was observed in the oscilloscope +display. Therefore out reading method works. +

+ +

+Some statistics from the data can now be computed, once we can trust the data +was correctly read. +

+
+
avg_data = np.mean(data2)
+med_data = np.median(data2)
+std_data = np.std(data2,ddof=1)
+min_data = np.amin(data2)
+max_data = np.amax(data2)
+#------------------------
+print('Data average (Volts): ',avg_data)
+print('Data median (Volts): ',med_data)
+print('Data standard deviation (Volts): ',std_data)
+print('Data minimum (Volts): ',min_data)
+print('Data maximum (Volts): ',max_data)
+
+
+ +
+Data average (Volts):  0.0019089614972975556
+Data median (Volts):  0.004743201433087485
+Data standard deviation (Volts):  0.039513536115368106
+Data minimum (Volts):  -0.1612688487249741
+Data maximum (Volts):  0.15652564729188667
+
+ + +

+The data was recorded when the input channel was turned off, thus from that we +can estimated the signature “noise” of the system when the input will be on. +For that we will look at the histogram and see if looks like a Normal/Gaussian +continuous probability distribution. +

+
+
from scipy.stats import norm
+
+plt.figure()
+plt.hist(data2,color='b',histtype='bar',ec='black',density=True)
+
+x = np.linspace(min_data,max_data,idata4)
+p = norm.pdf(x,avg_data,std_data)
+
+plt.plot(x,p,'k',linewidth=2)
+
+plt.grid(linestyle='dotted')
+plt.axis('tight')
+
+title = "Fitting Gaussian to histogram: $\mu$ = %.5e, $\sigma$ = %.5e" % (avg_data,std_data)
+plt.title(title)
+
+plt.savefig(filename)
+print(filename)
+
+
+ + +
+

data_hist.png +

+
+ +

+At last we can use the D’Agostino’s K\(^2\) test to infer goodness-of-fit or if +the assumption of a Normal distribution really holds. +

+
+
from scipy.stats import normaltest
+alpha = 1e-3
+k2, p = normaltest(data2)
+print("p = {:.10e}".format(p))
+
+
+ +

+p = 0.0000000000e+00 +

+ +

+Since the p-value is (way) smaller than the alpha value we can conclude the null +hypothesis can be rejected, that is, a Gaussian distribution does not represent +the data. +

+
+
+
+
+

Date: 2020-04-28 Tuesday

+

Author: Victor Martins Gomes

+

Created: 2020-04-29 mer. 15:24

+
+ + diff --git a/module2/exo4/exercice_python_en.org b/module2/exo4/exercice_python_en.org index 5782f493934678ba782fb65634a4d86e5f3adefc..dd317b26c277296e06194118f692b85056b62d75 100644 --- a/module2/exo4/exercice_python_en.org +++ b/module2/exo4/exercice_python_en.org @@ -1,94 +1,187 @@ -#+TITLE: Your title -#+AUTHOR: Your name -#+DATE: Today's date +#+TITLE: Reading and Analyzing some experimental hdf5 data +#+AUTHOR: Victor Martins Gomes +#+DATE: 2020-04-28 Tuesday #+LANGUAGE: en -# #+PROPERTY: header-args :eval never-export +#+PROPERTY: header-args :session :exports both :eval never-export -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: -#+HTML_HEAD: +I will read and get the statistics of some data I usually acquire in the lab. I +will use this log as a future reference to when I need to read hdf5 files +contained my data. -* Some explanations +* Data reading using h5py package +To read the data we make use of the h5py package which contains most of what +might be necessary to read hdf5 files. -This is an org-mode document with code examples in R. Once opened in -Emacs, this document can easily be exported to HTML, PDF, and Office -formats. For more information on org-mode, see -https://orgmode.org/guide/. +#+begin_src python :results output :session *python* :exports both +import h5py as h5 +f = h5.File('testdata.h5','r') +print(f.keys()) +#+end_src -When you type the shortcut =C-c C-e h o=, this document will be -exported as HTML. All the code in it will be re-executed, and the -results will be retrieved and included into the exported document. If -you do not want to re-execute all code each time, you can delete the # -and the space before ~#+PROPERTY:~ in the header of this document. +#+RESULTS: +: -Like we showed in the video, Python code is included as follows (and -is exxecuted by typing ~C-c C-c~): +Now that we know what is inside, let's try to see which waveforms we have. -#+begin_src python :results output :exports both -print("Hello world!") +#+begin_src python :results output :session *python* :exports both +print(f['Waveforms'].keys()) #+end_src #+RESULTS: -: Hello world! +: -And now the same but in an Python session. With a session, Python's -state, i.e. the values of all the variables, remains persistent from -one code block to the next. The code is still executed using ~C-c -C-c~. +We have two channels, thus another subgroup. Gotta investigate deeper: +#+begin_src python :results output :session *python* :exports both +print(f['Waveforms/Channel 1'].keys()) +print('---') +print(f['Waveforms/Channel 2'].keys()) +#+end_src -#+begin_src python :results output :session :exports both -import numpy -x=numpy.linspace(-15,15) -print(x) +#+RESULTS: +: +: --- +: + +Now we can get the attributes of the recorded channels. They are going to be +necessary to properly map the data to a =numpy= array. +#+begin_src python :results output :session *python* :exports both +print(f['Waveforms/Channel 1'].attrs.keys()) #+end_src #+RESULTS: -#+begin_example -[-15. -14.3877551 -13.7755102 -13.16326531 -12.55102041 - -11.93877551 -11.32653061 -10.71428571 -10.10204082 -9.48979592 - -8.87755102 -8.26530612 -7.65306122 -7.04081633 -6.42857143 - -5.81632653 -5.20408163 -4.59183673 -3.97959184 -3.36734694 - -2.75510204 -2.14285714 -1.53061224 -0.91836735 -0.30612245 - 0.30612245 0.91836735 1.53061224 2.14285714 2.75510204 - 3.36734694 3.97959184 4.59183673 5.20408163 5.81632653 - 6.42857143 7.04081633 7.65306122 8.26530612 8.87755102 - 9.48979592 10.10204082 10.71428571 11.32653061 11.93877551 - 12.55102041 13.16326531 13.7755102 14.3877551 15. ] -#+end_example - -Finally, an example for graphical output: -#+begin_src python :results output file :session :var matplot_lib_filename="./cosxsx.png" :exports results +: + +Besides the channels we've read, a time series should be created. Each of +its elements should correspond to a data sample. To do that we will use the +information stored in one of the channels (supposing they were acquired using +the same oscilloscope config). +#+begin_src python :results output file :var filename="time_plot.png" :exports both :session *python* +import numpy as np import matplotlib.pyplot as plt -plt.figure(figsize=(10,5)) -plt.plot(x,numpy.cos(x)/x) -plt.tight_layout() +idata1 = f['Waveforms/Channel 1'].attrs.get('XDispRange') +# XDispRange and YDispRange are the total range covered in the oscilloscope display. +idata2 = f['Waveforms/Channel 1'].attrs.get('XOrg') +# XOrg is the x-origin of the data and will be the same as XDispOrigin if the saved +# data is constrained to the displayed area. +idata3 = f['Waveforms/Channel 1'].attrs.get('XInc') +# Xinc is the incremental value of X, or the sampling period (i.e. XDispRange/NumPoints). +idata4 = f['Waveforms/Channel 1'].attrs.get('NumPoints') +temp1 = idata3*(np.arange(1,idata4+1))+idata2 + +plt.figure() +plt.plot(temp1,'b') +plt.grid(linestyle='dotted') +plt.axis('tight') +plt.title('Times series') + +plt.savefig(filename) +print(filename) +#+end_src + +#+RESULTS: +[[file:time_plot.png]] + +Once the time array has been created we can move to reading the data. +#+begin_src python :results output :session *python* :exports both +yinc = f['Waveforms/Channel 1'].attrs.get('YInc') # Vertical sampling period +yorg = f['Waveforms/Channel 1'].attrs.get('YOrg') # Closest value to origin (y=0) +data1 = yorg + yinc*f['Waveforms/Channel 1/Channel 1Data'] +yinc = f['Waveforms/Channel 2'].attrs.get('YInc') +yorg = f['Waveforms/Channel 2'].attrs.get('YOrg') +data2 = yorg + yinc*f['Waveforms/Channel 2/Channel 2Data'] +print('Data type data:', data1.dtype) +#+end_src + +#+RESULTS: +: Data type data: float64 + +Now we can plot the data to see it. + +#+begin_src python :results output file :var filename="data_plot.png" :exports both :session *python* +plt.close('all') +plt.figure() +plt.plot(temp1*1e6,data1,'b') +plt.plot(temp1*1e6,data2,'r') +plt.grid(linestyle='dotted') +plt.axis('tight') +plt.xlabel('Time ($\mu$s)') +plt.ylabel('Amplitude (Volts)') +plt.title('Trigger (blue) and Data (red)') + +plt.savefig(filename) +print(filename) +#+end_src + +#+RESULTS: +[[file:data_plot.png]] + +Data looks nice and amplitudes agree with what was observed in the oscilloscope +display. Therefore out reading method works. + +Some statistics from the data can now be computed, once we can trust the data +was correctly read. +#+begin_src python :results output :session *python* :exports both +avg_data = np.mean(data2) +med_data = np.median(data2) +std_data = np.std(data2,ddof=1) +min_data = np.amin(data2) +max_data = np.amax(data2) +#------------------------ +print('Data average (Volts): ',avg_data) +print('Data median (Volts): ',med_data) +print('Data standard deviation (Volts): ',std_data) +print('Data minimum (Volts): ',min_data) +print('Data maximum (Volts): ',max_data) +#+end_src + +#+RESULTS: +: Data average (Volts): 0.0019089614972975556 +: Data median (Volts): 0.004743201433087485 +: Data standard deviation (Volts): 0.039513536115368106 +: Data minimum (Volts): -0.1612688487249741 +: Data maximum (Volts): 0.15652564729188667 + +The data was recorded when the input channel was turned off, thus from that we +can estimated the signature "noise" of the system when the input will be on. +For that we will look at the histogram and see if looks like a Normal/Gaussian +continuous probability distribution. +#+begin_src python :results output file :var filename="data_hist.png" :exports both :session *python* +from scipy.stats import norm + +plt.figure() +plt.hist(data2,color='b',histtype='bar',ec='black',density=True) + +x = np.linspace(min_data,max_data,idata4) +p = norm.pdf(x,avg_data,std_data) + +plt.plot(x,p,'k',linewidth=2) -plt.savefig(matplot_lib_filename) -print(matplot_lib_filename) +plt.grid(linestyle='dotted') +plt.axis('tight') + +title = "Fitting Gaussian to histogram: $\mu$ = %.5e, $\sigma$ = %.5e" % (avg_data,std_data) +plt.title(title) + +plt.savefig(filename) +print(filename) +#+end_src + +#+RESULTS: +[[file:data_hist.png]] + +At last we can use the D'Agostino's K$^2$ test to infer goodness-of-fit or if +the assumption of a Normal distribution really holds. +#+begin_src python :results output file :var filename="data_plot.png" :exports both :session *python* +from scipy.stats import normaltest +alpha = 1e-3 +k2, p = normaltest(data2) +print("p = {:.10e}".format(p)) #+end_src #+RESULTS: -[[file:./cosxsx.png]] - -Note the parameter ~:exports results~, which indicates that the code -will not appear in the exported document. We recommend that in the -context of this MOOC, you always leave this parameter setting as -~:exports both~, because we want your analyses to be perfectly -transparent and reproducible. - -Watch out: the figure generated by the code block is /not/ stored in -the org document. It's a plain file, here named ~cosxsx.png~. You have -to commit it explicitly if you want your analysis to be legible and -understandable on GitLab. - -Finally, don't forget that we provide in the resource section of this -MOOC a configuration with a few keyboard shortcuts that allow you to -quickly create code blocks in Python by typing ~