The Vitruvian man

The Vitruvian man#

This Jupyter Notebook is made for the TPT article The Vitruvian man: an introduction to measurement and data-analysis. The data is collected via google forms with three questions: Gender; Length in cm; Arm span in cm.

To save the CSV file: Open forms -> as spreadsheet -> download as CSV Note that the CSV file is stored with timestamp (first column).

An initial dataset in dataset.csv is provided to illustrate how the data-analysis can be performed.

# Import the required libaries
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from scipy import stats
# Loading the data from the CSV file, column 0 with timestap is skipped
data = np.genfromtxt('dataset.csv',skip_header=1, delimiter=",",usecols=range(1,4),dtype=str)

# Show the first row and some cells to check the output
print(data[1,:])
print(data[1,0])
print(data[1,2])

#seperate data based on gender and convert str to float. 
female = data[data[:,2]=="Female",0:2].astype(float)
male = data[data[:,2]=="Male",0:2].astype(float)
unknown = data[data[:,2]=="",0:2].astype(float)
full_dataset = np.concatenate((female,male,unknown))

# Show the first row of the new array female
print(female[0,:])
['168' '168' 'Male']
168
Male
[185. 176.]
# A first glimpse at the data
plt.figure()
plt.xlabel('Body height (cm)')
plt.ylabel('Arm span (cm)')

plt.plot(male[:,0],male[:,1],'r+',label='male')
plt.plot(female[:,0],female[:,1],'b_',label='female')
plt.plot(unknown[:,0],unknown[:,1],'k.',label='not specified')

# setting the axis limits 
plt.xlim(0.95*np.min(full_dataset[:,0]),1.05*np.max(full_dataset[:,0]))
plt.ylim(0.95*np.min(full_dataset[:,1]),1.05*np.max(full_dataset[:,1]))
         
plt.legend()
plt.show()
../../_images/62674e2e82dc1dd6744aedbb220a1e054a535858247f626f95258e7e1fd98242.png
# Fitting using a linear fit and direct proportional relation using entire dataset

def linear(x,a,b):
    return a*x+b

def direct_prop(x,a):
    return a*x

lin_var, lin_cov = curve_fit(linear,full_dataset[:,0],full_dataset[:,1])
prop_var, prop_cov = curve_fit(direct_prop,full_dataset[:,0],full_dataset[:,1])

length_dummy = np.linspace(0,1.2*max(full_dataset[:,1]),1000)
fit_lin = linear(length_dummy,*lin_var)
fit_prop = direct_prop(length_dummy,*prop_var)

residuals = full_dataset[:,1]-direct_prop(full_dataset[:,0],*prop_var)
# Plotting the data and both fits and the residuals

fig, (ax1, ax2) = plt.subplots(2, sharex=True, gridspec_kw={'height_ratios': [3, 1]})

plt.xlabel('Body height (cm)')
ax1.set_ylabel('Arm span (cm)')
ax2.set_ylabel('Residuals')

ax1.plot(male[:,0],male[:,1],'r+',label='male')
ax1.plot(female[:,0],female[:,1],'b_',label='female')
ax1.plot(unknown[:,0],unknown[:,1],'k.',label='not specified')

ax1.plot(length_dummy, fit_lin, 'r--', label='linear: AS = {:.3f} * BH + {:.3f}'.format(lin_var[0], lin_var[1]))
ax1.plot(length_dummy,fit_prop,'b--',label='direct propotional: AS = %1.3f*BH ' %prop_var[0])
ax1.legend()

ax2.plot(full_dataset[:,0],residuals,'k.')

plt.xlim(0.95*np.min(full_dataset[:,0]),1.05*np.max(full_dataset[:,0]))
ax1.set_ylim(0.95*np.min(full_dataset[:,1]),1.05*np.max(full_dataset[:,1]))
ax2.set_ylim(-15,15)

plt.show()
../../_images/1680b8eb77f4e4e65c0aeb37e621e9fcd01dd82390b7b5d35104d1ac3e7812d4.png
# Residual analysis

x_gauss = np.arange(-20, 20, 0.01)
y_gauss = stats.norm.pdf(x_gauss, np.mean(residuals), np.std(residuals))
  
print("average value:", np.mean(residuals),"standard deviation", np.std(residuals))

## plot data
plt.figure()

plt.xlabel("residual (cm)")
plt.ylabel("percentage")

plt.hist(residuals,density=True)
plt.plot(x_gauss, y_gauss,'r--')

plt.show()
average value: -0.0004005400346063269 standard deviation 4.931398098381245
../../_images/c71fa755b17baa0ed63c90fb08665a5903df56cd7764160f811a375bf2c04583.png
plt.figure()

plt.xlabel('Body height (cm)')
plt.ylabel('Arm span (cm)')

plt.hist2d(full_dataset[:,0], full_dataset[:,1],bins=10)

plt.show()
plt.figure()

plt.xlabel('Body height (cm)')
plt.ylabel('Arm span (cm)')

plt.hist2d(full_dataset[:,0], full_dataset[:,1],bins=100)

plt.show()