# Import all the packages
import numpy as np
import scipy as sc
import statsmodels.api as sm
import matplotlib.pyplot as plt
# Force matplolib to produce figures inline
%matplotlib inline
# Download the data and save it in a file pottery and open the file to read data into a variable
pottery_data=np.genfromtxt('pottery.txt', usecols=(0,4,5),dtype=[('Al','f8'),('Na','f8'),('site','S2')], skiprows=1)
# Find unique labels in site
labels=np.unique(pottery_data['site'])
# Create a list with data for each unique site
xdAl=[]; xdNa=[]
for label in labels:
xdAl.append(pottery_data['Al'][np.nonzero(pottery_data['site']==label)[0]])
xdNa.append(pottery_data['Na'][np.nonzero(pottery_data['site']==label)[0]])
#Plot the data for Al and Na oxides
plt.subplot(1,2,1)
plt.boxplot(xdAl,labels=labels,sym='ro', whis=1.5);
plt.title('Al oxide concentration')
plt.xlabel('Sites')
plt.ylabel('Concentration (in %)')
plt.subplot(1,2,2)
plt.boxplot(xdNa,labels=labels,sym='ro', whis=1.5);
plt.title('Na oxide concentration')
plt.xlabel('Sites')
plt.ylabel('Concentration (in %)')
plt.tight_layout();
# Read the data
delivery_time=sm.datasets.get_rdataset("delivery","robustbase")
# Plot the data
plt.scatter(delivery_time.data['distance'],delivery_time.data['delTime'],c='c',s=15*delivery_time.data['n.prod'])
plt.xlabel('Distance')
plt.ylabel('Delivery Time')
plt.title ('Time to Service')
plt.xlim([0,1600])
States that the distribution of the sum (or mean) of a large number of independent, identically distributed variables will be approximately normal, regardless of the underlying distribution.
ur=np.random.rand(10,11555000)
plt.hist(ur[0,:],75,color='c',normed='True');
Xmu1=np.zeros((4,11555000));
# For n=2
Xmu1[0,:]=np.mean(ur[0:2],axis=0)
# For n=4
Xmu1[1,:]=np.mean(ur[0:4],axis=0)
# For n=8
Xmu1[2,:]=np.mean(ur[0:8],axis=0)
# For n=10
Xmu1[3,:]=np.mean(ur[0:8],axis=0)
plt.subplot(2,2, 1)
plt.hist(Xmu1[0,:],75,color='k',normed=True)
plt.title('n = 2')
plt.subplot(2,2, 2)
plt.hist(Xmu1[1,:],75,color='k', normed=True)
plt.title('n = 4')
plt.subplot(2,2, 3)
plt.hist(Xmu1[2,:],75,color='k', normed=True)
plt.title('n = 8')
plt.subplot(2,2, 4)
plt.hist(Xmu1[2,:],75,color='k', normed=True)
plt.title('n = 10')
plt.tight_layout()
plt.show()