# Here explore our age guessing data collected in class. 

# This vector (called A) contains the actual reported ages of our 10 subjects in years.

A=c(35,44,48,42,23,44,54,22,54,26)

# This array contains the guesses made by our 9 in class groups.  The rows are the rows samples and the columns are the group.  So G[i,j] is the person i's age as guessed by group j.   


G=cbind(
c(36,30,37,32,25,31,62,23,43,24),
c(27,32,43,37,24,35,56,22,43,23),
c(29,38,35,36,22,30,55,24,43,22),
c(26,27,36,32,28,35,58,24,43,27),
c(29,25,41,22,19,33,58,24.5,38,22),
c(32,30,37,28,28,34,56,26,42,25),
c(35,40,55,36,30,35,60,27,41,22),
c(26,30,43,21,21,33,58,18,42,17),
c(35,28,41,33,29,32,60,27,43,24)
)

# Here is matrix of the errors, ordered as above. 

G-A

# We can look at them all after sorting them, and summarize some of the key statistics about them. 

Errors=sort(c(G-A))
summary(c(Errors))
mean(c(G-A))
sd(c(G-A))

# We can plot a version of the summary called the box plot. 

boxplot(c(Errors))

# We can graph a cumulative distribution function then tells us the percentile associated to  each error value.

Percentile= 100*seq(1,length(c(G-A)))/length(c(G-A))
plot(Errors,Percentile,main="The Percentile of Every Error Value")


# Another way to view the data is the Histogram (the "derivative" of the cumulative distribution function).

hist(c(G-A),nclass=6,main="A Histogram of the Error Values")

# Question: This  histogram looks bimodal.  Why might this be? 

# One answer: The process of evaluating male and female ages might be very different.

# Here is a histogram of our Female Errors

LW=c(2,3,5,6,9)
Women=G[c(LW),]-A[LW]
hist(Women,nclass=6,main="Our Histogram Conditioned on Female Samples")

mean(c(Women))

# Here is a histogram of our Female Errors

quartz()

LM=c(1,4,7,8,10)
Men=G[c(LM),]-A[LM]
hist(Men,nclass=6,main="Our Histogram Conditioned on Male Samples")

mean(c(Men))


#  Question: Do the distributions we see help explain the bi-modality?

# Question:  Redo the above exploration exploring age as a cause of the bi-modality.   Can age be used to help us come to terms with the skewness and tails of our Female and Male distributions? 

########

# Suppose I was instead interested in the difference not between your guess and the true ages, but rather the differences between in you perceptions of the picture. How might I compare the your answers for different samples?


#Answer: normalize.

 
M1=margin.table(G,1) 
M=M1/9
Centered=G-c(M)
sd=sqrt(margin.table(Centered^2,1)/(10-1))
Normalized=Centered/c(sd)
sort(Normalized)
hist(Normalized,nclass=6)


# We should compare this to the normal.  


X=seq(-3,3,by=1/(2*3))
hist(Normalized,freq=FALSE)
points(X,dnorm(X,0,1),col="blue",type="l")


###########


# Question: how to rank which group did best? (absolute value - mean or median - price for bad descisions notice this changes the results!)


#Numerical

AErrors=c()
for (i in 1:length(G[1,])){
AErrors=rbind(AErrors,abs(G[,i]-A))
}
AErrors

#Summary for groups 

summary(abs(G-A))

#Summary for Samples

summary(aperm(abs(G-A),c(2,1)))


# Pictorial.  With 10 samples a box plot is pretty weak but we can see...

MakeList=list(abs(G[,1]-A))
for (i in 2:length(G[1,])){
MakeList=c(MakeList,list(abs(G[,i]-A)))
}
MakeList

boxplot(MakeList)