#Here we use our age data to explore the "danger of averaging". # This vector (called A) contains the actual reported Ages of our 10 subjects in years. A=c(35,44,48,42,23,44,54,22,54,26) # This array contains the guesses made by our 9 in class groups. The rows are the rows samples and the columns are the group. So G[i,j] is the person i's age as guessed by group j. G=cbind( c(36,30,37,32,25,31,62,23,43,24), c(27,32,43,37,24,35,56,22,43,23), c(29,38,35,36,22,30,55,24,43,22), c(26,27,36,32,28,35,58,24,43,27), c(29,25,41,22,19,33,58,24.5,38,22), c(32,30,37,28,28,34,56,26,42,25), c(35,40,55,36,30,35,60,27,41,22), c(26,30,43,21,21,33,58,18,42,17), c(35,28,41,33,29,32,60,27,43,24) ) # First let us compare with a scatter plot the actual Ages with the average of our group Ages, basically the guess we would make as a class. # First let us compare with a scatter plot the actual ages with the average of our group ages, basically the guess we would make as a class. We let mMG denote the mean guess for a given picture over all 9 of our groups. MG=apply(G,1,mean) PA=(2/51)*A^2-(7/3)*A+1000/17 plot(PA,MG,main="Comparing Guessed Ages with Perceived Ages",xlab=" Our Model of Perceived Ages",ylab="Class's Consensus Estimated Age") # Here they are again. Play with outliers. How does changing 58.1111 effect the slope and correlation coefficient? Put in some outliers explore. MGf=c(30.55556, 31.11111, 40.88889, 30.77778, 25.11111, 33.11111, 58.11111, 23.94444, 42.00000, 22.88889) PAf=c(25.19608, 32.07843, 37.17647, 30.00000, 25.90196, 32.07843, 47.17647, 26.47059, 47.17647, 24.66667) plot(PAf,MGf) cor(PAf,MGf) lm(PAf ~ MGf) abline(lm(PAf ~ MGf),col="red") quartz() ###### # We know look at all the data. How should this effect the correlation? WHow should this affect our interpretation of the results? Aall=rep(A,9) Gall<-G dim(Gall)<-c(length(Gall),1) plot(Aall,Gall,main="Comparing Guessed Ages with Perceived Ages",xlab=" Actual Ages",ylab="Each Group's Estimated Age") cor(Aall,Gall) PAall=(2/51)*Aall^2-(7/3)*Aall+1000/17 plot(PAall,Gall,main="Comparing Guessed Ages with Perceived Ages",xlab=" Our Model of Perceived Ages",ylab="Each Group's Estimated Age") cor(PAall,Gall) rAPAall=cor(PAall,Gall) mPAall=mean(PAall) sPAall=sd(PAall) mGall=mean(Gall) sGall=sd(Gall) b1=(sGall/sPAall)*rAPAall b0=mGall-b1*(mPAall) hatGall=b1*PAall+b0 plot(PAall,Gall,main="Comparing Guessed Ages with Perceived Ages",xlab=" Our Model of Perceived Ages",ylab="Each Group's Estimated Age") points(PAall,hatGall,col="red") # Question 1. lm(X ~ Y) should be read as "please produce a linear model to predict Guesses from FofAges." Explain what it actually does. lm(Gall ~ PAall) ResAll=Gall-hatGall hist(ResAll) plot(PAall,ResAll)