> "Here we explore an example of regression analysis in the context of our Big Tests":




with(stats):
with(stats[statplots]):
Convert:=proc(data)
local hap,hap1,d,j1,j2,j3,mean;
hap:=[];
hap1:=[];
d:=[];
for j1 from 1 to nops(data)
do
hap:=[op(hap),data[j1][2]];
od;
mean := nops(data)*describe[mean](hap);
for j2 from 1 to nops(data)
do
hap1:=[op(hap1), hap[j2]/mean];
od;
for j3 from 1 to nops(data)
do
d:=[op(d), [data[j3][1],hap1[j3]]];
od;
d;
end:
Bargraph:=proc(data,xmin,xmax,k)
local sorteddata,dx,graphicslist,f:
sorteddata:=[]:
dx:=(xmax-xmin)/k:
graphicslist:=[]:
sorteddata:=sort(data):
if ((op(1,sorteddata)<xmin) or (op(nops(sorteddata),sorteddata)>xmax))
then
lprint(`Note: some data values lie outside the user-defined interval.`):
fi:
f:=proc(k,sorteddata,xmin,dx,xmax)
local i,j,currentupperlim,leng,index,counter,result,finallist,q,numb,
linelist:
linelist:=[]: finallist:=[]:
index:=1:
leng:=nops(sorteddata):
currentupperlim:=xmin+dx:
result:=[]:
for q from 1 to leng do
if ((op(q,sorteddata)>=xmin) and (op(q,sorteddata)<=xmax))
then finallist:=[op(finallist),op(q,sorteddata)]
fi:
od:
numb:=nops(finallist):
for i from 1 to k do
counter:=0:
while ((index<=numb) and (op(index,finallist)<=currentupperlim)) do
counter:=counter+1:
index:=index+1:
od:
result:=[op(result),counter]:
currentupperlim:=currentupperlim+dx:
od:
for j from 1 to k do
linelist:=[op(linelist),[xmin + (j-1)*dx,0]]:
linelist:=[op(linelist),[xmin + (j-1)*dx,op(j,result)/leng]]:
linelist:=[op(linelist),[xmin + j*dx,op(j,result)/leng]]:
linelist:=[op(linelist),[xmin + j*dx,0]]:
od:
plot(linelist,style=LINE);
end:
f(k,sorteddata,xmin,dx,xmax):
end:Areabargraph:=proc(data,xmin,xmax,k)
local sorteddata, dx, lines,graphicslist, f:
sorteddata:=[]:
dx:=(xmax-xmin)/k:
lines:=[]:
graphicslist:=[]:
sorteddata:=sort(data):
if ((op(1,sorteddata)<xmin) or (op(nops(sorteddata),sorteddata)>xmax))
then
lprint(`Note: some data values lie outside the user-defined interval.`)
fi:
f:=proc(k,sorteddata,xmin,dx,xmax,lines::evaln)
local i,j,currentupperlim,leng,index,counter,result,
finallist,q,numb,linelist:
finallist:=[]:
linelist:=[]:
index:=1:
leng:=nops(sorteddata):
currentupperlim:=xmin+dx:
result:=[]:
for q from 1 to leng do
if ((op(q,sorteddata)>=xmin) and (op(q,sorteddata)<=xmax))
then finallist:=[op(finallist),op(q,sorteddata)]
fi:
od:
numb:=nops(finallist):
for i from 1 to k do
counter:=0:
while ((index<=numb) and (op(index,finallist)<=currentupperlim)) do
counter:=counter+1:
index:=index+1:
od:
result:=[op(result),counter]:
currentupperlim:=currentupperlim+dx:
od:
for j from 1 to k do
linelist:=[op(linelist),[xmin + (j-1)*dx,0]]:
linelist:=[op(linelist),[xmin + (j-1)*dx,op(j,result)/(leng*dx)]]:
linelist:=[op(linelist),[xmin + j*dx,op(j,result)/(leng*dx)]]:
linelist:=[op(linelist),[xmin + j*dx,0]]:
od:
lines:=linelist:
end:
f(k,sorteddata,xmin,dx,xmax,lines):
plot(lines,style=LINE);
end:




Mine:=proc(L,n)
local k,R;
R:=[];
for k from 1 to nops(L) do
R:=[op(R),[L[k][n],1]];
od:
R;
end:


Mine2:=proc(L,n)
local k,R;
R:=[];
for k from 1 to nops(L) do
R:=[op(R),L[k][n]];
od:
R;
end:





AveIt:=proc(L,K)
local k,R;
R:=[];
for k from 1 to nops(L)
do
R:=[op(R),1/2*(L[k]+K[k])];
od;
R;
end:

Fuzz:=proc(L,H,E1)
local L1,H1,k1,R,k2;
L1:=[];
H1:=[];
for k1 from 1 to nops(L)
do
R:=E1*stats[random, uniform[0,1]](1);
L1:=[op(L1),L[k1]+R];
od;
for k2 from 1 to nops(L)
do
R:=E1*stats[random, uniform[0,1]](1);
H1:=[op(H1),H[k2]+R];
od;
[L1,H1];
end:

Analysis:=proc(L)
local m,v,sd,k,S,i,j,l;
m:=0;
v:=0;
sd:=0;
for k from 1 to nops(L)
do
m:=m+L[k];
od;
m:=evalf(m/nops(L));
for i from 1 to nops(L)
do
v:=v+(L[i]-m)^2;
od;
v:=evalf(v/nops(L));
sd:=evalf(sqrt(v));
[m,v,sd];
end:


Analysis2:=proc(L,H)
local cov,cor,st,ml,mh,sl,sh,Al,Ah,k;
cov:=0;
cor:=0;
st:=0;
Al:=Analysis(L);
Ah:=Analysis(H);
ml:=Al[1];
mh:=Ah[1];
sl:=Al[3];
sh:=Ah[3];
for k from 1 to nops(L)
do
cov:=cov+(L[k]-ml)*(H[k]-mh);
od;
cov:=cov/nops(L);
cor:=cov/(sl*sh);
st:=cor^2;
[cov,cor,st];
end:


"Here is a program no analyze pairs of potentially correlated data":

Scatter:=proc(B,type)
local ml,mh,sl,sh,cor,str,Al,Ah,A,F,L,H;
L:=Mine2(B,1):;
H:=Mine2(B,2):;
Al:=Analysis(L);
Ah:=Analysis(H);
A:=Analysis2(L,H);
if A[2] <= 0 then
sl:= - (Ah[3]/Al[3]);
else
sl:= (Ah[3]/Al[3]);
fi;
lprint("the first set of data has mean",Al[1],"and standard deviation",Al[3]);
lprint("the second set of dat has mean",Ah[1],"and standard deviation",Ah[3]);
lprint("the correlation of the second on the first is",A[2]):
lprint("while the strength of this correlation is",evalf(100*A[3]),"percent"):
if type=Whole
then
F:=Fuzz(L,H,.5):
else;
F:=[L,H]:
fi;
lprint("Here is the scatter plot, where the black line is the is the regression line of the second set of data on the first and the blue line is the
standard deviation line.");
plots[display](
{scatterplot(F[1],F[2],color=red),
plot((A[2]*(sl))*(x-Al[1])+Ah[1], x=min(op(L))-1..max(op(L))+1,color=black),
plot((sl)*(x-Al[1])+Ah[1], x=min(op(L))-1..max(op(L))+1,color=blue)
}, view = [min(op(L))-1..max(op(L))+1, min(op(H))-1..max(op(H))+1], axes=frame,color=red
);
end:

Warning, the name transform has been redefined

> Scores:=[
[1,[6,5]],
[2,[5,6]],
[3,[10,9]],
[4,[10,7]],
[5,[7,4]],
[6,[4,5]],
[7,[9,4]],
[8,[5,2]],
[9,[10,5]],
[10,[6,3]],
[11,[1,3]],
[13,[9,3]],
[14,[1,6]],
[15,[6,3]],
[16,[10,6]],
[17,[8,9]],
[18,[10,10]],
[23,[8,4]],
[24,[4,6]],
[25,[9,6]],
[26,[2,3]],
[27,[10,4]],
[28,[10,1]]
];

Scores := [[1, [6, 5]], [2, [5, 6]], [3, [10, 9]], ...
Scores := [[1, [6, 5]], [2, [5, 6]], [3, [10, 9]], ...
Scores := [[1, [6, 5]], [2, [5, 6]], [3, [10, 9]], ...

> Iso:=proc(L)
local i,H:
H:=[];for i from 1 to nops(L)
do
H:=[op(H),L[i][2]];
od;
end:

> CorScore:=Iso(Scores);

CorScore := [[6, 5], [5, 6], [10, 9], [10, 7], [7, ...
CorScore := [[6, 5], [5, 6], [10, 9], [10, 7], [7, ...

> Scatter(CorScore,Whole);

"the first set of data has mean", 6.956521739, "and standard deviation", 2.970554930

"the second set of dat has mean", 4.956521739, "and standard deviation", 2.235645241

"the correlation of the second on the first is", .2681357700

"while the strength of this correlation is", 7.189679115, "percent"

"Here is the scatter plot, where the black line is the is the regression line of the second set of data on the first and the blue line is the\nstandard deviation line."

[Maple Plot]

> Regres:=proc(H)
local i,t,s,L,K:
K:=[];
L:=[];
s:=0;
t:=0;
for i from 1 to nops(H)
do
if H[i][2][1] < 7 then
L:=[op(L),H[i]];
t:=t+1;
fi;
if H[i][2][1] < 7 and H[i][2][2] > 4 then
K:=[op(K),H[i]];
s:=s+1;
fi;
od;
[evalf(t/nops(L)),s];
lprint(L);
lprint(K);
[nops(H),t,s,evalf(s/t)];
end:

> Regres(Scores);

[[1, [6, 5]], [2, [5, 6]], [6, [4, 5]], [8, [5, 2]], [10, [6, 3]], [11, [1, 3]], [14, [1, 6]], [15, [6, 3]], [24, [4, 6]], [26, [2, 3]]]

[[1, [6, 5]], [2, [5, 6]], [6, [4, 5]], [14, [1, 6]], [24, [4, 6]]]

[23, 10, 5, .5000000000]

>