# «University of Nebraska - Lincoln DigitalCommons of Nebraska - Lincoln Dissertations and Theses in Statistics Statistics, Department of 8-2010 ...»

put 'F D4D1_Cov 9 * 1';

put 'F D4D2_Cov 10 * 1';

put 'F D4D3_Cov 11 * 1';

put 'F D4_Var 12 * 1';

Run;

%Let pinfile=tch_student_scores.pin;

Data _null_;

Call System('cd C:\Users\Jenny\Desktop\Flash Drive - June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Analyze');

X "'C:\Program Files\ASREML3\BIN\asreml.exe' -NP &pinfile";

Run;

Filename tmp3 "C:\Users\Jenny\Desktop\Flash Drive - June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Analyze\tch_student_scores.pvc";

Data Variance_Temp;

Infile tmp3 firstobs = 16 obs = 27 lrecl=55;

Input Error_Var 31-37 Error_SE 45-55 / Teacher_Var 31-37 Teacher_SE 45-55 / D1_Var 31-37 D1_SE 45-55 / D2D1_Cov 31D2D1_SE 45-55 / D2_Var 31-37 D2_SE 45-55 / D3D1_Cov 31- 37 D3D1_SE 45-55 / D3D2_Cov 31-37 D3D2_SE 45-55 / D3_Var 31-37 D3_SE 45-55 / D4D1_Cov 31-37 D4D1_SE 45-55 / D4D2_Cov 31-37 D4D2_SE 45-55 / D4D3_Cov 31-37 D4D3_SE 45-55 / D4_Var 31-37 D4_SE 45-55 ;

%End;

%Else %do;

*Create ASReml.pin file for ZCRT and ZMAT Variance Components;

Filename tmp "C:\Users\Jenny\Desktop\Flash Drive - June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Analyze\tch_student_scores.pin";

Data _null_;

File tmp;

put 'F E1_var 1 * 1';

put 'F E2E1_cov 2 * 1';

put 'F E2_Var 3 * 1';

put 'F E3E1_Cov 4 * 1';

put 'F E3E2_Cov 5 * 1';

put 'F E3_Var 6 * 1';

put 'F E4E1_Cov 7 * 1';

put 'F E4E2_Cov 8 * 1';

put 'F E4E3_Cov 9 * 1';

put 'F E4_Var 10 * 1';

put 'F Teacher_Var 11 * 1';

Run;

%Let pinfile=tch_student_scores.pin;

Data _null_;

Call System('cd C:\Users\Jenny\Desktop\Flash Drive - June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Analyze');

X "'C:\Program Files\ASREML3\BIN\asreml.exe' -NP &pinfile";

Run;

Filename tmp3 "C:\Users\Jenny\Desktop\Flash Drive - June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Analyze\tch_student_scores.pvc";

Data Variance_Temp;

Infile tmp3 firstobs = 15 obs = 25 lrecl=55;;

Input E1_Var 31-37 E1_SE 45-55 / E2E1_Cov 31-37 E2E1_SE 45E2_Var 31-37 E2_SE 45-55 / E3E1_Cov 31-37 E3E1_SE 45- 55 / E3E2_Cov 31-37 E3E2_SE 45-55 / E3_Var 31-37 E3_SE 45- 55 / E4E1_Cov 31-37 E4E1_SE 45-55 / E4E2_Cov 31-37 E4E2_SE 45-55 / E4E3_Cov 31-37 E4E3_SE 45-55 / E4_Var 31-37 E4_SE 45-55 / Teacher_Var 31-37 Teacher_SE 45-55 ;

%End;

Exp = ≁

Model = "&model";

Run;

%If %eval(&nsim) = 1 %then %do;

Data Results.Pred_Variance_&model;

Set Variance_Temp;

Run;

%End;

%Else %do;

Data Results.Pred_Variance_&model;

Set Results.Pred_Variance_&model Variance_Temp;

Run;

%End;

%mend;

**********************************************************;

* Create Macro to Simulate & Analyze Multiple Data Sets *;

**********************************************************;

%Macro sim(sims,oseed);

%Do nsim = 1 %to &sims;

%Let seed = &oseed + ≁

*Run COF Macro to create PP, Zcrt and Zmat Datasets;

%cof_full(2000,&seed,Tch_Student_Scores_pp,Tch_Student_Scor es_zcrt,Tch_Student_Scores_zmat);

*Run Analyze Macro to analyze each dataset;

%analyze(Tch_Student_Scores_pp,pp);

%analyze(Tch_Student_Scores_zcrt,zc);

%analyze(Tch_Student_Scores_zmat,zm);

%End;

%mend;

%sim(1000,20060902);

********************************************************;

*** Create Datasets for Plots and Summary Statistics ***;

********************************************************;

*Merge True and Predicted Teacher Effects from the 1000 Simulations;

Proc Sort Data = Results.True_Teacher_Effects;

By Exp Teacher;

Proc Sort Data = Results.Pred_Teacher_Effects;

By Exp Teacher;

Run;

Data Results.Teacher_Effects;

Merge Results.Pred_Teacher_Effects Results.True_Teacher_Effects;

By Exp Teacher;

Run;

*Create Dataset (Results.Stats_Plot) for RMSE and SEPE Plots;

Data Results.Teacher_Effects;

Set Results.Teacher_Effects;

SPE = (Pred_Percentile - True_Percentile)**2;

PE = (Pred_Percentile - True_Percentile);

Run;

Proc Sort Data = Results.Teacher_Effects;

By Year True_Percentile Model;

Run;

Proc Means Data = Results.Teacher_Effects Mean noprint;

By Year True_Percentile Model;

Var SPE;

Output Out = Stats1 Mean = MSE;

Run;

Proc Means Data = Results.Teacher_Effects VAR noprint;

By Year True_Percentile Model;

Var PE;

Output Out = Stats2 Var = VPE;

Run;

Data Results.Stats_Plot;

Merge Stats1 Stats2;

By Year True_Percentile Model;

Run;

Data Results.Stats_Plot;

Set Results.Stats_Plot;

Drop _Freq_ _Type_;

RMSE = sqrt(MSE);

SEPE = sqrt(VPE);

Run;

Proc Sort Data = Results.Stats_Plot;

By Year Model;

Run;

PROC EXPORT DATA= RESULTS.Stats_PLOT OUTFILE= "C:\Users\Jenny\Desktop\Flash Drive June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Graphs\Stats_Plot.csv" DBMS=CSV REPLACE;

PUTNAMES=YES;

RUN;

*Create Dataset for Bias Plot;

Proc Sort Data = Results.True_Teacher_Effects;

By Exp Teacher;

Proc Sort Data = Results.Pred_Teacher_Effects;

By Exp Teacher;

Run;

Data Results.Teacher_Effects;

Merge Results.Pred_Teacher_Effects Results.True_Teacher_Effects;

By Exp Teacher;

Run;

Proc Sort Data=Results.Teacher_Effects;

By Year True_Percentile Model;

Run;

Proc Means Data = Results.Teacher_Effects Mean noprint;

By Year True_Percentile Model;

Var Pred_Percentile;

Output Out = Stats3 Mean = Mean_Pred;

Run;

Data Results.Bias_Plot;

Set Stats3;

Bias = Mean_Pred - True_Percentile;

Drop _Freq_ _Type_;

Run;

PROC EXPORT DATA= RESULTS.Bias_Plot OUTFILE= "C:\Users\Jenny\Desktop\Flash Drive June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Graphs\Bias_Plot.csv" DBMS=CSV REPLACE;

PUTNAMES=YES;

RUN;

*Create Dataset (Results.Percentile_Plot) for 25th, 50th and 75th Percentile Sampling Distribution Plots;

Data Results.Percentile;

Set Results.Teacher_Effects;

If Ceil(True_Rank) = 5 or Ceil(True_Rank) = 10 or Ceil(True_Rank) = 16;

Ceil_True_Rank = Ceil(True_Rank); *CEIL function adjusts for mean ranks from ties;

Ceil_Pred_Rank = Ceil(Pred_Rank); *CEIL function adjusts for mean ranks from ties;

Ceil_True_Percentile = (Ceil_True_Rank*100)/21;

Ceil_Pred_Percentile = (Ceil_Pred_Rank*100)/21;

Dummy = 1;

Run;

Proc Sort Data = Results.Percentile;

By Ceil_True_Percentile Ceil_Pred_Percentile Year Model;

Run;

Proc Means Data = Results.Percentile Sum noprint;

By Ceil_True_Percentile Ceil_Pred_Percentile Year Model;

Var Dummy;

Output out = Stats Sum = sum;

Run;

Data Zero;

Do Ceil_True_Rank = 1 to 20;

Ceil_True_Percentile = 100*(Ceil_True_Rank/21);

Do Ceil_Pred_Rank = 1 to 20;

Ceil_Pred_Percentile = 100*(Ceil_Pred_Rank/21);

Do Year = 1 to 4;

Do Model2 = 1 to 3;

Output;

End;

End;

End;

End;

Run;

Data Zero;

Set Zero;

If Ceil_True_Rank = 5 or Ceil_True_Rank = 10 or Ceil_True_Rank = 16;

If Model2 = 1 then Model = 'pp';

If Model2 = 2 then Model = 'zc';

If Model2 = 3 then Model = 'zm';

Run;

Data Test;

Merge Stats Zero;

By Ceil_True_Percentile Ceil_Pred_Percentile Year Model;

Run;

Data Results.Percentile_Plot;

Set Test;

If Sum =. then Sum = 0;

Relative_Freq = Sum/1000;

Drop _Freq_ _Type_ Model2;

Run;

PROC EXPORT DATA= RESULTS.Percentile_PLOT OUTFILE= "C:\Users\Jenny\Desktop\Flash Drive June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Graphs\Percentile_Plot.csv" DBMS=CSV REPLACE;

PUTNAMES=YES;

RUN;

*Create Dataset (Results.Prob75_Plot) for 75th Percentile Classification Probability Plots;

Data Percentile75;

Set Results.Teacher_Effects;

If Pred_Percentile =75 then Dummy75 = 1;

Else Dummy75 = 0;

Run;

Proc Sort Data = Percentile75;

By True_Percentile Year Model;

Run;

Proc Sort Data = Results.Teacher_Effects;

By True_Percentile Year Model;

Run;

Proc Means Data = Percentile75 Sum noprint;

By True_Percentile Year Model;

Var Dummy75;

Output out = Stats75 Sum = sum;

Run;

Data Results.Prob75_Plot;

Set Stats75;

Relative_Freq = Sum/1000;

Drop _Freq_ _Type_;

Run;

PROC EXPORT DATA= RESULTS.PROB75_PLOT OUTFILE= "C:\Users\Jenny\Desktop\Flash Drive June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\Graphs\Prob75_Plot.csv" DBMS=CSV REPLACE;

PUTNAMES=YES;

RUN;

*Obtain Mean and SD of Teacher Var and Avg Teacher Var SE for each Model;

Proc Means Data = Results.Pred_Variance_pp Mean Std;

Var Teacher_Var Teacher_SE;

Output Out = VarStats;

Run;

Proc Means Data = Results.Pred_Variance_zc Mean Std;

Var Teacher_Var Teacher_SE;

Output Out = VarStats;

Run;

Proc Means Data = Results.Pred_Variance_zm Mean Std;

Var Teacher_Var Teacher_SE;

Output Out = VarStats;

Run;

Appendix C Missing Tests SAS Simulation Code and ASReml Analysis Code ********************************************;

*** Create a Macro to Simulate Data Sets ***;

********************************************;

Libname MResults 'C:\Users\Jenny\Desktop\Flash Drive - June 2, 2008\PhD\Dissertation\Paper 1 - Intro and Model Lit Review\Curve of Factors Methodology\MResults';

*Delete Old True Teacher Effects File before re-creating data sets;

Data MResults.True_Teacher_Effects;

Delete;

Run;

%macro cof_missing(nstudent,seed,dataname2,dataname3,dataname4,dat aname5);

%let seed1 = &seed + 3220;

%let seed2 = &seed + 9086;

%let seed3 = &seed + 3;

***************************************;

*** Make Random Teacher Assignments ***;

***************************************;

/* you do this by ranking the random numbers from step 1 then for year 1 you call ranks 1 to (n/20) teacher 1, ranks (n/20)+1 to (2n/20) teacher 2, etc for year 2 you call ranks 1 to (n/20) teacher 21, ranks (n/20)+1 to (2n/20) teacher 22, etc and similarly for years 3 and 4 */ /* step 1: generate random numbers for each student each year */ data step_1;

seed=&seed1;

do student=1 to &nstudent;

rntyr1=ranuni(seed);

rntyr2=ranuni(seed);

rntyr3=ranuni(seed);

rntyr4=ranuni(seed);

output;

end;

/* step 2: create the ranks using proc rank */ proc rank data=step_1 out=step_2;

var rntyr1-rntyr4;

ranks tyr1 tyr2 tyr3 tyr4;

run;

/* step 3: use the ranks to make teacher assignments */ data step_3;

set step_2;

real_obs=1;

teacher_yr1=1;

if tyr1((1/20)*&nstudent) then teacher_yr1=2;

if tyr1((2/20)*&nstudent) then teacher_yr1=3;

if tyr1((3/20)*&nstudent) then teacher_yr1=4;

if tyr1((4/20)*&nstudent) then teacher_yr1=5;

if tyr1((5/20)*&nstudent) then teacher_yr1=6;

if tyr1((6/20)*&nstudent) then teacher_yr1=7;

if tyr1((7/20)*&nstudent) then teacher_yr1=8;

if tyr1((8/20)*&nstudent) then teacher_yr1=9;

if tyr1((9/20)*&nstudent) then teacher_yr1=10;

if tyr1((10/20)*&nstudent) then teacher_yr1=11;

if tyr1((11/20)*&nstudent) then teacher_yr1=12;

if tyr1((12/20)*&nstudent) then teacher_yr1=13;

if tyr1((13/20)*&nstudent) then teacher_yr1=14;

if tyr1((14/20)*&nstudent) then teacher_yr1=15;

if tyr1((15/20)*&nstudent) then teacher_yr1=16;

if tyr1((16/20)*&nstudent) then teacher_yr1=17;

if tyr1((17/20)*&nstudent) then teacher_yr1=18;

if tyr1((18/20)*&nstudent) then teacher_yr1=19;

if tyr1((19/20)*&nstudent) then teacher_yr1=20;

teacher_yr2=21;

if tyr2((1/20)*&nstudent) then teacher_yr2=22;

if tyr2((2/20)*&nstudent) then teacher_yr2=23;

if tyr2((3/20)*&nstudent) then teacher_yr2=24;

if tyr2((4/20)*&nstudent) then teacher_yr2=25;

if tyr2((5/20)*&nstudent) then teacher_yr2=26;

if tyr2((6/20)*&nstudent) then teacher_yr2=27;

if tyr2((7/20)*&nstudent) then teacher_yr2=28;

if tyr2((8/20)*&nstudent) then teacher_yr2=29;

if tyr2((9/20)*&nstudent) then teacher_yr2=30;

if tyr2((10/20)*&nstudent) then teacher_yr2=31;

if tyr2((11/20)*&nstudent) then teacher_yr2=32;

if tyr2((12/20)*&nstudent) then teacher_yr2=33;

if tyr2((13/20)*&nstudent) then teacher_yr2=34;

if tyr2((14/20)*&nstudent) then teacher_yr2=35;

if tyr2((15/20)*&nstudent) then teacher_yr2=36;

if tyr2((16/20)*&nstudent) then teacher_yr2=37;

if tyr2((17/20)*&nstudent) then teacher_yr2=38;

if tyr2((18/20)*&nstudent) then teacher_yr2=39;

if tyr2((19/20)*&nstudent) then teacher_yr2=40;

teacher_yr3=41;

if tyr3((1/20)*&nstudent) then teacher_yr3=42;

if tyr3((2/20)*&nstudent) then teacher_yr3=43;

if tyr3((3/20)*&nstudent) then teacher_yr3=44;

if tyr3((4/20)*&nstudent) then teacher_yr3=45;

if tyr3((5/20)*&nstudent) then teacher_yr3=46;

if tyr3((6/20)*&nstudent) then teacher_yr3=47;

if tyr3((7/20)*&nstudent) then teacher_yr3=48;

if tyr3((8/20)*&nstudent) then teacher_yr3=49;

if tyr3((9/20)*&nstudent) then teacher_yr3=50;

if tyr3((10/20)*&nstudent) then teacher_yr3=51;

if tyr3((11/20)*&nstudent) then teacher_yr3=52;

if tyr3((12/20)*&nstudent) then teacher_yr3=53;

if tyr3((13/20)*&nstudent) then teacher_yr3=54;

if tyr3((14/20)*&nstudent) then teacher_yr3=55;

if tyr3((15/20)*&nstudent) then teacher_yr3=56;

if tyr3((16/20)*&nstudent) then teacher_yr3=57;

if tyr3((17/20)*&nstudent) then teacher_yr3=58;

if tyr3((18/20)*&nstudent) then teacher_yr3=59;

if tyr3((19/20)*&nstudent) then teacher_yr3=60;

teacher_yr4=61;

if tyr4((1/20)*&nstudent) then teacher_yr4=62;

if tyr4((2/20)*&nstudent) then teacher_yr4=63;

if tyr4((3/20)*&nstudent) then teacher_yr4=64;

if tyr4((4/20)*&nstudent) then teacher_yr4=65;

if tyr4((5/20)*&nstudent) then teacher_yr4=66;

if tyr4((6/20)*&nstudent) then teacher_yr4=67;

if tyr4((7/20)*&nstudent) then teacher_yr4=68;

if tyr4((8/20)*&nstudent) then teacher_yr4=69;

if tyr4((9/20)*&nstudent) then teacher_yr4=70;

if tyr4((10/20)*&nstudent) then teacher_yr4=71;

if tyr4((11/20)*&nstudent) then teacher_yr4=72;

if tyr4((12/20)*&nstudent) then teacher_yr4=73;

if tyr4((13/20)*&nstudent) then teacher_yr4=74;

if tyr4((14/20)*&nstudent) then teacher_yr4=75;

if tyr4((15/20)*&nstudent) then teacher_yr4=76;

if tyr4((16/20)*&nstudent) then teacher_yr4=77;

if tyr4((17/20)*&nstudent) then teacher_yr4=78;

if tyr4((18/20)*&nstudent) then teacher_yr4=79;

if tyr4((19/20)*&nstudent) then teacher_yr4=80;

Run;

/* Transpose Random Teacher Assignments from Step 3 */ Data Tch_assignments;

Set Step_3;

Array S[4] teacher_yr1 teacher_yr2 teacher_yr3 teacher_yr4;

Do Time = 1 to 4;

Teacher = S[Time];

Output;

End;

Keep Student Teacher Time;

Run;

*******************************;

*** Generate Student Scores ***;

*******************************;

/* step 1: generate student scores and output data set */ Proc IML;

Call Randseed(&seed2);

N=1;

G={157.5 116.7 99.8 83.3, 116.7 135 98.6 82.7, 99.8 98.6 112.5 80.5, 83.3 82.7 80.5 90}; *AR(1) structure with 1, 0.8, 0.75, 0.7 correlations;

R=I(4)@{45 0, 0 45}; *20% of total variance attributed to measurement error;

lps_sim=j(&nstudent,9,.);

Do Student=1 to &nstudent;

D = (Randnormal(1, J(4,1,0), G))`;

D2 = D@J(2,1,1);

D = D2`;

E = Randnormal(1, J(8,1,0), R);