/* This is the C program "grademcmc.c" for running MCMC to fit an IRT model on student grade data, as described in the paper "Grades and Incentives: Assessing Competing GPA Measures and Post-Graduate Outcomes" by Michael A. Bailey, Jeffrey S. Rosenthal, and Albert Yoon. This program requires three input files. The first, "counts.txt", consists of just two numbers: the number of students, and the number of courses, for example: 99397 31191 The second, "studentinfo.txt", contains a header line and then a list of rows mapping a student identification number with that student's index value (numbered 0,1,2,3,...), for example: IDLIST STUDIND 86275 0 86832 1 86901 2 87204 3 The third and most important input file, "gradedata.txt", contains many lines, each giving a student's index value, a course number (with courses also numbered 0,1,2,3,...), and the student's actual grade in that course (an integer, e.g. between 0 and 100; letter grades should first be converted into equivalent numerical grades), for example: 0 0 83 0 1 77 0 2 36 0 3 80 1 4 84 1 5 83 1 6 91 2 3 66 2 4 71 2 7 75 The program then runs MCMC to fit the IRT model, producing two output files: "thetaest.txt" giving the MCMC estimates of the theta values, and "kappaest.txt" giving the MCMC estimates of the kappa values. You may contact Jeffrey Rosenthal (jeff@math.toronto.edu; www.probability.ca) with questions. */ #include #include #include #include #include #define ALTSTART 1 /* 0 to start at theta=RAG and kappa=0, 1 for alt. */ #define INCLUDESOC101 1 /* 1 to also update kappa for t=455, 0 to omit it */ #define NUMITS 6000 #define BURNIN 5000 #define verboselevel 0 /* higher for more program output */ #define MAXNUMSTUD 100000 #define MAXNUMCOURSES 33000 #define MAXNUMGRADES 60 #define MAXNUMSTUDINCOURSE 3000 #define FILENAMEIN "gradedata.txt" #define FILENAMETHETA "thetaest.txt" #define FILENAMEKAPPA "kappaest.txt" #define sigma 3.0 /* Standard deviation of the epsilon errors. */ #define PI 3.1415926536 /* Global arrays. */ int grade[MAXNUMSTUD][MAXNUMGRADES]; int courseind[MAXNUMSTUD][MAXNUMGRADES]; int numgrades[MAXNUMSTUD]; double theta[MAXNUMSTUD]; double kappa[MAXNUMCOURSES]; double thetasum[MAXNUMSTUD]; double kappasum[MAXNUMCOURSES]; double RAG[MAXNUMSTUD]; int IDlist[MAXNUMSTUD]; int studincourseind[MAXNUMCOURSES][MAXNUMSTUDINCOURSE]; int gradeincourse[MAXNUMCOURSES][MAXNUMSTUDINCOURSE]; int numstudincourse[MAXNUMCOURSES]; double rawtheta[MAXNUMSTUD]; double truetheta[MAXNUMSTUD]; double rawkappa[MAXNUMSTUD]; double truekappa[MAXNUMSTUD]; double rawthetaav, rawthetasum, rawkappaav, rawkappasum, RAGav, RAGsum; int N; /* Total number of students considered. */ int T; /* Total number of courses considered. */ /* BEGIN MAIN PROGRAM. */ int main(int argc, char **argv) { /* Initial declarations. */ FILE *fpin, *fptheta, *fpkappa, *fpcounts, *fptmp, *fopenit(); int i, j, t, inputint, studind, counter, itnum; int thecourseind, thestudind, theIDlist, thegrade, Ni, Nt; char tmpstring[240]; double normal(double,double), tmpsum; void seedrand(); long presenttime(), starttime; double ellapsedsecs; /* Record starting time. */ starttime = presenttime(); /* Obtain the N and T counts. */ printf("Reading counts ..."); fpcounts = fopenit("counts.txt","r"); fscanf(fpcounts, "%d", &N); fscanf(fpcounts, "%d", &T); fclose(fpcounts); printf(" done. (N=%d, T=%d)\n", N, T); /* Read in "IDlist" values. */ printf("Reading IDlist values ...\n"); fptmp = fopenit("studentinfo.txt","r"); fgets(tmpstring, 240, fptmp); /* Read and ignore header line. */ while ((eatwhite(fptmp)) != EOF) { fscanf(fptmp, "%d", &theIDlist); fscanf(fptmp, "%d", &thestudind); IDlist[thestudind] = theIDlist; } fclose(fptmp); /* Initialise variables. */ printf("Initialising variables ...\n"); seedrand(); for (i=0; i= 2) && (thestudind==1)) printf("thestudind=%d, counter=%d, thegrade=%d\n", thestudind, counter, thegrade); courseind[thestudind][counter] = thecourseind; numgrades[thestudind]++; gradeincourse[thecourseind][numstudincourse[thecourseind]] = thegrade; studincourseind[thecourseind][numstudincourse[thecourseind]] = thestudind; numstudincourse[thecourseind]++; /* if (divides(1000,thestudind)) printf("HERE. thestudid=%d\n", thestudind); */ } fclose(fpin); /* Compute RAGs. */ printf("Computing RAGs ...\n"); for (i=0; i 0) { RAG[i] = 0.0; for (j=0; j 2)) printf("i=%d, Ni=%d, t=%d, grade=%d, kappa=%f, tmpsum=%f\n", i, Ni, t, grade[i][j], kappa[t], tmpsum); } theta[i] = normal( tmpsum/Ni, sigma/sqrt(1.0*Ni) ); if (itnum >= BURNIN) thetasum[i] = thetasum[i] + theta[i]; if (verboselevel > 2) printf("tmpsum=%f, Ni=%d, theta[%d]=%f.\n", tmpsum, Ni, i, theta[i]); if ((verboselevel >= 1) && (i==0)) printf("itnum=%d, i=%d, thetamean=%.3f\n", itnum, i, tmpsum/Ni); } /* Update the kappa_t. */ for (t=0; t= BURNIN) kappasum[t] = kappasum[t] + kappa[t]; } } printf("END OF ITERATION %d.\n", itnum+1); fflush(stdout); } if (verboselevel > 2) { /* Output a few results, for now. */ for (i=0; i<10; i++) { printf("%d %d %.3f %.3f\n", i, numgrades[i], RAG[i], theta[i]); } for (t=0; t<10; t++) { printf("%d %.3f\n", t, kappa[t]); } printf("kappa[455] = %f\n", kappa[455]); } /* Compute some averages. */ printf("Computing averages ...\n"); rawthetasum = rawkappasum = RAGsum = 0.0; for (i=0; i 'z') && (t < 'A' || t > 'Z') && (t < '0' || t > '9') ) ; ungetc(t, fp); return(t); } FILE *fopenit(char *thefilename, char *themode) { FILE *thefp; char verb[20]; if (!strcmp(themode,"w")) strcpy(verb,"write"); else if (!strcmp(themode,"r")) strcpy(verb,"read"); else if (!strcmp(themode,"a")) strcpy(verb,"append to"); else strcpy(verb,"access"); if ((thefp = fopen(thefilename,themode)) == NULL) { fprintf(stderr, "\n ** Unable to %s file %s. **\n\n", verb, thefilename); exit(1); } return(thefp); } int divides(int aa, int bb) { return( (bb/aa)*aa == bb ); }