*************************************************************; * Examples of stratified Cox regressions * * Read multiple job histories for a variety of individuals. * For each record: * (a) eid = employee ID number (for that employee) * (b) status (1 if end of job, 0 if still employed when last seen) * (c) education level: 8=grade school, 12=high school, etc. * (d) job number: 1=first job, 2=second job, etc * (e) duration of job in years * (f) prestige level of job (1-100) * (g) logarithm(base e) of starting annual salary in thousands * The data is sorted by eid then by job number. *************************************************************; title 'MULTIPLE EVENTS PER INDIVIDUAL - YOURNAME'; title2 'EXAMPLES OF STRATIFIED COX REGRESSIONS - YOURNAME'; options ls=75 ps=60 pageno=1 nocenter; data jobs; infile "aljobmult.dat" firstobs=6; input eid status edlevel jj duration prestige logsalary; salary=exp(logsalary); run; data firstfew; set jobs; if eid le 11; run; proc print data=firstfew; title3 'THE FIRST FEW RECORDS AS SAS SEES THEM'; run; proc phreg data=jobs; title3 'NO STRATIFICATION: IGNORE SUBJECT EFFECTS'; title4 'BUT DOES THIS LEAD TO BIAS?'; title5 'IS HIGH HAZARD FOR HIGH JJ AN ARTIFACT?'; model duration*status(0) = jj edlevel prestige logsalary; run; data firstjob; set jobs; if jj=1; * Keep only records with jj=job number=1; run; proc phreg data=firstjob; title3 'FIRST JOB ONLY'; model duration*status(0) = jj edlevel prestige logsalary; run; data secondjob; set jobs; retain durjob1; if jj=1 then durjob1=duration; * Save duration if job number=1; if jj=2; * Keep records with job number=2; run; proc phreg data=secondjob; title3 'SECOND JOB WITH FIRST-JOB DURATION AS A COVARIATE'; model duration*status(0) = jj durjob1 edlevel prestige logsalary; run; proc corr data=secondjob; title3 'PEARSON CORRELATION COEFFICIENT OF FIRST TWO JOB DURATIONS'; var durjob1 duration; run; *************************************************************; * Stratification for a Cox regression means that Cox likelihoods are * are calculated for each strata and then multiplied together. * * This means that the risk set for each observed failure is drawn * from within that stratum only. In other words, an event with an * observed failure is assumed to have competed only with other * other events in the same stratum. * * However, the same coefficients for the covariates are used across * all strata. *************************************************************; proc phreg data=jobs; title3 'STRATIFIED COX REGRESSION BY INDIVIDUAL'; strata eid; model duration*status(0) = jj edlevel prestige logsalary; run; proc phreg data=jobs; title3 'STRATIFIED COX REGRESSION BY JOB NUMBER'; strata jj; model duration*status(0) = edlevel prestige logsalary; run; proc phreg data=jobs nosimple; title3 'STRATIFIED COX REGRESSION BY INDIVIDUAL'; title4 'TEST FOR PRESTIGE TIME DEPENDENCE (PH ASSUMPTION)'; strata eid; model duration*status(0) = edlevel prestige logsalary time_prestige; time_prestige = duration*prestige; run; proc phreg data=jobs nosimple; title3 'STRATIFIED COX REGRESSION BY INDIVIDUAL'; title4 'TEST FOR LOGSALARAY TIME DEPENDENCE (PH ASSUMPTION)'; strata eid; model duration*status(0) = edlevel prestige logsalary time_logsal; time_logsal = duration*logsalary; run;