| yujie__ |
/* Task 1: Read in the class gss survey data */
libname datapool 'M:\sas\';
filename classgss 'M:\sas\gss-survey.csv';
data datapool.classgss;
infile classgss firstobs=2 obs=59 dlm="," dsd missover lrecl=4000;
informat student $3.
stud_id 9.
relation $10.
sex $1.
age $10.
lfegenes $30.
lfesocty $30.
lfehrdwk $30.
lfechnce $30.
impfinan $30.
impmar $30.
impkids $30.
impself $30.
setwage $30.
setprice $30.
cutgovt $30.
lessreg $30.
hlphitec $30.
savejobs $30.
cuthours $30.
marhappy $30.
marfree $30.
marfin $30.
markids $30.
marnomar $30.
marlegit $30.
marmakid $30.
marpakid $30.
mardiv $30.
marhomo $30.;
input recordid student stud_id relation sex age
lfegenes lfesocty lfehrdwk lfechnce /* Q1.1-Q1.4 */
impfinan impmar impkids impself /* Q2.1-Q2.4 */
setwage setprice cutgovt lessreg
hlphitec savejobs cuthours /* Q3.1-Q3.7 */
marhappy marfree marfin markids marnomar
marlegit marmakid marpakid mardiv marhomo /* Q4.1-Q4.10 */;
if student='No' and relation='self' then delete;
if relation='' then delete;
run;
proc print data=datapool.classgss;
title 'classgss';
run;
/* Task 2: Generate summary statistics with the class gss survey */
proc freq data=datapool.classgss;
tables lfegenes*student;
tables lfegenes*relation;
tables impfinan*student;
tables impfinan*relation;
tables setwage*student;
tables setwage*relation;
tables marhappy*student;
tables marhappy*relation;
run;
data gsshapy1;
set datapool.classgss;
if marhappy='strongly agree' | marhappy='agree' then yeshappy=1;
else yeshappy=0;
if marhappy='disagree' | marhappy='strongly disagree' then nohappy=1;
else nohappy=0;
year=2001;
keep year stud_id student relation sex yeshappy nohappy;
run;
proc print data=gsshapy1;
title 'gsshapy1';
run;
proc means data=gsshapy1 noprint;
class sex;
var yeshappy nohappy;
output out=classhpy
n=nyeshpy nnohpy
sum=syeshpy snohpy;
run;
proc print data=classhpy;
title 'classhpy';
run;
/* Task 3: Read in the national gss survey data */
libname datapool 'M:\sas\';
filename natnlgss 'M:\sas\natnlgss.csv';
data datapool.natnlgss;
infile natnlgss firstobs=2 dlm="," dsd missover lrecl=4566;
informat year 4.
id 4.
age 2.
sex 1.
lfegenes 1.
lfesocty 1.
lfehrdwk 1.
lfechnce 1.
impfinan 1.
impmar 1.
impkids 1.
impself 1.
setwage 1.
setprice 1.
cutgovt 1.
lessreg 1.
hlphitec 1.
savejobs 1.
cuthours 1.
marhappy 1.
marfree 1.
marfin 1.
markids 1.
marnomar 1.
marlegit 1.
marmakid 1.
marpakid 1.
mardiv 1.
marhomo 1.;
input year
id
age
sex
lfegenes
lfesocty
lfehrdwk
lfechnce
impfinan
impmar
impkids
impself
setwage
setprice
cutgovt
lessreg
hlphitec
savejobs
cuthours
marhappy
marfree
marfin
markids
marnomar
marlegit
marmakid
marpakid
mardiv
marhomo;
run;
proc print data=datapool.natnlgss (obs=100);
title 'natnlgss';
run;
/* Task 4: Generate nation-wide summary statistics */
proc format;
value imprtfmt 1='very important'
2='important'
3='somewhat important'
4='not at all important'
8='dont know'
9='no answer'
.='not applicable';
value agreefmt 1='strongly agree'
2='agree'
3='neither agree nor disagree'
4='disagree'
5='strongly disagree'
8='cannot choose'
9='no answer'
.='not applicable';
run;
proc freq data=datapool.natnlgss;
tables year;
title 'Proc freq by year - natnlgss';
run;
data datapool.natgss93;
set datapool.natnlgss;
where year=1993;
run;
proc print data=datapool.natgss93 (obs=100);
title 'natgss93';
run;
proc freq data=datapool.natgss93;
missing;
tables lfegenes*year;
tables impfinan*year;
tables lfegenes*sex;
tables impfinan*sex;
format lfegenes imprtfmt.;
format impfinan imprtfmt.;
run;
proc freq data=datapool.natnlgss;
tables setwage*year;
tables marhappy*year;
format setwage agreefmt.;
format marhappy agreefmt.;
run;
data gsshapy2;
set datapool.natnlgss;
if year=1988 | year=1994;
if marhappy=1 | marhappy=2 then yeshappy=1;
else yeshappy=0;
if marhappy=4 | marhappy=5 then nohappy=1;
else nohappy=0;
sexnum=sex;
drop sex;
run;
data gsshapy2;
set gsshapy2;
if sexnum=1 then sex='M';
else if sexnum=2 then sex='F';
drop sexnum;
keep year age sex yeshappy nohappy;
run;
proc print data=gsshapy2 (obs=100);
title 'gsshapy2';
run;
proc means data=gsshapy2 noprint;
class sex;
var yeshappy nohappy;
output out=natnlhpy
n=nyeshpy nnohpy
sum=syeshpy snohpy;
run;
proc print data=natnlhpy;
title 'natnlhpy';
run;
/* Task 5: Pool gsshapy1 and gsshapy2 */
data gsshapy1;
set gsshapy1;
class=1;
run;
data gsshapy2;
set gsshapy2;
class=0;
run;
data both;
set gsshapy1 gsshapy2;
run;
proc freq data=both;
tables yeshappy*class;
tables nohappy*class;
run;
/* Task 6: Generate summary statistics over pooled data */
/* Task 6.1: Compare means in the pooled data - class vs. national - unbalanced sample */
proc glm data=both;
class class;
model yeshappy=class;
means class;
run;
/* Task 6.2: Compare means in the pooled data - class vs. national - male vs. female - unbalanced sample */
proc freq data=both;
tables yeshappy*class*sex;
run;
data both;
set both;
if class=1 and sex='M' then classsex='Male-class';
else if class=1 and sex='F' then classsex='Female-class';
else if class=0 and sex='M' then classsex='Male-nat';
else if class=0 and sex='F' then classsex='Female-nat';
run;
proc glm data=both;
class classsex;
model yeshappy=classsex;
means classsex / waller;
means classsex / lsd cldiff;
run;
/* task 6.3 : focus on the class survey and compare student vs nonstudent
m vs f - still an unbalance sample */
proc glm data=both;
where class=1;
class student sex;
model yeshappy=student*sex;
means student*sex/waller;
run;
data both;
set both;
if student='Yes' and student='Male' then studsex='Studmale';
else if student='Yes' and student='Female' then studsex='Studfemale';
else if student='No' and student='Male' then studsex='nonstud male';
else if student='No' and student='Female' then studsex='nonstud female';
run;
proc glm data=both;
where class=1;
class studsex;
model yeshappy=studsex;
means studsex/waller;
means studsex/lsd cldiff;
run;
proc glm data=both;
where class=1;
class relation;
model yeshappy=relation;
means relation/waller;
means relation/lsd cldiff;
run;
data both;
set both;
if class=0;
if 18 le age lt 25 then agerange='18-24';
else if 25 le age lt 35 then agerange='25-34';
else if 35 le age lt 45 then agerange='35-44';
else if 45 le age lt 55 then agerange='45-54';
else if 55 le age lt 65 then agerange='55-64';
else if 65 le age lt 75 then agerange='65-74';
else if 75 le age then agerange='75 +';
run;
proc glm data=both;
where class=0;
class agerange;
model yeshappy=agerange;
means agerange/waller;
means agerange/lsd cldiff;
run;
/* task 6.4: generate the aveagea responce from nonstudent for each studen.pool
compare student vs non student */
proc means data=gsshapy1;
where student='No';
class stud_id;
var yeshappy;
output out=nonstud;
run;
data studgss;
set gsshapy1;
if student='Yes';
run;
data nonstud;
set nonstud;
student='No';
run;
data balanced;
set studgss nonstud;
run;
proc anova data=balanced;
class student;
model yeshappy=student;
means student;
run; |
|
|