a=read.csv("ReuseDatasetsSnap.csv") help(read.csv) a summary(a) aov=aov(ResolvableScore~DatasetType,data=a);aov;summary(aov)#whoops! not categorical vs. continuous boxplot( ResolvableScore~DatasetType,data=a)#ignore (see above) poisson = glm(ResolvableScore ~ DatasetType, data=a, family= poisson)#loglinear model (cat vs. cat) summary(poisson) library(MASS) library(help=MASS) help(polr) ordlogit=polr(as.ordered(ResolvableScore)~ DatasetType, data = a);summary(ordlogit) attach(a) b=table(ResolvableYN,DatasetType);b chisq.test(table (DatasetType,ResolvableYN)) cc=table(ResolvableScore,DatasetType);cc chisq.test(table (ResolvableScore,DatasetType)) #but why not just chi sq? bb=table(ResolvableYN,BroaderDatatypes);bb chisq.test(table (BroaderDatatypes,ResolvableYN)) ccc=table(ResolvableScore,BroaderDatatypes);ccc chisq.test(table (ResolvableScore,BroaderDatatypes)) d=table(ResolvableYN,Journal);d chisq.test(table (ResolvableYN,Journal)) e=table(ResolvableScore,Journal);e chisq.test(table (ResolvableScore,Journal)) f=table(AttributionYN,Journal);f chisq.test(table (AttributionYN,Journal)) g=table(AttributionScore,Journal);g chisq.test(table (AttributionScore,Journal)) hh=table(AttributionYN,DatasetType);hh chisq.test(table (DatasetType,AttributionYN)) i=table(AttributionScore,DatasetType);i chisq.test(table (AttributionScore,DatasetType)) #Outputs to send to Heather ##Tables #> b=table(ResolvableYN,DatasetType);b # DatasetType #ResolvableYN Bio Ea Eco GA GIS GO GS PA PT XY # 0 22 35 10 5 10 5 33 19 8 5 # 1 0 0 0 0 0 0 17 0 1 0 #> bb=table(ResolvableYN,BroaderDatatypes);bb # BroaderDatatypes #ResolvableYN EA Eco G O PT S # 0 35 10 43 41 8 15 # 1 0 0 17 0 1 0 #> cc=table(ResolvableScore,DatasetType);cc # DatasetType #ResolvableScore Bio Ea Eco GA GIS GO GS PA PT XY # 0 6 15 3 0 4 0 3 4 1 3 # 1 13 10 6 2 2 4 12 10 5 0 # 2 1 8 0 2 2 1 6 3 0 0 # 3 0 0 0 0 0 0 4 0 0 0 # 4 2 2 1 1 2 0 8 2 2 2 # 5 0 0 0 0 0 0 17 0 1 0 #> ccc=table(ResolvableScore,BroaderDatatypes);ccc # BroaderDatatypes #ResolvableScore EA Eco G O PT S # 0 15 3 3 10 1 7 # 1 10 6 18 23 5 2 # 2 8 0 9 4 0 2 # 3 0 0 4 0 0 0 # 4 2 1 9 4 2 4 # 5 0 0 17 0 1 0 ##Chi-Squared #> chisq.test(table (ResolvableScore,DatasetType)) # Pearson's Chi-squared test #data: table(ResolvableScore, DatasetType) #X-squared = 98.1825, df = 45, p-value = 7.922e-06 #Warning message: #In chisq.test(table(ResolvableScore, DatasetType)) : # Chi-squared approximation may be incorrect ##Linear Model-Poisson (alternative = binomial or zero inflated for "Resolvable YN") #> poisson = glm(ResolvableScore~DatasetType,data=a,family=poisson) #> summary(poisson) #Call: #glm(formula = ResolvableScore ~ DatasetType, family = poisson, # data = a) #Deviance Residuals: # Min 1Q Median 3Q Max #-2.47386 -1.37229 -0.04478 0.60369 2.29458 #Coefficients: # Estimate Std. Error z value Pr(>|z|) #(Intercept) 0.04445 0.20851 0.213 0.8312 #DatasetTypeEa -0.07344 0.26998 -0.272 0.7856 #DatasetTypeEco -0.04445 0.37878 -0.117 0.9066 #DatasetTypeGA 0.64870 0.37879 1.713 0.0868 . #DatasetTypeGIS 0.29202 0.33898 0.861 0.3890 #DatasetTypeGO 0.13787 0.45842 0.301 0.7636 #DatasetTypeGS 1.07396 0.22364 4.802 1.57e-06 *** #DatasetTypePA 0.18916 0.29180 0.648 0.5168 #DatasetTypePT 0.64870 0.31470 2.061 0.0393 * #DatasetTypeXY 0.42555 0.41042 1.037 0.2998 #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #(Dispersion parameter for poisson family taken to be 1) # Null deviance: 283.03 on 169 degrees of freedom #Residual deviance: 212.36 on 160 degrees of freedom #AIC: 566.94 #Number of Fisher Scoring iterations: 5 ##Ordered Logit Model (library MASS, function polr) #polr(formula = as.ordered(ResolvableScore) ~ DatasetType, data = a) #Coefficients: # Value Std. Error t value #DatasetTypeEa -0.1696435 0.4978595 -0.3407457 #DatasetTypeEco -0.1249959 0.6788473 -0.1841296 #DatasetTypeGA 1.3932769 0.8160008 1.7074454 #DatasetTypeGIS 0.2656731 0.7333286 0.3622839 #DatasetTypeGO 0.6023778 0.8112429 0.7425369 #DatasetTypeGS 2.3786340 0.4917395 4.8371833 #DatasetTypePA 0.3831136 0.5557081 0.6894152 #DatasetTypePT 1.0288666 0.7284316 1.4124410 #DatasetTypeXY -0.2736481 1.1134912 -0.2457569 #Intercepts: # Value Std. Error t value #0|1 -0.6708 0.3879 -1.7294 #1|2 1.2789 0.4015 3.1855 #2|3 2.0552 0.4237 4.8511 #3|4 2.2097 0.4285 5.1573 #4|5 3.3495 0.4775 7.0144 #Residual Deviance: 483.3118 #AIC: 511.3118