Holcombe:CheatSheets

From OpenWetWare

(Difference between revisions)
Jump to: navigation, search

Sarah McIntyre (Talk | contribs)
(New page: ===Alex's stackoverflow favourites=== http://stackoverflow.com/users/302378/alex-holcombe?tab=favorites ===Sarah's R cheat sheet=== <code> setwd('~/Dropbox/Experiment/') #set the working...)
Next diff →

Revision as of 00:56, 29 August 2012

Alex's stackoverflow favourites

http://stackoverflow.com/users/302378/alex-holcombe?tab=favorites

Sarah's R cheat sheet

setwd('~/Dropbox/Experiment/') #set the working directory
rm(list=ls(all=T)) #delete all variables
df<-read.table(file="filename.txt",header=T) #read in a text file of data
write.table(df,file="filename.txt",se="\t",row.names=FALSE) #save a data frame as a text file, tab delimited
df <- na.omit(df) #remove all records with NA values
df.subset<-subset(df,condition=="baseline") #make a subset of a data frame
df.subset<-subset(df,select=c(var1,var2)) #make a subset, by variable
df.subset<-subset(df,select=-c(var1,var2)) #make a subset by removing variables
df <- rename(df, c("V1"="mean","V2"="slope","V3"="lapseRate"))
df <- cbind(df,newcolumn) #add a vector as a new column to a data frame
df <- rbind(df,newrow) #add new rows to a data frame
df <- df[with(df, order(subject,time)),] #sort by subject then by time
head(df) #see the first six rows of a data frame
ddply(df,.(condition),numcolwise(mean)) #find the mean of all numeric variables in the data frame by condition; can sub in other functions e.g. 'sd'
ddply(df,c('subject'),myFunction) #apply function separately by subject
df$subject<-factor(df$subject,levels=c("AH","SM","SYL","WYC","DL")) #Getting things to appear in a graph the way you want may require reordering the 'levels' of a factor, like this to put DL as the last subject to be graphed
#Within-Subjects (repeated measures) ANOVA
anmod<-aov(force ~ drum*condition + Error(subject/(drum*condition)), df)
summary(anmod)
print(model.tables(anmod,"means",se=TRUE),digits=3)
print(model.tables(anmod,se=TRUE),digits=3)
ddply(df,.(condition),numcolwise(sd))
#Within-Subjects (repeated measures) contrasts
#Put in wide form: subject, dv.ivl1, dv.ivl2 .. etc.
reshape(data=df,direction="wide",idvar="subject",timevar="condition")
#and multiply by contrast coefficients
pse.mat <- as.matrix(pse.wide)
t.test(pse.mat %*% c(1, -0.5, -0.5)) #baseline vs opp_dir, same_dir
t.test(pse.mat %*% c(0, 1, -1)) #same_dir vs opp_dir
#Put in long form: subject, dv, iv
library(reshape)
melt.data.frame(data=df,id.vars=c("subject"),measure.vars=c("dv.ivl1","dv.ivl2",...etc))
#Progress bar using tcltk package
total <- 20
# create progress bar
pb <- tkProgressBar(title = "progress bar", min = 0,max = total, width = 300)
for(i in 1:total){
  Sys.sleep(0.1); setTkProgressBar(pb, i, label=paste( round(i/total*100, 0),"% done"))
}
close(pb)
#GGPLOT
#set appearance of figures
theme_update(theme_bw()); theme_update(legend.position='bottom', axis.title.x=theme_text(size=14), axis.title.y=theme_text(size=14,angle=90), axis.text.x=theme_text(size=12), axis.text.y=theme_text(size=12), strip.text.x=theme_text(size=12), legend.text=theme_text(size=12), legend.title=theme_text(size=14))
#widths for publishing
quartz(width=3.27) # half page
quartz(width=4.86) # 3/4 page
quartz(width=6.83) # full page
+stat_summary(fun.y=mean,geom="point") #calculate mean of y and plot as a point
+stat_summary(fun.data=mean_cl_boot,geom='errorbar',width=4) #bootstrapped CI plotted as error bar
+facet_grid(variable ~ ., scales = "free") #'free_y' / 'free_x'
+scale_y_continuous(trans = "log10", breaks = 10^seq(-4, 4, 1), labels = 10^seq(-4, 4, 1)) #manually specify breaks and labels to show untransformed units when plotting on a log scale
+geom_rect(aes(xmin=5,xmax=35,ymin=-Inf,ymax=Inf),fill="#FFFFB2") #shaded rectangle
+coord_cartesian(xlim=c(100,500)) #zoom in on the plot
#dodge individual data points around error bar
ggplot(data,aes(x=x,y=y)) +geom_bar() +geom_errorbar()+geom_point(aes(shape=subject),position=position_dodge(width=0.3)) +scale_shape_manual(values=(1:9)) #unfilled shapes (use this for more values than with "fill=FALSE")
#colours
#If you have the rgb code of a colour you want to use in R, you can use:
rgb(r,g,b,max=255) #to get the colour codes that R uses
#e.g. Brewer palette Set 1:
#red: #E41A1C
#blue: #377EB8
#green: #4DAF4A
RColorBrewer::display.brewer.all()
#get colour codes from Brewer colours
RColorBrewer::brewer.pal(9, "Blues")
##FUNCTIONS
#proportion of a given response
proportion <- function(x,na.rm=TRUE) {
  if (na.rm == TRUE) { x <- x[is.na(x) == FALSE] }
  x <- as.factor(x)
  value <- levels(x)
  proportion <- c()
  for (i in value) {
  proportion <- append(proportion,length(x[x==i & is.na(x)==FALSE]) / length(x)) }
  data.frame(v = factor(value,levels=value), p = proportion)
 }
#calculate direction index, distal positive, proximal negative
calcDI <- function(df,resp) {
  resp<-eval(substitute(resp),df, parent.frame()) #to pass a column name to the function
  DI = (proportion(resp)$p[proportion(resp)$v=="distal"] - proportion(resp)$p[proportion(resp)$v=="proximal"])
  data.frame(DI)
}
#standard error
stder <- function(x)
     {
       y <- x[!is.na(x)] # remove the missing values
       sqrt(var(as.vector(y))/length(y))
}

Personal tools