#############
#clear space
rm(list=ls())
#load packages
require(vegan)
require(ecodist)
require(simba)
require(lme4)
#load data
records <- read.table(file="species_data.csv", header=TRUE, sep="\t")
DIST <- as.dist(read.table(file="distance_data.csv", header=TRUE, sep="\t", row.names=1))
variables <- read.table(file="environmental_data.csv", header=TRUE, sep="\t", row.names=1)
#transform aspect in degrees to 'eastness' and'northness':
aspect2 <- variables$aspect
variables <- cbind(variables, aspect2)
variables$aspect <- cos((variables$aspect*pi)/180)
variables$aspect2 <- sin((variables$aspect2*pi)/180)
#generate spatial eigenvectors
PCNM <- pcnm(DIST)
SC2 <- as.data.frame(scores(PCNM))
variables <- cbind(variables, SC2[,1:10]) #add first 10 PCNM axes to variables set
#generate species PA matrix
species <- mama(records)
##################
#matrix functions and look at data
species2 <- species
species2[species2>0] = 1 #convert df 'species2' to binary
freq <- colSums(species2) #calculate frequencies
freq <- sort(freq, decreasing=TRUE) #sort them
diversity <- rowSums(species2) #no spp. in each plot
#plot histograms figure
par(mfrow=c(2,2),mar=c(4,5,2,1))
hist(variables$bio12, breaks=c(0:75,150,225,300, 375, 450, 525, 600, 675, 750, 825, 900, 975, 1050, 1125), density=50, col="lightgray", border="black", xlab="Mean annual rainfall (mm)", ylab="Number of plots", cex.axis=1.5, cex.lab=1.5, main="", freq=TRUE, ylim=c(0,20), xlim=c(200,1200))
mtext("a",3, adj=0, cex=2, font=2)
hist(variables$sand, 10, density=50, col="lightgray", border="black", xlab="%Sand", ylab="Number of plots", cex.axis=1.5, cex.lab=1.5, main="", freq=TRUE, ylim=c(0,25), xlim=c(40,100))
mtext("b",3, adj=0, cex=2, font=2)
hist(variables$pH_CaCl2, 10, density=50, col="lightgray", border="black", xlab="pH", ylab="Number of plots", cex.axis=1.5, cex.lab=1.5, main="", freq=TRUE, ylim=c(0,20), xlim=c(3.5,6.5))
mtext("c",3, adj=0, cex=2, font=2)
DISTkm <- DIST/1000
plot(bio1D~DISTkm, xlab="Geographic distance (km)", xlim=c(0,600), ylim=c(0,5),ylab="Difference in mean temperature", bty="n", cex.axis=1.5, cex.lab=1.5, cex=0.8, pch=19) #shows near sites different and distance sites similar (in addition to usual decay with distance)
mtext("d",3, adj=0, cex=2, font=2)
plot(BRAY~DISTkm, ylim=c(0,1.1), xlim=c(0,600), xlab="Geographic distance (km)", ylab="Bray-Curtis dissimilarity", bty="n", cex.axis=1.5, cex.lab=1.5, cex=0.8, pch=19, yaxs="i")
mtext("a",3, adj=0, cex=2, font=2)
hist(freq, 20, density=50, col="lightgray", border="black", xlab="Number of records", ylab="Number of species", cex.axis=1.5, cex.lab=1.5, main="", xlim=c(0,50)) # plot frequency histogram
mtext("b",3, adj=0, cex=2, font=2)
hist(records$VCA, breaks=c(0:1,4:5,9:10,14:15,19:20,24:25,29:30,34:35,39:40,44:45,49:50,54:55,59:60,64:65,69:70,74:75,79:80,84:85,89:90,94:95,99:100), density=50, col="lightgray", border="black", xlab="Cover-abundance", ylab="Number of records", cex.axis=1.5, cex.lab=1.5, main="", xlim=c(0,100), freq=TRUE) # plot frequency his
mtext("c",3, adj=0, cex=2, font=2)
hist(diversity, 10, density=50, col="lightgray", border="black", xlab="Number of species", ylab="Number of plots", cex.axis=1.5, cex.lab=1.5, main="", freq=TRUE, ylim=c(0,20), xlim=c(10,60))
mtext("d",3, adj=0, cex=2, font=2)
##############
#non-metric multidimensional scaling
BRAY <- vegdist(species)
NMDS <- metaMDS(BRAY, pc=TRUE)
SCORES3 <- as.data.frame(scores(NMDS))
#plot linear correlations of variables with NMDS plot
linears <- with(variables, cbind(bio12, bio1, sand, pH_CaCl2))
colnames(linears) <- c("bio12", "bio1", "%sand", "pH")
VF <- vf(SCORES3, linears, nperm=1000)
print(VF)
par(mar=c(5,5,1,1))
plot(SCORES3, col="white", xlab="NMDS axis-1", ylab="NMDS axis-2", cex.axis=1.5, cex.lab=1.5, bty="l", las=0)
plot(VF, col="black", cex=2, lwd=2)
plot(SCORES3, col="white", xlab="NMDS axis-1", ylab="NMDS axis-2", cex.axis=1.5, cex.lab=1.5, bty="l", las=0)
text(SCORES3, labels=variables$site, cex=0.7, font=2)
arrows(0.06, -0.22, -0.42, -0.02, lty=2, angle=20) #NB re-runs of the NMDS will produce different configurations/orientations and so this arrow won't point to the same plots
####################
#distance based analysis
#create distances for each variable
bio1D <- vegdist(variables$bio1/10, method='euclidean')
bio12D <- vegdist(variables$bio12, method='euclidean')
aspectD <- vegdist(variables$aspect, method='euclidean')
aspect2D <- vegdist(variables$aspect2, method='euclidean')
slopeD <- vegdist(variables$slope, method='euclidean')
sandD <- vegdist(variables$sand, method='euclidean')
outcropD <- vegdist(variables$outcrop, method='euclidean')
surfaceD <- vegdist(variables$surface, method='euclidean')
PD <- vegdist(variables$P, method='euclidean')
NO3D <- vegdist(variables$NO3, method='euclidean')
NH4D <- vegdist(variables$NH4, method='euclidean')
KD <- vegdist(variables$K, method='euclidean')
conductD <- vegdist(variables$conduct, method='euclidean')
pHD <- vegdist(variables$pH_CaCl2, method='euclidean')
##model exponential distance decay:
InverseBRAY <- (1-BRAY) + 1/DIST
BRAYtransform <- -log(InverseBRAY)
multi <- MRM(BRAYtransform ~ DIST + bio1D + NH4D + conductD + aspectD, nperm=10000)
COEF <- multi$coef[,1]
PREDICT1 <- 1-exp(-(COEF[2])*max(DIST))
PREDICT2 <- 1-exp(-(COEF[3])*max(bio1D))
PREDICT3 <- 1-exp(-(COEF[4])*max(NH4D))
PREDICT4 <- 1-exp(-(COEF[5])*max(conductD))
PREDICT5 <- 1-exp(-(COEF[6])*max(aspectD))
PREDICTIONS <- rbind(PREDICT1, PREDICT2, PREDICT3, PREDICT4, PREDICT5)
rownames(PREDICTIONS) <- c("Geographic distance", "Mean temperature", "Ammonium nitrate", "Electrical conductivity", "Aspect")
nothing <- c(1, 2, 3, 4, 5)
par(mar=c(20,15,1,1)+0.5)
plot(nothing ~ PREDICTIONS, yaxt='n', xlim=c(0.5,1), bty="l", las=1, ylab="", cex.axis=1, cex.lab=1.4, xlab="Bray-Curtis dissimilarity", pch=16, cex=2)
axis(2, at=c(1, 2, 3, 4, 5), labels=rownames(PREDICTIONS), cex.axis=1.4, las=2)
##############
#GLMM - multivariate generalised mixed-model of species occurrences
#subset a multivariate binary dataset to include species with a min. number of records
species3 = species2[,colSums(species2) > 4] #i.e. 5 or more records included only
STACK2 <- stack(species3) #stacks P/A of each spp. in each plot into one column
var2 <- do.call("rbind", replicate(ncol(species3), variables, simplify = FALSE)) #number of replicates = number of species with 5 or more records (number of columns in 'species3')
dataset2 <- cbind(STACK2, var2)
#below, site random factor means the slopes of the response don't change btw sites but 'intercepts' can change - accounting for how freq may be biased due to autocorrelation within reps. The random spp. term allows intercept and slope to vary by species, so frequency and response of spp. to gradients can differ:
##run-forward selection on selected terms, having excluded those that increased AIC and/or have high p-values for their addition etc, or have no significant effect individually
#forward selection
#NOTE the following analysis may take several hours on a standard computer
M1 <- lmer(values ~ (1|site), family=binomial(link="logit"), dataset2)
M1
M2 <- update(M1,. ~ . + (1|ind))
M2
M3 <- update(M2,. ~ . - (1|ind) + PCNM1 + (1 + PCNM1|ind))
M3
M4 <- update(M3,. ~ . - (1 + PCNM1|ind) + bio1 + (1 + PCNM1 + bio1|ind))
M4
M5 <- update(M4,. ~ . - (1 + PCNM1 + bio1|ind) + NH4 + (1 + PCNM1 + bio1 + NH4|ind))
M5
M6 <- update(M5,. ~ . - (1 + PCNM1 + bio1 + NH4|ind) + conduct + (1 + PCNM1 + bio1 + NH4 + conduct|ind))
M6
M7 <- update(M6,. ~ . - (1 + PCNM1 + bio1 + NH4 + conduct|ind) + P + (1 + PCNM1 + bio1 + NH4 + conduct + P|ind))
M7
anova(M1, M2, M3, M4, M5, M6, M7)
scor1 <- AIC(M1)
scor2 <- AIC(M2)
scor3 <- AIC(M3)
scor4 <- AIC(M4)
scor5 <- AIC(M5)
scor6 <- AIC(M6)
scor7 <- AIC(M7)
deltaAIC2 <- c(scor2-scor1, scor3-scor2, scor4-scor3,scor5-scor4, scor6-scor5, scor7-scor6)
print(deltaAIC2)
#plot random effects
RANDOM <- ranef(M7, postVar=TRUE, whichel="ind")
colnames(RANDOM$ind) <- c("(Intercept)", "Spatial eigenvector", "Mean temperature", "Ammonium N", "Electrical conductivity", "Colwell P")
qqmath(RANDOM) #plot of species 'random' intercepts & slopes against standard normal distribution
RANDOM2 <- ranef(M7, postVar=TRUE, whichel="site")
dotplot(RANDOM2) #plot of site 'random' intercepts
###############
#correlate phylogenetic shifts with variables
#load packages
require(picante)
require(ppcor)
#generate matching phylogeny and community datasets
PHYLOmine <- match.phylo.comm(phyloTree, species)
#calculate phylogenetic distances
PCDscores <- pcd(PHYLOmine$comm, PHYLOmine$phy)
PCD <- as.numeric(PCDscores$PCD)
PCDp <- as.numeric(PCDscores$PCDp)
PCDc <- as.numeric(PCDscores$PCDc)
UNIFRAC <- unifrac(PHYLOmine$comm, PHYLOmine$phy)
phylouni <- as.numeric(UNIFRAC)
linear <- as.data.frame(cbind(BRAYtransform, DIST, bio1D, bio12D, surfaceD, sandD, outcropD, aspectD, aspect2D, slopeD, PD, NO3D, NH4D, KD, conductD, pHD, phylouni, PCD, PCDp, PCDc))
#Unifrac
subset1 <- with(linear, cbind(DIST, bio1D, sandD, aspectD, outcropD, aspect2D, slopeD, KD, conductD, pHD, phylouni))
#PCD
subset2 <- with(linear, cbind(DIST, bio1D, sandD, aspectD, slopeD, KD, conductD, pHD, PCD))
#PCDp
subset3 <- with(linear, cbind(bio1D, sandD, aspectD, PD, conductD, pHD, PCDp))
#PCDc
subset4 <- with(linear, cbind(DIST, bio1D, surfaceD, sandD, aspectD, aspect2D, slopeD, NH4D, KD, conductD, PCDc))
########
##FUNCTION 'parCorPerm' developed here to run permutational significance test of multivariate pairwise partial correlations by permuting variables and testing correlation against null model. This function has been adapted from a related function for permutational significance testing of 2 variable correlations 'corPerm' by Pierre Legendre, see http://adn.biol.umontreal.ca/~numericalecology/Rcode/, and package 'ppcor' must be loaded
#parCorPerm only tests the partial interactions of multiple variables against 1 response and it assumes the response variable of interest is represented by the final (right hand) row of the input data matrix
#set up functions
sample_func <- function(x)
{
sample(x, length(x))
}
PFUNCTION <- function(x, y, z)
{
if(x > y) z <- z+1
return(z)
}
parCorPerm <- function(x,nperm) #where x is a data.frame with variables in columns and the response of interest in the final (RH) column; and nperm is the desired number of permutations
{
r.ref <- spcor(x) #calculate reference partial correlation on raw data
temp.ref <- as.data.frame(r.ref$estimate) #extract the coefficients
nGT <- as.data.frame(as.vector(rep(1, ncol(x)))) # a vector of 1s same length as number of variables
colnames(nGT) <- "value"
response <- as.data.frame(x[,ncol(x)]) #isolate the response column which must be the last one
number <- as.numeric(ncol(x)-1) #calcuate number of columns without the response
x <- as.data.frame(x[,1:number]) #extract just the variables, exclude the response
for(i in 1:nperm)
{
y.perm <- as.data.frame(apply(x, 2, sample_func)) #permute variables
test_data <- cbind(y.perm, response) #join permuted with unpermuted response
r.perm <- spcor(test_data) #calculate correlation coefficient with permuted variables
temp.perm <- as.data.frame(r.perm$estimate) #extract coefficients
nGT$value <- mapply(PFUNCTION, temp.perm[, ncol(temp.perm)], temp.ref[, ncol(temp.ref)], nGT$value)
}
P <- nGT$value/(nperm+1)
cat('\nPearson partial correlation of response variable to each explanatory variable (i.e. given the others)','\n')
cat('Prob (',nperm,'permutations) =',P,'\n','\n')
return(list(Variables=c(colnames(x), "Response"), Correlation=temp.ref[, ncol(temp.ref)], No.perm=nperm, P.perm=P ))
}
#use function parCorPerm to calculate partial correlations for each metric of phylogenetic distance
parCorPerm(subset1, 1000)
parCorPerm(subset2, 1000)
parCorPerm(subset3, 1000)
parCorPerm(subset4, 1000)
#end
No comments:
Post a Comment