Note: this is a post to hold the R script that is referred to in this post. It’s even more awfully technical than the post that refers to it; readers not interested or familiar with R code or this type of analysis may want to skip it!

You’d probably want to copy this all in to an R script (and perhaps view in r studio) to make sense of it.

# Disturbance by patch to determine landscape predictors of disturbance —-

# Housekeeping —-

setwd(“C:/Users/ke/Dropbox/PhD/Chapter 3 – spatial analysis/R”)

library(lme4); library(ggplot2); library(plyr); library(lattice); library(car); require(lmerTest); library(nlme)

source(“HighstatLibV6.R”)

dp

# Start of data exploration —-

names(dp)

str(dp)

summary(dp)

attach(dp)

head(dp)

sum(is.na(dp)) # no missing values

# 4.2 looking for outliers

vars1 “greenstone” , “esa” , “cons_estate” , “iron_formation”)

Mydotplot(dp[, vars1])

vars2 “mining_activity”,”commodity”,”wheatbelt_dist”,”town_dist”,”patch_area”,”disturbed.area”, “dist_perc”)

Mydotplot(dp[, vars2])

vars3 Mydotplot(dp[, vars3])

# no outliers once the tenement years issue was fixed (in raw data), but some vars need transformation

# Transforming variables —-

# mini-tutorial on how scaling works:

x s.x s.x # the resultant scaled variable stored the scaling attributes: the centre (mean) and scale (multiplier)

attr(s.x,”scaled:center”) # 5.5; extracting the centre, the mean of the original vector, from which each value was subtracted to do the scaling

attr(s.x,”scaled:scale”) # 3.02765; extracting the scale (the multiplier)

x.recreated = s.x*attr(s.x,’scaled:scale’) + attr(s.x,’scaled:center’) # to unscale just multiply the scaled vector by the ‘scale’ and add the ‘centre’

# transforming disturbance percentage:

plot(dp$polyID~dp$dist_perc)

hist(dp$dist_perc) # too skewed

dist_perc1 = dist_perc + 1

dp$logdisp = log(dist_perc1)

range(dp$logdisp)

hist(dp$logdisp)

plot(dp$polyID~dp$logdisp) # still too squashed. try log10 transformation?

dp$logdisp = log10(dist_perc1)

attach(dp)

range(dp$logdisp)

hist(dp$logdisp)

plot(dp$polyID~dp$logdisp) # still too skewed. Let’s try a logit transformation:

qlogis(0.9) # Getting familiar w logit transformations… this returns 2.197. qlogis does a logit transformation on (a set of) values.

plogis(2.197225) # returns 0.9. plogis ‘undoes’ the logit transformation.

dp$ldistp = (qlogis(dp$dist_perc+0.001)) # ldistp is logit transformation of the percent disturbed plus 0.001

plot(dp$ldistp) # Looks good.

range(dp$ldistp)

hist(dp$ldistp)

dp$s.dist = scale(dp$ldistp) # s.dist is the logit-transformed, scaled disturbed percentage

hist(dp$s.dist)

# OK I need to either do a logistic regression here, or simply logit-transform the response before modelling linearly

# doing a logistic transformation and then modelling linearly – advised by Michael Renton

# transforming and scaling ten_yrs:

range(ten_yrs) # the 177 is a valid value.

plot(ten_yrs) # needs some transformation

dp$lntyrs = log((ten_yrs+1))

plot(dp$lntyrs) # yep the natural log looks good.

range(dp$lntyrs)

dp$s.lntyrs = scale(dp$lntyrs, center = TRUE, scale = TRUE)

plot(dp$lntyrs)

plot(dp$s.lntyrs)

# transforming patch area:

range(dp$patch_area)

dp$patchkm2 = dp$patch_area/1000000 # make kms instead of m2

range(dp$patchkm2)

hist(dp$patchkm2)

dp$ln.pkm = log(dp$patchkm2) # natural log of patch area

hist(dp$ln.pkm) # centred and normalish

# transforming minedex_count:

plot(minedex_count)

table(dp$minedex_count)

plot(log(minedex_count+1)) # somewhat better

logmx = log(minedex_count+1)

range(logmx)

dp$s.mx = scale(logmx)

plot(dp$s.mx)

# transforming distance to wheatbelt:

range(wheatbelt_dist)

dp$wbkm = wheatbelt_dist/1000

plot(dp$wbkm)

range(dp$wbkm)

dp$s.wb = scale(dp$wbkm)

plot(dp$s.wb)

# transforming distance to town:

dp$tnkm = town_dist/1000

plot(dp$tnkm)

range(dp$tnkm)

dp$s.tn = scale(dp$tnkm)

plot(dp$s.tn)

# not much can be done with the commodity variable:

summary(dp$commodity) # not sure if having the “no commodity stated” value is useful…

# dp$commodity[dp$commodity==”no commodity stated”] <- “none” # creates NAs

# adding second-order polynomials of the numerical variables

dp$tenyrs2 = dp$s.lntyrs^2

dp$mx2 = dp$s.mx^2

dp$wb2 = dp$s.wb^2

dp$tn2 = dp$s.tn^2

range(dp$s.lntyrs)

range(dp$tenyrs2)

# combining greenstone and ironstone lithology data into one categorical variable

dp$lithology = ifelse(greenstone == “greenstone”,”greenstone”,”other.lithology”)

dp$iron_formation

dp$lithology = ifelse(iron_formation == “iron-form”,”iron.formation”,dp$lithology)

dp$lithology

table(dp$lithology) # clever duck

# Looking for collinearity —-

pairs(dp[,vars1], lower.panel = panel.cor)

#i.e. SA number, SA_mines and category all related – use SA_mines

pairs(dp[,vars2], lower.panel = panel.cor)

miningvars = c(“SA_mines”,”greenstone”,”iron_formation”,”minedex_count”,”ten_yrs”,”mining_activity”,”commodity”)

pairs(dp[,miningvars], lower.panel = panel.cor)

# also strong collinearity between minedex count and dt-count, mined and

# note mining activity replaces expl_ten and mined.

# low 0.4 collinearity between minedex count and ten_yrs, and 0.5 between SA_mines and mining activity

summary(dp$dist_perc ~ dp$veg_form)

summary(dp$cons_estate)

dpvars = c(“pastoral”,”greenstone”,”esa”,”cons_estate”,”iron_formation”,”sched_1″,”veg_form”,

“mining_activity”,”commodity”,”s.mx”,”s.lntyrs”,”s.wb”,”s.tn”,”s.dist”)

Mydotplot(dp[, dpvars])

pairs(dp[,dpvars], lower.panel = panel.cor)

# ok, have dealt with collinearity. There’s nothing above 0.5, and even the 0.5 is not important

# looking at possible X Y relationships —-

# first the numeric variables: plot Y against the Xes and calculate pearson coefficients:

dpnumvars = c(“s.mx”,”s.lntyrs”,”s.wb”,”s.tn”,”s.dist”)

pairs(dp[,dpnumvars], lower.panel = panel.cor)

# there appears to be a positive relationship between the area disturbed and both the

# number of mining projects and the total duration of tenements in an area.

# No relation with distance from wheatbelt, and slight negative relationship with distance from town,

# which makes sense – areas near towns are more likely to get more disturbed and to stop tracks from growing over etc.

plot(dp$dist_perc~dp$lntyrs) #remember that dp$lntyrs = log((ten_yrs+1))

plot(dp$ldistp~dp$lntyrs)

#then categorical variables:

boxplot(dp$s.dist~dp$pastoral) # no real effect of pastoral activity

boxplot(dp$s.dist~dp$greenstone) # more disturbance on greenstone

boxplot(dp$s.dist~dp$esa) # more disturbance on ESAs

boxplot(dp$s.dist~dp$cons_estate) # greatest disturbance on former leasehold then class As!

# UCL has same mean distrubance as gazetted conservation estate; though higher variance.

boxplot(dp$s.dist~dp$iron_formation) # more disturbance on BIFs

boxplot(dp$s.dist~dp$sched_1) # more disturbance on schedule 1 areas

boxplot(dp$s.dist~dp$veg_form) # less disturbance in bare areas and succulent steppe

boxplot(dp$dist_perc~dp$veg_form)

boxplot(dp$s.dist~dp$mining_activity) # more disturbance where there’s been mining tenement.

# strangely similar between exploration and no mining tenement.

boxplot(dp$s.dist~dp$commodity) # most disturbance around gold mining projects, maybe not significant

# Looking for interactions —-

coplot(s.dist ~ s.mx | factor(commodity), data = dp)

# R displays from lower left, across, to upper right.

coplot(s.dist ~ s.mx | factor(commodity), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

# there could be an interaction between the commodity and the number of mining projects

coplot(s.dist ~ s.lntyrs | factor(pastoral), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

# There could be an interaction between tenement years and pastoral, same as found in regional-scale analysis.

coplot(s.dist ~ factor(greenstone) | factor(pastoral), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

# maybe nothing between greenstone and pastoral

coplot(s.dist ~ factor(veg_form) | factor(greenstone), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

# maybe nothing between greenstone and pastoral

coplot(s.dist ~ s.tn | factor(veg_form), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

# well there doesn’t seem to be much mallee near towns; looks like there could be an interaction

coplot(s.dist ~ s.tn | factor(esa), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

# definitely looks like an interaction between esas and dist to town

coplot(s.dist ~ s.wb | factor(esa), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

# same again – interaction between esas and distance to wheatbelt, although esas tend to be much closer to wheatbelt than non-esas.

coplot(s.dist ~ factor(veg_form) | factor(esa), data = dp,

panel = function(x, y, …) {

tmp abline(tmp)

points(x, y) })

coplot(s.dist ~ factor(mining_activity) | factor(veg_form), data = dp )

# most of the mining and exploration appears to be in woodland

# Fitting and selecting models —-

# 4.7.1 using gaussian family and logit link with lme4

# glmer example: model1 dp1 = glmer(dist_perc ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+

(1|SA_number),

data=dp, family = gaussian (link = ‘logit’))

summary(dp1)

E1 F1 plot(x = F1,

y = E1,

xlab = “Fitted values”,

ylab = “Residuals”,

main = “Homogeneity?”)

abline(h = 0, v = 0, lty = 2)

plot(dp1)

plot(F1)

# 4.7.2 Fitting a linear mixed model with a logit-transformed response in lmerTest

dp2 = lmer(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+mx2+wb2+tn2+

(1|SA_number), REML=TRUE,

data=dp)

summary(dp2)

plot(dp2)

summary(dp$commodity)

# dp$commodity[is.na(dp$commodity)] <- “none”

# compare with model that doesn’t have random effects to see if they are important

dp2.gls = gls(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+mx2+wb2+tn2,

data=dp)

summary(dp2.gls)

plot(dp2.gls)

anova(dp2.gls,dp2)

anova(dp2)

plot(dp2)

E2 F2 plot(x = F2,

y = E2,

xlab = “Fitted values”,

ylab = “Residuals”,

main = “Homogeneity?”)

abline(h = 0, v = 0, lty = 2)

table(dp$SA_number)

length(E2)

dp3 = lmer(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+mx2+wb2+tn2+

(1|SA_number),

data=dp, REML = FALSE)

summary(dp3)

anova(dp3)

dp4 = lmer(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+

(1|SA_number),

data=dp, REML = FALSE)

summary(dp4)

anova(dp4)

dp5 = lmer(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(veg_form)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+

(1|SA_number),

data=dp, REML = FALSE)

summary(dp5)

anova(dp5)

dp6 = lmer(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(veg_form)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+

(1|SA_number),

data=dp, REML = FALSE)

summary(dp6)

anova(dp6)

# 4.7.3 Fitting a linear mixed model with a logit-transformed response in nlme

library(nlme)

# first check the significance of the random effect structure:

dp1 = lme(s.dist ~ factor(pastoral)+factor(iron_formation)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(sched_1)+factor(veg_form) +factor(commodity)+factor(mining_activity)+

s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+mx2+wb2+tn2+ln.pkm,

random = ~1|SA_number,method = “REML”,

data=dp)

summary(dp1)

plot(dp1)

# compare with model that doesn’t have random effects to see if they are important

dp1.gls = gls(s.dist ~ factor(pastoral) +factor(iron_formation) +factor(greenstone) +factor(esa) +

factor(cons_estate) +factor(sched_1) +factor(veg_form) +factor(commodity) +

factor(mining_activity) +s.lntyrs +s.mx +s.wb +s.tn +tenyrs2 +mx2 +wb2 +tn2 +ln.pkm,

data=dp)

dp$s.dist

summary(dp1.gls)

plot(dp1.gls)

anova(dp1.gls,dpn1)

# The p-value of the Maximum Likelihood ratio test comparison between the mixed and fixed methods

# (both calculated using REML, which allows us to apply the likelihood ratio test) is 0.0154,

# indicating that the random factor is significant.

# check the model:

E2 F2 op MyYlab <- “Residuals”

plot(x = F2, y = E2, xlab = “Fitted values”, ylab = MyYlab, main=”Residuals versus fitted”)

boxplot(E2 ~ pastoral, data = dp,

main = “Pastoral tenure”, ylab = MyYlab)

boxplot(E2 ~ greenstone, data = dp,

main = “Greenstone lithology”, ylab = MyYlab)

plot(x = dp$s.lntyrs, y = E2, ylab = MyYlab,

main = “Tenement duration”, xlab = “scaled log of total tenement duration”)

par(op)

#haven’t checked against all variables but there’s too many, moving on for now and will check when model is smaller.

# Next, I’m going to fit the ‘beyond optimal’ model with ML, then reduce it manually

# then validate it,

dp2 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+mx2+wb2+tn2+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp2)

# remove tn2

dp3 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+mx2+wb2+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

anova(dp2,dp3)

# remove mx2

dp4 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+wb2+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

anova(dp4,dp3) #yes, wasn’t significant

summary(dp4)

# remove wb2

dp5 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+tenyrs2+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp5)

#remove tenyrs2

dp6 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.mx+s.wb+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp6)

# remove s.mx

dp7 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(sched_1)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.wb+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp7)

# remove schedule 1 areas

dp8 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(iron_formation)+factor(veg_form)+factor(commodity)+

factor(mining_activity)+s.lntyrs+s.wb+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp8)

# remove iron_form

dp9 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+factor(mining_activity)+s.lntyrs+s.wb+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp9)

# remove either distance to wheatbelt or mining activity

dp9.no.wb = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+factor(mining_activity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

dp9.no.ma = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+s.lntyrs+s.wb+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

anova(dp9,dp9.no.wb) # p = 0.1054

anova(dp9,dp9.no.ma) # p = 0.2048

# so remove mining activity

dp10 = lme(s.dist ~ factor(pastoral)+factor(greenstone)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+s.lntyrs+s.wb+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp10)

# not significant are: greenstone, cons estate, wheatbelt. try remove each, one at a time.

dp10.no.green dp10.no.cons dp10.no.wb

anova(dp10,dp10.no.green) # p = 0.0539

anova(dp10,dp10.no.cons) # p < 0.0001

anova(dp10,dp10.no.wb) # p = 0.0395

# got to remove greenstone

dp11 = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+s.lntyrs+s.wb+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp11)

# pastoral is significant

# esa is significant

# cons estate need to be checked but is probably highly significant

# veg form looks significant

# commodity looks significant

# tenement years is highly significant

# s.wb – what is that still doing here? the p values from the model are so different from anova comparing models!

dp11.no.wb = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

anova(dp11,dp11.no.wb)

# yep get rid of wheatbelt

dp12 = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp12)

# I think we’re there so just double checking the significance of every variable:

dp12.no.pastoral = lme(s.dist ~ factor(esa)+factor(cons_estate)+factor(veg_form)+factor(commodity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”, data=dp)

dp12.no.esa = lme(s.dist ~ factor(pastoral)+factor(cons_estate)+factor(veg_form)+factor(commodity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”, data=dp)

dp12.no.cons = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(veg_form)+factor(commodity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”, data=dp)

dp12.no.veg = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(commodity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”, data=dp)

dp12.no.comm = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(veg_form)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”, data=dp)

dp12.no.lntyrs = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(veg_form)+factor(commodity)+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”, data=dp)

dp12.no.tn = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(veg_form)+factor(commodity)+s.lntyrs+ln.pkm,

random = ~1|SA_number,method = “ML”, data=dp)

dp12.no.pkm = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(veg_form)+factor(commodity)+s.lntyrs+s.tn,

random = ~1|SA_number,method = “ML”, data=dp)

anova(dp12,dp12.no.pastoral) # p = 0.0046

anova(dp12,dp12.no.esa) # p = 0.0001

anova(dp12,dp12.no.cons) # p = 0.0001

anova(dp12,dp12.no.veg) # p = 0.0206

anova(dp12,dp12.no.comm) # p = 0.0001

anova(dp12,dp12.no.lntyrs) # p = 0.0001

anova(dp12,dp12.no.tn) # p = 0.000001

anova(dp12,dp12.no.pkm) # p = 0.0348

# all significant, and lower AIC values!

# what if i try to reintroduce greenstone?

dp12.plus.green = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(greenstone)+

factor(veg_form)+factor(commodity)+s.lntyrs+s.tn+ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

anova(dp12,dp12.plus.green)

# greenstone is marginally significant with p = 0.0562 and a slightly lower AIC value (but within 2 AIC). Leave out.

#what about interaction between mining and pastoralism?

dp13 = lme(s.dist ~ factor(pastoral)+factor(esa)+factor(cons_estate)+

factor(veg_form)+factor(commodity)+s.lntyrs+s.tn+ln.pkm+s.lntyrs:factor(pastoral),

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp13)

# interaction not significant

# I’ll also just try to use non-scaled versions of the variables for ease of interpretation if possible:

dp14 = lme(ldistp ~ factor(pastoral)+factor(esa)+factor(cons_estate)+

factor(veg_form) +factor(commodity) +lntyrs +tnkm +ln.pkm,

random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp14)

# excellent! unscaled it is…

# what about removing commodity as it’s not helpful as is, and seeing if this

# and allows for reinstatement of greenstone too, and removal of patch area:

dp15 = lme(ldistp ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(veg_form)+

lntyrs+tnkm+factor(greenstone),random = ~1|SA_number,method = “ML”,

data=dp)

summary(dp15)

#yes, lets just recheck the significance of all variables:

dp15.no.pastoral = update(dp15, .~. – factor(pastoral))

dp15.no.esa = update(dp15, .~. – factor(esa))

dp15.no.cons = update(dp15, .~. – factor(cons_estate))

dp15.no.veg = update(dp15, .~. – factor(veg_form))

dp15.no.green = update(dp15, .~. – factor(greenstone))

dp15.no.lntyrs = update(dp15, .~. – lntyrs)

dp15.no.tn = update(dp15, .~. – tnkm)

dp15.w.pkm = lme(ldistp ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(veg_form)+lntyrs+

tnkm+ln.pkm+factor(greenstone)+ln.pkm,random = ~1|SA_number,method = “ML”,data=dp)

anova(dp15,dp15.no.pastoral) # p = 0.0031

anova(dp15,dp15.no.esa) # p = 0.0004

anova(dp15,dp15.no.cons) # p < 0.0001

anova(dp15,dp15.no.veg) # p = 0.005

anova(dp15,dp15.no.green) # p = 0.006

anova(dp15,dp15.no.lntyrs) # p = 0.0001

anova(dp15,dp15.no.tn) # p = 0.0002

anova(dp15,dp15.w.pkm) # p = 0.778 # excluding patch area is justified.

# otherwise all significant, and lower AIC values!

# Final dp model, using REML and with unscaled response variable

finaldp = lme(ldistp ~ factor(pastoral)+factor(esa)+factor(cons_estate)+factor(veg_form)+

factor(greenstone)+lntyrs+tnkm,random = ~1|SA_number,method = “REML”, data=dp)

summary(finaldp)

# Validate model

par(mfrow=c(2,2))

plot(finaldp)

E1 F1

par(mfrow = c(1, 1))

plot(x = F1,

y = E1,

xlab = “Fitted values”,

ylab = “Residuals”,

main = “Homogeneity?”)

abline(h = 0, v = 0, lty = 2)

# according to Zuur 2009, looks ok

boxplot(E1 ~ factor(dp$greenstone)) # compare variance across factor levels

boxplot(E1 ~ factor(dp$esa))

boxplot(E1 ~ factor(dp$cons_estate))

boxplot(E1 ~ factor(dp$veg_form))

boxplot(E1 ~ factor(dp$commodity))

boxplot(E1 ~ factor(dp$mining_activity))

tapply(E1, INDEX = dp$pastoral, FUN = var)

tapply(E1, INDEX = dp$cons_estate, FUN = var)

# variance varies only by a factor of ~4; Zuur says that it’s a problem if greater than 10. So not a problem.

# could consider removing the outlier

# test for independence:

plot(x = dp$patch_area,

y = E1,

xlab = “patch area”,

ylab = “Residuals”)

abline(h = 0, lty = 2)

cor.test(E1, dp$patch_area) # not significant

# i.e. no clusters above or below; looks ok.

#Normality

hist(E1, main = “Normality”, breaks=10)

#Or qq-plot

qqnorm(E1)

qqline(E1)

# i.e. not so normal

hist(dp$s.dist ) # very skewed no worries

#But plot residuals also versus covariates NOT in the model!

boxplot(E1 ~ s.wb) ; abline(h = 0, lty = 2) # confirms no effect of distance to wheatbelt

boxplot(E1 ~ s.tn) ; abline(h = 0, lty = 2) # no residual effect of distance to town

# Final model and sketch with predictions —-

finaldp = lme(ldistp ~ factor(pastoral) +factor(esa) +factor(cons_estate) +factor(veg_form) +

factor(greenstone)+lntyrs+tnkm,random = ~1|SA_number,method = “REML”, data=dp)

summary(finaldp)

summary(finaldp)$coef

anova.lme(finaldp)

# I tried many different ways to predict from the model using a newdata dataframe but couldn’t get it to work.

# here i use the values i calculated manually in excel using the model outputs:

# remember how to un-logit the response variable:

plogis(2.197225) # returns 0.9. plogis undoes the logit transformation.

ldistp = (qlogis(dist_perc+0.001))

# predicting effect of pastoral status:

pastoral.pred.logit = data.frame(pastoral = c(“pastoral”,”pastoral”,”pastoral”,”not.pastoral”,”not.pastoral”,”not.pastoral”),

logit.percent.disturbed = c(-5.48660829,

-5.29961149,

-5.67360509,

-4.94659229,

-4.75959549,

-5.13358909))

pastoral.pred.logit$percent.disturbed = plogis(pastoral.pred.logit$logit.percent.disturbed)-0.001

pastoral.pred.logit$percent.disturbed

pastoral.pred2 fit = (c(0.0031247841,0.0060574270)*100))

pastoral.pred2$se.low = (c((0.003124784-0.002423696),(0.006057427-0.004860812))*100)

pastoral.pred2$se.high = (c((0.003968722-0.003124784),(0.007496270-0.006057427))*100)

k k + geom_pointrange(size=1,aes(colour=Pastoral.status)) + theme_bw()+ylab(“Percent of area disturbed”)

ggsave(“dp figure 1 – pastoral status.png”, width=4, height=3, dpi=300)

# using intervals function to calculate predictions and 95% intervals:

conf.ints = intervals(finaldp)

# predicting effect of pastoral status:

pastoral.pred1 = data.frame(pastoral = c(“pastoral”,”pastoral”,”pastoral”,”not.pastoral”,”not.pastoral”,”not.pastoral”),

logit.percent.disturbed = c(-5.40035707,

-3.81570717,

-6.98500697,

-4.86034100,

-3.30078471,

-6.41989730))

pastoral.pred1$percent.disturbed = plogis(pastoral.pred1$logit.percent.disturbed)-0.001

attach(pastoral.pred1)

fit1=percent.disturbed[1];low1 = percent.disturbed[3];high1=percent.disturbed[2]

fit2=percent.disturbed[4];low2 = percent.disturbed[6];high2=percent.disturbed[5]

pastoral.pred2 fit = c(fit1,fit2))

pastoral.pred2$se.low = c((fit1-low1),(fit2-low2))

pastoral.pred2$se.high = c((high1-fit1),(high2-fit2))

k k + geom_pointrange(size=1,aes(colour=Pastoral.status)) + theme_bw()+ylab(“Percent of area disturbed”)

ggsave(“dp figure 1 – pastoral status(full uncertainty).png”, width=4, height=3, dpi=300)

# predicting effect of esa status:

esa.pred1 = data.frame(esa = c(“esa”,”esa”,”esa”,”not.esa”,”not.esa”,”not.esa”),

logit.percent.disturbed = c(-3.83612729,

-3.64913049,

-4.02312409,

-4.94659229,

-4.75959549,

-5.13358909))

esa.pred1$percent.disturbed = (plogis(esa.pred1$logit.percent.disturbed)-0.001)*100

attach(esa.pred1)

fit1=percent.disturbed[1];low1 = percent.disturbed[3];high1=percent.disturbed[2]

fit2=percent.disturbed[4];low2 = percent.disturbed[6];high2=percent.disturbed[5]

esa.pred2 fit = c(fit1,fit2))

esa.pred2$se.low = c((fit1-low1),(fit2-low2))

esa.pred2$se.high = c((high1-fit1),(high2-fit2))

k k + geom_pointrange(size=1,aes(colour=Clearing.regs)) + theme_bw()+ylab(“Percent of area disturbed”)

ggsave(“dp figure 2 – esa.png”, width=4, height=3, dpi=300)

# predicting effect of conservation estate:

table(dp$cons_estate)

cons.pred1 = data.frame(cons = c(rep(c(“class-A”,”ex.leasehold”,”gazetted”,”not-cons”,”unofficial”),each=3)),

logit.percent.disturbed = c(-5.70267029, -4.91130149, -6.49403909,

-4.34291529, -3.60996819, -5.07586239,

-6.59664929, -5.88199979, -7.31129879,

-4.94659229, -4.30305129, -5.59013329,

-5.45534029, -4.75924019, -6.15144039))

cons.pred1$percent.disturbed = plogis(cons.pred1$logit.percent.disturbed)-0.001

attach(cons.pred1)

fit1= percent.disturbed[1]; low1 = percent.disturbed[3]; high1=percent.disturbed[2]

fit2= percent.disturbed[4]; low2 = percent.disturbed[6]; high2=percent.disturbed[5]

fit3= percent.disturbed[7]; low3 = percent.disturbed[9]; high3=percent.disturbed[8]

fit4= percent.disturbed[10];low4 = percent.disturbed[12];high4=percent.disturbed[11]

fit5= percent.disturbed[13];low5 = percent.disturbed[15];high5=percent.disturbed[14]

cons.pred2 fit = c(fit1,fit2,fit3,fit4,fit5))

cons.pred2$se.low = c((fit1-low1),(fit2-low2),(fit3-low3),(fit4-low4),(fit5-low5))

cons.pred2$se.high = c((high1-fit1),(high2-fit2),(high3-fit3),(high4-fit4),(high5-fit5))

k k + geom_pointrange(size=1,aes(colour=Conservation.tenure)) + theme_bw()+ylab(“Percent of area disturbed”)

ggsave(“dp figure 3 – cons.png”, width=6, height=3, dpi=300) # note colouring is different to combined plot (r2 code) but content is same

# predicting effect of vegetation form:

table(dp$veg_form)

veg.pred1 = data.frame(veg = c(rep(c(“bare”,”broombush”,”mallee”,”mulga”,”shrubland”,”succulent”,”woodland”),each=3)),

logit.percent.disturbed = c(-4.56203229, -5.35340109, -3.77066349,

-4.12225829, -4.35071239, -3.89380419,

-4.33060829, -4.71315739, -3.94805919,

-3.40862529, -3.71443229, -3.10281829,

-4.10232729, -4.37808419, -3.82657039,

-4.00632829, -4.30158499, -3.71107159,

-3.88034929, -4.08146089, -3.67923769))

veg.pred1$percent.disturbed = plogis(veg.pred1$logit.percent.disturbed)-0.001

attach(veg.pred1)

fit1=percent.disturbed[1];low1 = percent.disturbed[2];high1=percent.disturbed[3]

fit2=percent.disturbed[4];low2 = percent.disturbed[5];high2=percent.disturbed[6]

fit3=percent.disturbed[7];low3 = percent.disturbed[8];high3=percent.disturbed[9]

fit4=percent.disturbed[10];low4 = percent.disturbed[11];high4=percent.disturbed[12]

fit5=percent.disturbed[13];low5 = percent.disturbed[14];high5=percent.disturbed[15]

fit6=percent.disturbed[16];low6 = percent.disturbed[17];high6=percent.disturbed[18]

fit7=percent.disturbed[19];low7 = percent.disturbed[20];high7=percent.disturbed[21]

veg.pred2 fit = c(fit1,fit2,fit3,fit4,fit5,fit6,fit7))

veg.pred2$se.low = c((fit1-low1),(fit2-low2),(fit3-low3),(fit4-low4),(fit5-low5),(fit6-low6),(fit7-low7))

veg.pred2$se.high = c((high1-fit1),(high2-fit2),(high3-fit3),(high4-fit4),(high5-fit5),(high6-fit6),(high7-fit7))

k k + geom_pointrange(size=1,aes(colour=Vegetation.form)) + theme_bw()+ylab(“Percent of area disturbed”)+xlab(“Vegetation formation”)

ggsave(“dp figure 4 – veg.png”, width=6, height=3, dpi=300)

summary(dp$dist_perc~dp$veg_form)

summary(veg.pred$percent.disturbed~ veg.pred$veg)

# predicting effect of greenstone lithology:

summary(dp$dist_perc~dp$greenstone)

green.pred1 = data.frame(green = c(rep(c(“greenstone”,”not_greenstone”),each=3)),

logit.percent.disturbed = c(-4.55671829, -5.34808709, -3.76534949,

-4.94659229, -5.09247909, -4.80070549))

green.pred1$percent.disturbed = plogis(green.pred1$logit.percent.disturbed)-0.001

attach(green.pred1)

fit1=percent.disturbed[1];low1 = percent.disturbed[2];high1=percent.disturbed[3]

fit2=percent.disturbed[4];low2 = percent.disturbed[5];high2=percent.disturbed[6]

green.pred2 fit = c(fit1,fit2))

green.pred2$se.low = c((fit1-low1),(fit2-low2))

green.pred2$se.high = c((high1-fit1),(high2-fit2))

k k + geom_pointrange(size=1,aes(colour=Lithology)) + theme_bw()+ylab(“Percent of area disturbed”)+xlab(“Lithology”)

ggsave(“dp figure 5 – greenstone.png”, width=4, height=3, dpi=300)

# predicting effect of tenement years:

range(lntyrs) # 0.000000 to 5.181784

tenyrs.pred = data.frame(lntyrs = seq(from=0, to= 5.181784, by= 0.01))

tenyrs.pred$logit.percent.disturbed = -5.107206754 + 0.440025 *tenyrs.pred$lntyrs

tenyrs.pred$logit.percent.disturbed.upper = -5.107206754 + (0.440025+0.0756683) *tenyrs.pred$lntyrs

tenyrs.pred$logit.percent.disturbed.lower = -5.107206754 + (0.440025-0.0756683) *tenyrs.pred$lntyrs

tenyrs.pred$tenyrs = exp(tenyrs.pred$lntyrs)-1

tenyrs.pred$percent.disturbed = (plogis(tenyrs.pred$logit.percent.disturbed)-0.001)*100

tenyrs.pred$percent.disturbed.upper = (plogis(tenyrs.pred$logit.percent.disturbed.upper)-0.001)*100

tenyrs.pred$percent.disturbed.lower = (plogis(tenyrs.pred$logit.percent.disturbed.lower)-0.001)*100

plot(percent.disturbed~lntyrs, data = tenyrs.pred, xlab = “log of tenement years”, ylab=”Percentage of area disturbed”)

head(tenyrs.pred) # variables of use are: tenyrs,percent.disturbed,percent.disturbed.upper,percent.disturbed.lower

#start of plot

p = ggplot(tenyrs.pred, aes(tenyrs,percent.disturbed))

p + geom_line(colour=I(“red”),size=1.1)+

geom_ribbon(aes(ymin=percent.disturbed.lower, ymax=percent.disturbed.upper),

fill=I(“red”),alpha=0.2)+

theme_bw() +

labs(x=”Total tenement duration (years)”,y=”Area disturbed (%)”)

ggsave(“dp figure 6 – tenement years.png”, width=4, height=3, dpi=300)

# end of plot

# predicting effect of distance to town:

range(tnkm) # 0.74 100.00

tnkm.pred = data.frame(tnkm = seq(from=0.74, to= 100, by= 0.1))

tnkm.pred$logit.percent.disturbed = -2.629818536 + -0.019183 *tnkm.pred$tnkm

tnkm.pred$logit.percent.disturbed.upper = -2.629818536 + (-0.019183+0.0051545) *tnkm.pred$tnkm

tnkm.pred$logit.percent.disturbed.lower = -2.629818536 + (-0.019183-0.0051545) *tnkm.pred$tnkm

tnkm.pred$percent.disturbed = (plogis(tnkm.pred$logit.percent.disturbed)-0.001)*100

tnkm.pred$percent.disturbed.upper = (plogis(tnkm.pred$logit.percent.disturbed.upper)-0.001)*100

tnkm.pred$percent.disturbed.lower = (plogis(tnkm.pred$logit.percent.disturbed.lower)-0.001)*100

plot(percent.disturbed~tnkm, data = tnkm.pred, xlab = “Distance to town (km)”, ylab=”Percentage of area disturbed”)

#start of plot

p = ggplot(tnkm.pred, aes(tnkm,percent.disturbed))

p + geom_line(colour=I(“blue”),size=1.1)+

geom_ribbon(aes(ymin=percent.disturbed.lower, ymax=percent.disturbed.upper),

fill=I(“blue”),alpha=0.2)+

theme_bw() +

labs(x=”Distance from town (kms)”,y=”Area disturbed (%)”)

ggsave(“dp figure 7 – distance from town.png”, width=4, height=3, dpi=300)

# end of plot

###########################################################################################################

# an afterthought: it would be good to combine pastoral and cons_estate as they both refer to tenure.

# I’ve resolved a few conflicts and created a new variable called ‘tenure’; the AIC for this model is slightly better,

dptenure = lme(ldistp ~ factor(tenure) + factor(esa) + factor(veg_form) + factor(greenstone) +

lntyrs + tnkm, random = ~1|SA_number,method = “REML”, data=dp)

summary(dptenure)

summary(dp$tenure)

# Maybe this would be better but I’d have to do all the predictions over again in excel and that’s too hard

# but for now I’ll have a sneak peak at the result of it:

# Class-A is the default,

# former leasehold has more disturbance

# gazetted cons has less disturbance

# not-cons has more disturbance but less than former leasehold

# pastoral has more disturbance but less than not-cons

# unofficial cons has more than pastoral

# so in order of most to least disturbance:

# 1. former leasehold

# 2. not-cons-estate

# 3. unofficial cons

# 4. pastoral

# 5. Class-A

# 6. gazetted conservation

# It’s wierd, you’d expect pastoral and former-pastoral to match up a bit more.

# ..and perhaps unofficial cons to match up more with gazetted cons.

# it looks as if the process of converting a pastoral lease to a conservation-ex-lease is the most disturbing thing!

# doesn’t really make sense, unless it’s all focued on Credo and they cleared a lot more area?

# I think i need to actually just combine the pastoral and ex-pastoral categories and that might increase the sample size and solve the issue.