Libraries sourced from an additional script
Ecological data was obtained from ALA and AWAP data bases through additional scripts (available in the scripts folder of this same project)
# Data sourced from additional scripts:
# ALA Data
source(here::here("Scripts/1_ALA_variables.R"))
# AWAP Data
source(here::here("Scripts/4_AWAPDailyQueries_2022.R"))
We extracted the climate variables correspondent to the activity period of each species (see additional scripts for more details)
vegeta <- read_csv(here::here("Data/6_VegetationVariables.csv"))
climat <- read_csv(here::here("Data/7_ClimateVariables.csv"))
locat <- read_csv(here::here("Data/5_Locations.csv"))
The climate data we extracted contained many additional variables, but we decided to use only the most informative ones
climat <-
climat %>%
filter(species != "xyle3" &
species != "oliv3") %>% # removed. vegetation data not available
arrange(species) %>%
dplyr::select(c(1, 5:8, 11:19)) %>%
dplyr::rename(
over35 = avg_temp_over_35, # days over 35deg averaged through 10 years
MaxT = avg_max_temp, # daily -> mean in one month -> mean over 10 y
MinT = avg_min_temp, # daily -> mean in one month -> mean over 10 y
Solar = avg_sol, # daily radiation -> mean in month -> mean over 10 y
Vapour = avg_vpr, # vapor pressure -> mean in month -> mean over 10 y
Clouds = cloud_cover, # cloud cover per year
Rain = avg_rr
) # rainfall per day -> mean in one month -> mean over 10 years
names(climat)
## [1] "species" "over35" "MaxT" "MinT" "Solar" "Clouds" "Rain"
## [8] "Vapour" "avg_temp_over_35_Coll" "avg_max_temp_Coll" "avg_min_temp_Coll" "avg_sol_Coll" "avg_rr_Coll" "avg_vpr_Coll"
# Suffix Coll = value for the parameter in the collection month.
# No suffix = average value for this variable across the activity period
vegeta <-
vegeta %>%
filter(spp != "vrdi1" &
spp != "rose05") %>% # removed. climate data not available
arrange(spp) %>%
dplyr::select(c(1, 11:18)) # Keep only the vegetation variables
names(vegeta) <- (c(
"beetle", "NPP", "BareSoil", "LeafArea",
"FPAR", "Aridity", "C3Macro", "C3Meso", "C4Mega"
)) # rename
EcoVar <-
data.frame(vegeta, climat) %>%
dplyr::select(-species) %>%
dplyr::select(c(1, 6:16)) %>% # Keep only variables for the period of activity
mutate(AridityN = Aridity * -1) %>% # Because arid = closer to 0
dplyr::select(-Aridity)
Some beetles were removed from the analysis because their position on the phylogeny is uncertain.
EcoVar <-
EcoVar %>%
mutate(spp = substr(beetle, 1, 4)) %>%
filter(
spp != "ambl" & # Amblyterus cicatricosus
spp != "psqz" & # Pseudoschizongnatus schoenfeldti
spp != "saul" & # Saulostomus villosus
spp != "sqzb" & # Schizognathus burmeisteri
spp != "sqzc" & # Schizognathus compressicornis
spp != "sqzm" # Schizognathus mesosternalis
) %>% # These species were removed. No phylogenetic info
dplyr::select(-spp)
# Group location data by species
LocatAgg <- locat %>%
dplyr::mutate(Spp1 = substr(Spp, 1, 4)) %>%
dplyr::select(Spp1, Latitude, Longitude) %>%
dplyr::group_by(Spp1) %>%
dplyr::summarise(
meanLat = mean(Latitude),
meanLon = mean(Longitude),
minLat = min(Latitude),
maxLat = max(Latitude),
minLon = min(Longitude),
maxLon = max(Longitude)
)
# Subset with only species with white underlay
LocatAggWU <- LocatAgg %>%
dplyr::filter(
Spp1 %in% c("xyls", "prsi", "xyle", "pczo", "pczp", "pczv")
)
# Subset with only species that cluster
LocatAggC <- LocatAgg %>%
dplyr::filter(
Spp1 %in% c("aurs", "pvul", "rose", "lats", "opal", "ecry")
)
Finally we obtained the data frame EcoVar
which contains
all the relevant ecological variables for the relevant species.
But before starting the analysis, we studied the correlations between the variables.
We obtained the \(R^2\) and correspondent p values for all combinations between the ecological variables.
EVarP <- EcoVar[, c(2:length(EcoVar))] # remove the categorical variables
EVarMatP <- Hmisc::rcorr(as.matrix(EVarP))
EVarR2P <- round(EVarMatP$r, 2) # R2 values
EvarCpP <- round(EVarMatP$P, 5) # p values
The following are the results for all variables including Vegetation and climate for activity period:
C3Macro | C3Meso | C4Mega | over35 | MaxT | MinT | Solar | Clouds | Rain | Vapour | AridityN | |
---|---|---|---|---|---|---|---|---|---|---|---|
C3Macro | 1.00 | 0.57 | 0.99 | -0.09 | 0.68 | 0.87 | 0.47 | 0.55 | 0.84 | 0.95 | -0.36 |
C3Meso | 0.57 | 1.00 | 0.48 | -0.47 | -0.03 | 0.26 | -0.23 | 0.73 | 0.64 | 0.42 | -0.61 |
C4Mega | 0.99 | 0.48 | 1.00 | -0.02 | 0.73 | 0.90 | 0.53 | 0.49 | 0.81 | 0.96 | -0.32 |
over35 | -0.09 | -0.47 | -0.02 | 1.00 | 0.49 | 0.22 | 0.47 | -0.55 | -0.20 | -0.01 | 0.40 |
MaxT | 0.68 | -0.03 | 0.73 | 0.49 | 1.00 | 0.91 | 0.83 | -0.10 | 0.38 | 0.73 | 0.23 |
MinT | 0.87 | 0.26 | 0.90 | 0.22 | 0.91 | 1.00 | 0.72 | 0.21 | 0.64 | 0.92 | -0.06 |
Solar | 0.47 | -0.23 | 0.53 | 0.47 | 0.83 | 0.72 | 1.00 | -0.26 | 0.23 | 0.53 | 0.28 |
Clouds | 0.55 | 0.73 | 0.49 | -0.55 | -0.10 | 0.21 | -0.26 | 1.00 | 0.61 | 0.52 | -0.59 |
Rain | 0.84 | 0.64 | 0.81 | -0.20 | 0.38 | 0.64 | 0.23 | 0.61 | 1.00 | 0.77 | -0.67 |
Vapour | 0.95 | 0.42 | 0.96 | -0.01 | 0.73 | 0.92 | 0.53 | 0.52 | 0.77 | 1.00 | -0.27 |
AridityN | -0.36 | -0.61 | -0.32 | 0.40 | 0.23 | -0.06 | 0.28 | -0.59 | -0.67 | -0.27 | 1.00 |
C3Macro | C3Meso | C4Mega | over35 | MaxT | MinT | Solar | Clouds | Rain | Vapour | AridityN | |
---|---|---|---|---|---|---|---|---|---|---|---|
C3Macro | NA | 0.00000 | 0.00000 | 0.17021 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 |
C3Meso | 0.00000 | NA | 0.00000 | 0.00000 | 0.65740 | 0.00002 | 0.00023 | 0.00000 | 0.00000 | 0.00000 | 0.00000 |
C4Mega | 0.00000 | 0.00000 | NA | 0.72154 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 |
over35 | 0.17021 | 0.00000 | 0.72154 | NA | 0.00000 | 0.00025 | 0.00000 | 0.00000 | 0.00154 | 0.87588 | 0.00000 |
MaxT | 0.00000 | 0.65740 | 0.00000 | 0.00000 | NA | 0.00000 | 0.00000 | 0.10765 | 0.00000 | 0.00000 | 0.00019 |
MinT | 0.00000 | 0.00002 | 0.00000 | 0.00025 | 0.00000 | NA | 0.00000 | 0.00050 | 0.00000 | 0.00000 | 0.34075 |
Solar | 0.00000 | 0.00023 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | NA | 0.00002 | 0.00020 | 0.00000 | 0.00000 |
Clouds | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.10765 | 0.00050 | 0.00002 | NA | 0.00000 | 0.00000 | 0.00000 |
Rain | 0.00000 | 0.00000 | 0.00000 | 0.00154 | 0.00000 | 0.00000 | 0.00020 | 0.00000 | NA | 0.00000 | 0.00000 |
Vapour | 0.00000 | 0.00000 | 0.00000 | 0.87588 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | NA | 0.00001 |
AridityN | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 0.00019 | 0.34075 | 0.00000 | 0.00000 | 0.00000 | 0.00001 | NA |
Afterwards we removed the variables with correlations of R > 0.8
EcolVar <-
EcoVar %>%
dplyr::select(-C3Macro, -C4Mega, -C3Meso, -MinT)
EVar <- EcolVar[, c(2:length(EcolVar))] # remove the categorical variables
EVarMat <- Hmisc::rcorr(as.matrix(EVar))
EVarR2 <- round(EVarMat$r, 2) # R2 values
EvarCp <- round(EVarMat$P, 5) # p values
Finally, we considered only these variables:
over35 | MaxT | Solar | Clouds | Rain | Vapour | AridityN | |
---|---|---|---|---|---|---|---|
over35 | 1.00 | 0.49 | 0.47 | -0.55 | -0.20 | -0.01 | 0.40 |
MaxT | 0.49 | 1.00 | 0.83 | -0.10 | 0.38 | 0.73 | 0.23 |
Solar | 0.47 | 0.83 | 1.00 | -0.26 | 0.23 | 0.53 | 0.28 |
Clouds | -0.55 | -0.10 | -0.26 | 1.00 | 0.61 | 0.52 | -0.59 |
Rain | -0.20 | 0.38 | 0.23 | 0.61 | 1.00 | 0.77 | -0.67 |
Vapour | -0.01 | 0.73 | 0.53 | 0.52 | 0.77 | 1.00 | -0.27 |
AridityN | 0.40 | 0.23 | 0.28 | -0.59 | -0.67 | -0.27 | 1.00 |
over35 | MaxT | Solar | Clouds | Rain | Vapour | AridityN | |
---|---|---|---|---|---|---|---|
over35 | NA | 0.00000 | 0.00000 | 0.00000 | 0.00154 | 0.87588 | 0.00000 |
MaxT | 0.00000 | NA | 0.00000 | 0.10765 | 0.00000 | 0.00000 | 0.00019 |
Solar | 0.00000 | 0.00000 | NA | 0.00002 | 0.00020 | 0.00000 | 0.00000 |
Clouds | 0.00000 | 0.10765 | 0.00002 | NA | 0.00000 | 0.00000 | 0.00000 |
Rain | 0.00154 | 0.00000 | 0.00020 | 0.00000 | NA | 0.00000 | 0.00000 |
Vapour | 0.87588 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | NA | 0.00001 |
AridityN | 0.00000 | 0.00019 | 0.00000 | 0.00000 | 0.00000 | 0.00001 | NA |
Testing correlations
Running a PCA is only possible if variables can be correlated:
cord <- cor(EVar) # default: pearsons correlation coeficient
N <- dim(EVar)[1] # number of observations
cortest.bartlett(cord, n = N) # Are the variables correlated? Yes p < 0.05
## $chisq
## [1] 1693.035
##
## $p.value
## [1] 0
##
## $df
## [1] 21
Yes, they can be summarised in the new variables called components
Extract components
Here we extracted the eigen values, components and loadings matrix
eigVal <- eigen(cord) # Extract eigen values
vectorev <- eigVal$vectors # vector with the eigen values
compv <- prcomp(EVar, retx = TRUE, center = TRUE, scale. = TRUE) # components
load <- compv$rotation # Loadings matrix
Scree Plot
Represents the eigen values associated to each component. We used only the ones with eigen value > 1.
{
plot(c(1:length(eigVal$values)), eigVal$values,
type = "b", cex = 1.4,
ylab = "Eigen Values", xlab = "Principal components",
pch = 21, bg = "#648fff"
)
abline(h = 1, col = "#ff2c85", lwd = 2, lty = 2)
}
Variance Explained
The following table shows the proportion of variance explained by each component.
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.7206 1.6629 0.77800 0.59788 0.40371 0.31950 0.21572
## Proportion of Variance 0.4229 0.3950 0.08647 0.05107 0.02328 0.01458 0.00665
## Cumulative Proportion 0.4229 0.8179 0.90442 0.95549 0.97877 0.99335 1.00000
PC1 and PC2 are able to explain 82% of all the variance in ecological habitats for our samples.
Loadings Matrix
The colours in the following figure represent the correlations of each variable is to each PC
par(mar = c(4, 8, 4, 4))
plot(load[, 1:2],
cex = 0.8, breaks = c(-1, -0.5, -0.4, -0.3, 0.3, 0.4, 0.5, 1),
las = 2, ylab = " ", xlab = " ",
digits = 2,
col = c(
"#ff2c85", "#ff85ac",
"#ffc4d5", "#F7F7F7",
"#d3d8ff", "#a2b2ff",
"#648fff"
),
main = "Loadings matrix "
)
Lower PC1 = higher humidity (vapour, rain and clouds) lower aridity.
Lower PC2 = higher solar radiation, higher max temp, more days above 35 and more aridity
# brewer.pal(n = 7, name = "Accent")
# Extract PC axes for plotting
PCAvalues <- data.frame(
Species = EcolVar$beetle,
compv$x
)
# Extract loadings of the variables
PCAloadings <- data.frame(Variables = rownames(compv$rotation), compv$rotation)
# PC1 vs PC2
ggplot(PCAvalues, aes(x = PC1, y = PC2)) +
geom_point(fill = "#648fff", col = "black", pch = 21, size = 2, alpha = 0.4) +
theme_bw() +
geom_text(
data = subset(PCAvalues, PC1 > 2 | PC1 < -2.5 | PC2 < -2.5),
alpha = .3, size = 1, aes(label = Species), col = "black"
) +
annotate("text",
size = 3,
x = (PCAloadings$PC1 * 11.5),
y = (PCAloadings$PC2 * 11.5),
label = (PCAloadings$Variables)
) +
geom_segment(
data = PCAloadings[c(1, 2, 3), ],
aes(
x = 0, y = 0,
xend = (PC1 * 11), yend = (PC2 * 11)
),
arrow = arrow(
length = unit(1 / 2, "picas")
),
color = "#ff2c85"
) +
geom_segment(
data = PCAloadings[c(4, 5, 6, 7), ],
aes(
x = 0, y = 0,
xend = (PC1 * 11), yend = (PC2 * 11)
),
arrow = arrow(
length = unit(1 / 2, "picas")
),
color = "#ffb000"
) +
geom_hline(yintercept = 0, linetype = "dashed", colour = "gray") +
geom_vline(xintercept = 0, linetype = "dashed", colour = "gray") +
ylim(-10, 5) +
xlim(-7, 5)
PC values by species
We obtained a data frame with the PC values that summarise the ecological variables considered in our analysis.
We need one PC value per species.
# Import species names
SppNames <- read.csv(here::here("Data/9_CodesAndSpecies.csv"))
SppNames <-
SppNames %>%
mutate(spp = substr(Ind, 1, 4)) %>%
filter(
spp != "ambl" & # Amblyterus cicatricosus
spp != "psqz" & # Pseudoschizongnatus schoenfeldti
spp != "saul" & # Saulostomus villosus
spp != "sqzb" & # Schizognathus burmeisteri
spp != "sqzc" & # Schizognathus compressicornis
spp != "sqzm" # Schizognathus mesosternalis
) %>% # These species were removed. No phylogenetic info
dplyr::select(-spp) %>%
arrange(Ind)
# Add specie snames to the PC values data frame
PC_Values$phylogeny_name <- SppNames$phylogeny_name
# Find the mean per species:
FinalPCs <-
PC_Values %>%
dplyr::select(-Species) %>%
dplyr::group_by(phylogeny_name) %>% # group
dplyr::summarise(across(everything(), list(mean))) # mean
# Print results
write.csv(FinalPCs, here::here("Data/FromCode/PCsbySpp.csv"))
Reflectivity by
individual
Cons1oo <- read.csv(here::here("Data/FromCode/ConsolidatedReflectivityInd.csv"))[-1]
PC_Values2 <- PC_Values
colnames(PC_Values2) <- c("ind", "PC1", "PC2", "phylogeny_name")
Cons1 <- merge(Cons1oo, PC_Values2, by = "ind") %>%
dplyr::rename("phylogeny_name" = phylogeny_name.x) %>%
dplyr::select(-phylogeny_name.y)
write.csv(Cons1, here::here("Data/FromCode/ConsReflEcolInd.csv"))
Absorptivity by
individual
Cons2oo <- read.csv(here::here("Data/FromCode/ConsolidatedAbsoptivityInd.csv"))[-1]
Cons2 <-
PC_Values %>%
filter(Species %in% (Cons2oo$ind)) %>% # Subset the PCA values
arrange(Species) %>%
bind_cols(., Cons2oo) %>% # join with the 'Optical' Data frame
dplyr::select(ind, everything(), -Species, -phylogeny_name...7) %>% # rearrange the column order
dplyr::rename("phylogeny_name" = phylogeny_name...4)
## New names:
## • `phylogeny_name` -> `phylogeny_name...4`
## • `phylogeny_name` -> `phylogeny_name...7`
Lower PC1 = higher humidity: vapour, rain and clouds, which in turn correlates with more plant coverage.
Lower PC2 = higher solar radiation and max temp, more days above 35 and more aridity.
Check the distribution of the size at the species level:
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
SizeSppDf <-
Cons1 %>%
dplyr::select(size, phylogeny_name) %>%
dplyr::group_by(phylogeny_name) %>%
dplyr::summarise(
meanSize = mean(size)
)
hist(SizeSppDf$meanSize)
mean(SizeSppDf$meanSize)
## [1] 2.25245
## [1] 1.766
Evaluate at the species level:
PCSZ <-
Cons1 %>%
dplyr::select(size, phylogeny_name, PC1, PC2) %>%
dplyr::group_by(phylogeny_name) %>% # group
dplyr::summarise(
meanPC1 = mean(PC1),
meanPC2 = mean(PC2),
meanSize = mean(size),
sdPC1 = sd(PC1),
sdPC2 = sd(PC2),
sdSize = sd(size)
)
Checking for correlations between PCs and Size at the species level
##
## Pearson's product-moment correlation
##
## data: PCSZ$meanSize and PCSZ$meanPC1
## t = 3.221, df = 45, p-value = 0.002375
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1663555 0.6404094
## sample estimates:
## cor
## 0.4328445
##
## Pearson's product-moment correlation
##
## data: PCSZ$meanSize and PCSZ$meanPC2
## t = -1.1883, df = 45, p-value = 0.2409
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4395748 0.1186857
## sample estimates:
## cor
## -0.174426
Plot
szp1 <- ggplot(PCSZ, aes(x = -meanPC1, y = meanSize)) +
geom_errorbar(aes(
ymin = meanSize - sdSize,
ymax = meanSize + sdSize
),
col = "#cecec2"
) +
geom_errorbarh(aes(
xmin = -meanPC1 - sdPC1,
xmax = -meanPC1 + sdPC1
),
col = "#cecec2"
) +
geom_point(
size = 2, pch = 21, fill = "#648fff",
colour = "black", alpha = 0.9
) +
theme_minimal() +
theme(legend.position = "none") +
geom_smooth(
method = lm, color = "#648fff",
fill = "#D3D3D3", size = 0.6
) +
xlab("Humidity (-PC1)") +
ylab("Size (cm)")
szp2 <- ggplot(PCSZ, aes(x = -meanPC2, y = meanSize)) +
geom_errorbar(aes(
ymin = meanSize - sdSize,
ymax = meanSize + sdSize
),
col = "#cecec2"
) +
geom_errorbarh(aes(
xmin = -meanPC2 - sdPC2,
xmax = -meanPC2 + sdPC2
),
col = "#cecec2"
) +
geom_point(
size = 2, pch = 21, fill = "#ffb000",
colour = "black", alpha = 0.9
) +
theme_minimal() +
theme(legend.position = "none") +
# geom_smooth(method=lm, color="#ffb000", fill="#D3D3D3")+ # Not different than 0
xlab("Temperature (-PC1)") +
ylab("Size (cm)")
grid.arrange(szp1, szp2, nrow = 1)
oz_states <- ozmaps::ozmap_states # load Australian maps
#st_crs(oz_states) <- 4326 # transform, have to make due to package problem
ggplot(oz_states) +
geom_sf() +
coord_sf(x = c(105.5507, 167.9969), y = c(-43.63203, -9.229287)) +
geom_point(
data = LocatAgg, # here the coords data has to be the original format, not spatial format
aes(x = meanLon, y = meanLat), # if want to change size as well, size = PC2
alpha = 0.5, color = "black", size = 2
) +
geom_errorbarh(data = LocatAgg, aes(
xmin = minLon,
xmax = maxLon,
y = meanLat
), alpha = 0.5) +
geom_errorbar(data = LocatAgg, aes(
ymin = minLat,
ymax = maxLat,
x = meanLon
), alpha = 0.5) +
#scale_colour_viridis_c() +
#ggtitle("Total Reflectance") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.title.y = element_blank())
Special cases (white underlay)
ggplot(oz_states) +
geom_sf() +
coord_sf(x = c(105.5507, 167.9969), y = c(-43.63203, -9.229287)) +
geom_point(
data = LocatAggWU, # here the coords data has to be the original format, not spatial format
aes(x = meanLon, y = meanLat), # if want to change size as well, size = PC2
alpha = 0.9, color = "black", size = 2.8,
fill = "#648FFF", pch = 21
) +
geom_errorbarh(data = LocatAggWU, aes(
xmin = minLon,
xmax = maxLon,
y = meanLat
), alpha = 0.5) +
geom_errorbar(data = LocatAggWU, aes(
ymin = minLat,
ymax = maxLat,
x = meanLon
), alpha = 0.5) +
#scale_colour_viridis_c() +
#ggtitle("Total Reflectance") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.title.y = element_blank())
Special cases (cluster)
ggplot(oz_states) +
geom_sf() +
coord_sf(x = c(105.5507, 167.9969), y = c(-43.63203, -9.229287)) +
geom_point(
data = LocatAggC, # here the coords data has to be the original format, not spatial format
aes(x = meanLon, y = meanLat), # if want to change size as well, size = PC2
alpha = 0.9, color = "black", size = 2.8,
fill = "#648FFF", pch = 21
) +
geom_errorbarh(data = LocatAggC, aes(
xmin = minLon,
xmax = maxLon,
y = meanLat
), alpha = 0.5) +
geom_errorbar(data = LocatAggC, aes(
ymin = minLat,
ymax = maxLat,
x = meanLon
), alpha = 0.5) +
#scale_colour_viridis_c() +
#ggtitle("Total Reflectance") +
theme_minimal() +
theme(axis.title.x = element_blank(), axis.title.y = element_blank())