1 Univ. Grenoble Alpes, CEA, CNRS, INRAE, IRIG, Laboratoire de Physiologie Cellulaire & Végétale, 38000 Grenoble.
2 Univ. Grenoble-Alpes, CNRS, Jardin du Lautaret, 38000, Grenoble, France.
3 Univ. Grenoble-Alpes, Univ. Savoie Mont Blanc, CNRS, LECA, 38000, Grenoble, France.
@ Correspondence: Eric Maréchal <eric.marechal@cea.fr>, Eric Coissac <eric.coissac@metabarcoding.org>
Go back to the global description of the project
ROBITools
package is used to read result files produced by OBITools.
ROBITaxonomy
package provides function allowing to query OBITools formated taxonomy.
if (!require(ROBITools)) {
# ROBITools are not available on CRAN and have to be installed
# from http://git.metabarcoding.org using devtools
if (!require(devtools)) {
install.packages("devtools")
}
metabarcoding_git <- "https://git.metabarcoding.org/obitools"
devtools::install_git(paste(metabarcoding_git,
"ROBIUtils.git",
sep="/"))
devtools::install_git(paste(metabarcoding_git,
"ROBITaxonomy.git",
sep="/"))
devtools::install_git(paste(metabarcoding_git,
"ROBITools.git",
sep="/"))
library(ROBITools)
}
library(ROBITaxonomy)
tidyverse
(Wickham et al., 2019) provides various method for efficient data manipulation and plotting via ggplot2
(Wickham, 2016)if (!require(tidyverse)) {
install.packages("tidyverse")
library(tidyverse)
}
vegan
is loaded for its decostand
function (Oksanen et al., 2015)if (!require(vegan)) {
install.packages("vegan")
library(vegan)
}
ade4
provides multivariate analysis functions (Thioulouse et al., 2018)if (!require(ade4)) {
install.packages("ade4")
library(ade4)
}
factoextra
is loaded for its fviz_pca_var
function (Kassambara et al., 2017)if (!require(factoextra)) {
install.packages("factoextra")
library(factoextra)
}
matrixStats
is loaded for its weightedMedian
function (Bengtsson, 2021)if (!require(matrixStats)) {
install.packages("matrixStats")
library(matrixStats)
}
sfsmisc
is loaded for its pretty10exp
function (Maechler et al., 2021)if (!require(sfsmisc)) {
install.packages("sfsmisc")
library(sfsmisc)
}
GGally
is loaded for its ggpairs
function (Emerson et al., 2013)if (!require(GGally)) {
install.packages("GGally")
library(GGally)
}
ggrepel
is loaded for its geom_text_repel
function (Slowikowski et al., 2018)if (!require(ggrepel)) {
install.packages("ggrepel")
library(ggrepel)
}
gridExtra
is loaded for its grid.arrange
function (Auguie et al., 2017)if (!require(gridExtra)) {
install.packages("gridExtra")
library(gridExtra)
}
robustreg
is loaded for its robustRegBS
function (Johnson, 2012)if (!require(robustreg)) {
install.packages("robustreg")
library(robustreg)
}
ggpubr
is loaded for its ggarrange
function (Kassambara and Kassambara, 2020)if (!require(ggpubr)) {
install.packages("ggpubr")
library(ggpubr)
}
gg.gap
is loaded for its gg.gap
functionif (!require(gg.gap)) {
install.packages("gg.gap")
library(gg.gap)
}
source("utils._func.R")
site_factor <- function(x) {
levels1 <- names(tapply(
euka03@samples$Elevation,
euka03@samples$Site_short,
median
) %>% sort())
xf <- factor(x, levels = levels1)
xf
}
milieu_factor <- function(x) {
f <- as.character(x) %>%
factor(levels = c(
"Milieu forestier",
"Milieu ouvert"
))
levels(f) <- c("Forest", "Open area")
f
}
horizon_factor <- function(x) {
f <- as.character(x) %>%
factor(levels = c("LIT", "SOL"))
levels(f) <- c("Litter", "Topsoil")
f
}
variable_factor <- function(x) {
all_levels <- unique(as.character(x))
main_levels <- c(
"Elevation", "pH",
"Organic_matter_2mm",
"Carbon", "Nitrogen",
"cn_ratio"
)
f <- factor(x, levels = c(main_levels,
setdiff(all_levels, main_levels)))
levels(f) <- c(
"Elevation", "pH",
"Organic matter",
"Carbon", "Nitrogen",
"C/N Ratio", setdiff(all_levels, main_levels)
)
f
}
theme_paper
defines the ggplot theme for the figurestheme_paper <- function() {
theme_classic(base_size = 9) +
theme(
strip.background = element_rect(linetype = "blank"),
panel.spacing = unit(1, "lines")
)
}
Functions wrapping the ggplot ggsave
function to produce TIFF
and PDF
files according to the Frontiers recommendations
write_figure_1_col
: for the one column figureswrite_figure_2_cols
: for the two columns figureswrite_figure <- function(name,
width = 180,
height = 90,
dpi = 300) {
dir.create("Figures", showWarnings = FALSE)
ggsave(sprintf("Figures/%s.pdf", name),
units = "mm", width = width,
height = height, dpi = dpi
)
ggsave(sprintf("Figures/%s.tiff", name),
units = "mm", width = width,
height = height, dpi = dpi
)
}
write_figure_1_col <- function(name,
height = 90,
dpi = 300) {
write_figure(name, 85, height, dpi)
}
write_figure_2_cols <- function(name,
height = 90,
dpi = 300) {
write_figure(name, 180, height, dpi)
}
vif
estimates the Variance Inflation Factor.Function estimating the Variance Inflation Factor (VIF). The function estimates VIF for every columns of a data.frame
VIF estimates how much a explanatory variable can be expressed as a linear combination of the others. It is commonly admited to remove variables with a $ VIF>5$.
\[ VIF = \frac{1}{1-R^2} \]
with \(R^2\) the adjusted \(R^2\) of the linear regression of one explanatory variable by the others.
vif <- function(data) {
n <- ncol(data)
r2 <- sapply(
1:n,
function(i) {
summary(lm(as.formula(data[, c(i, (1:n)[-i])]),
data = data
))$adj.r.squared
}
)
v <- 1 / (1 - r2)
names(v) <- colnames(data)
sort(v, decreasing = TRUE) %>%
tibble(variable = colnames(data), vif = .)
}
rtest_niche
estimates the specialisation of a niche.The niche is defined following the OMI model implemented in the ade4::niche
function (Dolédec et al., 2000).
rtest_niche
implements a double permutation test on :
The test is implemented following the same permutation procedure used to test marginality in the ade4::rtest
function.
rtest_niche <- function(xtest, nrepet = 99, ...) {
if (!inherits(xtest, "dudi")) {
stop("Object of class dudi expected")
}
if (!inherits(xtest, "niche")) {
stop("Type 'niche' expected")
}
appel <- as.list(xtest$call)
X <- eval.parent(appel$dudiX)$tab
Y <- eval.parent(appel$Y)
w1 <- colSums(Y)
if (any(w1 <= 0)) {
stop(paste("Column sum <=0 in Y"))
}
Y <- sweep(Y, 2, w1, "/")
calcul_niche <- function(freq, mil) {
m <- colSums(freq * mil)
mil <- t(t(mil) - m)
tolt <- sum(freq * mil * mil)
u <- m / sqrt(sum(m^2))
z <- mil %*% u
c(sum(m^2), sum(freq * z * z))
}
obs_niche <- apply(Y, 2, calcul_niche, mil = X)
# obs <- c(obs, Tolm.mean = mean(obs))
sim_niche <- lapply(
1:nrepet,
function(x) {
apply(apply(Y, 2, sample),
2,
calcul_niche,
mil = X
)
}
)
better_mid <- rowSums(sapply(
1:length(sim_niche),
function(i) sim_niche[[i]][1, ] >= obs_niche[1, ]
))
better_tol <- rowSums(sapply(
1:length(sim_niche),
function(i) sim_niche[[i]][2, ] <= obs_niche[2, ]
))
better <- mapply(min, better_mid, better_tol)
(better + 1) / (nrepet + 1)
}
motus_gradient
estimates the distribution range of MOTUs along a gradient.Considering an environmental gradient that function estimates :
The pic of density of each MOTU along the gradient. That value is estimated as the mean of the gradient weighted by the read relative frequency of the considered MOTU in the samples.
The limits of the niche, centred on the pic of density, and estimated from the tolerance parameter (the variance of the gradient weighted by the read relative frequency of the MOTU).
Estimated \(p_value\) of that niche using the rtest_niche
function.
Parameters :
data
: a metabarcoding.data
instance.gradient
: a numeric vector with one value per sample.returns
A data.frame
of ten columns :
motu
: the MOTU namegradient_weight
: the weigthed median of the gradient value for the MOTUrange_low
: the lower limit of the gradient rangerange_high
: the higher limit of the gradient rangepval
: the p-value as returned by the rtest_niche
function.var
: variance of the gradient.sd
: square root of var
.niche_motus_gradient <- function(data, gradient, nrepet = 999) {
relfreq <- decostand(data@reads, method = "total")
motus_normed <- decostand(relfreq, method = "total", MARGIN = 2)
mean_gradient <- colSums(sweep(motus_normed,
MARGIN = 1,
STATS = gradient, FUN = "*"
))
var_gradient <- colSums(outer(
gradient,
mean_gradient,
"-"
)^2 * motus_normed)
data.frame(
motu = colnames(data),
gradient_weight = mean_gradient,
range_low = mean_gradient - 1.96 * sqrt(var_gradient),
range_high = mean_gradient + 1.96 * sqrt(var_gradient),
pval = rtest_niche(niche(dudi.pca(data.frame(gradient),
scannf = FALSE
),
as.data.frame(relfreq),
scannf = FALSE
), nrepet = 999),
var = var_gradient,
sd = sqrt(var_gradient)
)
}
plot_motus_gradient
plots the result of the motus_gradient
functionThat function draws a map of the MOTUs distribution along a gradient. It use the result of the motus_gradient
function as input.
Parameters :
data
: a metabarcoding.data
instance.gradient
: a numeric vector with one value per sample.motus_distribution
:motus_order
:alpha_risk
:jitter
:plot_motus_gradient <- function(
data, gradient,
motus_distribution,
motus_order = motus_distribution$gradient_weight,
alpha_risk = 0.05,
only_h1 = FALSE,
jitter = 0.01) {
jitter <- jitter * sd(gradient)
motus_distribution$raw_pval <- motus_distribution$pval
motus_distribution$pval <- p.adjust(motus_distribution$pval,
method = "fdr"
)
relfreq <- decostand(data@reads, method = "total")
if (only_h1) {
relfreq <- relfreq[, motus_distribution$pval < alpha_risk]
}
urank <- function(data) {
r <- rank(data)
ru <- unique(r)
rur <- rank(ru)
names(rur) <- ru
rep <- rur[as.character(r)]
names(rep) <- NULL
rep
}
motus_distribution <- motus_distribution[order(motus_order), ]
if (only_h1) {
motus_distribution <-
motus_distribution[motus_distribution$pval < alpha_risk, ]
}
motu_labels <- mapply(
function(m, pval, p) {
if (round(pval, 2) <= alpha_risk) {
bquote(atop(
textstyle(bolditalic(.(m))),
textstyle(P[value] == .(p))
))
} else {
bquote(atop(
textstyle(italic(.(m))),
textstyle(P[value] == .(p))
))
}
},
motus_distribution$motu,
motus_distribution$pval,
pretty10exp(motus_distribution$pval, drop.1 = TRUE, digits = 2)
)
cbind(
data.frame(
gradient = gradient,
pcr = rownames(data),
Site = site_factor(data@samples$Site_short)
),
as.data.frame(relfreq)
) %>%
pivot_longer(
data = .,
cols = names(.)[-c(1:3)],
names_to = "motu"
) %>%
group_by(gradient, Site, pcr) %>%
mutate(g = cur_group_id()) %>%
group_by(gradient) %>%
mutate(g = urank(g) * jitter) %>%
left_join(motus_distribution, by = "motu") %>%
mutate(motu = factor(motu,
levels = motus_distribution$motu[motus_distribution$gradient_weight %>%
order()
]
)) %>%
arrange(gradient) %>%
ggplot(aes(
x = gradient - g,
xend = gradient - g,
y = 0,
yend = value * 100,
col = Site
)) +
facet_wrap(vars(motu),
switch = "y",
labeller = function(variable, value) motu_labels[value],
ncol = 2, dir = "v"
) +
geom_rect(aes(
xmin = range_low, xmax = range_high,
ymin = 0, ymax = 100 * as.numeric(pval < alpha_risk),
alpha = 0.03
),
fill = grey(0.8), col = 0, show.legend = FALSE
) +
geom_segment(aes(
x = gradient_weight,
xend = gradient_weight,
y = 30, yend = 100
),
col = rgb(0, 0, 0, 0.2),
arrow = arrow(
length = unit(3, "mm"),
ends = "first", type = "closed"
)
) +
geom_segment() +
geom_point(aes(y = 0.1), cex = 0.3) +
theme_paper() +
theme(
strip.text.y.left = element_text(
angle = 0,
size = 6,
lineheight = 1,
vjust = 0.5
),
axis.text = element_text(size = 6),
panel.spacing = unit(.4, "lines")
) +
scale_y_log10(limits = c(0.1, 100)) +
xlim(
min(motus_distribution$range_low),
max(motus_distribution$range_high)
) +
ylab("Relative read frequencies (%)")
}
Processed data files cand be produced using the code present in the following notebooks :
Euka03
cleaned dataseteuka03_motus <- read.csv2("cleaned_datasets/Euka03.cleaned.motus.csv",
header = TRUE, row.names = 1
)
euka03_samples <-
read.csv2("cleaned_datasets/Euka03.cleaned.samples.csv",
header = TRUE, row.names = 1
) %>%
rename(Elevation = Altitude, Elev_groups = Alt_groups)
euka03_reads <- read.csv2("cleaned_datasets/Euka03.cleaned.reads.csv",
header = TRUE, row.names = 1
) %>%
as.matrix()
euka03 <- metabarcoding.data(
reads = euka03_reads,
samples = euka03_samples,
motus = euka03_motus
)
rm(euka03_motus, euka03_samples, euka03_reads)
Chlo01
cleaned datasetchlo01_motus <- read.csv2("cleaned_datasets/Chlo01.cleaned.motus.csv",
header = TRUE, row.names = 1
)
chlo01_samples <-
read.csv2("cleaned_datasets/Chlo01.cleaned.samples.csv",
header = TRUE, row.names = 1
) %>% rename(Elevation = Altitude, Elev_groups = Alt_groups)
chlo01_reads <- read.csv2("cleaned_datasets/Chlo01.cleaned.reads.csv",
header = TRUE, row.names = 1
) %>% as.matrix()
chlo01 <- metabarcoding.data(
reads = chlo01_reads,
samples = chlo01_samples,
motus = chlo01_motus
)
rm(chlo01_motus, chlo01_samples, chlo01_reads)
Chlo02
cleaned datasetchlo02_motus <- read.csv2("cleaned_datasets/Chlo02.cleaned.motus.csv",
header = TRUE, row.names = 1
)
chlo02_samples <-
read.csv2("cleaned_datasets/Chlo02.cleaned.samples.csv",
header = TRUE, row.names = 1
) %>% rename(Elevation = Altitude, Elev_groups = Alt_groups)
chlo02_reads <- read.csv2("cleaned_datasets/Chlo02.cleaned.reads.csv",
header = TRUE, row.names = 1
) %>% as.matrix()
chlo02 <- metabarcoding.data(
reads = chlo02_reads,
samples = chlo02_samples,
motus = chlo02_motus
)
rm(chlo02_motus, chlo02_samples, chlo02_reads)
The ncbi taxonomy have to be previously formated using the obitaxonomy
command from OBITools.
ncbi <- read.taxonomy("embl-140/ncbi20190930")
We’ll consider a set of 6 environmental variables assessed during the soil sampling.
Nitrogen
: total Nitrogen concentrationCarbon
: total Carbon concentrationOrganic_matter_2mm
: Organic matter larger than 2mmpH
: soil pHElevation
: Elevation above sea levelC/N ratio
: ratio of total Carbon concentration over total Nitrogen concentrationAnd 10 climatic variable retreived from climatic databases.
GDD_1cm.sum.mean
: Annual sum of average daily degrees above zero at 1cm below soil surface, averaged over 1988–2018. (Choler, 2018)GDD_10cm.sum.mean
: Annual sum of average daily degrees above zero at 10cm below soil surface, averaged over 1988–2018.CWD.sum.mean
: Sum of daily climatic water deficit: the sum of the negative daily climatic water balance, over the growing season, averaged over 1988–2018.FDD_1cm.sum.mean
: Annual sum of average daily degrees below zero at 1cm below soil surface, averaged over 1988–2018.FDD_10cm.sum.mean
: Annual sum of average daily degrees below zero at 10cm below soil surface, averaged over 1988–2018.solar.radiation.sum.mean
: Sum of daily solar radiation accumulated over the growing season, averaged over 1988–2018.DSN_T_ISBA.mean
: total snow depth and temperatureTG1.degres.mean
: Mean of the annual temperature at 1cm below soil surfaceTG4.degres.mean
: Mean of the annual temperature at 10cm below soil surfaceDRT.air.mean
: Diurnal temperature range, the difference between the higest and the lowest temperatures that occurs during the same day.These variables were calculated from the SAFRAN-SURFEX/ISBA-Crocus-MEPRA reanalysis (Durand et al., 2009; Vannier and Braud, 2012) For details please consult (Martinez-Almoyna et al., 2020)
chlo01@samples$D_1 <- apply(chlo01@reads, MARGIN = 1, D_q)
load("climatic_only2016.Rdata")
chlo01@samples %>%
mutate(SSHT = Site_short %>%
str_replace("CHA", "CHAM") %>%
str_replace("LOR", "LORI") %>%
str_replace("VCH", "VCHA")) %>%
unite(col = "codeplot", SSHT, Elevation, remove = FALSE) %>%
left_join(climatic_2016, by = "codeplot") -> chlo01@samples
environmental <- chlo01@samples %>%
select(which(sapply(., is.numeric)),
Horizon, Milieu,
Site = Site_short
) %>%
rename(Environment = Milieu) %>%
mutate(
Site = site_factor(Site),
Environment = milieu_factor(Environment),
Horizon = horizon_factor(Horizon)
) %>%
# select_if(is.numeric) %>%
select(
-rep_pcr, -X, -Elev_groups,
-dist_barycenter, -chlorophyta_part, -D_1,
-idplot
) %>%
select(-starts_with("EEA_")) %>%
mutate(cn_ratio = Carbon / Nitrogen)
Search for trivial correlations between numerical and categorical data.
environmental %>%
pivot_longer(
cols = c("Horizon", "Environment", "Site"),
names_to = "Category",
values_to = "Modality"
) %>%
pivot_longer(.,
cols = which(sapply(., is.numeric)),
names_to = "Variable",
values_to = "Value"
) %>%
mutate(Variable = variable_factor(Variable)) %>%
ggplot(aes(x = Modality, y = Value)) +
geom_boxplot() +
facet_grid(Variable ~ Category, scales = "free") +
theme(axis.text.x = element_text(
angle = 20,
hjust = 0.8
))
Keeps an renames only numerical environmental variables
environmental <- environmental %>%
select(`Elevation`,
`pH`,
`Organic matter` = Organic_matter_2mm,
`Carbon`,
`Nitrogen`,
`C/N Ratio` = cn_ratio,
everything()
) %>%
select_if(is.numeric)
The code below produces the supplementary Figure S1
environmental %>% ggpairs(
progress = FALSE,
aes(col = site_factor(chlo01@samples$Site_short)),
legend = 1,
alpha = 0.5,
cex = 0.7,
upper = list(continuous = wrap(ggally_cor, size = 2)),
diag = list(
continuous = wrap("densityDiag", alpha = 0.5),
combo = "box"
),
lower = list(continuous = wrap(ggally_points, size = 1))
) + labs(fill = "Site") +
theme(
axis.text = element_text(size = 6),
panel.spacing = unit(0.7, "lines"),
axis.title.x = element_text(angle = 180,
vjust = 1,
color = "black",
size = 5)
)
write_figure("Figure_S1", height = 700, width = 700)
Chlorophyta DNA represents a small part of the extracted environmental DNA. It can be explained by a low capacity to extract algeae DNA from soil samples, and/or by a low biomass of algeae in soil. It as to be noticed that the used DNA extraction method target extracellular DNA.
Based on the Euka03
marker we can estimate the relative aboundance of fungi, plants and Chlorophyta in soil extracellular DNA.
Produces the Figure 2
.
euka03@samples %>%
select(Site = Site_short, ends_with("_part")) %>%
rownames_to_column(var = "sample") %>%
filter(chlorophyta_part > 0) %>%
pivot_longer(
cols = ends_with("_part"),
names_to = "Taxonomic group", values_to = "fraction"
) %>%
mutate(
`Taxonomic group` = str_replace(
`Taxonomic group`,
"_part", ""
) %>%
str_to_title(),
Site = site_factor(Site)
) %>%
filter(`Taxonomic group` %in% c(
"Fungi",
"Streptophyta",
"Chlorophyta"
)) %>%
mutate(`Taxonomic group` = factor(`Taxonomic group`,
levels = c(
"Chlorophyta",
"Streptophyta",
"Fungi"
)
)) %>%
ggplot(mapping = aes(
x = Site, y = fraction * 100,
col = `Taxonomic group`
)) +
# geom_violin() +
geom_boxplot() +
scale_y_log10() +
xlab("Sampling site") +
ylab("Euka03 Relative read frequency (%)") +
theme_paper() +
theme(
legend.title = element_blank(),
legend.text = element_text(size = 6),
legend.position = "bottom",
legend.box = "horizontal", legend.margin = margin()
) +
guides(col = guide_legend(
title.position = NULL,
title.hjust = 0.5,
nrow = 1
))
write_figure_1_col("Figure_2")
Moreover only 18.7% of the PCR with the Euka03
marker exhibit some Chlorophyta reads.
With the Chlo01
marker, we observed that many sequences was not annotated as Chlorophyta, despite that this marker is supposed to be highly specific to that clade. We can put in relation fraction of Chlorophyta by both the markers Euka03
and Chlo01.
euka03@samples %>%
select(EXTRACTION.CODE, chlorophyta_part) %>%
group_by(EXTRACTION.CODE) %>%
summarise(
euka03_algeae_part = ifelse(mean(chlorophyta_part),
mean(chlorophyta_part[chlorophyta_part > 0]),
0
),
euka03_algeae_positive = sum(chlorophyta_part > 0)
) %>%
inner_join(chlo01@samples %>%
select(EXTRACTION.CODE, chlorophyta_part) %>%
group_by(EXTRACTION.CODE) %>%
summarise(
chlo01_algeae_part = mean(chlorophyta_part[chlorophyta_part > 0])
),
by = "EXTRACTION.CODE"
) %>%
rename(sample = EXTRACTION.CODE) -> algeae_parts
Because of the low amount of Chlorophyta DNA results are noisy. To tackle that problem we use iteratively reweighted least squares linear regression. In log scale the determination coefficient of the relation is \(R^2 = 25.2\%\)
algeae_parts %>%
filter(euka03_algeae_positive > 0) %>%
na.omit() %>%
as.data.frame() %>%
robustRegBS(log10(chlo01_algeae_part) ~ log10(euka03_algeae_part),
data = .,
anova.table = TRUE
) -> chlo01_euka03_algeae_part_lm
##
## Robust Regression with Bisquare Function
## Convergence achieved after: 8 iterations
## source SS df MS F
## model 0.04 1 0.04 10.94
## error 0.13 54
## tot 0.17 55
## rsquared 0.252
## mse 0.002346813
##
## Coefficients:
## estimate std error t value p value
## (Intercept) -0.01119624 0.05663185 -0.20 0.84402
## log10(euka03_algeae_part) 0.06728985 0.02034279 3.31 0.00168
chlo01_euka03_algeae_part_lm %>% summary()
## Length Class Mode
## coefficients 2 -none- numeric
## weights 56 -none- numeric
## mse 1 -none- numeric
The code below produces the Figure 3
algeae_parts %>%
filter(euka03_algeae_positive > 0) %>%
mutate(
weight = chlo01_euka03_algeae_part_lm$weights,
euka03_algeae_part = log10(euka03_algeae_part),
chlo01_algeae_part = log10(chlo01_algeae_part),
part = chlo01_algeae_part < -1
) %>%
ggplot(aes(
x = euka03_algeae_part,
y = chlo01_algeae_part,
col = weight
)) +
geom_point(cex = 0.7) +
geom_abline(
slope = chlo01_euka03_algeae_part_lm$coefficients[2],
intercept = chlo01_euka03_algeae_part_lm$coefficients[1],
lty = 2
) +
facet_grid(part ~ ., scales = "free_y", space = "free") +
scale_y_continuous(
labels = scales::number_format(accuracy = 0.1),
breaks = seq(-1.5, 0, by = 0.1)
) +
ylab(expression(paste(log[10], "(Chlo01 fraction)"))) +
xlab(expression(paste(log[10], "(Euka03 fraction)"))) +
theme_paper() +
theme(
strip.background = element_blank(),
strip.text.y = element_blank()
)
write_figure_1_col("Figure_3", height = 70)
No relationship can be established between Chlorophyta eDNA aboundances in the samples and any environmental variable measured.
We have to keep in mind that we are dealing with very low amount od DNA and therefore the sampling strategy is perhaps not the best. Consequently we are under sampling the Algeae diversity.
According to the same idea, the more abundant a taxon is in the overall experiment, the more ubiquitous it is.
The code below produces Figure 4
bind_rows(
tibble(
banality = ((chlo02@reads %>%
decostand("total") %>%
aggregate(
by = list(chlo02@samples$Site_short),
mean
))[, -1] > 0) %>%
colSums(),
frequency = (chlo02@reads %>%
decostand("total") %>%
aggregate(
by = list(chlo02@samples$Site_short),
mean
))[, -1] %>%
apply(MARGIN = 2, FUN = max),
Marker = "Chlo02"
),
tibble(
banality = ((chlo01@reads %>%
decostand("total") %>%
aggregate(
by = list(chlo01@samples$Site_short),
mean
))[, -1] > 0) %>%
colSums(),
frequency = (chlo01@reads %>%
decostand("total") %>%
aggregate(
by = list(chlo01@samples$Site_short),
mean
))[, -1] %>%
apply(MARGIN = 2, FUN = max),
Marker = "Chlo01"
)
) %>%
ggplot(aes(
x = factor(banality), y = frequency * 100,
col = Marker
)) +
geom_boxplot() +
scale_y_log10() +
xlab("Number of gradient where a MOTUs occurs") +
ylab("Relative MOTU frequencies (%)") +
theme_paper()
write_figure_1_col("Figure_4")
The following code splits the pH, Nitrogen, C/N ratio, and elevation gradiants into seven discret levels.
chlo01@samples %>%
filter(Site_short != "CHA") %>%
mutate(
Site = site_factor(Site_short),
Horizon = horizon_factor(Horizon)
) %>%
select(D_1, pH, Nitrogen,
cn_ratio = Carbon / Nitrogen,
Elevation,
CWD = CWD.sum.mean,
FDD = FDD_1cm.sum.mean,
DRT = DRT.air.mean,
Site, Horizon
) %>%
mutate(
pH_bin = cut(pH, breaks = 7),
Nitrogen_bin = cut(Nitrogen, breaks = 7),
cn_ratio_bin = cut(cn_ratio, breaks = 7),
Elevation_bin = cut(Elevation, breaks = 7),
CWD_bin = cut(CWD, breaks = 7),
FDD_bin = cut(FDD, breaks = 7),
DRT_bin = cut(DRT, breaks = 7),
) %>%
mutate(tmp = str_replace_all(pH_bin,
pattern = "[()\\[\\]]",
replacement = ""
)) %>%
separate(
col = tmp,
into = c("pH_low", "pH_high"),
sep = ","
) %>%
mutate(
pH_low = as.numeric(pH_low),
pH_high = as.numeric(pH_high)
) %>%
mutate(tmp = str_replace_all(Nitrogen_bin,
pattern = "[()\\[\\]]",
replacement = ""
)) %>%
separate(
col = tmp,
into = c("Nitrogen_low", "Nitrogen_high"),
sep = ","
) %>%
mutate(
Nitrogen_low = as.numeric(Nitrogen_low),
Nitrogen_high = as.numeric(Nitrogen_high)
) %>%
mutate(tmp = str_replace_all(cn_ratio_bin,
pattern = "[()\\[\\]]",
replacement = ""
)) %>%
separate(
col = tmp,
into = c("cn_ratio_low", "cn_ratio_high"),
sep = ","
) %>%
mutate(
cn_ratio_low = as.numeric(cn_ratio_low),
cn_ratio_high = as.numeric(cn_ratio_high)
) %>%
mutate(tmp = str_replace_all(Elevation_bin,
pattern = "[()\\[\\]]",
replacement = ""
)) %>%
separate(
col = tmp,
into = c("Elevation_low", "Elevation_high"),
sep = ","
) %>%
mutate(
Elevation_low = as.numeric(Elevation_low),
Elevation_high = as.numeric(Elevation_high)
) %>%
mutate(tmp = str_replace_all(CWD_bin,
pattern = "[()\\[\\]]",
replacement = ""
)) %>%
separate(
col = tmp,
into = c("CWD_low", "CWD_high"),
sep = ","
) %>%
mutate(
CWD_low = as.numeric(CWD_low),
CWD_high = as.numeric(CWD_high)
) %>%
mutate(tmp = str_replace_all(FDD_bin,
pattern = "[()\\[\\]]",
replacement = ""
)) %>%
separate(
col = tmp,
into = c("FDD_low", "FDD_high"),
sep = ","
) %>%
mutate(
FDD_low = as.numeric(FDD_low),
FDD_high = as.numeric(FDD_high)
) %>%
mutate(tmp = str_replace_all(DRT_bin,
pattern = "[()\\[\\]]",
replacement = ""
)) %>%
separate(
col = tmp,
into = c("DRT_low", "DRT_high"),
sep = ","
) %>%
mutate(
DRT_low = as.numeric(DRT_low),
DRT_high = as.numeric(DRT_high)
) -> data_diversity
The effect of these discretized gradients on Chlorophyta alpha diversity is tested using the Kruskall Wallis test.
p.adjust(c(
kruskal.test(D_1 ~ pH_bin, data = data_diversity)$p.value,
kruskal.test(D_1 ~ Elevation_bin, data = data_diversity)$p.value,
kruskal.test(D_1 ~ Nitrogen_bin, data = data_diversity)$p.value,
kruskal.test(D_1 ~ cn_ratio_bin, data = data_diversity)$p.value,
kruskal.test(D_1 ~ CWD_bin, data = data_diversity)$p.value,
kruskal.test(D_1 ~ FDD_bin, data = data_diversity)$p.value,
kruskal.test(D_1 ~ DRT_bin, data = data_diversity)$p.value
),
method = "fdr"
) %>%
pretty10exp(drop.1 = TRUE, digits = 2) %>%
as.list() -> pval
And part of the Chlorophyta alpha diversity variance explained by these gradients is estimated using ANOVA.
c(
lm(D_1 ~ pH_bin * Site, data = data_diversity) %>%
anova() %>% pull(`Sum Sq`) %>%
(function(x) x[1] / sum(x))(),
lm(D_1 ~ Elevation_bin * Site, data = data_diversity) %>%
anova() %>% pull(`Sum Sq`) %>%
(function(x) x[1] / sum(x))(),
lm(D_1 ~ Nitrogen_bin * Site, data = data_diversity) %>%
anova() %>% pull(`Sum Sq`) %>%
(function(x) x[1] / sum(x))(),
lm(D_1 ~ cn_ratio_bin * Site, data = data_diversity) %>%
anova() %>% pull(`Sum Sq`) %>%
(function(x) x[1] / sum(x))(),
lm(D_1 ~ CWD_bin * Site, data = data_diversity) %>%
anova() %>% pull(`Sum Sq`) %>%
(function(x) x[1] / sum(x))(),
lm(D_1 ~ FDD_bin * Site, data = data_diversity) %>%
anova() %>% pull(`Sum Sq`) %>%
(function(x) x[1] / sum(x))(),
lm(D_1 ~ DRT_bin * Site, data = data_diversity) %>%
anova() %>% pull(`Sum Sq`) %>%
(function(x) x[1] / sum(x))()
) %>%
round(2) %>%
as.list() -> R2
names(pval) <- c(
"pval_pH", "pval_Elevation",
"pval_Nitrogen", "pval_cn_ratio",
"pval_CWD", "pval_FDD", "pval_DRT"
)
names(R2) <- c(
"R2_pH", "R2_Elevation",
"R2_Nitrogen", "R2_cn_ratio",
"R2_CWD", "R2_FDD", "R2_DRT"
)
pvalR2 <- c(pval, R2)
The following code produces the Figure 5
ggplot(data = data_diversity) +
geom_point(aes(
y = D_1, x = pH,
col = Site, shape = Horizon
), cex = 0.8) +
geom_segment(data = group_by(data_diversity, pH_bin) %>%
summarise(
m = mean(D_1),
sd = sd(D_1) / sqrt(length(D_1)),
low = mean(pH_low),
high = mean(pH_high),
ddl = length(D_1) - 1,
se_low = m + qt(0.025, ddl) * sd,
se_high = m + qt(0.975, ddl) * sd,
delta = (high - low) * 0.05,
center = (low + high) / 2
) -> stat, aes(
y = m, yend = m,
x = low + delta,
xend = high - delta
), size = 1.5) +
geom_segment(
data = stat,
aes(
x = center, xend = center,
y = se_low, yend = se_high
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_low, yend = se_low
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_high, yend = se_high
)
) +
ylab(bquote("Hill's number" ~ q == 1 ~ "(" ~ phantom()^
{
1
} * D ~ ")")) +
xlab(bquote("pH" ~ "(" ~ p[value] == .(pval_pH) ~
" ; " ~ R^2 == .(R2_pH) ~ ")",
where = pvalR2
)) +
theme_paper() +
theme(axis.title = element_text(size = 8)) -> plot_pH
ggplot(data = data_diversity) +
geom_point(aes(
y = D_1, x = Elevation, col = Site,
shape = Horizon
), cex = 0.8) +
geom_segment(data = group_by(data_diversity, Elevation_bin) %>%
summarise(
m = mean(D_1),
sd = sd(D_1) / sqrt(length(D_1)),
low = mean(Elevation_low),
high = mean(Elevation_high),
ddl = length(D_1) - 1,
se_low = m + qt(0.025, ddl) * sd,
se_high = m + qt(0.975, ddl) * sd,
delta = (high - low) * 0.05,
center = (low + high) / 2
) -> stat,
aes(y = m,
yend = m,
x = low + delta,
xend = high - delta), size = 1.5) +
geom_segment(data = stat, aes(
x = center,
xend = center,
y = se_low, yend = se_high
)) +
geom_segment(data = stat, aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_low, yend = se_low
)) +
geom_segment(data = stat, aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_high, yend = se_high
)) +
ylab(bquote("Hill's number" ~ q == 1 ~ "(" ~ phantom()^
{
1
} * D ~ ")")) +
xlab(bquote("Elevation" ~ "(" ~ p[value] == .(pval_Elevation) ~
" ; " ~ R^2 == .(R2_Elevation) ~ ")",
where = pvalR2
)) +
theme_paper() +
theme(
axis.title.x = element_text(size = 8),
axis.title.y = element_blank()
) -> plot_Elevation
ggplot(data = data_diversity) +
geom_point(aes(
y = D_1, x = Nitrogen,
col = Site, shape = Horizon
), cex = 0.8) +
geom_segment(data = group_by(data_diversity, Nitrogen_bin) %>%
summarise(
m = mean(D_1),
sd = sd(D_1) / sqrt(length(D_1)),
low = mean(Nitrogen_low),
high = mean(Nitrogen_high),
ddl = length(D_1) - 1,
se_low = m + qt(0.025, ddl) * sd,
se_high = m + qt(0.975, ddl) * sd,
delta = (high - low) * 0.05,
center = (low + high) / 2
) -> stat, aes(
y = m, yend = m,
x = low + delta,
xend = high - delta
), size = 1.5) +
geom_segment(
data = stat,
aes(
x = center, xend = center,
y = se_low, yend = se_high
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_low, yend = se_low
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_high, yend = se_high
)
) +
ylab(bquote("Hill's number" ~ q == 1 ~ "(" ~ phantom()^
{
1
} * D ~ ")")) +
xlab(bquote("Nitrogen" ~ "(" ~
p[value] == .(pval_Nitrogen) ~ " ; "
~ R^2 == .(R2_Nitrogen) ~ ")", where = pvalR2)) +
theme_paper() +
theme(axis.title = element_text(size = 8)) -> plot_Nitrogen
p <- pval["cn_ratio"]
ggplot(data = data_diversity) +
geom_point(aes(
y = D_1, x = cn_ratio,
col = Site, shape = Horizon
), cex = 0.8) +
geom_segment(data = group_by(data_diversity, cn_ratio_bin) %>%
summarise(
m = mean(D_1),
sd = sd(D_1) / sqrt(length(D_1)),
low = mean(cn_ratio_low),
high = mean(cn_ratio_high),
ddl = length(D_1) - 1,
se_low = m + qt(0.025, ddl) * sd,
se_high = m + qt(0.975, ddl) * sd,
delta = (high - low) * 0.05,
center = (low + high) / 2
) -> stat, aes(
y = m, yend = m,
x = low + delta,
xend = high - delta
), size = 1.5) +
geom_segment(
data = stat,
aes(
x = center, xend = center,
y = se_low, yend = se_high
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_low, yend = se_low
)
) +
geom_segment(data = stat, aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_high, yend = se_high
)) +
ylab(bquote("Hill's number" ~ q == 1 ~ "(" ~ phantom()^
{
1
} * D ~ ")")) +
xlab(bquote("C/N ratio" ~ "(" ~ p[value] == .(pval_cn_ratio) ~ " ; " ~
R^2 == .(R2_cn_ratio) ~ ")", where = pvalR2)) +
theme_paper() +
theme(
axis.title.x = element_text(size = 8),
axis.title.y = element_blank()
) -> plot_cn_ratio
p <- pval["CWD"]
ggplot(data = data_diversity) +
geom_point(aes(
y = D_1, x = CWD,
col = Site, shape = Horizon
), cex = 0.8) +
geom_segment(data = group_by(data_diversity, CWD_bin) %>%
summarise(
m = mean(D_1),
sd = sd(D_1) / sqrt(length(D_1)),
low = mean(CWD_low),
high = mean(CWD_high),
ddl = length(D_1) - 1,
se_low = m + qt(0.025, ddl) * sd,
se_high = m + qt(0.975, ddl) * sd,
delta = (high - low) * 0.05,
center = (low + high) / 2
) -> stat, aes(
y = m, yend = m,
x = low + delta,
xend = high - delta
), size = 1.5) +
geom_segment(
data = stat,
aes(
x = center, xend = center,
y = se_low, yend = se_high
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_low, yend = se_low
)
) +
geom_segment(data = stat, aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_high, yend = se_high
)) +
ylab(bquote("Hill's number" ~ q == 1 ~ "(" ~ phantom()^
{
1
} * D ~ ")")) +
xlab(bquote("CWD" ~ "(" ~ p[value] == .(pval_CWD) ~ " ; " ~
R^2 == .(R2_CWD) ~ ")", where = pvalR2)) +
theme_paper() +
theme(
axis.title.x = element_text(size = 8)
) -> plot_CWD
p <- pval["FDD"]
ggplot(data = data_diversity) +
geom_point(aes(
y = D_1, x = FDD,
col = Site, shape = Horizon
), cex = 0.8) +
geom_segment(data = group_by(data_diversity, FDD_bin) %>%
summarise(
m = mean(D_1),
sd = sd(D_1) / sqrt(length(D_1)),
low = mean(FDD_low),
high = mean(FDD_high),
ddl = length(D_1) - 1,
se_low = m + qt(0.025, ddl) * sd,
se_high = m + qt(0.975, ddl) * sd,
delta = (high - low) * 0.05,
center = (low + high) / 2
) -> stat, aes(
y = m, yend = m,
x = low + delta,
xend = high - delta
), size = 1.5) +
geom_segment(
data = stat,
aes(
x = center, xend = center,
y = se_low, yend = se_high
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_low, yend = se_low
)
) +
geom_segment(data = stat, aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_high, yend = se_high
)) +
ylab(bquote("Hill's number" ~ q == 1 ~ "(" ~ phantom()^
{
1
} * D ~ ")")) +
xlab(bquote("FDD" ~ "(" ~ p[value] == .(pval_FDD) ~ " ; " ~
R^2 == .(R2_FDD) ~ ")", where = pvalR2)) +
theme_paper() +
theme(
axis.title.x = element_text(size = 8),
axis.title.y = element_blank()
) -> plot_FDD
p <- pval["DRT"]
ggplot(data = data_diversity) +
geom_point(aes(
y = D_1, x = DRT,
col = Site, shape = Horizon
), cex = 0.8) +
geom_segment(data = group_by(data_diversity, DRT_bin) %>%
summarise(
m = mean(D_1),
sd = sd(D_1) / sqrt(length(D_1)),
low = mean(DRT_low),
high = mean(DRT_high),
ddl = length(D_1) - 1,
se_low = m + qt(0.025, ddl) * sd,
se_high = m + qt(0.975, ddl) * sd,
delta = (high - low) * 0.05,
center = (low + high) / 2
) -> stat, aes(
y = m, yend = m,
x = low + delta,
xend = high - delta
), size = 1.5) +
geom_segment(
data = stat,
aes(
x = center, xend = center,
y = se_low, yend = se_high
)
) +
geom_segment(
data = stat,
aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_low, yend = se_low
)
) +
geom_segment(data = stat, aes(
x = center - 2 * delta,
xend = center + 2 * delta,
y = se_high, yend = se_high
)) +
ylab(bquote("Hill's number" ~ q == 1 ~ "(" ~ phantom()^
{
1
} * D ~ ")")) +
xlab(bquote("DRT" ~ "(" ~ p[value] == .(pval_DRT) ~ " ; " ~
R^2 == .(R2_DRT) ~ ")", where = pvalR2)) +
theme_paper() +
theme(
axis.title.x = element_text(size = 8)
) -> plot_DRT
ggarrange(NULL, plot_pH, NULL, plot_Elevation, NULL,
NULL, plot_Nitrogen, NULL, plot_cn_ratio, NULL,
NULL, plot_CWD, NULL, plot_FDD, NULL,
NULL, plot_DRT, NULL, NULL, NULL,
widths = c(0.15, 1, 0.15, 1, 0.08),
ncol = 5, nrow = 4,
common.legend = TRUE, legend = "bottom",
labels = c(
"(A)", "", "(B)", "", "",
"(C)", "", "(D)", "", "",
"(E)", "", "(F)", "", "",
"(G)", "", "", "", ""
)
)
write_figure_2_cols("figure_5", height = 180)
Detected Chlorophyta belongs four taxonomical classes:
Pedinophyceae
Ulvophyceae
Chlorophyceae
Trebouxiophyceae
The relative abundances and diversities of these classes are estimated.
clades <- taxonatrank(ncbi, chlo01@motus$taxid, "class", name = TRUE)
t(apply(chlo01@reads,
MARGIN = 1,
function(x) H_q(x, q = 1, clades = clades) / H_q(x, q = 1) * 100
)) %>%
as.data.frame() %>%
rownames_to_column(var = "sample") %>%
left_join(chlo01@samples, by = "sample") %>%
pivot_longer(
cols = ends_with("ceae"),
names_to = "Class",
values_to = "Relative entropy"
) %>%
left_join(t(apply(chlo01@reads,
MARGIN = 1,
function(x) tapply(x, clades, sum) / sum(x)
) * 100) %>%
as.data.frame() %>%
rownames_to_column(var = "sample") %>%
pivot_longer(
cols = ends_with("ceae"),
names_to = "Class",
values_to = "Read frequency"
),
by = c("sample", "Class")
) %>%
pivot_longer(
cols = c(
"Relative entropy",
"Read frequency"
),
names_to = "Measure",
values_to = "Value"
) %>%
na.omit() -> relative_diversity
The following code produces Figure 6
relative_diversity %>%
filter(Measure == "Read frequency") %>%
mutate(
Class = factor(Class, levels = c(
"Pedinophyceae", "Ulvophyceae",
"Chlorophyceae", "Trebouxiophyceae"
)),
Site = site_factor(Site_short)
) %>%
ggplot(aes(x = Site, y = Value, col = Class)) +
geom_boxplot(outlier.size = 1) +
scale_y_sqrt() +
xlab("Site") +
ylab("Chlo01 Relative read frequency (%)") +
theme_paper() +
theme(
legend.title = element_blank(),
legend.text = element_text(size = 6),
legend.position = "bottom",
legend.box = "vertical", legend.margin = margin()
) +
guides(col = guide_legend(
title.position = NULL,
title.hjust = 0.5,
nrow = 2
))
write_figure_1_col("Figure_6")
The Chlorophyta are mainly composed of Trebouxiophyceae and Chlorophyceae with a clear superiority for Trebouxiophyceae.
The code below produces Figure 7
sub_figure_labeller <- function(labels) {
subfig <- LETTERS[1:length(labels)]
paste0("(", subfig, ") ", labels)
}
relative_diversity %>%
select(`Measure`, Value, Class,
Site = Site_short,
pH, Elevation,
CWD = CWD.sum.mean, FDD = FDD_1cm.sum.mean
) %>%
mutate(Site = site_factor(Site)) %>%
filter(Class %in% c(
"Chlorophyceae",
"Trebouxiophyceae"
) &
Measure == "Read frequency") %>%
filter(`Value` > 0) %>%
pivot_longer(
cols = c("pH", "Elevation", "CWD", "FDD"),
names_to = "env",
values_to = "Gradient"
) %>%
mutate(env = factor(env,
levels = c("pH", "Elevation", "CWD", "FDD"))) %>%
ggplot() +
geom_point(aes(
y = Value, x = Gradient,
shape = Class, col = Site
)) +
geom_smooth(
mapping = aes(
y = Value, x = Gradient,
linetype = Class
),
method = "lm", formula = y ~ x,
se = FALSE, col = "black",
size = 0.7
) +
scale_linetype_manual(values = c("twodash", "solid")) +
facet_grid(. ~ env,
scales = "free_x",
labeller = as_labeller(sub_figure_labeller)
) +
xlab("Environmental gradient") +
ylab("Chlo01 Relative read frequency (%)") +
theme_paper() +
theme(strip.text = element_text(size = 12))
write_figure_2_cols("Figure_7")
relative_diversity %>%
mutate(cn_ratio = Carbon / Nitrogen) %>%
select(sample, Measure, Value,
Class,
Site = Site_short, pH, Elevation, cn_ratio,
CWD = CWD.sum.mean, FDD = FDD_1cm.sum.mean
) %>%
filter(Class == "Trebouxiophyceae") %>%
pivot_wider(
names_from = "Measure",
values_from = "Value"
) %>%
filter(`Read frequency` > 0) %>%
select(-sample) %>%
na.omit() -> data
lm(`Relative entropy` ~ pH + Elevation +
FDD + CWD + Site, data = data) -> ll
anova(ll)
r2_partial <- anova(ll)$`Sum Sq` / sum(anova(ll)$`Sum Sq`)
names(r2_partial) <- rownames(anova(ll))
r2_partial
## pH Elevation FDD CWD Site Residuals
## 0.22015741 0.02563475 0.02423909 0.03074945 0.04282688 0.65639242
A redoundancy analysis (RDA) is done to force alignement of Community in the space of selected variables
Read count are transformed according to Hellinger method (square roots of the relative frequencies)
chlo01_communities <- decostand(chlo01@reads, method = "hellinger")
Environmental variables are centred and scaled.
scaled_env <- selected_env %>%
scale() %>%
as.data.frame()
chlo01_rda <- rda(chlo01_communities ~ . +
Condition(chlo01@samples$Site_short),
data = scaled_env,
scale = FALSE
)
A step forward/backward procedure is then run to select the best model.
ordistep(rda(chlo01_communities ~ 1 +
Condition(chlo01@samples$Site_short),
data = scaled_env,
scale = FALSE
),
scope = formula(chlo01_rda),
permutations = 999, trace = TRUE
) -> chlo01_rda_selected
##
## Start: chlo01_communities ~ 1 + Condition(chlo01@samples$Site_short)
##
## Df AIC F Pr(>F)
## + FDD 1 -140.45 10.4306 0.001 ***
## + `C/N Ratio` 1 -139.83 9.8047 0.001 ***
## + Elevation 1 -138.66 8.6221 0.001 ***
## + CWD 1 -138.15 8.1069 0.001 ***
## + pH 1 -136.94 6.8891 0.001 ***
## + Nitrogen 1 -135.24 5.1871 0.001 ***
## + DRT 1 -134.97 4.9244 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: chlo01_communities ~ Condition(chlo01@samples$Site_short) + FDD
##
## Df AIC F Pr(>F)
## - FDD 1 -131.99 10.431 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + CWD 1 -147.16 8.6343 0.001 ***
## + Nitrogen 1 -144.19 5.6606 0.001 ***
## + `C/N Ratio` 1 -143.88 5.3547 0.001 ***
## + pH 1 -142.94 4.4185 0.001 ***
## + DRT 1 -142.44 3.9305 0.001 ***
## + Elevation 1 -141.75 3.2494 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: chlo01_communities ~ Condition(chlo01@samples$Site_short) + FDD + CWD
##
## Df AIC F Pr(>F)
## - CWD 1 -140.45 8.6343 0.001 ***
## - FDD 1 -138.15 10.9547 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + Elevation 1 -158.91 13.7050 0.001 ***
## + DRT 1 -150.99 5.7368 0.001 ***
## + `C/N Ratio` 1 -150.62 5.3668 0.001 ***
## + Nitrogen 1 -150.32 5.0774 0.001 ***
## + pH 1 -149.95 4.7111 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: chlo01_communities ~ Condition(chlo01@samples$Site_short) + FDD + CWD + Elevation
##
## Df AIC F Pr(>F)
## - FDD 1 -156.86 3.979 0.001 ***
## - Elevation 1 -147.16 13.705 0.001 ***
## - CWD 1 -141.75 19.250 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + Nitrogen 1 -162.85 5.8190 0.001 ***
## + pH 1 -162.80 5.7746 0.001 ***
## + `C/N Ratio` 1 -162.21 5.1928 0.001 ***
## + DRT 1 -159.02 2.0569 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: chlo01_communities ~ Condition(chlo01@samples$Site_short) + FDD + CWD + Elevation + Nitrogen
##
## Df AIC F Pr(>F)
## - FDD 1 -160.91 3.8503 0.001 ***
## - Nitrogen 1 -158.91 5.8190 0.001 ***
## - Elevation 1 -150.32 14.4395 0.001 ***
## - CWD 1 -146.38 18.4802 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + pH 1 -165.80 4.8340 0.001 ***
## + `C/N Ratio` 1 -165.06 4.1060 0.001 ***
## + DRT 1 -162.90 1.9933 0.003 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: chlo01_communities ~ Condition(chlo01@samples$Site_short) + FDD + CWD + Elevation + Nitrogen + pH
##
## Df AIC F Pr(>F)
## - FDD 1 -164.22 3.4872 0.001 ***
## - pH 1 -162.85 4.8340 0.001 ***
## - Nitrogen 1 -162.80 4.8781 0.001 ***
## - Elevation 1 -152.61 15.0668 0.001 ***
## - CWD 1 -149.16 18.5884 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + `C/N Ratio` 1 -167.50 3.5981 0.001 ***
## + DRT 1 -165.99 2.1236 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: chlo01_communities ~ Condition(chlo01@samples$Site_short) + FDD + CWD + Elevation + Nitrogen + pH + `C/N Ratio`
##
## Df AIC F Pr(>F)
## - FDD 1 -165.95 3.4491 0.001 ***
## - `C/N Ratio` 1 -165.80 3.5981 0.001 ***
## - pH 1 -165.06 4.3226 0.001 ***
## - Nitrogen 1 -164.83 4.5475 0.001 ***
## - Elevation 1 -155.43 13.8967 0.001 ***
## - CWD 1 -151.50 17.8811 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Df AIC F Pr(>F)
## + DRT 1 -167.71 2.129 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: chlo01_communities ~ Condition(chlo01@samples$Site_short) + FDD + CWD + Elevation + Nitrogen + pH + `C/N Ratio` + DRT
##
## Df AIC F Pr(>F)
## - DRT 1 -167.50 2.1290 0.001 ***
## - FDD 1 -167.28 2.3442 0.001 ***
## - `C/N Ratio` 1 -165.99 3.5987 0.001 ***
## - pH 1 -165.12 4.4432 0.001 ***
## - Nitrogen 1 -165.10 4.4624 0.001 ***
## - CWD 1 -158.93 10.5534 0.001 ***
## - Elevation 1 -158.49 10.9859 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Every considered variables are significantly used in the model and therefore the complete model is selected.
The biplot of the selected model constitutes the Figure 8
.
r2_rda <- chlo01_rda_selected$CCA$eig / sum(chlo01_rda_selected$CCA$eig)
gscale <- sqrt(attributes(summary(chlo01_rda_selected))$const) / 2
summary(chlo01_rda_selected)$site %>%
as.data.frame() %>%
rownames_to_column(var = "sample") %>%
left_join(chlo01@samples, by = "sample") %>%
mutate(
Environment = milieu_factor(Environment),
Site = site_factor(Site_short)
) %>%
ggplot() +
geom_point(aes(
x = RDA1, y = RDA2,
col = Site, pch = Environment
), cex=0.6) +
geom_segment(
data = summary(chlo01_rda_selected)$biplot %>%
as.data.frame() %>%
add_rownames(var = "variable"),
mapping = aes(x = 0, xend = RDA1 *
gscale, y = 0, yend = RDA2 * gscale),
col = "black",
arrow = arrow(length = unit(0.10, "cm"), type = "closed")
) +
geom_text_repel(
data = summary(chlo01_rda_selected)$biplot %>%
as.data.frame() %>%
add_rownames(var = "variable") %>%
mutate(variable = str_replace_all(variable, "`", "")),
mapping = aes(
x = RDA1 * gscale,
y = RDA2 * gscale,
label = variable
),
hjust = 1, vjust = 0,
nudge_x = 0.1, nudge_y = 0.05,
cex = 2,
segment.size = 0,
size = 4
) +
xlab(sprintf("%s (%2.1f%%)",
names(r2_rda)[1], r2_rda[1] * 100)) +
ylab(sprintf("%s (%2.1f%%)",
names(r2_rda)[2], r2_rda[2] * 100)) +
theme_paper()
write_figure_1_col("Figure_8", height = 90)
Variance partitionning of the community changes by the considered environmental variables is estimated.
The global effect is :
chlo01_rda <- rda(chlo01_communities ~ . +
Condition(chlo01@samples$Site_short),
data = scaled_env,
scale = FALSE
)
chlo01_rda
## Call: rda(formula = chlo01_communities ~ Elevation + pH + Nitrogen +
## `C/N Ratio` + CWD + FDD + DRT + Condition(chlo01@samples$Site_short),
## data = scaled_env, scale = FALSE)
##
## Inertia Proportion Rank
## Total 0.69723 1.00000
## Conditional 0.05269 0.07557 4
## Constrained 0.09247 0.13263 7
## Unconstrained 0.55206 0.79180 300
## Inertia is variance
##
## Eigenvalues for constrained axes:
## RDA1 RDA2 RDA3 RDA4 RDA5 RDA6 RDA7
## 0.05039 0.01648 0.00884 0.00611 0.00454 0.00341 0.00270
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## 0.04359 0.03098 0.02834 0.02401 0.02255 0.02063 0.01925 0.01670
## (Showing 8 of 300 unconstrained eigenvalues)
RsquareAdj(chlo01_rda)
## $r.squared
## [1] 0.1326277
##
## $adj.r.squared
## [1] 0.1161423
Pure effects of each variable are estimated declaring other variables as condition to remove their effects
cov <- cbind(
Site = chlo01@samples$Site_short,
as.data.frame(scaled_env)
)
chlo01_rda_pH <- rda(chlo01_communities ~ pH +
Condition(Site + Elevation + Nitrogen +
`C/N Ratio` + CWD + FDD + DRT),
data = cov,
scale = FALSE
)
RsquareAdj(chlo01_rda_pH)
## $r.squared
## [1] 0.01138547
##
## $adj.r.squared
## [1] 0.00910763
cov <- cbind(
Site = chlo01@samples$Site_short,
as.data.frame(scaled_env)
)
chlo01_rda_Elev <- rda(chlo01_communities ~ Elevation +
Condition(Site + pH + Nitrogen + `C/N Ratio` +
CWD + FDD + DRT),
data = cov,
scale = FALSE
)
RsquareAdj(chlo01_rda_Elev)
## $r.squared
## [1] 0.02815087
##
## $adj.r.squared
## [1] 0.02641385
cov <- cbind(
Site = chlo01@samples$Site_short,
as.data.frame(scaled_env)
)
chlo01_rda_N <- rda(chlo01_communities ~ Nitrogen +
Condition(Site + pH + Elevation + `C/N Ratio` +
CWD + FDD + DRT),
data = cov,
scale = FALSE
)
RsquareAdj(chlo01_rda_N)
## $r.squared
## [1] 0.0114348
##
## $adj.r.squared
## [1] 0.009158555
cov <- cbind(
Site = chlo01@samples$Site_short,
as.data.frame(scaled_env)
)
chlo01_rda_CN <- rda(chlo01_communities ~ `C/N Ratio` +
Condition(Site + pH + Elevation + Nitrogen +
CWD + FDD + DRT),
data = cov,
scale = FALSE
)
RsquareAdj(chlo01_rda_CN)
## $r.squared
## [1] 0.009221461
##
## $adj.r.squared
## [1] 0.006873813
cov <- cbind(
Site = chlo01@samples$Site_short,
as.data.frame(scaled_env)
)
chlo01_rda_cwd <- rda(chlo01_communities ~ CWD +
Condition(Site + pH + Elevation + Nitrogen +
`C/N Ratio` + FDD + DRT),
data = cov,
scale = FALSE
)
RsquareAdj(chlo01_rda_cwd)
## $r.squared
## [1] 0.02704272
##
## $adj.r.squared
## [1] 0.02526995
cov <- cbind(
Site = chlo01@samples$Site_short,
as.data.frame(scaled_env)
)
chlo01_rda_fdd <- rda(chlo01_communities ~ FDD +
Condition(Site + pH + Elevation + Nitrogen +
CWD + `C/N Ratio` + DRT),
data = cov,
scale = FALSE
)
RsquareAdj(chlo01_rda_fdd)
## $r.squared
## [1] 0.0060069
##
## $adj.r.squared
## [1] 0.003555556
cov <- cbind(
Site = chlo01@samples$Site_short,
as.data.frame(scaled_env)
)
chlo01_rda_drt <- rda(chlo01_communities ~ DRT +
Condition(Site + pH + Elevation + Nitrogen +
CWD + FDD + `C/N Ratio`),
data = cov,
scale = FALSE
)
RsquareAdj(chlo01_rda_drt)
## $r.squared
## [1] 0.005455353
##
## $adj.r.squared
## [1] 0.002986217
The heterogeneous distribution of MOTUs is investigated only for MOTUs that have been identified taxonomically to the species or genus level.
Seven environmental gradients are studied:
A subset of the MOTUs annotated at the species level is extracted from the global data set and aggregated to merge every MOTUs annotated by the same species.
chlo01_species <- chlo01[, !is.na(chlo01@motus$species)]
chlo01_species <- aggregate(chlo01_species,
MARGIN = "motus",
by = list(chlo01_species@motus$species_name),
FUN = sum
)
A subset of the MOTUs annotated at the genus level is extracted from the global data set and aggregated to merge every MOTUs annotated by the same genus.
chlo01_genus <- chlo01[, !is.na(chlo01@motus$genus)]
chlo01_genus <- aggregate(chlo01_genus,
MARGIN = "motus",
by = list(chlo01_genus@motus$genus_name),
FUN = sum
)
Distribution ranges are estimated using the motus_gradient
defined above.
species_elev <- niche_motus_gradient(
chlo01_species,
chlo01_species@samples$Elevation
)
The code below produces the supplentary Figure S3
plot_motus_gradient(
chlo01_species,
chlo01_species@samples$Elevation,
species_elev
) + xlab("Elevation of the sample")
write_figure_2_cols("Figure_S3", height = 270)
genus_elev <- niche_motus_gradient(
chlo01_genus,
chlo01_genus@samples$Elevation
)
The code below produces the Figure 9
plot_motus_gradient(
chlo01_genus,
chlo01_genus@samples$Elevation,
genus_elev
) +
xlab("Elevation of the sample")
write_figure_2_cols("Figure_9", height = 270)
species_ph <- niche_motus_gradient(
chlo01_species,
chlo01_species@samples$pH
)
The code below produces the supplentary Figure S4
plot_motus_gradient(chlo01_species,
chlo01_species@samples$pH,
species_ph,
jitter = 0.01
) +
xlab("Soil pH")
write_figure_2_cols("Figure_S4", height = 270)
genus_ph <- niche_motus_gradient(
chlo01_genus,
chlo01_genus@samples$pH
)
The code below produces the supplentary Figure S5
plot_motus_gradient(chlo01_genus,
chlo01_genus@samples$pH,
genus_ph,
jitter = 0.01
) +
xlab("Soil pH")
write_figure_2_cols("Figure_S5", height = 270)
species_Nitrogen <- niche_motus_gradient(
chlo01_species,
chlo01_species@samples$Nitrogen
)
The code below produces the supplentary Figure S6
plot_motus_gradient(chlo01_species,
chlo01_species@samples$Nitrogen,
species_Nitrogen,
jitter = 0.01
) +
xlab("Soil Nitrogen")
write_figure_2_cols("Figure_S6", height = 270)
genus_Nitrogen <- niche_motus_gradient(
chlo01_genus,
chlo01_genus@samples$Nitrogen
)
The code below produces the supplentary Figure S7
plot_motus_gradient(chlo01_genus,
chlo01_genus@samples$Nitrogen,
genus_Nitrogen,
jitter = 0.01
) +
xlab("Soil Nitrogen")
write_figure_2_cols("Figure_S7", height = 270)
species_cn_ratio <- niche_motus_gradient(
chlo01_species,
chlo01_species@samples$Carbon / chlo01_species@samples$Nitrogen
)
The code below produces the supplentary Figure S8
plot_motus_gradient(chlo01_species,
chlo01_species@samples$Carbon / chlo01_species@samples$Nitrogen,
species_cn_ratio,
jitter = 0.01
) +
xlab("Soil C/N ratio")
write_figure_2_cols("Figure_S8", height = 270)
genus_cn_ratio <- niche_motus_gradient(
chlo01_genus,
chlo01_genus@samples$Carbon / chlo01_genus@samples$Nitrogen
)
The code below produces the supplentary Figure S9
plot_motus_gradient(chlo01_genus,
chlo01_genus@samples$Carbon / chlo01_genus@samples$Nitrogen,
genus_cn_ratio,
jitter = 0.01
) +
xlab("Soil C/N ratio")
write_figure_2_cols("Figure_S9", height = 270)
Distribution ranges are estimated using the motus_gradient
defined above.
species_cwd <- niche_motus_gradient(
chlo01_species,
chlo01_species@samples$CWD.sum.mean
)
The code below produces the supplentary Figure S3
plot_motus_gradient(
chlo01_species,
chlo01_species@samples$CWD.sum.mean,
species_cwd
) + xlab("CWD of the sample")
write_figure_2_cols("Figure_S10", height = 270)
genus_cwd <- niche_motus_gradient(
chlo01_genus,
chlo01_genus@samples$CWD.sum.mean
)
The code below produces the Figure 9
plot_motus_gradient(
chlo01_genus,
chlo01_genus@samples$CWD.sum.mean,
genus_cwd
) +
xlab("CWD of the sample")
write_figure_2_cols("Figure_S11", height = 270)
Distribution ranges are estimated using the motus_gradient
defined above.
species_fdd <- niche_motus_gradient(
chlo01_species,
chlo01_species@samples$FDD_1cm.sum.mean
)
The code below produces the supplentary Figure S3
plot_motus_gradient(
chlo01_species,
chlo01_species@samples$FDD_1cm.sum.mean,
species_fdd
) + xlab("FDD of the sample")
write_figure_2_cols("Figure_S12", height = 270)
genus_fdd <- niche_motus_gradient(
chlo01_genus,
chlo01_genus@samples$FDD_1cm.sum.mean
)
The code below produces the Figure 9
plot_motus_gradient(
chlo01_genus,
chlo01_genus@samples$FDD_1cm.sum.mean,
genus_fdd
) +
xlab("FDD of the sample")
write_figure_2_cols("Figure_S13", height = 270)
Distribution ranges are estimated using the motus_gradient
defined above.
species_drt <- niche_motus_gradient(
chlo01_species,
chlo01_species@samples$DRT.air.mean
)
The code below produces the supplentary Figure S3
plot_motus_gradient(
chlo01_species,
chlo01_species@samples$DRT.air.mean,
species_drt
) + xlab("DRT of the sample")
write_figure_2_cols("Figure_S14", height = 270)
genus_drt <- niche_motus_gradient(
chlo01_genus,
chlo01_genus@samples$DRT.air.mean
)
The code below produces the Figure 9
plot_motus_gradient(
chlo01_genus,
chlo01_genus@samples$DRT.air.mean,
genus_drt
) +
xlab("DRT of the sample")
write_figure_2_cols("Figure_S15", height = 270)
A delimitation of the niche for the species and genus levels MOTUs, considering the four environmental variables, is computed.
env_var <- tibble(
Elevation = chlo01_genus@samples$Elevation,
pH = chlo01_genus@samples$pH,
Nitrogen = chlo01_genus@samples$Nitrogen,
`CN Ratio` = chlo01_genus@samples$Carbon /
chlo01_genus@samples$Nitrogen,
CWD = chlo01_genus@samples$CWD.sum.mean,
FDD = chlo01_genus@samples$FDD_1cm.sum.mean,
DRT = chlo01_genus@samples$DRT.air.mean
)
env_var_pca <- dudi.pca(env_var, scannf = FALSE)
niche_species <- niche(env_var_pca,
as.data.frame(decostand(chlo01_species@reads, method = "total")),
scannf = FALSE
)
niche_genus <- niche(env_var_pca,
as.data.frame(decostand(chlo01_genus@reads, method = "total")),
scannf = FALSE
)
niche_species_tests <- rtest_niche(niche_species, nrepet = 999)
niche_genus_tests <- rtest_niche(niche_genus, nrepet = 999)
Specialized MOTUs, are selected based on the rtest_niche
function.
niche_barycenter_species <- sweep(sweep(niche_species$tab,
MARGIN = 2,
STATS = apply(env_var, 2, sd), "*"
),
MARGIN = 2,
STATS = apply(env_var, 2, mean), "+"
) %>%
mutate(
name = names(niche_species_tests)[1:nrow(niche_species$tab)],
class = chlo01_species@motus$class,
pval = niche_species_tests,
`taxonomic rank` = "species"
) %>%
filter(p.adjust(pval, method = "fdr") < 0.05)
niche_barycenter_genus <- sweep(sweep(niche_genus$tab,
MARGIN = 2,
STATS = apply(env_var, 2, sd), "*"
),
MARGIN = 2,
STATS = apply(env_var, 2, mean), "+"
) %>%
mutate(
name = names(niche_genus_tests)[1:nrow(niche_genus$tab)],
class = chlo01_genus@motus$class,
pval = niche_genus_tests,
`taxonomic rank` = "genus"
) %>%
filter(p.adjust(pval, method = "fdr") < 0.05)
niche_barycenter_species %>%
bind_rows(niche_barycenter_genus) -> niche_barycenter
niche_barycenter
A principal correspondance analysis on the niche center is realized to compare them.
niche_barycenter %>%
select(Elevation, pH, Nitrogen, `CN Ratio`, CWD, FDD, DRT) %>%
dudi.pca(scannf = FALSE) -> niches_pca
The code below produces the Figure 10
niches_pca %>%
fviz_pca_biplot(
repel = TRUE, labelsize = 2,
pointsize = 0.5, title = "",
col.ind = niche_barycenter$class,
col.var = "black",
addEllipses = FALSE,
) +
theme(
axis.title = element_text(size = 7),
axis.text = element_text(size = 7),
legend.position = "bottom",
legend.text = element_text(size = 8),
legend.title = element_blank()
) + guides(col = guide_legend(
title.position = NULL,
title.hjust = 0.5,
nrow = 2
))
write_figure_1_col("Figure_10", height = 110)
We endly test the non-homogeneous representation of Chlorophyceae and Trebouxiophyceae along of the first axis.
wilcox.test(niches_pca$li[niche_barycenter$class %in%
c("Chlorophyceae", "Trebouxiophyceae"), 1] ~
(niche_barycenter$class[niche_barycenter$clas %in%
c(
"Chlorophyceae",
"Trebouxiophyceae"
)]))
##
## Wilcoxon rank sum exact test
##
## data: niches_pca$li[niche_barycenter$class %in% c("Chlorophyceae", "Trebouxiophyceae"), 1] by niche_barycenter$class[niche_barycenter$clas %in% c("Chlorophyceae", "Trebouxiophyceae")]
## W = 26, p-value = 2.533e-06
## alternative hypothesis: true location shift is not equal to 0