Code
library(targets)
library(withr)
library(here)
library(tidyverse)
library(performance)
library(broom.mixed)
library(gt)
library(lme4)
library(MuMIn)
library(ManyEcoEvo)
library(ggrepel)
set.seed(1234)
library(targets)
library(withr)
library(here)
library(tidyverse)
library(performance)
library(broom.mixed)
library(gt)
library(lme4)
library(MuMIn)
library(ManyEcoEvo)
library(ggrepel)
set.seed(1234)
<-
ManyEcoEvo_results %>%
ManyEcoEvo_results mutate(effects_analysis =
map(effects_analysis,
rename, id_col = study_id)) #%>%
# mutate_at(c("data",
# "diversity_data",
# "diversity_indices",
# "effects_analysis"),
# .funs = ~ map(.x, anonymise_teams,
# TeamIdentifier_lookup))
The figures below (Figure B.1,Figure B.2) shows the fixed effect of categorical review rating on deviation from the meta-analytic mean. There is very little difference in deviation for analyses in any of the review categories. It is worth noting that each analysis features multiple times in these figures corresponding to the multiple reviewers that provided ratings.
In Figure B.3 we display the results of our post-hoc analysis, examining the effect of removing analyses that were reviewed at least once as being ‘unpublishable’, ‘unpublishable’ or requiring ‘major revisions’, as compared with retaining the full set of analyses. Removing these analyses from the blue tit data had little impact on the overall amount of deviation or the distribution of the results. For the Eucalytpus analyses, removing ‘unpublishable’ analyses meant dropping the outlier ‘Brooklyn-2-2-1’ which made a substantial difference to the amount of observerd deviation from the meta-analytic mean.
For each dataset (blue tit, Eucalyptus), we created a second, more conservative version, that excluded effects based on estimates of \(df\) that we considered less reliable (Table B.1). We compared the outcomes of analyses of the primary dataset (constituted according to our registered plan) with the outcomes of analyses of the more conservative version of the dataset. We also compared results from analyses of both of these versions of the dataset to versions with our post-hoc removal of outliers described in the main text. Our more conservative exclusions (based on unreliable estimates of \(df\)) had minimal impact on the meta-analytic mean for both blue tit and Eucalyptus analyses, regardless of whether outliers were excluded (Table B.1).
%>%
ManyEcoEvo_viz ::filter(estimate_type == "Zr",
dplyr== "MA_mod") %>%
model_name hoist(tidy_mod_summary) %>%
unnest(tidy_mod_summary) %>%
filter(publishable_subset == "All", expertise_subset == "All") %>%
select(-publishable_subset, -expertise_subset) %>%
select(dataset,
exclusion_set,
estimate,
std.error,
statistic,
p.value, starts_with("conf")) %>%
mutate(exclusion_set =
case_when(exclusion_set == "complete" ~
"Primary dataset",
== "complete-rm_outliers" ~
exclusion_set "Primary dataset, outliers removed",
== "partial" ~
exclusion_set "Conservative exclusions",
TRUE ~ "Conservative exclusions, outliers removed")) %>%
group_by(exclusion_set) %>%
::gt() %>%
gt::opt_stylize(style = 6, color = "gray") %>%
gt::fmt(columns = "p.value",
gtfns = function(x) gtsummary::style_pvalue(x, prepend_p = FALSE)) %>%
::fmt_number(columns = c(-p.value, -dataset)) %>%
gt::cols_label(estimate = gt::md("$$\\hat\\mu$$"),
gtstd.error = gt::md("$$SE[\\hat\\mu]$$"),
conf.low = gt::md("95\\%CI")) %>%
::cols_merge(columns = starts_with("conf"),
gtpattern = "[{1},{2}]") %>%
::cols_move(columns = conf.low, after = std.error) gt
dataset | $$\hat\mu$$ | $$SE[\hat\mu]$$ | 95%CI | statistic | p.value |
---|---|---|---|---|---|
Primary dataset | |||||
blue tit | −0.35 | 0.03 | [−0.41,−0.28] | −10.49 | <0.001 |
eucalyptus | −0.09 | 0.06 | [−0.22,0.03] | −1.47 | 0.14 |
Conservative exclusions | |||||
blue tit | −0.36 | 0.03 | [−0.42,−0.29] | −10.50 | <0.001 |
eucalyptus | −0.11 | 0.07 | [−0.24,0.03] | −1.55 | 0.12 |
Primary dataset, outliers removed | |||||
blue tit | −0.35 | 0.03 | [−0.42,−0.29] | −10.95 | <0.001 |
eucalyptus | −0.03 | 0.01 | [−0.06,0.00] | −2.23 | 0.026 |
Conservative exclusions, outliers removed | |||||
blue tit | −0.36 | 0.03 | [−0.43,−0.30] | −11.09 | <0.001 |
eucalyptus | −0.04 | 0.02 | [−0.07,−0.01] | −2.52 | 0.012 |
<- function(data, intercept = TRUE, MA_mean = TRUE ){
plot_forest if(MA_mean == FALSE){
<- filter(data, term != "Overall")
data
}
<- ggplot(data, aes(y = term,
p x = estimate,
ymin = conf.low,
ymax = conf.high,
# shape = point_shape,
colour = parameter_type)) +
geom_pointrange() +
::theme_forest() +
ggforestplottheme(axis.line = element_line(size = 0.10, colour = "black"),
axis.line.y = element_blank(),
text = element_text(family = "Helvetica"),
axis.text.y = element_blank()) +
guides(shape = "none", colour = "none") +
coord_flip() +
labs(y = "Standardised Effect Size, Zr",
x = element_blank()) +
scale_x_continuous(breaks = c(-4,-3,-2,-1,0,1),
minor_breaks = seq(from = -4.5, to = 1.5, by = 0.5)) +
::scale_color_natparks_d("Glacier")
NatParksPalettes
if(intercept == TRUE){
<- p + geom_hline(yintercept = 0)
p
}if(MA_mean == TRUE){
<- p + geom_hline(aes(yintercept = meta_analytic_mean),
p data = data,
colour = "#01353D",
linetype = "dashed")
}
return(p)
}
<-
complete_euc_data %>%
ManyEcoEvo_viz filter(exclusion_set == "complete",
== "Zr",
estimate_type == "MA_mod",
model_name == "eucalyptus",
dataset == "All") %>%
publishable_subset select(model) %>%
mutate(plot_data = map(model,
.f = ~ broom::tidy(.x,
conf.int = TRUE,
include_studies = TRUE) %>%
::mutate(point_shape =
dplyrifelse(stringr::str_detect(term, "overall"),
"diamond",
"circle"),
Parameter =
::fct_reorder(term,
forcats%>%
estimate) ::fct_reorder(.,
forcats
point_shape,.desc = TRUE))
),meta_analytic_mean = map_dbl(plot_data,
~ filter(.x, Parameter == "overall") %>%
pull(estimate))) %>%
select(plot_data, meta_analytic_mean) %>%
unnest(cols = c("plot_data")) %>%
mutate(parameter_type = case_when(str_detect(Parameter, "overall") ~ "mean",
TRUE ~ "study"))
# complete_euc_data <-
# complete_euc_data %>%
# rename(id_col = term) %>%
# group_by(type) %>%
# group_split() %>%
# set_names(., complete_euc_data$type %>% unique) %>%
# # map_if(.x = ., names(.) == "study",
# # .f = ~ anonymise_teams(.x, TeamIdentifier_lookup)) %>%
# bind_rows() %>%
# rename(term = id_col)
<- complete_euc_data %>%
min_outlier_euc filter(type == "study") %>%
slice_min(estimate, n = 3) %>%
pull(term)
<- ManyEcoEvo_results %>%
sample_size_euc_Zr filter(exclusion_set == "complete", dataset == "eucalyptus") %>%
pluck("data", 1) %>%
select(id_col, sample_size) %>%
rename(term = id_col) %>%
mutate(sample_size = as.numeric(sample_size))
<- sample_size_euc_Zr %>%
mean_n_euc_Zr drop_na(sample_size) %>%
pull(sample_size) %>%
mean() %>%
round(2)
<- sample_size_euc_Zr %>%
N_outliers_Zr_euc filter(term %in% min_outlier_euc) %>%
arrange(desc(sample_size))
<- function(data, intercept = TRUE, MA_mean = TRUE){
plot_forest if (MA_mean == FALSE){
<- filter(data, Parameter != "overall")
data
}
<- ggplot(data, aes(y = estimate,
p x = term,
ymin = conf.low,
ymax = conf.high,
shape = parameter_type,
colour = parameter_type)) +
geom_pointrange(fatten = 2) +
::theme_forest() +
ggforestplottheme(axis.line = element_line(linewidth = 0.10, colour = "black"),
axis.line.y = element_blank(),
text = element_text(family = "Helvetica")#,
# axis.text.y = element_blank()
+
) guides(shape = guide_legend(title = NULL),
colour = guide_legend(title = NULL)) +
coord_flip() +
ylab(bquote(Standardised~Effect~Size~Z[r])) +
xlab(element_blank()) +
# scale_y_continuous(breaks = c(-4,-3,-2,-1,0,1),
# minor_breaks = seq(from = -4.5, to = 1.5, by = 0.5)) +
::scale_color_natparks_d("Glacier")
NatParksPalettes
if(intercept == TRUE){
<- p + geom_hline(yintercept = 0)
p
}if(MA_mean == TRUE){
<- p + geom_hline(aes(yintercept = meta_analytic_mean),
p data = data,
colour = "#01353D",
linetype = "dashed")
}
return(p)
}
<-
bt_experts_only %>%
ManyEcoEvo_viz filter(exclusion_set == "complete",
== "Zr",
estimate_type == "MA_mod",
model_name == "blue tit",
dataset == "All",
publishable_subset == "expert") %>%
expertise_subset select(model) %>%
mutate(plot_data = map(model,
.f = ~ broom::tidy(.x,
conf.int = TRUE,
include_studies = TRUE)%>%
::mutate(point_shape =
dplyrifelse(stringr::str_detect(term, "overall"),
"diamond",
"circle"),
Parameter =
::fct_reorder(term,
forcats%>%
estimate) ::fct_reorder(.,
forcats
point_shape,.desc = TRUE))
),meta_analytic_mean = map_dbl(plot_data,
~ filter(.x, Parameter == "overall") %>%
pull(estimate))) %>%
select(plot_data, meta_analytic_mean) %>%
unnest(cols = c("plot_data")) %>%
mutate(parameter_type = case_when(str_detect(Parameter, "overall") ~ "mean",
TRUE ~ "study"))
# bt_experts_only <-
# bt_experts_only %>%
# rename(id_col = term) %>%
# group_by(type) %>%
# group_split() %>%
# set_names(., bt_experts_only$type %>% unique) %>%
# # map_if(.x = ., names(.) == "study",
# # .f = ~ anonymise_teams(.x, TeamIdentifier_lookup)) %>%
# bind_rows() %>%
# rename(term = id_col)
<- bt_experts_only %>%
bt_forest_experts arrange(desc(type)) %>%
mutate(type = forcats::as_factor(type)) %>%
group_by(type) %>%
arrange(desc(estimate),.by_group = TRUE) %>%
mutate(term = forcats::as_factor(term),
point_shape = case_when(str_detect(type, "summary") ~ "mean",
TRUE ~ "study")) %>%
plot_forest(intercept = TRUE, MA_mean = TRUE) +
theme(axis.text.x = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.text.y = element_blank()
+
) scale_y_continuous(limits = c(-1.6, 0.65))
<-
euc_experts_only %>%
ManyEcoEvo_viz filter(exclusion_set == "complete",
== "Zr",
estimate_type == "MA_mod",
model_name == "eucalyptus",
dataset == "All",
publishable_subset == "expert") %>%
expertise_subset select(model) %>%
mutate(plot_data = map(model,
.f = ~ broom::tidy(.x,
conf.int = TRUE,
include_studies = TRUE) %>%
::mutate(point_shape =
dplyrifelse(stringr::str_detect(term, "overall"),
"diamond",
"circle"),
Parameter =
::fct_reorder(term,
forcats%>%
estimate) ::fct_reorder(.,
forcats
point_shape,.desc = TRUE))
),meta_analytic_mean = map_dbl(plot_data,
~ filter(.x, Parameter == "overall") %>%
pull(estimate))) %>%
select(plot_data, meta_analytic_mean) %>%
unnest(cols = c("plot_data")) %>%
mutate(parameter_type = case_when(str_detect(Parameter, "overall") ~ "mean",
TRUE ~ "study"))
# euc_experts_only <-
# euc_experts_only %>%
# rename(id_col = term) %>%
# group_by(type) %>%
# group_split() %>%
# set_names(., euc_experts_only$type %>% unique) %>%
# # map_if(.x = ., names(.) == "study",
# # .f = ~ anonymise_teams(.x, TeamIdentifier_lookup)) %>%
# bind_rows() %>%
# rename(term = id_col)
<- euc_experts_only %>%
euc_forest_experts arrange(desc(type)) %>%
mutate(type = forcats::as_factor(type)) %>%
group_by(type) %>%
arrange(desc(estimate),.by_group = TRUE) %>%
mutate(term = forcats::as_factor(term),
point_shape = case_when(str_detect(type, "summary") ~ "mean",
TRUE ~ "study")) %>%
plot_forest(intercept = TRUE, MA_mean = TRUE) +
theme(axis.text.x = element_text(size = 15),
axis.title.x = element_text(size = 15),
axis.text.y = element_blank()
+
) scale_y_continuous(limits = c(-5, 1),
breaks = c(-5, -4, -3, -2, -1, 0, 1) )
# ---- Extract Viz & Summary Stats
bt_forest_experts
euc_forest_experts
Below is the non-truncated version of Figure 3.3 showing a forest plot of the out-of-sample predictions, \(y_{i}\), on the response-scale (stems counts), for Eucalyptus analyses, showing the full error bars of all model estimates.
<- function(data, intercept = TRUE, MA_mean = TRUE, y_zoom = numeric(2L)){
plot_forest_2 if(MA_mean == FALSE){
<- filter(data, study_id != "overall")
data
}
<- data %>%
plot_data group_by(study_id) %>%
group_nest() %>%
hoist(data, "estimate",.remove = FALSE) %>%
hoist(estimate, y50 = 2) %>%
select(-estimate) %>%
unnest(data) %>%
arrange(desc(type)) %>%
mutate(type = forcats::as_factor(type)) %>%
group_by(type) %>%
arrange(desc(y50),.by_group = TRUE) %>%
mutate(study_id = forcats::as_factor(study_id),
point_shape = case_when(str_detect(type, "summary") ~ "diamond",
TRUE ~ "circle"))
<- ggplot(plot_data, aes(y = estimate,
p x = study_id,
ymin = conf.low,
ymax = conf.high,
# shape = type,
shape = point_shape,
colour = estimate_type
+
)) geom_pointrange(position = position_dodge(width = 0.5)) +
::theme_forest() +
ggforestplottheme(axis.line = element_line(linewidth = 0.10, colour = "black"),
axis.line.y = element_blank(),
text = element_text(family = "Helvetica")) +
guides(shape = "none", colour = "none") +
coord_flip(ylim = y_zoom) +
labs(y = "Model estimated out of sample predictions, stem counts",
x = element_blank()) +
scale_y_continuous(breaks = scales::breaks_extended(10)) +
::scale_color_natparks_d("Glacier")
NatParksPalettes
if(intercept == TRUE){
<- p + geom_hline(yintercept = 0)
p
}if(MA_mean == TRUE){
<- p +
p geom_hline(aes(yintercept = plot_data %>%
filter(type == "summary", estimate_type == "y25") %>%
pluck("estimate")),
data = data,
colour = "#01353D",
linetype = "dashed") +
geom_hline(aes(yintercept = plot_data %>%
filter(type == "summary", estimate_type == "y50") %>%
pluck("estimate")),
data = data,
colour = "#088096",
linetype = "dashed") +
geom_hline(aes(yintercept = plot_data %>%
filter(type == "summary", estimate_type == "y75") %>%
pluck("estimate")),
data = data,
colour = "#58B3C7" ,
linetype = "dashed")
}
print(p)
}
# TODO put into R/ and build into package to call!
<- function(effects_analysis, Z_colname, VZ_colname, estimate_type){
fit_MA_mv <- effects_analysis %>% pull({{Z_colname}})
Zr <- effects_analysis %>% pull({{VZ_colname}})
VZr <- ManyEcoEvo::fit_metafor_mv(estimate = Zr,
mod variance = VZr,
estimate_type = estimate_type,
data = effects_analysis)
return(mod)
}
<-
back_transformed_predictions %>%
ManyEcoEvo_yi ::mutate(data =
dplyr::map(data,
purrr~ dplyr::filter(.x,
::str_detect(response_variable_type, "constructed", negate = TRUE)))) %>%
stringrprepare_response_variables_yi(estimate_type = "yi",
param_table = ManyEcoEvo:::analysis_data_param_tables) %>%
generate_yi_subsets()
<-
raw_mod_data_logged %>%
back_transformed_predictions filter(dataset == "eucalyptus") %>%
group_by(estimate_type) %>%
select(estimate_type, data) %>%
unnest(data) %>%
rename(study_id = id_col) %>%
hoist(params, param_mean = list("value", 1), param_sd = list("value", 2)) %>%
rowwise() %>%
mutate(exclusion_threshold = param_mean + 3*param_sd) %>%
filter(fit < exclusion_threshold) %>%
mutate(log_vals = map2(fit, se.fit, log_transform, 1000)) %>%
unnest(log_vals) %>%
select(study_id,
TeamIdentifier,
estimate_type, starts_with("response_"),
-response_id_S2,
ends_with("_log")) %>%
group_by(estimate_type) %>%
nest()
<- raw_mod_data_logged %>%
mod_data_logged mutate(MA_mod =
map(data,
~fit_MA_mv(.x, mean_log, std.error_log, "yi")))
<- mod_data_logged %>%
plot_data_logged mutate(tidy_mod = map(.x = MA_mod,
~broom::tidy(.x,
conf.int = TRUE,
include_studies = TRUE) %>%
rename(study_id = term))) %>%
select(tidy_mod) %>%
unnest(cols = c(tidy_mod))
%>%
plot_data_logged mutate(response_scale = map2(estimate, std.error, log_back, 1000)) %>%
select(estimate_type, study_id, type, response_scale) %>%
unnest(response_scale) %>%
rename(estimate = mean_origin, conf.low = lower, conf.high = upper) %>%
# filter(estimate <1000) %>%
plot_forest_2(MA_mean = T,y_zoom = c(0,140))