Code
library("readxl")
<- read_excel("database_notes.xlsx")
notessum(notes$`Applicable Results`)
Systematic literature search was performed in “list” on January 15 and 16, 2024, using search terms to identify eligible articles with the limitations on the publication year: 01/01/2018 to 31/13/2023. The keywords used for the search included “Remote sensing” AND “machine learning” AND “sustainable development goals”” (see table “database_notes.csv”)
Missing DOI were identified using the zotero “no DOI” tag. the DOI was imported either via zotero add on (“manage DOI”) or paper was opened and re-added to zotero via the chrome zotero add on.
These search results were downloaded to a RIS file and imported to Zotero. Zotero’s merge duplicates function was used and after which a csv file was exported.
library("readxl")
<- read_excel("database_notes.xlsx")
notessum(notes$`Applicable Results`)
<- read.csv("all_citations_j19.csv",
all_citations na.strings=c("","NA")) # make blank cells NA
sum(notes$`Applicable Results`) - nrow(all_citations)
<- subset(all_citations,
all_citations == "journalArticle" &
Item.Type != "CALL FOR PAPERS")
Title # some reports and a call for papers made it in
sum(notes$`Applicable Results`) - nrow(all_citations)
ref: https://rpubs.com/cschwarz/web_scrape
# missing_index <- which(is.na(all_citations$Abstract.Note))
# not_missing <- all_citations[!is.na(all_citations$Abstract.Note), ]
# retrieve abstract function
<- function(article){
grab_info if(is.na(article["Abstract.Note"])) {
<- paste0("https://doi.org/", article["DOI"])
article_link <- try(read_html(article_link), silent = TRUE)
a_html if (inherits(a_html, 'try-error')) {
"Abstract.Note"] <- NA
article[
}else {
<- a_html %>% html_node(".hlFld-Abstract .last") %>% html_text()
abstract "Abstract.Note"] <- abstract
article[
}
}return(article)
}
library(parallel)
<- makeCluster(detectCores())
cl clusterExport(cl,varlist = c("grab_info"))
clusterEvalQ(cl,library(rvest))
clusterEvalQ(cl,library(stringr))
library(pbapply)
<- as.data.frame(t(pbapply(all_citations, 1, grab_info, cl=cl)))
citations sum(is.na(citations$Abstract.Note))
# to make searching for words easier and because metagear is case sensitive
$Title <- tolower(citations$Title)
citations$Abstract.Note <- tolower(citations$Abstract.Note)
citations
#write.csv(citations, file = "citations_with_reivew.csv")
TO DO: - manually add the missing 6 using zotero
checking titles first
<- read.csv("citations_with_reivew.csv")
cit
<- cit[which(stringr::str_detect(cit$Title, "review")), ]
to_omit $Title
to_omit
#After reviewing this titles safe to omit these before metagear screaning
<- cit[which(!stringr::str_detect(cit$Title, "review")), ]
citations write.csv(citations, file = "citations.csv")
<- read.csv("citations.csv")
citations
nrow(cit) - nrow(citations)
library(metagear)
<- c(
keywords
# general
"empirical", "result", "predictive",
"analysis", "sustainable development goal",
"sustainable development",
# data related
"remotely sensed", "remote sensing", "satellite", "earth observation",
# models
"deep learning", "machine learning", "classification", "classifier",
"regression", "supervised", "supervized", "test set", "training set",
" cart ", "svm", " rf ", " ann ", "random forest", "support vector machine",
"regression tree", "decision tree", "neural network", "boosting", "bagging",
"gradient", "bayes",
# quality metrics
"overall accuracy", "accuracy", "coefficient of determination", "rmse", "mse",
"f1", "precision", "auc", " roc ", "recall","sensitivity", "specificity",
"mean absolute error", "error", "mae",
#to omit
"systematic review", "meta-analysis" , "review"
)
<- read.csv("citations.csv") citations
<- citations[sample(nrow(citations), size=100), ] # set.seed next time! cite_testset
Random sample of 100 papers was compaired by three reviewers using metagear and the
keywords (from line 144).
<- function(data, reviewer){
metagear_abstract_screeninglibrary(metagear)
# prime the study‐reference dataset
<- effort_initialize(data)
theRefs # here one would = distribute screening effort to a team, in this case that is one person
# and save to separate files for each team member
<- effort_distribute(theRefs, reviewers = reviewer,
theRefs_unscreened effort = 100, save_split = TRUE)
}
# the following was run for each reviewer
# eample nina
metagear_abstract_screening(citations, "nina")
# initialize screener GUI
abstract_screener("effort_nina.csv", aReviewer = "nina",
abstractColumnName = "Abstract.Note",
titleColumnName = "Title",
highlightKeywords = keywords)
<- read.csv("reviewer_screaning/effort_nina.csv")
nina_1<- read.csv("reviewer_screaning/effort_jonas.csv")
jonas_1<- read.csv("reviewer_screaning/effort_joep.csv")
joep_1ftable(nina_1$INCLUDE, jonas_1$INCLUDE, joep_1$INCLUDE, dnn = c("nina", "jonas", "joep"))
joep maybe NO YES
nina jonas
maybe maybe 0 1 0
NO 1 1 0
YES 2 0 1
NO maybe 0 3 1
NO 0 29 0
YES 1 7 2
YES maybe 0 0 1
NO 1 3 1
YES 5 4 36
All agreed on YES 36% and NO 29%
After reviewing cases that contradicted, points learnt: - not all the papers included use remote sensing data, these were difficult to categories and might require opening the full paper. Therefore, the next trail will be:
phase 2a: screening for empirical research, rather than papers reviewing or discussing methods.
phase 2b: from the papers that should be included, we will then assess them more carefully: remote sensing data, machine learning.
# new random sample: excluding all reviewed in phase 1
<- subset(citations, !(DOI %in% nina$DOI))
not_in_sample1set.seed(123)
<- not_in_sample1[sample(nrow(not_in_sample1), size=100), ] sample2
# the following was run for each reviewer
# example nina
metagear_abstract_screening(sample2, "nina_2a")
# initialize screener GUI
abstract_screener("effort_nina_2a.csv", aReviewer = "nina_2a",
abstractColumnName = "Abstract.Note",
titleColumnName = "Title",
highlightKeywords = keywords)
Instructions 1. run keywords line 362
<- function(data, reviewer){
metagear_abstract_screeninglibrary(metagear)
# prime the study‐reference dataset
<- effort_initialize(data)
theRefs # here one would = distribute screening effort to a team, in this case that is one person
# and save to separate files for each team member
<- effort_distribute(theRefs, reviewers = reviewer,
theRefs_unscreened effort = 100, save_split = TRUE)
}
# Stage 1
# initialize screener GUI
abstract_screener("effort_jonas_2a.csv", aReviewer = "jonas_2a",
abstractColumnName = "Abstract.Note",
titleColumnName = "Title",
highlightKeywords = keywords)
<- c(keywords,
keywords_new # data related
"remotely sensed", "remote sensing", "satellite", "earth observation",
" rs ", "images", "imagery", "sentinel", "landsat", "openstreetmap",
"google earth engine", "true color", "true colour", "false color",
"false colour", "rgb", "resolution"
)
# Stage 2
# after revieing run:
<- read.csv("effort_jonas_2a.csv")
s1_review <- subset(s1_review, s1_review$INCLUDE != "NO")
rev
metagear_abstract_screening(rev, "jonas_2b")
# initialize screener GUI
abstract_screener("effort_jonas_2b.csv", aReviewer = "jonas_2b",
abstractColumnName = "Abstract.Note",
titleColumnName = "Title",
highlightKeywords = keywords_new)
<- read.csv("reviewer_screaning/effort_nina_2a.csv")
nina_2a<- read.csv("reviewer_screaning/effort_jonas_2a.csv")
jonas_2a<- read.csv("reviewer_screaning/effort_joep_2a.csv")
joep_2a
ftable(nina_2a$INCLUDE, jonas_2a$INCLUDE, joep_2a$INCLUDE, dnn = c("nina", "jonas", "joep"))
joep NO YES
nina jonas
NO NO 44 1
YES 1 1
YES NO 2 1
YES 5 45
Stage 1: agreed: 89%
<- read.csv("reviewer_screaning/effort_jonas_2b.csv")
jonas_2b
<- read.csv("reviewer_screaning/effort_nina_2b.csv")
nina_2b
<- read.csv("reviewer_screaning/effort_joep_2b.csv") joep_2b
<- joep_2a|>
all::select(INCLUDE, Title, Abstract.Note, Publication.Year , DOI)
dplyr
<- left_join(all, subset(joep_2b, select = c(INCLUDE, DOI)),
all2by = "DOI", suffix =c("_joep_a","_joep"))
<- left_join(all2, subset(jonas_2b, select = c(INCLUDE, DOI)),
all2by = "DOI")
colnames(all2)[colnames(all2) == "INCLUDE"] <- "INCLUDE_jonas"
<- left_join(all2, subset(nina_2b, select = c(INCLUDE, DOI)),
all2by = "DOI")
colnames(all2)[colnames(all2) == "INCLUDE"] <- "INCLUDE_nina"
which(is.na(all2$INCLUDE_joep) & is.na(all2$INCLUDE_jonas) & is.na(all2$INCLUDE_nina)), 6:8] <- "NO" all2[
ftable(all2$INCLUDE_nina, all2$INCLUDE_jonas, all2$INCLUDE_joep, dnn = c("nina", "jonas", "joep"))
67% over all agreement 21% YES that we agree at least one yes:
sum(all2$INCLUDE_joep == "YES"| all2$INCLUDE_nina == "YES"| all2$INCLUDE_jonas == "YES", na.rm = T)
sum(all2$INCLUDE_joep == "YES", na.rm = T)
sum(all2$INCLUDE_nina == "YES", na.rm = T)
sum(all2$INCLUDE_jonas == "YES", na.rm = T)
<- nina_1|>
all3::select(INCLUDE, Title, Abstract.Note, Publication.Year ,DOI)
dplyr
<- left_join(all3, subset(jonas_1, select = c(INCLUDE, DOI)),
all3by = "DOI")
colnames(all3)[colnames(all3) == "INCLUDE.x"] <- "INCLUDE_nina"
colnames(all3)[colnames(all3) == "INCLUDE.y"] <- "INCLUDE_jonas"
<- left_join(all3, subset(joep_1, select = c(INCLUDE, DOI)),
all3by = "DOI")
colnames(all3)[colnames(all3) == "INCLUDE"] <- "INCLUDE_joep"
<- rbind(all3, all2[, -1])
all_reviewed
<- ftable(all_reviewed$INCLUDE_nina, all_reviewed$INCLUDE_jonas, all_reviewed$INCLUDE_joep, dnn = c("nina", "jonas", "joep"))
ftable_result
<- subset(all_reviewed,
all_agreed_yes subset = (INCLUDE_joep == "YES"&
== "YES"&
INCLUDE_nina == "YES"))
INCLUDE_jonas #write.csv(all_agreed_yes, file = "all_agreed_yes.csv")
<- as.data.frame(ftable_result)
ftable_df
sum(ftable_df$Freq[ftable_df$nina == "YES"])
sum(ftable_df$Freq[ftable_df$jonas == "YES"])
sum(ftable_df$Freq[ftable_df$joep == "YES"])
<- read.csv("reviewer_screaning/all_agreed_yes.csv")
all_agreed_yes #metagear_abstract_screening(all_agreed_yes, "clas_v_reg_nina")
abstract_screener("effort_nina.csv", aReviewer = "clas_v_reg_nina",
abstractColumnName = "Abstract.Note",
titleColumnName = "Title",
theButtons = c("class", "reg", "unknown"),
highlightKeywords = keywords)
<- read.csv("effort_clas_v_reg_nina.csv")
current
sum(current$INCLUDE == "class")