diff --git a/NAMESPACE b/NAMESPACE
index b4eb55a..447a827 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand
export("%>%")
+export(articles_by_jp)
export(colors_license)
export(colors_license_unpaywall)
export(colors_source_disclosure)
diff --git a/R/data.R b/R/data.R
index c55f19e..9cdce34 100644
--- a/R/data.R
+++ b/R/data.R
@@ -1,10 +1,11 @@
+# crossref ====
#' Hybrid open access articles via [Crossref](https://www.crossref.org)
-#'
+#'
#' Contains information about the overall publication volume, and, if available, cost information from the Open APC Initiative.
-#'
-#' @format
+#'
+#' @format
#' A data frame with the following variables:
-#'
+#'
#' | **Variable** | **Description** |
#' | -------------------------- | ------------------------------------------------------------------ |
#' | `license` | Normalized open content license statement |
@@ -27,14 +28,14 @@
#' | `subdomain` | Email subdomain first or corresponding author |
#' | `domain` | Email domain first or corresponding author |
#' | `suffix` | Email suffix first or corresponding author |
-#'
+#'
#' @source [Crossref](https://www.crossref.org)
-#'
+#'
#' @section License:
#' See Crossref [Terms and Conditions](https://www.crossref.org/requestaccount/termsandconditions.html)
-#'
+#'
#' @family data
-#'
+#'
#' @export
# storing this as a function ensures this is read in only at compile time, not run time
hybrid_publications <- function() {
@@ -62,9 +63,29 @@ hybrid_publications <- function() {
domain = col_character(),
suffix = col_character()
)
- )
+ ) %>%
+ # reorder factor levels for better cosmetic results
+ mutate(
+ license = forcats::fct_infreq(.data$license),
+ journal_title = forcats::fct_relevel(.data$journal_title, sort),
+ publisher = forcats::fct_infreq(.data$publisher),
+ )
+}
+
+#' @describeIn hybrid_publications number of articles by journal and publisher
+#'
+#' @examples
+#' articles_by_jp()
+#' @export
+#' @family data
+articles_by_jp <- function() {
+ # this is smaller by a factor of 100 than the whole thing, useful for shiny UI functions as well as plots
+ hybrid_publications() %>%
+ group_by(.data$journal_title, .data$publisher) %>%
+ summarise(n = n())
}
-
+
+# unpaywall ====
#' Unpaywall data
# TODO improve docs
#' @family data
@@ -79,4 +100,4 @@ unpaywall_df <- function() {
articles = col_integer()
)
)
-}
\ No newline at end of file
+}
diff --git a/R/helpers.R b/R/helpers.R
index b3024a0..f302ac7 100644
--- a/R/helpers.R
+++ b/R/helpers.R
@@ -41,3 +41,44 @@ find_all_ints <- function(x) {
x <- as.integer(x)
min(x):max(x)
}
+
+#' Run a shiny module
+#'
+#' Used for testing and development.
+#' Based on [Cole Arendt](https://github.com/colearendt)s [suggestion](https://community.rstudio.com/t/tips-for-module-development/14510).
+#'
+#' @inheritParams shiny::NS
+#'
+#' @param ui part of the module
+#'
+#' @inheritParams shiny::callModule
+#'
+#' @param ui_params, module_params list of parameters to be passed to the `ui` and `module` (= server function) of the module, respectively.
+#'
+#' @family CICD
+#'
+#' @examples
+#' runModule(ui = jpPickerInput, module = jpPicker)
+#' @export
+#' @keywords internal
+# TODO this should be factored out to a separate pkg as per https://github.com/subugoe/metaR/issues/94
+runModule <- function(id = "test_module",
+ ui,
+ module,
+ ui_params = list(),
+ module_params = list()) {
+ actualUI <- do.call(ui, c(id = id, ui_params))
+
+ actualServer <- function(input, output, session) {
+ do.call(
+ callModule,
+ c(
+ module = module,
+ id = id,
+ module_params
+ )
+ )
+ }
+
+ shinyApp(actualUI, actualServer)
+}
diff --git a/R/modules.R b/R/modules.R
new file mode 100644
index 0000000..462eff0
--- /dev/null
+++ b/R/modules.R
@@ -0,0 +1,53 @@
+jpPickerInput <- function(id) {
+ ns <- NS(id)
+
+ tagList(
+ selectizeInput(
+ inputId = ns("publishers"),
+ label = "Selected Publishers",
+ choices = c(All = "", forcats::fct_unique(articles_by_jp()$publisher)),
+ multiple = TRUE
+ ),
+ selectizeInput(
+ inputId = ns("journals"),
+ label = "Selected Journals",
+ choices = c(All = "", unique(articles_by_jp()$journal_title)),
+ multiple = TRUE
+ )
+ )
+}
+
+jpPicker <- function(input, output, session) {
+ jpFiltered <- reactive({
+ if (is.null(input$publishers)) {
+ res <- articles_by_jp()
+ } else {
+ res <- filter(
+ .data = articles_by_jp(),
+ .data$publisher %in% input$publishers
+ )
+ }
+ if (!is.null(input$journals)) {
+ res <- filter(
+ .data = res,
+ .data$journal_title %in% input$journals
+ )
+ }
+ res
+ })
+
+ # only update journal input when publishers change, not when journals change
+ observeEvent(
+ eventExpr = input$publishers,
+ handlerExpr = {
+ updateSelectizeInput(
+ session = session,
+ inputId = "journals",
+ choices = c(All = "", levels(jpFiltered()$journal_title))
+ )
+ }
+ )
+
+ # might add additional submit button here with isolation, but seems fast enough for now
+ jpFiltered
+}
diff --git a/inst/app/dashboard.Rmd b/inst/app/dashboard.Rmd
index bc6e64f..0597ba1 100644
--- a/inst/app/dashboard.Rmd
+++ b/inst/app/dashboard.Rmd
@@ -3,16 +3,16 @@ title: "Hybrid OA Dashboard"
output:
flexdashboard::flex_dashboard:
orientation: rows
- vertical_layout: fill
+ vertical_layout: scroll
css: ugoe_bootswatch.css
runtime: shiny
---
```{r setup, include=FALSE}
+library(shiny)
library(flexdashboard)
library(viridis)
library(plotly)
-library(shiny)
library(scales)
library(DT)
library(writexl)
@@ -24,58 +24,27 @@ library(stringr)
```
```{r global}
-# access to data
-hybrid_df <- readr::read_csv(file = path_extdat("hybrid_publications.csv")) %>%
- mutate(license = fct_infreq(license)) %>%
- mutate(publisher = fct_infreq(publisher)) %>%
- mutate(year = factor(issued, levels = c("2013", "2014", "2015","2016", "2017", "2018", "2019", "2020"))) %>%
- mutate(hybrid_type = ifelse(hybrid_type == "SCOAP", "SCOAP3", hybrid_type)) %>%
- mutate(hybrid_type = factor(hybrid_type, levels = c("Open APC (TA)", "Open APC (Hybrid)", "SCOAP3"))) %>%
- mutate(domain = paste(domain, suffix, sep = ".")) %>%
- arrange(desc(yearly_publisher_volume))
-unpaywall_df <- readr::read_csv(file = path_extdat("unpaywall_df.csv")) %>%
- mutate(year = factor(year, levels = c("2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020")))
+
```
Overview
-=================================================================
+========
Inputs {.sidebar}
------------------------------------------------------------------------
+-----------------
+```{r context="render"}
-```{r}
-# from https://stackoverflow.com/questions/42148177/how-can-i-build-multiple-inputs-into-my-shiny-app-with-both-updating-and-multipl
-# TODO factor this out
-selectizeInput(
- inputId = "publishers",
- label = "Selected Publishers",
- choices = c(All = "", levels(hybrid_df$publisher)),
- multiple = TRUE,
-)
-selectizeInput(
- inputId = "journals",
- label = "Selected Journals",
- choices = c(All = "", sort(unique(hybrid_df$journal_title))),
- multiple = TRUE
-)
+```
+
+```{r context="server"}
-# publisher filter
-jn_filtered <- reactive({
- if (is.null(input$publishers)) {
- return(hybrid_df)
- } else {
- return(hybrid_df[hybrid_df$publisher %in% input$publishers, ])
- }
})
-observe(updateSelectizeInput(
- session,
- "journals",
- choices = c(All = "", sort(unique(jn_filtered()$journal_title)))
-))
-# journal_title filter
+```
+
+```{r context="server"}
jn_f <- reactive({
if (is.null(input$journals)) {
return(jn_filtered())
@@ -86,42 +55,24 @@ jn_f <- reactive({
# prepare dataset for plotting
hybrid_sub <- reactive({
-if (length(unique(jn_f()$publisher)) > 1) {
+ if (length(unique(jn_f()$publisher)) > 1) {
hybrid_sub <- jn_f() %>%
- group_by(year, yearly_all, license) %>%
+ group_by(issued, yearly_all, license) %>%
count() %>%
mutate(prop = n / yearly_all)
} else if (length(unique(jn_f()$journal_title)) > 1 &&
- length(unique(jn_f()$publisher)) == 1) {
+ length(unique(jn_f()$publisher)) == 1) {
hybrid_sub <- jn_f() %>%
- group_by(year, license, yearly_publisher_volume) %>%
+ group_by(issued, license, yearly_publisher_volume) %>%
count() %>%
mutate(prop = n / yearly_publisher_volume)
} else {
hybrid_sub <- jn_f() %>%
- group_by(year, license, yearly_jn_volume) %>%
+ group_by(issued, license, yearly_jn_volume) %>%
count() %>%
mutate(prop = n / yearly_jn_volume)
}
})
-# using unpaywall indicators
-# unpaywall <- reactive({
-# hybrid_df %>%
-# filter(journal_title %in% jn_f()$journal_title) %>%
-# group_by(year, journal_title, publisher, jn_y_unpaywall_others) %>%
-# summarise(n = n_distinct(doi_oa)) %>%
-# gather(n, jn_y_unpaywall_others, key = "source", value = "articles") %>%
-# ungroup() %>%
-# group_by(year, source) %>%
-# summarise(articles = sum(articles, na.rm = TRUE)) %>%
-# mutate(
-# source = ifelse(
-# source == "n",
-# "Crossref immediate license",
-# "Other license information\n(Unpaywall)"
-# )
-# )
-# })
```
Notice that only those hybrid open access journals were included where
@@ -146,7 +97,8 @@ renderValueBox({
renderValueBox({
journal_n <- length(unique(jn_f()$journal_title))
valueBox(format(journal_n, big.mark = " ", scientific = FALSE),
- icon = "fa-filter")
+ icon = "fa-filter"
+ )
})
```
@@ -156,7 +108,8 @@ renderValueBox({
renderValueBox({
hybrid_n <- nrow(jn_f())
valueBox(format(hybrid_n, big.mark = " ", scientific = FALSE),
- icon = "fa-creative-commons")
+ icon = "fa-creative-commons"
+ )
})
```
@@ -168,7 +121,7 @@ Column {.tabset .tabset-fade}
```{r}
renderPlotly({
- p <- ggplot(hybrid_sub(), aes(year, prop, fill = license)) +
+ p <- ggplot(hybrid_sub(), aes(issued, prop, fill = license)) +
xlab("Year") +
ylab("Hybrid OA / Articles published") +
scale_fill_manual(
@@ -188,7 +141,7 @@ renderPlotly({
```{r}
renderPlotly({
- p <- ggplot(hybrid_sub(), aes(year, n, fill = license)) +
+ p <- ggplot(hybrid_sub(), aes(issued, n, fill = license)) +
xlab("Year") +
ylab("Hybrid OA Articles") +
scale_fill_manual(
@@ -197,12 +150,13 @@ renderPlotly({
) +
scale_x_discrete(drop = FALSE) +
scale_y_continuous(
- labels = function(x)
+ labels = function(x) {
format(x, big.mark = " ", scientific = FALSE)
+ }
) +
theme_minimal() +
geom_col(position = position_stack(reverse = TRUE))
- # p
+ # p
plotly::ggplotly(p)
})
```
@@ -228,14 +182,15 @@ Row {data-width=400 data-height=350}
renderPlotly({
p <- hybrid_df %>%
filter(journal_title %in% jn_f()$journal_title) %>%
- ggplot(aes(year, ..count.., fill = hybrid_type)) +
+ ggplot(aes(issued, ..count.., fill = hybrid_type)) +
geom_bar(position = position_stack(reverse = TRUE)) +
xlab("Year") +
ylab("Articles") +
scale_x_discrete(drop = FALSE) +
scale_y_continuous(
- labels = function(x)
- format(x, big.mark = " ", scientific = FALSE),
+ labels = function(x) {
+ format(x, big.mark = " ", scientific = FALSE)
+ },
breaks = scales::pretty_breaks()
) +
scale_fill_manual(
@@ -264,8 +219,10 @@ renderEcharts4r({
count(suffix, domain) %>%
filter(!is.na(domain)) %>%
e_charts() %>%
- e_treemap(suffix, domain, n, leafDepth = "1",
- name = "Corresponding Email") %>%
+ e_treemap(suffix, domain, n,
+ leafDepth = "1",
+ name = "Corresponding Email"
+ ) %>%
e_tooltip(trigger = "item")
})
```
@@ -281,18 +238,18 @@ Row {data-height=250}
```{r}
hybrid_all <-
hybrid_df %>%
- count(year) %>%
- mutate(year = gsub("201", "1", year)) %>%
- mutate(year = gsub("2020", "20", year))
+ count(issued) %>%
+ mutate(issued = gsub("201", "1", issued)) %>%
+ mutate(issued = gsub("2020", "20", issued))
hybrid_df %>%
- mutate(year = gsub("201", "1", year)) %>%
- mutate(year = gsub("2020", "20", year)) %>%
+ mutate(issued = gsub("201", "1", issued)) %>%
+ mutate(issued = gsub("2020", "20", issued)) %>%
mutate(publisher_group = fct_lump(publisher, prop = 0.05)) %>%
- group_by(year, publisher_group) %>%
+ group_by(issued, publisher_group) %>%
summarize(n = n()) %>%
mutate(prop = n / sum(n)) %>%
mutate(`Proportion in %` = round(prop * 100, 2)) %>%
- ggplot(aes(year, n)) +
+ ggplot(aes(issued, n)) +
geom_bar(
data = hybrid_all,
aes(fill = "All Hybrid OA Articles"),
@@ -302,10 +259,12 @@ hybrid_df %>%
geom_bar(aes(fill = "by Publisher", label = `Proportion in %`), color = "transparent", stat = "identity") +
facet_wrap(~publisher_group, nrow = 1) +
scale_fill_manual(values = c("#b3b3b3a0", "#153268"), name = "") +
- labs(x = "Year", y = "Hybrid OA Articles") +
+ labs(x = "Year", y = "Hybrid OA Articles") +
theme_minimal() +
- theme(legend.position = "top",
- legend.justification = "right") +
+ theme(
+ legend.position = "top",
+ legend.justification = "right"
+ ) +
scale_y_continuous(labels = scales::number_format(big.mark = " ")) +
theme(panel.grid.minor = element_blank()) +
theme(axis.ticks = element_blank()) +
@@ -328,7 +287,7 @@ fillRow(
selectInput(
inputId = "Year",
label = "Select Years",
- choices = c(rev(levels(hybrid_df$year))),
+ choices = c(rev(levels(hybrid_df$issued))),
selected = c("2017", "2018", "2019"),
multiple = TRUE
),
@@ -339,18 +298,18 @@ fillRow(
output$pubplot <- renderPlotly({
if (input$Year == "All") {
- tt <- hybrid_df %>%
+ tt <- hybrid_df %>%
filter(!is.na(license)) %>%
mutate(publisher = forcats::as_factor(publisher)) %>%
- group_by(year, publisher, journal_title, yearly_jn_volume) %>%
+ group_by(issued, publisher, journal_title, yearly_jn_volume) %>%
summarize(oa = n()) %>%
mutate(prop = oa / yearly_jn_volume)
} else {
- tt <- hybrid_df %>%
+ tt <- hybrid_df %>%
filter(!is.na(license)) %>%
mutate(publisher = forcats::as_factor(publisher)) %>%
- filter(year %in% input$Year) %>%
- group_by(year, publisher, journal_title, yearly_jn_volume) %>%
+ filter(issued %in% input$Year) %>%
+ group_by(issued, publisher, journal_title, yearly_jn_volume) %>%
summarize(oa = n()) %>%
mutate(prop = oa / yearly_jn_volume)
}
@@ -366,8 +325,9 @@ output$pubplot <- renderPlotly({
scale_x_discrete(
drop = FALSE,
limits = rev(levels(tt$publisher)),
- labels = function(x)
+ labels = function(x) {
str_wrap(x, width = 30)
+ }
) +
scale_y_continuous(labels = scales::percent)
if (input$rb == TRUE) {
@@ -404,21 +364,21 @@ fillCol(
output$yearplot <- renderPlotly({
if (input$Publisher == "All") {
- tt <- hybrid_df %>%
+ tt <- hybrid_df %>%
filter(!is.na(license)) %>%
- group_by(year, publisher, journal_title, yearly_jn_volume) %>%
+ group_by(issued, publisher, journal_title, yearly_jn_volume) %>%
summarize(oa = n()) %>%
mutate(prop = oa / yearly_jn_volume)
} else {
- tt <- hybrid_df %>%
+ tt <- hybrid_df %>%
filter(!is.na(license)) %>%
filter(publisher == input$Publisher) %>%
- group_by(year, publisher, journal_title, yearly_jn_volume) %>%
+ group_by(issued, publisher, journal_title, yearly_jn_volume) %>%
summarize(oa = n()) %>%
mutate(prop = oa / yearly_jn_volume)
}
- p <- ggplot(tt, aes(y = prop, x = as.factor(year))) +
+ p <- ggplot(tt, aes(y = prop, x = as.factor(issued))) +
geom_boxplot() +
coord_flip() +
theme_minimal() +
@@ -454,11 +414,11 @@ hybrid_data <- reactive({
filter(publisher == input$Publisher)
}
hybrid_df %>%
- filter(year %in% input$Year) %>%
- group_by(year, publisher, journal_title, yearly_jn_volume) %>%
+ filter(issued %in% input$Year) %>%
+ group_by(issued, publisher, journal_title, yearly_jn_volume) %>%
summarize(oa = n()) %>%
mutate(prop = round((oa / yearly_jn_volume) * 100, 2)) %>%
- arrange(desc(year)) %>%
+ arrange(desc(issued)) %>%
select(
Year = year,
Publisher = publisher,
@@ -471,16 +431,16 @@ hybrid_data <- reactive({
renderDataTable(
expr = {
- hybrid_data()
-},
-rownames = FALSE,
+ hybrid_data()
+ },
+ rownames = FALSE,
filter = "bottom",
-options = list(
+ options = list(
pageLength = 4,
dom = "ftp",
- columnDefs = list(list(
+ columnDefs = list(list(
className = "dt-head-left", targets = "_all"
-))
+ ))
)
)
```
@@ -493,13 +453,13 @@ Institutional View
my_data <- reactive({
if (!is.null(input$plot_clicked_data$name)) {
hybrid_df %>%
- filter(suffix == input$plot_clicked_data$name | domain == input$plot_clicked_data$name) %>%
- mutate(plot_title = ifelse(!input$plot_clicked_data$name %in% suffix,
- domain,
+ filter(suffix == input$plot_clicked_data$name | domain == input$plot_clicked_data$name) %>%
+ mutate(plot_title = ifelse(!input$plot_clicked_data$name %in% suffix,
+ domain,
suffix
))
} else {
- hybrid_df
+ hybrid_df
}
})
```
@@ -512,16 +472,18 @@ Column
```{r}
output$plot <- renderEcharts4r({
hybrid_df %>%
- count(suffix, domain) %>%
- filter(!is.na(domain)) %>%
- e_charts() %>%
- e_treemap(suffix, domain, n, leafDepth = "1",
- name = "Corresponding Email") %>%
- e_tooltip(trigger = "item")
+ count(suffix, domain) %>%
+ filter(!is.na(domain)) %>%
+ e_charts() %>%
+ e_treemap(suffix, domain, n,
+ leafDepth = "1",
+ name = "Corresponding Email"
+ ) %>%
+ e_tooltip(trigger = "item")
})
output$oa_license <- renderPlotly({
- ggplot(my_data(), aes(factor(year), fill = license)) +
+ ggplot(my_data(), aes(factor(issued), fill = license)) +
geom_bar(position = position_stack(reverse = TRUE)) +
ggtitle(unique(my_data()$plot_title)) +
scale_fill_manual(
@@ -530,11 +492,12 @@ output$oa_license <- renderPlotly({
) +
scale_x_discrete("", drop = FALSE) +
scale_y_continuous("Articles",
- labels = function(x)
+ labels = function(x) {
format(x, big.mark = " ", scientific = FALSE)
+ }
) +
theme_minimal() -> p
- tt <- plotly::ggplotly(p, tooltip = c("y"))
+ tt <- plotly::ggplotly(p, tooltip = c("y"))
tt$x$data <- lapply(tt$x$data, function(x) {
x$text <- paste(x$name, x$y, sep = ": ")
x
@@ -544,13 +507,14 @@ output$oa_license <- renderPlotly({
output$oa_source <- renderPlotly({
p <-
- ggplot(my_data(), aes(factor(year), ..count.., fill = hybrid_type)) +
+ ggplot(my_data(), aes(factor(issued), ..count.., fill = hybrid_type)) +
geom_bar(position = position_stack(reverse = TRUE)) +
xlab("Year") +
ylab("Articles") +
scale_y_continuous(
- labels = function(x)
+ labels = function(x) {
format(x, big.mark = " ", scientific = FALSE)
+ }
) +
scale_fill_manual(
"Sources",
@@ -558,7 +522,7 @@ output$oa_source <- renderPlotly({
na.value = "#b3b3b3a0"
) +
theme_minimal()
- tt <- plotly::ggplotly(p, tooltip = c("y"))
+ tt <- plotly::ggplotly(p, tooltip = c("y"))
tt$x$data <- lapply(tt$x$data, function(x) {
x$text <- paste(x$name, x$y, sep = ": ")
x
@@ -584,7 +548,7 @@ Row
```{r}
renderDataTable(
expr = {
- my_data() %>%
+ my_data() %>%
count(publisher) %>%
arrange(desc(n)) %>%
mutate(prop = round(n / sum(n) * 100, 2))
diff --git a/man/hybrid_publications.Rd b/man/hybrid_publications.Rd
index 52dd26a..f710b19 100644
--- a/man/hybrid_publications.Rd
+++ b/man/hybrid_publications.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/data.R
\name{hybrid_publications}
\alias{hybrid_publications}
+\alias{articles_by_jp}
\title{Hybrid open access articles via \href{https://www.crossref.org}{Crossref}}
\format{
A data frame with the following variables:\tabular{ll}{
@@ -33,16 +34,30 @@ A data frame with the following variables:\tabular{ll}{
}
\usage{
hybrid_publications()
+
+articles_by_jp()
}
\description{
Contains information about the overall publication volume, and, if available, cost information from the Open APC Initiative.
}
+\section{Functions}{
+\itemize{
+\item \code{articles_by_jp}: number of articles by journal and publisher
+}}
+
\section{License}{
See Crossref \href{https://www.crossref.org/requestaccount/termsandconditions.html}{Terms and Conditions}
}
+\examples{
+articles_by_jp()
+}
\seealso{
+Other data:
+\code{\link{license_patterns}},
+\code{\link{unpaywall_df}()}
+
Other data:
\code{\link{license_patterns}},
\code{\link{unpaywall_df}()}
diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R
index 361c875..254516d 100644
--- a/tests/testthat/test-data.R
+++ b/tests/testthat/test-data.R
@@ -2,3 +2,11 @@ test_that("hybrid_publications dataset is sane", {
expect_factor(hybrid_publications()$license)
# TODO add more here once that makes sense
})
+test_that("summary by journal and publisher", {
+ purrr::walk(
+ .x = articles_by_jp()$n,
+ .f = checkmate::expect_count,
+ na.ok = FALSE,
+ null.ok = FALSE
+ )
+})
diff --git a/vignettes/about.Rmd b/vignettes/about.Rmd
index f4b0d44..7db267a 100644
--- a/vignettes/about.Rmd
+++ b/vignettes/about.Rmd
@@ -47,17 +47,12 @@ jn_facets <- jsonlite::stream_in(
file(path_extdat(x = "jn_facets_df.json")),
verbose = FALSE
)
-# get hybrid journals that have open licensing information in the period 2013-18
-hybrid_cr <- read_csv(path_extdat("hybrid_publications.csv")) %>%
- mutate(license = fct_infreq(license)) %>%
- mutate(year = factor(issued, levels = c("2013", "2014", "2015","2016", "2017", "2018", "2019"))) %>%
- arrange(desc(yearly_publisher_volume))
o_apc_df <- read_csv(path_extdat("oapc_hybrid.csv")) %>%
- mutate(year = factor(period, levels = c("2013", "2014", "2015","2016", "2017", "2018", "2019")))
+ mutate(year = factor(period, levels = c("2013", "2014", "2015", "2016", "2017", "2018", "2019")))
unpaywall_df <- read_csv(path_extdat("unpaywall_df.csv")) %>%
- mutate(year = factor(year, levels = c("2013", "2014", "2015","2016", "2017", "2018", "2019")))
+ mutate(year = factor(year, levels = c("2013", "2014", "2015", "2016", "2017", "2018", "2019")))
```
@@ -278,10 +273,12 @@ cvr_df <- left_join(n_journals_df, n_hoa_df, by = "publisher") %>%
mutate(publisher = forcats::fct_other(publisher, drop = publisher[21:length(publisher)])) %>%
ungroup() %>%
group_by(publisher) %>%
- summarise(n_journals = sum(n_journals),
- n_hoa_journals = sum(n_hoa_journals))
+ summarise(
+ n_journals = sum(n_journals),
+ n_hoa_journals = sum(n_hoa_journals)
+ )
#
-cvr_df_2 <- tidyr::gather(cvr_df, group, value,-publisher)
+cvr_df_2 <- tidyr::gather(cvr_df, group, value, -publisher)
#' plot
gg <- ggplot(cvr_df, aes(y = publisher)) +
@@ -378,52 +375,53 @@ hybrid_all <- hybrid_cr %>%
mutate(prop = n / yearly_all)
prop_p <- ggplot(hybrid_all, aes(year, prop, fill = license)) +
geom_col(position = position_stack(reverse = TRUE)) +
- xlab("Year") +
- ylab("Relative") +
- # palette https://twitter.com/debivort/status/994583058031546369
- scale_fill_manual(
- "License",
+ xlab("Year") +
+ ylab("Relative") +
+ # palette https://twitter.com/debivort/status/994583058031546369
+ scale_fill_manual(
+ "License",
values = colors_license
- ) +
- scale_x_discrete(drop = FALSE) +
- scale_y_continuous(labels = scales::percent) +
- # theme_minimal(base_family="Roboto", base_size = 12) +
- theme(plot.margin=margin(30,30,30,30)) +
- theme(panel.grid.minor=element_blank()) +
- theme(axis.ticks=element_blank()) +
- theme(panel.grid.major.x=element_blank()) +
- theme(panel.border=element_blank()) +
- theme(legend.position="none")
+ ) +
+ scale_x_discrete(drop = FALSE) +
+ scale_y_continuous(labels = scales::percent) +
+ # theme_minimal(base_family="Roboto", base_size = 12) +
+ theme(plot.margin = margin(30, 30, 30, 30)) +
+ theme(panel.grid.minor = element_blank()) +
+ theme(axis.ticks = element_blank()) +
+ theme(panel.grid.major.x = element_blank()) +
+ theme(panel.border = element_blank()) +
+ theme(legend.position = "none")
prop_n <- ggplot(hybrid_all, aes(year, n, fill = license)) +
geom_col(position = position_stack(reverse = TRUE)) +
- xlab("Year") +
- ylab("Absolute") +
- # palette https://twitter.com/debivort/status/994583058031546369
- scale_fill_manual(
- "License",
+ xlab("Year") +
+ ylab("Absolute") +
+ # palette https://twitter.com/debivort/status/994583058031546369
+ scale_fill_manual(
+ "License",
values = colors_license
- ) +
- scale_x_discrete(drop = FALSE) +
- scale_y_continuous() +
- # theme_minimal(base_family="Roboto", base_size = 12) +
- theme(plot.margin=margin(30,30,30,30)) +
- theme(panel.grid.minor=element_blank()) +
- theme(axis.ticks=element_blank()) +
- theme(panel.grid.major.x=element_blank()) +
- theme(panel.border=element_blank())
+ ) +
+ scale_x_discrete(drop = FALSE) +
+ scale_y_continuous() +
+ # theme_minimal(base_family="Roboto", base_size = 12) +
+ theme(plot.margin = margin(30, 30, 30, 30)) +
+ theme(panel.grid.minor = element_blank()) +
+ theme(axis.ticks = element_blank()) +
+ theme(panel.grid.major.x = element_blank()) +
+ theme(panel.border = element_blank())
prow <- cowplot::plot_grid(prop_p, prop_n +
- theme(legend.position="none"),
- align = 'vh',
- labels = c("A", "B"),
- hjust = -1,
- nrow = 1)
+ theme(legend.position = "none"),
+align = "vh",
+labels = c("A", "B"),
+hjust = -1,
+nrow = 1
+)
-legend_b <- get_legend(prop_n + theme(legend.position="bottom"))
+legend_b <- get_legend(prop_n + theme(legend.position = "bottom"))
p_license <- plot_grid(prow, legend_b, ncol = 1, rel_heights = c(1, .2))
title <- ggdraw() +
# draw_label("Hybrid Open Access Uptake 2013-2019", fontfamily = "Roboto")
draw_label("Hybrid Open Access Uptake 2013-2019")
-p <- plot_grid(title, p_license, ncol=1, rel_heights=c(0.1, 1))
+p <- plot_grid(title, p_license, ncol = 1, rel_heights = c(0.1, 1))
p
```
@@ -455,8 +453,10 @@ unpaywall_df() %>%
```{r}
hybrid_overview <- hybrid_cr %>%
group_by(publisher) %>%
- summarize(`Number of journals` = n_distinct(journal_title),
- `Hybrid OA article volume` = n_distinct(doi_oa))
+ summarize(
+ `Number of journals` = n_distinct(journal_title),
+ `Hybrid OA article volume` = n_distinct(doi_oa)
+ )
top3 <- hybrid_overview %>%
arrange(desc(`Number of journals`)) %>%
slice(1:3)
@@ -484,30 +484,33 @@ In total, spending information for `r hybrid_cr %>% filter(!is.na(hybrid_type))
```{r spending, fig.cap="Development of spending disclosure", fig.subcap="Selected for hybrid open access articles across publishers. Sources include expenditures for individual articles ('Open APC (Hybrid)') and articles from transformative agreements ('Open APC (TA)') provided by the Open APC Initiative. Overlap between hybrid open access articles found via Crossref and the SCOAP$^3$ repository is displayed as well. The light gray stack areas represent the number of articles where no information about open access sponsorship was available. Notice that it is very likely that the overall decrease of spending for hybrid open access reported to the Open APC initiative in 2019 is due to a lag between the time payments were made and reporting of payments to the initiative."}
spending_plot <- hybrid_cr %>%
mutate(publisher = fct_other(publisher, keep = c("Elsevier BV", "Springer Nature"))) %>%
- mutate(hybrid_type = ifelse(hybrid_type == "SCOAP","SCOAP³", hybrid_type)) %>%
+ mutate(hybrid_type = ifelse(hybrid_type == "SCOAP", "SCOAP³", hybrid_type)) %>%
ggplot(aes(year, ..count.., fill = hybrid_type)) +
- geom_bar(position = position_stack(reverse = TRUE)) +
- labs(x = "Year", y = "Hybrid OA Articles",
- title = "Availability of Spending Information for Hybrid Open Access Articles") +
- scale_x_discrete(drop = FALSE) +
- scale_y_continuous(
- labels = function(x)
- format(x, big.mark = " ", scientific = FALSE),
- breaks = scales::pretty_breaks()
- ) +
+ geom_bar(position = position_stack(reverse = TRUE)) +
+ labs(
+ x = "Year", y = "Hybrid OA Articles",
+ title = "Availability of Spending Information for Hybrid Open Access Articles"
+ ) +
+ scale_x_discrete(drop = FALSE) +
+ scale_y_continuous(
+ labels = function(x) {
+ format(x, big.mark = " ", scientific = FALSE)
+ },
+ breaks = scales::pretty_breaks()
+ ) +
scale_fill_manual(
- "Sources:",
+ "Sources:",
values = colors_source_disclosure,
na.value = "#E9E1D7"
- ) +
- facet_wrap( ~ publisher) +
+ ) +
+ facet_wrap(~publisher) +
# theme_minimal(base_family="Roboto", base_size = 12) +
- theme(plot.margin=margin(30,30,30,30)) +
- theme(panel.grid.minor=element_blank()) +
- theme(axis.ticks=element_blank()) +
- theme(panel.grid.major.x=element_blank()) +
- theme(panel.border=element_blank()) +
- theme(legend.position="top")
+ theme(plot.margin = margin(30, 30, 30, 30)) +
+ theme(panel.grid.minor = element_blank()) +
+ theme(axis.ticks = element_blank()) +
+ theme(panel.grid.major.x = element_blank()) +
+ theme(panel.border = element_blank()) +
+ theme(legend.position = "top")
spending_plot
```
@@ -541,11 +544,11 @@ email_plot <- ggplot(email_df, aes(suffix, prop)) +
scale_y_continuous("Percentage", labels = scales::percent_format(accuracy = 5L)) +
labs(x = "Email Domain Suffix", title = "Hybrid Open Access Articles: Corresponding Authors Affiliation") +
# theme_minimal(base_family="Roboto", base_size = 12) +
- theme(plot.margin=margin(30,30,30,30)) +
- theme(panel.grid.minor=element_blank()) +
- theme(axis.ticks=element_blank()) +
- theme(panel.grid.major.y=element_blank()) +
- theme(panel.border=element_blank())
+ theme(plot.margin = margin(30, 30, 30, 30)) +
+ theme(panel.grid.minor = element_blank()) +
+ theme(axis.ticks = element_blank()) +
+ theme(panel.grid.major.y = element_blank()) +
+ theme(panel.border = element_blank())
email_plot
```
@@ -579,10 +582,12 @@ hybrid_cr %>%
geom_bar(aes(fill = "per Publisher"), color = "transparent", stat = "identity") +
facet_wrap(~publisher_group, nrow = 1) +
scale_fill_manual(values = c("#b3b3b3a0", "#153268"), name = "") +
- labs(x = "Year", y = "Hybrid OA Articles", title = "Uptake of Hybrid Open Access since 2013") +
+ labs(x = "Year", y = "Hybrid OA Articles", title = "Uptake of Hybrid Open Access since 2013") +
# theme_minimal(base_family = "Roboto") +
- theme(legend.position = "top",
- legend.justification = "right") +
+ theme(
+ legend.position = "top",
+ legend.justification = "right"
+ ) +
scale_y_continuous(labels = scales::number_format(big.mark = " ")) +
theme(plot.margin = margin(30, 30, 30, 30)) +
theme(panel.grid.minor = element_blank()) +
@@ -610,15 +615,17 @@ q <- ggplot(my_df, aes(publisher_group, prop)) +
geom_boxplot() +
scale_x_discrete(limits = rev(levels(my_df$publisher_group))) +
scale_y_continuous(labels = scales::percent_format(accuracy = 5L)) +
- coord_flip(ylim = c(0,0.3)) +
- labs(y = "Percentage", x = "Publishers (Top 3)",
- title = "Journal hybrid open access share during 2017 and 2018") +
+ coord_flip(ylim = c(0, 0.3)) +
+ labs(
+ y = "Percentage", x = "Publishers (Top 3)",
+ title = "Journal hybrid open access share during 2017 and 2018"
+ ) +
# theme_minimal(base_family = "Roboto") +
- theme(plot.margin=margin(30,30,30,30)) +
- theme(panel.grid.minor=element_blank()) +
- theme(axis.ticks=element_blank()) +
- theme(panel.grid.major.y=element_blank()) +
- theme(panel.border=element_blank())
+ theme(plot.margin = margin(30, 30, 30, 30)) +
+ theme(panel.grid.minor = element_blank()) +
+ theme(axis.ticks = element_blank()) +
+ theme(panel.grid.major.y = element_blank()) +
+ theme(panel.border = element_blank())
q
```
@@ -647,28 +654,30 @@ The sponsors of these open access publications were therefore unknow to us.
```{r institutions, fig.cap = "Availability of spending information", fig.subcap="For hybrid open access articles by email top-level domain from first respective corresponding authors"}
hybrid_cr %>%
mutate(year = gsub("20", "", year)) %>%
- mutate(hybrid_type = ifelse(hybrid_type == "SCOAP","SCOAP³", hybrid_type)) %>%
+ mutate(hybrid_type = ifelse(hybrid_type == "SCOAP", "SCOAP³", hybrid_type)) %>%
filter(!is.na(suffix)) %>%
mutate(suffix = fct_lump(suffix, prop = 0.02, other_level = "other")) %>%
count(hybrid_type, year, suffix, sort = TRUE) %>%
ggplot(aes(year, n, fill = hybrid_type)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
- facet_wrap(~factor(suffix, levels = rev(levels(email_df$suffix)))) +
+ facet_wrap(~ factor(suffix, levels = rev(levels(email_df$suffix)))) +
scale_fill_manual(
- "Sources:",
+ "Sources:",
values = colors_source_disclosure,
- na.value = "#b3b3b3a0"
- ) +
- labs(x = "Year", y = "Hybrid OA Articles",
- title = "Availability of Spending Information for Hybrid Open Access Articles",
- subtitle = "Breakdown by email top-level domain from corresponding authors") +
+ na.value = "#b3b3b3a0"
+ ) +
+ labs(
+ x = "Year", y = "Hybrid OA Articles",
+ title = "Availability of Spending Information for Hybrid Open Access Articles",
+ subtitle = "Breakdown by email top-level domain from corresponding authors"
+ ) +
# theme_minimal(base_family = "Roboto", base_size = 12) +
- theme(plot.margin=margin(30,30,30,30)) +
- theme(panel.grid.minor=element_blank()) +
- theme(axis.ticks=element_blank()) +
- theme(panel.grid.major.x=element_blank()) +
- theme(panel.border=element_blank()) +
- theme(legend.position="top") -> plot_institutions
+ theme(plot.margin = margin(30, 30, 30, 30)) +
+ theme(panel.grid.minor = element_blank()) +
+ theme(axis.ticks = element_blank()) +
+ theme(panel.grid.major.x = element_blank()) +
+ theme(panel.border = element_blank()) +
+ theme(legend.position = "top") -> plot_institutions
plot_institutions
```