CS3DP_mgmt_charts.Rmd

---
title: "CS3DP Management Survey"
output: github_document
---
  
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Summary of Results

These are quick summaries of the initial results to the CS3DP Data Management survey.

1. Raw response data is in the [CS3DP Management group folder](https://drive.google.com/drive/u/0/folders/1fc-wqiG6J3lPyPC7WoWyV0GP1bJ6A2BQ)
2. Intermediate step: Made sure column headers & values were aligned
3. R-Scripts to generate or edit this doc are in [this github repo](https://github.com/magpiedin/CS3DP-management-survey)
    
```{r echo=FALSE, warning=FALSE, message=FALSE}
    
# Prep & Sumamry stats for CS3DP survey data

library("tidyr")
library("data.table")
library("ggplot2")
library("stringr")
library("stargazer")
library("plotly")
# library("gridExtra")

surveyBU <- read.csv(file = "Survey2-Data-20190422-CS3DP.csv", 
                     stringsAsFactors = FALSE)


# drop last two columns of survey data
survey <- surveyBU[,1:(NROW(surveyBU)-2)]


# some extra data cleanup:
for (i in 15:ncol(survey)) {
  # survey[,i] <- gsub(" \\(please explain below\\)\\:", "", survey[,i])
  survey[,i] <- gsub(" \\(please .+", "", survey[,i])
  survey[,i] <- gsub("\\. Please .+", "", survey[,i])
}

colnames(survey)[grepl("Q2$|Q2_4", colnames(survey))] <- c("Group","Group_TEXT")

survey$Group <- gsub("Both a Creator and a Repository Manager", "Both",
                     survey$Group)
survey$Group <- gsub("Repository manager", "Repository",
                     survey$Group)

# split explanatory-rows from response-rows
questionKey <- survey[1:2,]
responses <- survey[-c(1:2),]


# prep for summary-charts
questionKey2 <- questionKey[,grepl("ResponseId|Group$|Q[0-9]+$", colnames(questionKey)) > 0]
questionKey2 <- questionKey2[,c(1,3,2,4:ncol(questionKey2))]
questionKey2[3,] <- gsub(" \\(Select all that apply\\) \\- Selected Choice", "", questionKey2[1,])

responses2 <- responses[,grepl("ResponseId|Group$|Q[0-9]+$", colnames(responses)) > 0]
responses2 <- responses2[,c(1,3,2,4:ncol(responses2))]


# order variables
responses2$Group <- factor(responses2$Group, ordered = TRUE,
                           levels = c("Creator", "Repository",
                                      "Both", "Other"))

ggSumList <- list()


# Response Summaries ungrouped ####
for (i in 3:ncol(responses2)) {
  
  # split out each main response [excluding supplementary "TEXT" columns]
  temp <- responses2[,c(1:2,i)]
  
  # max number of commas in a response
  ncomma <- max(str_count(temp[,3], ","))
  
  # rename columns and split to new columns by commas
  colnames(temp) <- c("ResponseId", "Group", "Qresponse")
  
  if (ncomma > 0) {
    temp2 <- separate(temp,
                      Qresponse,
                      into = paste0(colnames(responses2)[i],
                                    "_", 
                                    seq(1:ncomma)),
                      sep = ",")
  } else {temp2 <- temp}
  
  # gather/transform to 2-column table
  if (NCOL(temp2) > 3) {
    temp3 <- gather(temp2, key = "RespNum", value = "Qresp", 
                    3:ncol(temp2), na.rm = TRUE)
    temp3 <- temp3[,-3]
  } else {temp3 <- temp2}
  
  temp3 <- temp3[nchar(temp3[,3]) > 0 & is.na(temp3[,3])==F,]
  
  colnames(temp3) <- c("ResponseId", 
                       "Group",
                       "Resp")
  # colnames(responses2)[i])
  
  assign(paste0(colnames(responses2)[i]), temp3)
  
  ggSumList[[i-2]] <- get(paste0(colnames(responses2)[i]))
  
}


# # Chart setup ####

# Clean values in responses
Q6$Resp <- gsub("1990 -1994", "1990-1994", Q6$Resp)

Q10$Resp <- gsub("5-Jan", "1-5", Q10$Resp)
Q10$Resp <- gsub("10-Jun", "6-10", Q10$Resp)
Q10$Resp <- gsub("25-Nov", "11-25", Q10$Resp)

Q14$Resp <- gsub(" \n", ",", Q14$Resp)

Q18$Resp <- gsub("3-Jan", "1-3", Q18$Resp)
Q18$Resp <- gsub("9-Apr", "4-9", Q18$Resp)

Q19$Resp <- gsub("3-Jan", "1-3", Q19$Resp)
Q19$Resp <- gsub("9-Apr", "4-9", Q19$Resp)

# Re-order values in responses
Q6$Resp <- factor(Q6$Resp, ordered = TRUE,
                  levels = c("Before 1990", "1990-1994", "1995-1999", "2000-2004",
                             "2005-2009", "2010-2014", "2015-2019"))

Q10$Resp <- factor(Q10$Resp, ordered = TRUE,
                   levels = c("0", "1-5", "6-10", "11-25", 
                              "26-75", "76-150", "151+"))

Q18$Resp <- factor(Q18$Resp, ordered = TRUE,
                   levels = c("<1", "1-3", "4-9", "10+"))

Q19$Resp <- factor(Q19$Resp, ordered = TRUE,
                   levels = c("<1", "1-3", "4-9", "10+"))


```

    ## Charts {#chartsanchor}
```{r echo=FALSE} 
# Generate graphs 

# #  If need to cut out columns with non-categ. data
# #  https://stackoverflow.com/questions/33962630/knitr-r-grouping-multiple-plots-in-to-html


for (i in 1:NROW(ggSumList)) {
  
  Qresp <- get(paste0(colnames(responses2)[i+2]))
  
  # Only generate a chart if number of rows > 0
  if (NROW(Qresp) > 0) {
    
    if (grepl("Q14", colnames(responses2)[i+2]) < 1) {
    
      print(
        ggplotly(
        ggplot(data = Qresp,
               aes(x = Resp,  
                   fill = Resp)) +
          geom_bar(position = "dodge") +
          facet_grid(Qresp[,2] ~ .) +
          labs(title = colnames(responses2)[i+2],
               subtitle = str_wrap(questionKey2[3,c(colnames(responses2)[i+2])]),
               x = "", y = "count") +
          theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
                plot.title=element_text(size=20),
                legend.position="none")
      )
      )
      
    }
    
  }
  
}

```