|
| 1 | +#!/bin/env Rscript |
| 2 | +prevaluation_file <- "survey_start.csv" |
| 3 | +evaluation_file <- "survey_end.csv" |
| 4 | +alpha_value <- 0.05 |
| 5 | + |
| 6 | +testthat::expect_true(file.exists(prevaluation_file)) |
| 7 | +testthat::expect_true(file.exists(evaluation_file)) |
| 8 | + |
| 9 | +t_pre_raw <- readr::read_csv(prevaluation_file, show_col_types = FALSE) |
| 10 | +t_post_raw <- readr::read_csv(evaluation_file, show_col_types = FALSE) |
| 11 | + |
| 12 | +other_feedback_post <- t_post_raw$`Any other feedback?` |
| 13 | + |
| 14 | +t_post_raw$`Any other feedback?` <- NULL |
| 15 | +t_pre_raw$Timestamp <- NULL |
| 16 | +t_post_raw$Timestamp <- NULL |
| 17 | +names(t_pre_raw) <- names(t_post_raw) |
| 18 | +testthat::expect_true(all(names(t_pre_raw) == names(t_post_raw))) |
| 19 | + |
| 20 | +#' Shorten the names of the columns |
| 21 | +shorten_col_names <- function(t) { |
| 22 | + questions <- stringr::str_remove( |
| 23 | + stringr::str_remove( |
| 24 | + names(t), |
| 25 | + "Give you confidence levels of the following statements below:\n \\["), |
| 26 | + "\\]" |
| 27 | + ) |
| 28 | + |
| 29 | + names(t) <- questions |
| 30 | + t |
| 31 | +} |
| 32 | + |
| 33 | +#' Convert the table to tidy format |
| 34 | +#' Add a columns 'i' for the index of an individual |
| 35 | +tidy_table <- function(t = t_pre) { |
| 36 | + t$i <- seq(1, nrow(t)) |
| 37 | + t_tidy <- tidyr::pivot_longer(t, cols = starts_with("I", ignore.case = FALSE)) |
| 38 | + names(t_tidy) <- c("i", "question", "answer") |
| 39 | + t_tidy |
| 40 | +} |
| 41 | + |
| 42 | +t_pre_untidy <- shorten_col_names(t_pre_raw) |
| 43 | +t_post_untidy <- shorten_col_names(t_post_raw) |
| 44 | +testthat::expect_true(all(names(t_pre_untidy) == names(t_post_untidy))) |
| 45 | + |
| 46 | +t_pre <- tidy_table(t_pre_untidy) |
| 47 | +t_post <- tidy_table(t_post_untidy) |
| 48 | +t_pre$when <- "pre" |
| 49 | +t_post$when <- "post" |
| 50 | +t <- dplyr::bind_rows(t_pre, t_post) |
| 51 | +t$when <- as.factor(t$when) |
| 52 | + |
| 53 | +plot_histrogram <- function(t_tidy) { |
| 54 | + |
| 55 | + n_individuals <- length(unique(t_tidy$i)) |
| 56 | + n_ratings <- length(t_tidy$answer[!is.na(t_tidy$answer)]) |
| 57 | + mean_confidence <- mean(t_tidy$answer[!is.na(t_tidy$answer)]) |
| 58 | + |
| 59 | + ggplot2::ggplot(t_tidy, ggplot2::aes(x = answer)) + |
| 60 | + ggplot2::geom_density() + |
| 61 | + ggplot2::labs( |
| 62 | + title = "All confidences", |
| 63 | + caption = paste0( |
| 64 | + "#individuals: ", n_individuals, ". ", |
| 65 | + "#ratings: ", n_ratings, ". ", |
| 66 | + "Mean confidence: ", round(mean_confidence, digits = 2) |
| 67 | + ) |
| 68 | + ) |
| 69 | + |
| 70 | +} |
| 71 | + |
| 72 | +plot_histrogram(t_pre) |
| 73 | +plot_histrogram(t_post) |
| 74 | + |
| 75 | +mean_pre <- mean(t[t$when == "pre", ]$answer, na.rm = TRUE) |
| 76 | +mean_post <- mean(t[t$when == "post", ]$answer, na.rm = TRUE) |
| 77 | +ks_test <- wilcox.test(t[t$when == "pre", ]$answer, t[t$when == "post", ]$answer) |
| 78 | +ks_test_p_value <- ks_test$p.value |
| 79 | +ks_test_is_different <- ks_test$p.value < alpha_value |
| 80 | + |
| 81 | +ggplot2::ggplot(t, ggplot2::aes(x = answer, fill = when)) + |
| 82 | + ggplot2::geom_density(alpha = 0.5) + |
| 83 | + ggplot2::geom_vline(xintercept = mean_pre, color = "skyblue", lty = "dashed") + |
| 84 | + ggplot2::geom_vline(xintercept = mean_post, color = "salmon", lty = "dashed") + |
| 85 | + ggplot2::labs( |
| 86 | + title = "All confidences", |
| 87 | + caption = paste0( |
| 88 | + "Mean pre: ", format(mean_pre, digits = 2), ", ", |
| 89 | + "Mean post: ", format(mean_post, digits = 2), "\n", |
| 90 | + "p value from KS test: ", format(ks_test_p_value, digits = 2), ", ", |
| 91 | + "alpha value: ", alpha_value, ", ", |
| 92 | + "different: ", ks_test_is_different |
| 93 | + ) |
| 94 | + ) |
| 95 | + |
| 96 | +ggplot2::ggsave(filename = "all_confidences_pre_post.png", width = 6, height = 2) |
| 97 | + |
| 98 | +ggplot2::ggplot(t, ggplot2::aes(x = answer, fill = when)) + |
| 99 | + ggplot2::geom_density(alpha = 0.5) + |
| 100 | + ggplot2::facet_grid(rows = "question", scales = "free_y") + |
| 101 | + ggplot2::theme( |
| 102 | + strip.text.y = ggplot2::element_text(angle = 0), |
| 103 | + legend.position = "none" |
| 104 | + ) + |
| 105 | + ggplot2::labs( |
| 106 | + title = "Confidences per question" |
| 107 | + ) |
| 108 | + |
| 109 | +ggplot2::ggsave(filename = "confidences_per_question_pre_post.png", width = 6, height = 7) |
| 110 | + |
| 111 | +names(t) |
| 112 | + |
| 113 | +ggplot2::ggplot( |
| 114 | + t, |
| 115 | + ggplot2::aes(x = question, y = answer, fill = when)) + |
| 116 | + ggplot2::geom_boxplot(position = "dodge") + |
| 117 | + ggplot2::theme( |
| 118 | + axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), |
| 119 | + legend.position = "none" |
| 120 | + ) + |
| 121 | + ggplot2::labs( |
| 122 | + title = "Confidences per question" |
| 123 | + ) |
| 124 | +ggplot2::ggsave(filename = "confidences_per_question_boxplot_pre_post.png", width = 6, height = 7) |
| 125 | + |
| 126 | +# Get the average |
| 127 | +t_averages <- t |> dplyr::group_by(question, when) |> dplyr::summarise(mean = mean(answer)) |
| 128 | +ggplot2::ggplot( |
| 129 | + t_averages, |
| 130 | + ggplot2::aes(x = question, y = mean, fill = when)) + |
| 131 | + ggplot2::geom_col(position = "dodge") + |
| 132 | + ggplot2::theme( |
| 133 | + axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), |
| 134 | + legend.position = "none" |
| 135 | + ) + |
| 136 | + ggplot2::labs( |
| 137 | + title = "Confidences per question" |
| 138 | + ) |
| 139 | +ggplot2::ggsave(filename = "average_confidences_per_question_pre_post.png", width = 6, height = 7) |
| 140 | + |
| 141 | +# Per question, has the distribution changed? |
| 142 | +t_stats <- tibble::tibble(question = unique(t$question), mean_pre = NA, mean_post = NA, p_value = NA, different = NA) |
| 143 | +for (question in unique(t$question)) { |
| 144 | + pre_values <- t[t$question == question & t$when == "pre", ]$answer |
| 145 | + post_values <- t[t$question == question & t$when == "post", ]$answer |
| 146 | + p <- wilcox.test(pre_values, post_values) |
| 147 | + t_stats[t_stats$question == question, ]$mean_pre <- mean(pre_values, na.rm = TRUE) |
| 148 | + t_stats[t_stats$question == question, ]$mean_post <- mean(post_values, na.rm = TRUE) |
| 149 | + t_stats[t_stats$question == question, ]$p_value <- p$p.value |
| 150 | + t_stats[t_stats$question == question, ]$different <- p$p.value < alpha_value |
| 151 | +} |
| 152 | +t_stats |
| 153 | + |
| 154 | +k <- knitr::kable(t_stats) |
| 155 | +readr::write_lines(k, "stats.md") |
0 commit comments