Microdata Aggregation Tool

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 1800

library(shiny)
library(haven)
library(data.table)

max_browser_file_size_mb <- 350
max_browser_file_size_bytes <- max_browser_file_size_mb * 1024^2

options(shiny.maxRequestSize = max_browser_file_size_bytes)

# Work around a Chromium/Shinylive download bug where the browser saves the
# fallback HTML page instead of the generated file.
downloadButton <- function(...) {
  tag <- shiny::downloadButton(...)
  tag$attribs$download <- NULL
  tag
}

allowed_extensions <- c("sav", "zsav", "sas7bdat", "xpt", "dta", "rdata", "rda")
equal_weight_choice <- "__uis_equal_weight__"
equal_weight_column <- "uis_equal_weight"

field_spec <- function(id, label, group, multiple = FALSE, required = FALSE, role = "dimension") {
  list(
    id = id,
    label = label,
    group = group,
    multiple = multiple,
    required = required,
    role = role
  )
}

field_groups <- list(
  list(
    id = "age",
    title = "Age variables",
    fields = list(
      field_spec("age_single_years", "Age in single years", "age", FALSE, TRUE),
      field_spec("age_school_year_start", "Age at the beginning of the school year", "age"),
      field_spec("year_of_birth", "Year of birth", "age"),
      field_spec("month_of_birth", "Month of birth", "age")
    )
  ),
  list(
    id = "attendance",
    title = "Attendance variables",
    fields = list(
      field_spec(
        "attendance_current_grade",
        "Current level and/or grade of attendance",
        "attendance",
        multiple = TRUE
      ),
      field_spec(
        "attendance_nonformal",
        "Participation in non-formal education or training",
        "attendance",
        multiple = TRUE
      )
    )
  ),
  list(
    id = "attainment",
    title = "Attainment variables",
    fields = list(
      field_spec(
        "attainment_highest_level",
        "Highest formal education level and/or grade attained",
        "attainment",
        multiple = TRUE
      ),
      field_spec(
        "attainment_highest_qualification",
        "Highest qualification attained",
        "attainment",
        multiple = TRUE
      )
    )
  ),
  list(
    id = "literacy",
    title = "Literacy",
    fields = list(
      field_spec(
        "literacy_basic",
        "Basic literacy or numeracy",
        "literacy",
        multiple = TRUE
      )
    )
  ),
  list(
    id = "socioeconomic",
    title = "Socioeconomic status",
    fields = list(
      field_spec("sex", "Sex", "socioeconomic"),
      field_spec("rural_urban", "Rural or urban", "socioeconomic"),
      field_spec("wealth_quantile", "Household wealth quantile", "socioeconomic"),
      field_spec("disability", "Disability", "socioeconomic", multiple = TRUE),
      field_spec("migration", "Migration", "socioeconomic", multiple = TRUE)
    )
  ),
  list(
    id = "technical",
    title = "Technical",
    fields = list(
      field_spec(
        "interview_date",
        "Interview date (day, month and year)",
        "technical",
        multiple = TRUE
      ),
      field_spec(
        "sample_weight",
        "Survey or sample weight (required when weighted)",
        "technical",
        required = TRUE,
        role = "weight"
      ),
      field_spec("population_weight", "Population weight", "technical", role = "weight")
    )
  )
)

field_specs <- unlist(lapply(field_groups, function(group) group$fields), recursive = FALSE)
dimension_ids <- vapply(field_specs[vapply(field_specs, function(spec) spec$role == "dimension", logical(1))], `[[`, character(1), "id")
technical_dimension_ids <- vapply(field_specs[vapply(field_specs, function(spec) spec$role == "dimension" && spec$group == "technical", logical(1))], `[[`, character(1), "id")
weight_ids <- vapply(field_specs[vapply(field_specs, function(spec) spec$role == "weight", logical(1))], `[[`, character(1), "id")

is_equal_weight_choice <- function(value) {
  length(value) == 1 && identical(as.character(value), equal_weight_choice)
}

format_file_size <- function(bytes) {
  paste0(format(round(bytes / 1024^2), big.mark = ","), " MB")
}

survey_reader <- function(path, filename) {
  ext <- tolower(tools::file_ext(filename))

  if (!(ext %in% allowed_extensions)) {
    stop(
      "Unsupported file format. Use one of: ",
      paste(allowed_extensions, collapse = ", "),
      call. = FALSE
    )
  }

  info <- list(source_object = NULL, note = NULL)

  data <- switch(
    ext,
    sav = haven::read_sav(path),
    zsav = haven::read_sav(path),
    sas7bdat = haven::read_sas(path),
    xpt = haven::read_xpt(path),
    dta = haven::read_dta(path),
    rdata = {
      loaded <- new.env(parent = emptyenv())
      obj_names <- load(path, envir = loaded)
      is_data <- vapply(
        obj_names,
        function(obj) is.data.frame(get(obj, envir = loaded)),
        logical(1)
      )
      if (!any(is_data)) {
        stop("No data frame object was found in the .RData file.", call. = FALSE)
      }
      chosen <- obj_names[is_data][1]
      if (sum(is_data) > 1) {
        info$note <- paste0(
          "Multiple data frames were found. Using the first one: ",
          chosen,
          "."
        )
      }
      info$source_object <- chosen
      get(chosen, envir = loaded)
    },
    rda = {
      loaded <- new.env(parent = emptyenv())
      obj_names <- load(path, envir = loaded)
      is_data <- vapply(
        obj_names,
        function(obj) is.data.frame(get(obj, envir = loaded)),
        logical(1)
      )
      if (!any(is_data)) {
        stop("No data frame object was found in the .rda file.", call. = FALSE)
      }
      chosen <- obj_names[is_data][1]
      if (sum(is_data) > 1) {
        info$note <- paste0(
          "Multiple data frames were found. Using the first one: ",
          chosen,
          "."
        )
      }
      info$source_object <- chosen
      get(chosen, envir = loaded)
    }
  )

  list(
    data = as.data.frame(data, stringsAsFactors = FALSE),
    ext = ext,
    filename = filename,
    source_object = info$source_object,
    note = info$note
  )
}

selected_field_values <- function(field_map, ids) {
  if (length(ids) == 0) {
    return(character(0))
  }

  values <- unlist(field_map[ids], use.names = FALSE)
  if (is.null(values)) {
    return(character(0))
  }

  values <- values[nzchar(values)]
  values[!values %in% equal_weight_choice]
}

is_labelled_column <- function(x) {
  inherits(x, "haven_labelled") || inherits(x, "labelled")
}

restore_column_metadata <- function(x, prototype) {
  if (inherits(prototype, "factor")) {
    if (is.ordered(prototype)) {
      return(ordered(x, levels = levels(prototype)))
    }
    return(factor(x, levels = levels(prototype)))
  }

  if (is_labelled_column(prototype)) {
    for (attr_name in c("label", "labels", "na_values", "na_range", "format.spss", "display_width")) {
      attr(x, attr_name) <- attr(prototype, attr_name, exact = TRUE)
    }
    class(x) <- class(prototype)
  }

  x
}

restore_group_metadata <- function(result, prototypes, vars) {
  for (var in vars) {
    data.table::set(result, j = var, value = restore_column_metadata(result[[var]], prototypes[[var]]))
  }
  result
}

aggregate_microdata <- function(data, field_map) {
  selected_dimensions <- selected_field_values(field_map, dimension_ids)
  selected_dimensions <- unique(selected_dimensions)
  technical_dimensions <- selected_field_values(field_map, technical_dimension_ids)
  k_dimensions <- setdiff(selected_dimensions, technical_dimensions)

  weight_var <- field_map$sample_weight
  pop_var <- field_map$population_weight
  equal_weight <- is_equal_weight_choice(weight_var)

  if (!nzchar(weight_var)) {
    stop("Select a survey or sample weight variable before aggregating.", call. = FALSE)
  }

  if (!equal_weight && !weight_var %in% names(data)) {
    stop("The selected survey or sample weight variable is not present in the file.", call. = FALSE)
  }

  missing_dimensions <- setdiff(selected_dimensions, names(data))
  if (length(missing_dimensions) > 0) {
    stop(
      "The following selected variables are not present in the file: ",
      paste(missing_dimensions, collapse = ", "),
      call. = FALSE
    )
  }

  group_vars <- unique(c(selected_dimensions, if (equal_weight) character(0) else weight_var, pop_var))
  group_vars <- group_vars[nzchar(group_vars)]
  prototypes <- data[selected_dimensions]
  working <- data[group_vars]
  data.table::setDT(working)

  weight_column <- weight_var
  if (equal_weight) {
    weight_column <- equal_weight_column
    while (weight_column %in% names(working)) {
      weight_column <- paste0(weight_column, "_")
    }
    working[, (weight_column) := 1]
  }

  if (!is.numeric(working[[weight_column]]) || is_labelled_column(working[[weight_column]])) {
    working[, (weight_column) := suppressWarnings(as.numeric(get(weight_column)))]
  }

  if (anyNA(working[[weight_column]])) {
    warning("Some survey or sample weights could not be converted to numeric and were treated as missing.")
  }

  if (nzchar(pop_var)) {
    if (!pop_var %in% names(working)) {
      stop("The selected population weight variable is not present in the file.", call. = FALSE)
    }
    if (!is.numeric(working[[pop_var]]) || is_labelled_column(working[[pop_var]])) {
      working[, (pop_var) := suppressWarnings(as.numeric(get(pop_var)))]
    }
  }

  if (length(selected_dimensions) == 0) {
    total_weight <- sum(working[[weight_column]], na.rm = TRUE)
    total_pop <- if (nzchar(pop_var)) sum(working[[pop_var]], na.rm = TRUE) else NULL
    result <- data.frame(
      k = nrow(working),
      survey_weight_sum = total_weight,
      stringsAsFactors = FALSE
    )
    if (!is.null(total_pop)) {
      result$population_weight_sum <- total_pop
    }
    return(result)
  }

  if (nzchar(pop_var)) {
    result <- working[
      ,
      .(
        survey_weight_sum = sum(get(weight_column), na.rm = TRUE),
        population_weight_sum = sum(get(pop_var), na.rm = TRUE)
      ),
      by = selected_dimensions
    ]
  } else {
    result <- working[
      ,
      .(survey_weight_sum = sum(get(weight_column), na.rm = TRUE)),
      by = selected_dimensions
    ]
  }

  if (length(k_dimensions) == 0) {
    result[, k := nrow(working)]
  } else {
    k_counts <- working[, .(k = .N), by = k_dimensions]
    result <- k_counts[result, on = k_dimensions]
  }

  result <- restore_group_metadata(result, prototypes, selected_dimensions)
  data.table::setcolorder(
    result,
    c(selected_dimensions, "k", "survey_weight_sum", if (nzchar(pop_var)) "population_weight_sum")
  )
  data.table::setDF(result)
  result
}

selected_variable_names <- function(field_map) {
  vars <- unlist(field_map, use.names = FALSE)
  vars <- vars[nzchar(vars)]
  vars <- vars[!vars %in% equal_weight_choice]
  unique(vars)
}

complete_observation_data <- function(data, field_map) {
  selected_vars <- selected_variable_names(field_map)

  if (length(selected_vars) == 0) {
    stop("Select at least one variable before downloading complete observations.", call. = FALSE)
  }

  missing_vars <- setdiff(selected_vars, names(data))
  if (length(missing_vars) > 0) {
    stop(
      "The following selected variables are not present in the file: ",
      paste(missing_vars, collapse = ", "),
      call. = FALSE
    )
  }

  working <- data[selected_vars]
  keep <- stats::complete.cases(working)
  working[keep, , drop = FALSE]
}

download_extension <- function(ext) {
  if (identical(ext, "sas7bdat")) {
    return("xpt")
  }
  ext
}

write_aggregated_file <- function(data, file, ext) {
  export_object <- data
  target_ext <- download_extension(ext)

  switch(
    target_ext,
    sav = haven::write_sav(export_object, file),
    zsav = haven::write_sav(export_object, file, compress = "zsav"),
    dta = haven::write_dta(export_object, file),
    xpt = haven::write_xpt(export_object, file),
    rdata = {
      aggregated_data <- export_object
      save(aggregated_data, file = file)
    },
    rda = {
      aggregated_data <- export_object
      save(aggregated_data, file = file)
    },
    stop("Unsupported export format requested.", call. = FALSE)
  )
}

selector_ui <- function(spec) {
  input_id <- spec$id
  label <- if (isTRUE(spec$required)) paste0(spec$label, " *") else spec$label

  if (isTRUE(spec$multiple)) {
    selectizeInput(
      inputId = input_id,
      label = label,
      choices = character(0),
      selected = character(0),
      multiple = TRUE,
      options = list(placeholder = "Select one or more variables")
    )
  } else {
    selectizeInput(
      inputId = input_id,
      label = label,
      choices = c("Not available / not selected" = ""),
      selected = "",
      multiple = FALSE,
      options = list(placeholder = "Select a variable")
    )
  }
}

survey_weight_mode_ui <- function() {
  radioButtons(
    inputId = "survey_weight_mode",
    label = "Survey weights",
    choices = c(
      "Weighted" = "weighted",
      "Unweighted (e.g. census)" = "unweighted"
    ),
    selected = "weighted",
    inline = TRUE
  )
}

group_ui <- function(group) {
  div(
    class = "mapping-group",
    h4(group$title),
    if (identical(group$id, "technical")) survey_weight_mode_ui(),
    div(
      class = "selection-grid",
      lapply(group$fields, selector_ui)
    )
  )
}

ui <- fluidPage(
  tags$head(
    tags$style(HTML("
      body { background: #f5f7fb; color: #1d2939; }
      #title-block-header { display: none; }
      .app-shell { max-width: 1200px; margin: 0 auto; }
      .app-hero {
        background: linear-gradient(135deg, #0b5cab 0%, #0e7490 100%);
        color: white;
        padding: 1.35rem 1.5rem;
        border-radius: 18px;
        margin: 1rem 0 1.2rem;
        box-shadow: 0 16px 40px rgba(11, 92, 171, 0.18);
      }
      .app-hero h2 { margin-top: 0; color: white; }
      .app-hero ul {
        margin: .45rem 0 .9rem 1.25rem;
        padding: 0;
      }
      .app-hero li {
        font-size: 1.5rem;
        line-height: 1.55;
        margin-bottom: .4rem;
        color: rgba(255, 255, 255, 0.94);
      }
      .app-hero .small-text {
        color: rgba(255, 255, 255, 0.92);
        font-size: 1.14rem;
      }
      .card-block {
        background: white;
        border: 1px solid #d9e0ea;
        border-radius: 16px;
        padding: 1rem 1.1rem;
        box-shadow: 0 1px 2px rgba(16, 24, 40, 0.04);
        margin-bottom: 1rem;
      }
      .card-block h3, .card-block h4 { margin-top: 0; }
      .muted-note { color: #667085; margin-bottom: .8rem; }
      .mapping-group {
        border-top: 1px solid #e4e7ec;
        padding-top: .95rem;
        margin-top: .95rem;
      }
      .mapping-group:first-of-type {
        border-top: 0;
        padding-top: 0;
        margin-top: 0;
      }
      .summary-chip {
        display: inline-flex;
        align-items: center;
        gap: .4rem;
        border-radius: 999px;
        background: #edf4ff;
        color: #0b5cab;
        font-weight: 700;
        padding: .45rem .8rem;
        margin-right: .55rem;
        margin-bottom: .55rem;
      }
      .selection-grid {
        display: grid !important;
        grid-template-columns: repeat(2, minmax(0, 1fr));
        gap: .8rem 1rem;
      }
      .selection-grid > * {
        min-width: 0;
      }
      .selection-grid .shiny-input-container {
        width: 100%;
        margin-bottom: 0;
      }
      .progress-shell {
        margin-top: .8rem;
      }
      .progress-track {
        width: 100%;
        height: 12px;
        background: #e7edf5;
        border-radius: 999px;
        overflow: hidden;
      }
      .progress-fill {
        height: 100%;
        background: linear-gradient(90deg, #0b5cab 0%, #0e7490 100%);
        transition: width .25s ease;
      }
      .action-row {
        display: flex;
        gap: .8rem;
        align-items: center;
        flex-wrap: wrap;
        margin-top: .7rem;
      }
      .section-divider {
        height: 1px;
        background: #e4e7ec;
        margin: 1rem 0;
      }
      .small-text { color: #667085; font-size: .93rem; }
      .status-good { color: #067647; font-weight: 700; }
      .status-warn { color: #b42318; font-weight: 700; }
      .var-list {
        max-height: 260px;
        overflow: auto;
        border: 1px solid #e4e7ec;
        border-radius: 12px;
        padding: .75rem .9rem;
        background: #fbfcfe;
      }
      .var-list code {
        display: inline-block;
        margin: 0 .4rem .45rem 0;
        padding: .15rem .42rem;
        background: #eef4fb;
        border-radius: 8px;
        color: #0b5cab;
      }
      .table-wrap { overflow-x: auto; }
      @media (max-width: 700px) {
        .selection-grid { grid-template-columns: 1fr; }
      }
    "))
  ),
    div(
      class = "app-shell",
      div(
        class = "app-hero",
        h2("About"),
        tags$ul(
          tags$li("Upload one survey file, map the relevant variables, and generate an aggregated output, or complete extract of selected variables."),
          tags$li("The aggregated file sums survey weights by each unique combination of selected variables."),
          tags$li("This application runs entirely in the client's browser and no uploaded or created files are transfered to the UIS."),
          tags$li(
            paste(
              "Maximum file size for this browser-based aggregation tool:",
              format_file_size(max_browser_file_size_bytes),
              "Files near this size may still fail depending on browser and available memory. For larger files, use alternative statistical software to aggregate, submit the original microdata package, or contact the UIS."
            )
          )
      ),
      tags$div(
        class = "small-text",
        "Accepted file types: .sav, .zsav, .sas7bdat, .xpt, .dta, .RData, .rds"
      )
    ),
    div(
      class = "card-block",
      h3("1. Upload survey file"),
        p(
          class = "muted-note",
          "The file is processed in the browser. No server-side upload is required for this tool."
        ),
        div(id = "survey_file_client_status", class = "small-text"),
        fileInput(
          "survey_file",
          "Survey file",
          multiple = FALSE,
          accept = c(".sav", ".zsav", ".sas7bdat", ".xpt", ".dta", ".RData", ".rds")
        ),
        tags$script(HTML(sprintf("
          document.addEventListener('change', function(evt) {
            var input = evt.target;
            if (!input || input.id !== 'survey_file') return;

            var status = document.getElementById('survey_file_client_status');
            var file = input.files && input.files[0];
            if (!file) {
              if (status) {
                status.textContent = '';
                status.className = 'small-text';
              }
              return;
            }

            var maxBytes = %d;
            var maxLabel = '%s';
            var fileSize = Math.round(file.size / 1024 / 1024).toLocaleString() + ' MB';

            if (file.size > maxBytes) {
              input.value = '';
              evt.preventDefault();
              evt.stopImmediatePropagation();
              if (status) {
                status.textContent = 'This file is ' + fileSize + '. The browser-based aggregation tool supports files up to ' + maxLabel + '. Please use a smaller extract, aggregate with local statistical software, submit the original microdata package, or contact UIS.';
                status.className = 'small-text status-warn';
              }
              return;
            }

            if (status) {
              status.textContent = 'Selected file size: ' + fileSize + '.';
              status.className = 'small-text';
            }
          }, true);
        ", max_browser_file_size_bytes, format_file_size(max_browser_file_size_bytes)))),
        uiOutput("dataset_status"),
        uiOutput("dataset_summary"),
        h4("Variables available"),
        uiOutput("variable_inventory")
    ),
    div(
      class = "card-block",
      h3("2. Map variables"),
      p(
        class = "muted-note",
        "Select the variables that should define the aggregated rows. If a variable is not available in the survey, leave that field blank unless it is marked as required. * indicates a required field."
      ),
      lapply(field_groups, group_ui),
      div(class = "section-divider"),
      div(
        class = "action-row",
        actionButton("run_aggregation", "Aggregate data", class = "btn-primary"),
        actionButton("clear_mapping", "Clear selections")
      ),
      uiOutput("download_format_note")
    ),
    div(
      class = "card-block",
      h3("3. Output"),
      div(
        class = "action-row",
        downloadButton("download_output", "Download aggregated output"),
        downloadButton("download_complete_cases", "Download complete observations for selected variables")
      ),
      uiOutput("aggregation_progress_ui"),
      uiOutput("aggregation_status"),
      uiOutput("aggregation_summary"),
      uiOutput("k_caption"),
      div(class = "table-wrap", tableOutput("aggregation_preview"))
    )
  )
)

server <- function(input, output, session) {
  loaded_data <- reactiveVal(NULL)
  loaded_note <- reactiveVal(NULL)
  aggregation_result <- reactiveVal(NULL)
  output_message <- reactiveVal("No aggregation run yet.")
  aggregation_progress <- reactiveVal(list(value = 0, label = "Aggregation not started."))

  is_duplicate_status <- function(message, progress_label) {
    identical(message, progress_label) || identical(message, "No aggregation run yet.")
  }

  update_choices <- function(vars) {
    for (spec in field_specs) {
      if (isTRUE(spec$multiple)) {
        updateSelectizeInput(
          session,
          spec$id,
          choices = stats::setNames(vars, vars),
          selected = character(0),
          options = list(placeholder = "Select one or more variables"),
          server = FALSE
        )
      } else {
        choices <- c("Not available / not selected" = "", stats::setNames(vars, vars))

        updateSelectizeInput(
          session,
          spec$id,
          choices = choices,
          selected = "",
          options = list(placeholder = "Select a variable"),
          server = FALSE
        )
      }
    }
  }

  observeEvent(input$survey_file, {
    aggregation_result(NULL)
    output_message("No aggregation run yet.")
    aggregation_progress(list(value = 0, label = "Aggregation not started."))

    req(input$survey_file)

    loaded_note(NULL)

    tryCatch(
      {
        withProgress(message = "Processing survey file", value = 0, {
          incProgress(0.2, detail = "Reading the uploaded file")
          parsed <- survey_reader(input$survey_file$datapath, input$survey_file$name)

          incProgress(0.5, detail = "Preparing available variable list")
          loaded_data(parsed)
          loaded_note(parsed$note)
          vars <- names(parsed$data)
          update_choices(vars)

          incProgress(0.3, detail = "Ready")
        })
      },
      error = function(err) {
        loaded_data(NULL)
        update_choices(character(0))
        output_message(paste("File load failed:", conditionMessage(err)))
      }
    )
  })

  observeEvent(input$clear_mapping, {
    vars <- if (is.null(loaded_data())) character(0) else names(loaded_data()$data)
    update_choices(vars)
    updateRadioButtons(session, "survey_weight_mode", selected = "weighted")
    aggregation_result(NULL)
    aggregation_progress(list(value = 0, label = "Selections cleared."))
    output_message("Selections cleared.")
  })

  selected_map <- reactive({
    stats::setNames(
      lapply(field_specs, function(spec) {
        value <- input[[spec$id]]
        if (is.null(value)) {
          if (isTRUE(spec$multiple)) character(0) else ""
        } else {
          value
        }
      }),
      vapply(field_specs, `[[`, character(1), "id")
    )
  })

  current_mapping <- function() {
    mapping <- lapply(selected_map(), function(value) {
      if (length(value) == 0) character(0) else as.character(value)
    })

    if (identical(input$survey_weight_mode, "unweighted")) {
      mapping$sample_weight <- equal_weight_choice
    }

    mapping
  }

  observeEvent(selected_map(), {
    if (is.null(aggregation_result())) {
      return()
    }

    aggregation_result(NULL)
    aggregation_progress(list(value = 0, label = "Selections changed. Run aggregation again."))
    output_message("Selections changed. Run aggregation again to refresh the preview and downloadable files.")
  }, ignoreInit = TRUE)

  observeEvent(input$survey_weight_mode, {
    if (is.null(aggregation_result())) {
      return()
    }

    aggregation_result(NULL)
    aggregation_progress(list(value = 0, label = "Survey weight setting changed. Run aggregation again."))
    output_message("Survey weight setting changed. Run aggregation again to refresh the preview and downloadable files.")
  }, ignoreInit = TRUE)

  observeEvent(input$run_aggregation, {
    parsed <- loaded_data()
    if (is.null(parsed)) {
      output_message("Upload a survey file before running the aggregation.")
      aggregation_result(NULL)
      return()
    }

    mapping <- current_mapping()

    tryCatch(
      {
        withProgress(message = "Aggregating survey", value = 0, {
          warning_text <- NULL
          aggregation_progress(list(value = 15, label = "Validating variable selections"))
          incProgress(0.15, detail = "Validating variable selections")

          aggregated <- withCallingHandlers(
            {
              aggregation_progress(list(value = 55, label = "Summing survey weights"))
              incProgress(0.45, detail = "Summing survey weights")
              aggregate_microdata(parsed$data, mapping)
            },
            warning = function(wrn) {
              warning_text <<- conditionMessage(wrn)
              invokeRestart("muffleWarning")
            }
          )

          aggregation_progress(list(value = 85, label = "Filtering rows where k = 1"))
          incProgress(0.25, detail = "Filtering rows where k = 1")
          low_k_index <- which(aggregated$k == 1)
          low_k_rows <- length(low_k_index)
          low_k_preview <- aggregated[utils::head(low_k_index, 12), , drop = FALSE]

          aggregation_result(
            list(
              full = aggregated,
              low_k_rows = low_k_rows,
              low_k_preview = low_k_preview,
              warning = warning_text
            )
          )

          aggregation_progress(list(value = 100, label = "Aggregation and downloads ready."))
          incProgress(0.25, detail = "Ready")

          if (is.null(warning_text)) {
            output_message("Aggregation complete. Preview shows only rows where k = 1. Download files are prepared when requested.")
          } else {
            output_message(
              paste(
                "Aggregation complete. Preview shows only rows where k = 1. Download files are prepared when requested.",
                "Warning:",
                warning_text
              )
            )
          }
        })
      },
      error = function(err) {
        aggregation_result(NULL)
        aggregation_progress(list(value = 100, label = paste("Aggregation failed:", conditionMessage(err))))
        output_message(paste("Aggregation failed:", conditionMessage(err)))
      }
    )
  })

  output$aggregation_progress_ui <- renderUI({
    state <- aggregation_progress()
    div(
      class = "progress-shell",
      div(class = "small-text", state$label),
      div(
        class = "progress-track",
        div(class = "progress-fill", style = paste0("width:", state$value, "%;"))
      )
    )
  })

  output$dataset_status <- renderUI({
    parsed <- loaded_data()
    if (is.null(parsed)) {
      div(class = "small-text", "No file loaded.")
    } else {
      div(
        class = "status-good",
        paste("Loaded:", parsed$filename)
      )
    }
  })

  output$dataset_summary <- renderUI({
    parsed <- loaded_data()
    if (is.null(parsed)) {
      return(NULL)
    }

    summary_bits <- list(
      div(class = "summary-chip", paste("Rows:", format(nrow(parsed$data), big.mark = ","))),
      div(class = "summary-chip", paste("Variables:", length(names(parsed$data)))),
      div(class = "summary-chip", paste("Format:", parsed$ext))
    )

    if (!is.null(parsed$source_object)) {
      summary_bits <- c(
        summary_bits,
        list(div(class = "summary-chip", paste("R object:", parsed$source_object)))
      )
    }

    do.call(
      tagList,
      c(
        summary_bits,
        if (!is.null(loaded_note())) list(div(class = "small-text", loaded_note()))
      )
    )
  })

  output$variable_inventory <- renderUI({
    parsed <- loaded_data()
    if (is.null(parsed)) {
      return(div(class = "small-text", "Variables will appear after a file is loaded."))
    }

    vars <- names(parsed$data)
    div(class = "var-list", lapply(vars, tags$code))
  })

  output$aggregation_status <- renderUI({
    result <- aggregation_result()
    message <- output_message()
    progress_label <- aggregation_progress()$label

    if (is.null(result) && is_duplicate_status(message, progress_label)) {
      return(NULL)
    }

    if (is.null(result)) {
      div(class = "small-text", message)
    } else {
      div(class = "status-good", message)
    }
  })

  output$aggregation_summary <- renderUI({
    result <- aggregation_result()
    if (is.null(result)) {
      return(NULL)
    }

    total_rows <- nrow(result$full)
    low_k_rows <- result$low_k_rows

    tagList(
      div(class = "summary-chip", paste("Output rows:", format(total_rows, big.mark = ","))),
      div(class = "summary-chip", paste("Rows where k = 1:", format(low_k_rows, big.mark = ",")))
    )
  })

  output$k_caption <- renderUI({
    result <- aggregation_result()
    if (is.null(result)) {
      return(NULL)
    }

    notes <- list(
      div(
        class = "small-text",
        "k shows the number of unique records for the respective combination of variables, excluding technical variables. Rows based on very low numbers of observations, particularly in the case of full census extracts, have a higher risk of identification."
      )
    )

    if (result$low_k_rows == 0) {
      notes <- c(
        notes,
        list(
          div(
            class = "small-text status-good",
            "No aggregated rows are based on a single record."
          )
        )
      )
    }

    do.call(tagList, notes)
  })

  output$download_format_note <- renderUI({
    parsed <- loaded_data()
    if (is.null(parsed)) {
      return(NULL)
    }

    if (identical(parsed$ext, "sas7bdat")) {
      return(
        div(
          class = "small-text",
          "SAS .sas7bdat uploads are downloaded as .xpt because the browser runtime can read SAS7BDAT but cannot write it back directly."
        )
      )
    }

    div(
      class = "small-text",
      paste(
        "The download uses the uploaded format for the full aggregated output:",
        paste0(".", parsed$ext)
      )
    )
  })

  output$aggregation_preview <- renderTable({
    result <- aggregation_result()
    req(result)
    result$low_k_preview
  }, striped = TRUE, bordered = TRUE, hover = TRUE, spacing = "s")

  output$download_output <- downloadHandler(
    filename = function() {
      parsed <- loaded_data()
      stem <- if (is.null(parsed)) "microdata_aggregation" else tools::file_path_sans_ext(parsed$filename)
      export_ext <- if (is.null(parsed)) "csv" else download_extension(parsed$ext)
      paste0(stem, "_aggregated.", export_ext)
    },
    content = function(file) {
      parsed <- loaded_data()
      result <- aggregation_result()
      validate(need(!is.null(parsed) && !is.null(result), "Run the aggregation before downloading the output file."))
      write_aggregated_file(result$full, file, parsed$ext)
    }
  )

  output$download_complete_cases <- downloadHandler(
    filename = function() {
      parsed <- loaded_data()
      stem <- if (is.null(parsed)) "microdata_complete_observations" else tools::file_path_sans_ext(parsed$filename)
      export_ext <- if (is.null(parsed)) "csv" else download_extension(parsed$ext)
      paste0(stem, "_complete_selected_variables.", export_ext)
    },
    content = function(file) {
      parsed <- loaded_data()
      result <- aggregation_result()
      validate(need(!is.null(parsed) && !is.null(result), "Run the aggregation before downloading complete observations."))
      complete_data <- complete_observation_data(parsed$data, current_mapping())
      write_aggregated_file(complete_data, file, parsed$ext)
    }
  )
}

shinyApp(ui, server)