datashield · StuartWheater · Oct 27, 2025 · Oct 27, 2025 · Nov 2, 2025 · Nov 3, 2025
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -22,7 +22,7 @@ jobs:
               sudo apt-get install -y r-base-core cmake
       - run:
           command: |
-              sudo apt-get install -y libxml2-dev
+              sudo apt-get install -y libxml2-dev libuv1-dev
       - run:
           command: |
               echo "options(Ncpus=4)" >> ~/.Rprofile

diff --git a/.github/workflows/dsBase_test_suite.yaml b/.github/workflows/dsBase_test_suite.yaml
@@ -15,7 +15,6 @@ on:
   push:
   schedule:
     - cron: '0 0 * * 0'   # Weekly
-    - cron: '0 1 * * *'   # Nightly
 
 jobs:
   dsBase_test_suite:
@@ -153,28 +152,30 @@ jobs:
           echo "branch:${{ env.BRANCH_NAME }}" > ${{ env.WORKFLOW_ID }}.txt
           echo "os:$(lsb_release -ds)" >> ${{ env.WORKFLOW_ID }}.txt
           echo "R:$(R --version | head -n1)" >> ${{ env.WORKFLOW_ID }}.txt
+          Rscript --vanilla -e 'sessionInfo()' >> session_info_${{ env.WORKFLOW_ID }}.txt
         working-directory: dsBase/logs
 
       - name: Parse results from testthat and covr
         run: |
-          Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/
+          Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/ logs/ https://github.com/datashield/${{ env.PROJECT_NAME }}/blob/${{ env.BRANCH_NAME }} '[^-:.]+' '(?<=::)[^:]+(?=::)'
         working-directory: dsBase
+        env:
+          PROJECT_NAME: ${{ env.PROJECT_NAME }}
+          BRANCH_NAME: ${{ env.BRANCH_NAME }}
 
       - name: Render report
         run: |
           cd testStatus
 
           mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
-          mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
           mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
 
           # Copy logs to new logs directory location
           cp -rv ../${{ env.PROJECT_NAME }}/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
           cp -rv ../${{ env.PROJECT_NAME }}/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
 
           R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
-          mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
-          cp -r new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest
+          mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/index.html
 
         env:
           PROJECT_NAME: ${{ env.PROJECT_NAME }}

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,7 +5,7 @@ Description: Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a
     been designed to only share non disclosive summary statistics, with built in automated output
     checking based on statistical disclosure control. With data sites setting the threshold values for
     the automated output checks. For more details, see 'citation("dsBase")'.
-Version: 6.3.4
+Version: 6.3.6.9000
 Authors@R: c(person(given = "Paul",
                     family = "Burton",
                     role = c("aut"),
@@ -65,6 +65,9 @@ Imports:
     stringr,
     lme4,
     dplyr,
+    tibble,
+    purrr,
+    tidyselect,
     reshape2,
     polycor (>= 0.8),
     splines,
@@ -75,6 +78,6 @@ Imports:
 Suggests:
     spelling,
     testthat
-RoxygenNote: 7.3.3
+RoxygenNote: 8.0.0
 Encoding: UTF-8
 Language: en-GB
diff --git a/NAMESPACE b/NAMESPACE
@@ -41,7 +41,12 @@ export(dmtC2SDS)
 export(elsplineDS)
 export(extractQuantilesDS1)
 export(extractQuantilesDS2)
+export(fixClassDS)
+export(fixColsDS)
+export(fixLevelsDS)
 export(gamlssDS)
+export(getAllLevelsDS)
+export(getClassAllColsDS)
 export(getWGSRDS)
 export(glmDS1)
 export(glmDS2)
@@ -82,6 +87,7 @@ export(matrixDimnamesDS)
 export(matrixInvertDS)
 export(matrixMultDS)
 export(matrixTransposeDS)
+export(mdPatternDS)
 export(meanDS)
 export(meanSdGpDS)
 export(mergeDS)
@@ -138,5 +144,15 @@ import(dplyr)
 import(gamlss)
 import(gamlss.dist)
 import(mice)
+importFrom(dplyr,"%>%")
+importFrom(dplyr,across)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
 importFrom(gamlss.dist,pST3)
 importFrom(gamlss.dist,qST3)
+importFrom(purrr,imap)
+importFrom(purrr,map)
+importFrom(purrr,set_names)
+importFrom(tibble,as_tibble)
+importFrom(tidyselect,all_of)
+importFrom(tidyselect,peek_vars)
diff --git a/R/glmSLMADS.assign.R b/R/glmSLMADS.assign.R
@@ -18,40 +18,25 @@
 #' @export
 glmSLMADS.assign <- function(formula, family, offsetName, weightsName, dataName){
 
-#############################################################
-#MODULE 1: CAPTURE THE nfilter SETTINGS                     #
-thr <- dsBase::listDisclosureSettingsDS()                   #
-nfilter.tab <- as.numeric(thr$nfilter.tab)                  #
-nfilter.glm <- as.numeric(thr$nfilter.glm)                  #
-#nfilter.subset<-as.numeric(thr$nfilter.subset)             #
-#nfilter.string<-as.numeric(thr$nfilter.string)             #
-#############################################################
+  # Convert transmitable text for special link variance combinations back to full representation
+  if(family=="quasigamma.link_log")
+  {family<-"quasi(link=log,variance=mu^2)"}
 
-########################################
-############
-#Convert transmitable text for special link variance combinations back to full representation
-if(family=="quasigamma.link_log")
-{family<-"quasi(link=log,variance=mu^2)"}
+  if(family=="Gamma.link_log")
+  {family<-"Gamma(link=log)"}
 
-if(family=="Gamma.link_log")
-{family<-"Gamma(link=log)"}
-#############
+  # Correctly name offset, weights and data objects in function call
+  # (to allow glmPredict to work correctly later)
+  calltext <- paste0("mg<-glm(formula,family=",family,",offset=",
+             offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")
 
-#Activate family object (this may not be necessary as character string may already be OK
-#but just checking
-final.family.object<-eval(parse(text=family))
+  eval(parse(text=calltext))
 
+  # update the call object to include the actual formula
+  mg$call$formula <- formula
 
-#Correctly name offset, weights and data objects in function call
-#(to allow glmPredict to work correctly later)
-calltext<-paste0("mg<-glm(formula,family=",family,",offset=",
-           offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")
-
-eval(parse(text=calltext))
-
-return(mg)		
+  return(mg)
 
 }
-
 # ASSIGN FUNCTION
 # glmSLMADS.assign
diff --git a/R/mdPatternDS.R b/R/mdPatternDS.R
@@ -0,0 +1,121 @@
+#'
+#' @title Missing data pattern with disclosure control
+#' @description This function is a serverside aggregate function that computes the 
+#' missing data pattern using mice::md.pattern and applies disclosure control to
+#' prevent revealing small cell counts.
+#' @details This function calls the mice::md.pattern function to generate a matrix
+#' showing the missing data patterns in the input data. To ensure disclosure control,
+#' any pattern counts that are below the threshold (nfilter.tab, default=3) are
+#' suppressed.
+#'
+#' \strong{Suppression Method:}
+#'
+#' When a pattern count is below threshold:
+#' - Row name is changed to "suppressed(<N>)" where N is the threshold
+#' - All pattern values in that row are set to NA
+#' - Summary row is also set to NA (prevents back-calculation)
+#'
+#' \strong{Output Matrix Structure:}
+#'
+#' - Rows represent different missing data patterns (plus a summary row at the bottom)
+#' - Row names contain pattern counts (or "suppressed(<N>)" for invalid patterns)
+#' - Columns show 1 if variable is observed, 0 if missing
+#' - Last column shows total number of missing values per pattern
+#' - Last row shows total number of missing values per variable
+#'
+#' \strong{Note for Pooling:}
+#'
+#' When this function is called from ds.mdPattern with type='combine', suppressed
+#' patterns are excluded from pooling to prevent disclosure through subtraction.
+#' This means pooled counts may underestimate the true total when patterns are
+#' suppressed in some studies.
+#'
+#' @param x a character string specifying the name of a data frame or matrix
+#' containing the data to analyze for missing patterns.
+#' @return A list containing:
+#' \item{pattern}{The missing data pattern matrix with disclosure control applied}
+#' \item{valid}{Logical indicating if all patterns meet disclosure requirements}
+#' \item{message}{A message describing the validity status}
+#' @author Xavier Escribà montagut for DataSHIELD Development Team
+#' @import mice
+#' @export
+#'
+mdPatternDS <- function(x){
+
+  #############################################################
+  # MODULE 1: CAPTURE THE nfilter SETTINGS
+  thr <- dsBase::listDisclosureSettingsDS()
+  nfilter.tab <- as.numeric(thr$nfilter.tab)
+  #############################################################
+
+  # Parse the input data name with error handling
+  x.val <- tryCatch(
+    {
+      eval(parse(text=x), envir = parent.frame())
+    },
+    error = function(e) {
+      stop(paste0("Object '", x, "' does not exist on the server"), call. = FALSE)
+    }
+  )
+
+  # Check object class
+  typ <- class(x.val)
+
+  # Check that input is a data frame or matrix
+  if(!("data.frame" %in% typ || "matrix" %in% typ)){
+    stop(paste0("The input object must be of type 'data.frame' or 'matrix'. Current type: ", 
+                paste(typ, collapse = ", ")), call. = FALSE)
+  }
+
+  # Use x.val for further processing
+  x <- x.val
+
+  # Call mice::md.pattern with plot=FALSE
+  pattern <- mice::md.pattern(x, plot = FALSE)
+
+  # Apply disclosure control
+  # Pattern counts are stored in row names (except last row which is empty/summary)
+  # The last row contains variable-level missing counts
+
+  validity <- "valid"
+  n_patterns <- nrow(pattern) - 1  # exclude the summary row
+
+  if(n_patterns > 0){
+    # Check pattern counts (stored in row names, excluding last row)
+    pattern_counts <- as.numeric(rownames(pattern)[1:n_patterns])
+
+    # Find patterns with counts below threshold
+    invalid_idx <- which(pattern_counts > 0 & pattern_counts < nfilter.tab)
+
+    if(length(invalid_idx) > 0){
+      validity <- "invalid"
+
+      # For invalid patterns, suppress by:
+      # - Setting row name to "suppressed"
+      # - Setting all pattern values to NA
+      rnames <- rownames(pattern)
+      for(idx in invalid_idx){
+        rnames[idx] <- paste0("suppressed(<", nfilter.tab, ")")
+        pattern[idx, ] <- NA
+      }
+      rownames(pattern) <- rnames
+
+      # Also need to recalculate the last row (summary) if patterns were suppressed
+      # Set to NA to avoid disclosures
+      pattern[nrow(pattern), seq_len(ncol(pattern))] <- NA
+    }
+  }
+
+  # Return the pattern with validity information
+  return(list(
+    pattern = pattern,
+    valid = (validity == "valid"),
+    message = ifelse(validity == "valid", 
+                     "Valid: all pattern counts meet disclosure requirements",
+                     paste0("Invalid: some pattern counts below threshold (", 
+                            nfilter.tab, ") have been suppressed"))
+  ))
+}
+
+#AGGREGATE FUNCTION
+# mdPatternDS