From 4d5fc18fc94d802952ad230f475d74f2a80f192a Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Mon, 8 Jun 2026 10:53:18 +0200 Subject: [PATCH 1/2] ci: speed up dependency install via Posit binary packages + cache --- armadillo_azure-pipelines.yml | 24 +++++++++++++++++++----- opal_azure-pipelines.yml | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/armadillo_azure-pipelines.yml b/armadillo_azure-pipelines.yml index 2fba0a0a..d0a7d096 100644 --- a/armadillo_azure-pipelines.yml +++ b/armadillo_azure-pipelines.yml @@ -122,6 +122,18 @@ jobs: condition: succeeded() + ##################################################################################### + # Restore the R package library from a previous run. Combined with the + # "install only missing packages" logic in the next step, a cache hit makes + # dependency installation near-instant. Bump the 'v1' token to rebuild the cache + # (e.g. when the package list below changes). + - task: Cache@2 + inputs: + key: 'rpkgs | "$(Agent.OS)" | v1' + path: '/usr/local/lib/R/site-library' + displayName: 'Restore cached R packages' + + ##################################################################################### # Install R and all the dependencies dsBaseClient requires. # If previous steps have failed then don't run. @@ -134,11 +146,13 @@ jobs: sudo apt-get install -qq libxml2-dev libcurl4-openssl-dev libssl-dev libgsl-dev libgit2-dev r-base -y sudo apt-get install -qq libharfbuzz-dev libfribidi-dev libmagick++-dev libudunits2-dev libuv1-dev -y - sudo R -q -e "install.packages(c('devtools','covr'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('fields','meta','metafor','ggplot2','gridExtra','data.table'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('DSI','DSOpal','DSLite'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('MolgenisAuth', 'MolgenisArmadillo', 'DSMolgenisArmadillo'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('DescTools','e1071'), dependencies=TRUE, repos='https://cloud.r-project.org')" + + # Use Posit Public Package Manager, which serves precompiled binary packages + # for this Ubuntu release instead of source - so dependencies are downloaded + # rather than compiled (the slow part). The HTTPUserAgent option is what makes + # the manager hand back binaries. The setdiff() only installs packages not + # already present, so a restored (cached) site-library is reused. + sudo R -q -e "options(repos=c(CRAN=paste0('https://packagemanager.posit.co/cran/__linux__/', system('lsb_release -cs', intern=TRUE), '/latest')), HTTPUserAgent=sprintf('R/%s R (%s)', getRversion(), paste(getRversion(), R.version\$platform, R.version\$arch, R.version\$os)), Ncpus=4); pkgs <- c('devtools','covr','fields','meta','metafor','ggplot2','gridExtra','data.table','DSI','DSOpal','DSLite','MolgenisAuth','MolgenisArmadillo','DSMolgenisArmadillo','DescTools','e1071'); to_install <- setdiff(pkgs, rownames(installed.packages())); if (length(to_install)) install.packages(to_install, dependencies=TRUE)" sudo R -q -e "library('devtools'); devtools::install_github(repo='datashield/dsDangerClient', ref='v6.3.4-dev', dependencies = TRUE)" diff --git a/opal_azure-pipelines.yml b/opal_azure-pipelines.yml index b541a390..f1b7dd69 100644 --- a/opal_azure-pipelines.yml +++ b/opal_azure-pipelines.yml @@ -103,6 +103,18 @@ jobs: condition: succeeded() + ##################################################################################### + # Restore the R package library from a previous run. Combined with the + # "install only missing packages" logic in the next step, a cache hit makes + # dependency installation near-instant. Bump the 'v1' token to rebuild the cache + # (e.g. when the package list below changes). + - task: Cache@2 + inputs: + key: 'rpkgs | "$(Agent.OS)" | opal | v1' + path: '/usr/local/lib/R/site-library' + displayName: 'Restore cached R packages' + + ##################################################################################### # Install R and all the dependencies dsBaseClient requires. # If previous steps have failed then don't run. @@ -115,12 +127,13 @@ jobs: sudo apt-get install -qq libxml2-dev libcurl4-openssl-dev libssl-dev libgsl-dev libgit2-dev r-base -y sudo apt-get install -qq libharfbuzz-dev libfribidi-dev libmagick++-dev libudunits2-dev libuv1-dev -y - sudo R -q -e "install.packages(c('curl','httr'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('devtools','covr'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('fields','meta','metafor','ggplot2','gridExtra','data.table'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('DSI','DSOpal','DSLite'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('MolgenisAuth', 'MolgenisArmadillo', 'DSMolgenisArmadillo'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "install.packages(c('DescTools','e1071'), dependencies=TRUE, repos='https://cloud.r-project.org')" + + # Use Posit Public Package Manager, which serves precompiled binary packages + # for this Ubuntu release instead of source - so dependencies are downloaded + # rather than compiled (the slow part). The HTTPUserAgent option is what makes + # the manager hand back binaries. The setdiff() only installs packages not + # already present, so a restored (cached) site-library is reused. + sudo R -q -e "options(repos=c(CRAN=paste0('https://packagemanager.posit.co/cran/__linux__/', system('lsb_release -cs', intern=TRUE), '/latest')), HTTPUserAgent=sprintf('R/%s R (%s)', getRversion(), paste(getRversion(), R.version\$platform, R.version\$arch, R.version\$os)), Ncpus=4); pkgs <- c('curl','httr','devtools','covr','fields','meta','metafor','ggplot2','gridExtra','data.table','DSI','DSOpal','DSLite','MolgenisAuth','MolgenisArmadillo','DSMolgenisArmadillo','DescTools','e1071'); to_install <- setdiff(pkgs, rownames(installed.packages())); if (length(to_install)) install.packages(to_install, dependencies=TRUE)" sudo R -q -e "library('devtools'); devtools::install_github(repo='datashield/dsDangerClient', ref='6.3.4', dependencies = TRUE)" From b900437d91b7a7d59ede8c6324f590ad5f1e0e93 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Mon, 8 Jun 2026 12:49:17 +0200 Subject: [PATCH 2/2] test: add stay_logged_in to reuse one login across test files --- .../init_testing_datasets.R | 95 +++++++++++++++++++ .../connection_to_datasets/login_details.R | 12 ++- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/tests/testthat/connection_to_datasets/init_testing_datasets.R b/tests/testthat/connection_to_datasets/init_testing_datasets.R index 52fb7fcf..9035726f 100644 --- a/tests/testthat/connection_to_datasets/init_testing_datasets.R +++ b/tests/testthat/connection_to_datasets/init_testing_datasets.R @@ -214,20 +214,115 @@ init.testing.dataset.factor_levels.1 <- function() log.in.data.server <- function() { # ds.test_env$connections <- datashield.login(logins=ds.test_env$login.data, assign=TRUE,variables=ds.test_env$stats.var, opts = getOption("datashield.opts", list(ssl_verifyhost=0, ssl_verifypeer=0))) + + if (isTRUE(ds.test_env$stay_logged_in) && !is.null(ds.test_env$connections)) + { + # Try to reuse the existing login when the new test file targets the same + # servers. clear.data.server() doubles as a liveness probe: if the cached + # connection is stale (e.g. logged out, or restored dead from a saved + # workspace) the ds.ls() inside it errors, and we drop through to a fresh + # login. A successful clear means the connection is good, so we re-assign the + # data table without re-authenticating. The assign itself is NOT guarded, so a + # genuine assignment error still surfaces. + same.servers <- setequal(names(ds.test_env$connections), as.character(ds.test_env$login.data$server)) + cleared <- same.servers && tryCatch({ clear.data.server(); TRUE }, error = function(e) FALSE) + if (cleared) + { + assign.current.dataset() + return(invisible(NULL)) + } + # Different servers, or a stale connection: close it (best effort) and log in afresh. + try(datashield.logout(ds.test_env$connections), silent = TRUE) + ds.test_env$connections <- NULL + } + ds.test_env$connections <- datashield.login(logins=ds.test_env$login.data, assign=TRUE,variables=ds.test_env$stats.var, opts = getOption("datashield.opts")) + + if (isTRUE(ds.test_env$stay_logged_in)) + { + # Arrange for a single logout once the whole test run has finished. + register.teardown.logout() + } } log.out.data.server <- function() { + if (isTRUE(ds.test_env$stay_logged_in)) + { + # Stay connected for the next test file. Clearing the server-side environment + # happens in the reuse path of log.in.data.server() (where the connection has + # just been confirmed live); the real logout happens once at end of the run. + return(invisible(NULL)) + } + if (!is.null(ds.test_env) && !is.null(ds.test_env$connections)) { datashield.logout(ds.test_env$connections) + # Reflect that the handle is now dead, so a later run cannot inherit and try + # to reuse it (which would fail with an authorization error). + ds.test_env$connections <- NULL } rm(list = ls()) gc() } + +# Remove every object from the server-side environment(s) on the current login. +# ds.ls() returns a per-server list whose $objects.found holds the object names; +# ds.rm() takes a vector of names in one (aggregate) call and ignores ones that +# are already absent, so this is safe across servers that hold different objects. +clear.data.server <- function() +{ + object.names <- unique(unlist(lapply(ds.ls(), function(x) x$objects.found))) + if (length(object.names) > 0) + { + ds.rm(object.names) + } +} + + +# (Re)assign the data table 'D' for the current dataset onto the existing +# connection, without re-authenticating. Mirrors datashield.login(assign=TRUE), +# using the table/variables prepared by the init.* function for this test file. +assign.current.dataset <- function() +{ + tables <- stats::setNames(as.character(ds.test_env$login.data$table), as.character(ds.test_env$login.data$server)) + DSI::datashield.assign.table(ds.test_env$connections, "D", tables, variables = ds.test_env$stats.var) +} + + +# Register a one-off logout to run after all test files complete. Uses the +# testthat teardown environment, so it fires once at the end of the run - and at +# the end of a single-file run too. If we are not inside a testthat run the +# registration is skipped (the session is closed when R exits). +register.teardown.logout <- function() +{ + if (isTRUE(ds.test_env$logout_registered)) + { + return(invisible(NULL)) + } + ds.test_env$logout_registered <- tryCatch( + { + withr::defer(final.logout.data.server(), testthat::teardown_env()) + TRUE + }, + error = function(e) FALSE + ) +} + + +# Unconditional logout, used by the deferred teardown above. +final.logout.data.server <- function() +{ + if (!is.null(ds.test_env) && !is.null(ds.test_env$connections)) + { + datashield.logout(ds.test_env$connections) + ds.test_env$connections <- NULL + } + ds.test_env$logout_registered <- NULL +} + connect.all.datasets <- function() { log.out.data.server() diff --git a/tests/testthat/connection_to_datasets/login_details.R b/tests/testthat/connection_to_datasets/login_details.R index 2ce4ca2d..2e496555 100644 --- a/tests/testthat/connection_to_datasets/login_details.R +++ b/tests/testthat/connection_to_datasets/login_details.R @@ -4,8 +4,12 @@ source("connection_to_datasets/init_local_settings.R") init.ip.address() -# create blank environment of test data -ds.test_env <- new.env() +# create blank environment of test data. +# When staying logged in across test files, keep the existing environment (and the +# live connection it holds) rather than rebuilding it on every connect. +if (! (isTRUE(getOption("stay_logged_in", TRUE)) && exists("ds.test_env", inherits = TRUE) && is.environment(ds.test_env))) { + ds.test_env <- new.env() +} # this option helps DSI to find the connection objects by looking in the right environment options(datashield.env=ds.test_env) @@ -67,6 +71,10 @@ if ((ds.test_env$driver == "DSLiteDriver") || (ds.test_env$driver == "OpalDriver stop("**** Unknown Driver ****", call. = FALSE) } +# Keep a single login across test files (only for real servers - DSLite login is +# in-process and cheap). Read by log.in.data.server() / log.out.data.server(). +ds.test_env$stay_logged_in <- isTRUE(getOption("stay_logged_in", TRUE)) && ds.test_env$driver %in% c("OpalDriver", "ArmadilloDriver") + ds.test_env$high_tolerance <- 10^-7 ds.test_env$medium_tolerance <- 10^-6 ds.test_env$low_tolerance <- 10^-4