Unincorporated module outputs

library(HydroBOT)
library(dplyr)
library(ggplot2)

In some cases, response models may not yet be integrated into HydroBOT (see here for integration), or may not be able to be integrated (e.g. if they are proprietary or unscriptable). In these cases, we can still use read_and_agg() for processing with the Aggregator and then Comparer. All we need is a csv of module outputs. For aggregation along the theme, space, and time dimensions, it needs to have information about those dimensions. It should have a column identified by readr::read_csv() as time, a column that matches a column in the causal network, and a column that matches a column in a spatial dataframe (sf object), as these last two are accomplished with joins. It will typically have a ‘scenario’ column as well. We will demonstrate here using read_and_agg() with the same examples used for multi_aggregate(). The difference here is that read_and_agg() is how most people will interact with HydroBOT’s aggregator and has to read the files from disk, rather than receive ready-prepared dataframes.

Setup

First, we create some dummy ‘module’ data, as in the multi_aggregate example, but here we save it out.

We need to know the spatial units to create the data:

austates <- rnaturalearth::ne_states(country = "australia") |>
  dplyr::select(state = name, geometry)

all_aus <- rnaturalearth::ne_countries(country = "australia") |>
  dplyr::select(geounit)

Then we create the data and save it out

# This is all copied from multi_aggregate except the last saving line.

# add a date column
state_inputs <- austates |>
  dplyr::mutate(date = lubridate::ymd("20000101"))

# add some values
withr::with_seed(
  17,
  state_inputs <- state_inputs |>
    dplyr::mutate(value = runif(nrow(state_inputs)))
)

withr::with_seed(
  17,
  # add some more days, each with different values
  state_inputs <- purrr::map(
    0:31,
    \(x) dplyr::mutate(state_inputs,
      date = date + x,
      value = value * rnorm(nrow(state_inputs),
        mean = x, sd = x / 2
      )
    )
  ) |>
    dplyr::bind_rows()
)

# add a scenario column, each with different values
state_inputs <- purrr::imap(
  letters[1:4],
  \(x, y) dplyr::mutate(state_inputs,
    scenario = x,
    value = value + y
  )
) |>
  dplyr::bind_rows()

# add a theme-relevant column, each with different values
state_inputs <- purrr::imap(
  c("E", "F", "G", "H", "I", "J"),
  \(x, y) dplyr::mutate(state_inputs,
    theme1 = x,
    value = value + y
  )
) |>
  dplyr::bind_rows()


# This bit saves it out so we can use read_and_agg.
purrr::map(
  unique(state_inputs$scenario),
  \(x) dir.create(file.path(tempdir(), "module_output", "fake_module", x),
    recursive = TRUE
  )
)

[[1]]
[1] TRUE

[[2]]
[1] TRUE

[[3]]
[1] TRUE

[[4]]
[1] TRUE

si <- state_inputs |>
  sf::st_drop_geometry() |>
  split(state_inputs$scenario) |>
  purrr::iwalk(
    \(x, y) readr::write_csv(x, file.path(
      tempdir(), "module_output", "fake_module",
      y, paste0("fakeout_", y, ".csv")
    ))
  )

And we need a simple causal network.

# make a simple 'causal' network
state_theme <- tibble::tibble(
  theme1 = c("E", "F", "G", "H", "I", "J"),
  theme2 = c(
    "vowel", "consonant", "consonant",
    "consonant", "vowel", "consonant"
  )
) |>
  list()

Using the Aggregator

First, we set up some aggregation steps. Will just use means throughout.

# This will aggregate into weeks, then to type, and then to the country.
ausseq <- list(
  week = "week",
  theme2 = c("theme1", "theme2"),
  all_aus = all_aus
)

# just use mean, since there are no NA in the data.
ausfuns <- list(
  week = "mean",
  type = "mean",
  all_aus = "mean"
)

Do the aggregation. Note that this warns about some built-in checks for the EWR module that are not relevant here.

Tip

We need to use prepfun = 'identity' here because the data does not require any modifications on read-in. See data preparation functions for more detail.

# Do the aggregation
ausagg <- ausagg <- read_and_agg(
  datpath = file.path(tempdir(), "module_output", "fake_module"),
  type = "everything",
  prepfun = "identity",
  geopath = austates,
  causalpath = state_theme,
  groupers = "scenario",
  aggCols = "value",
  aggsequence = ausseq,
  funsequence = ausfuns,
  saveintermediate = TRUE,
  namehistory = FALSE,
  keepAllPolys = FALSE,
  returnList = TRUE,
  add_max = FALSE,
  savepath = file.path(tempdir(), "aggregator_output", "dummy")
)

Warning in filtergroups(thisdf, fromcol = p[1], tocol = p[2], fromfilter =
fromfilter, : Unable to cross-check gauges and planning units, trusting the
user they work together

Quick plots of a couple levels

ausagg$week |>
  filter(theme1 == "E") |>
  plot_outcomes(
    outcome_col = "value",
    plot_type = "map",
    colorgroups = NULL,
    colorset = "value",
    pal_list = list("scico::bamako"),
    pal_direction = -1,
    facet_col = "scenario",
    facet_row = "date"
  )

ausagg$theme2 |>
  filter(date == lubridate::ymd("2000-01-03")) |>
  plot_outcomes(
    outcome_col = "value",
    plot_type = "map",
    colorgroups = NULL,
    colorset = "value",
    pal_list = list("scico::bamako"),
    pal_direction = -1,
    facet_col = "scenario",
    facet_row = "theme2"
  )

ausagg$all_aus |>
  filter(date == lubridate::ymd("2000-01-03")) |>
  plot_outcomes(
    outcome_col = "value",
    plot_type = "map",
    colorgroups = NULL,
    colorset = "value",
    pal_list = list("scico::bamako"),
    pal_direction = -1,
    facet_col = "scenario",
    facet_row = "theme2"
  )