User-provided causal networks

In most of the demonstrations, we have used the HydroBOT::causal_ewr causal networks. However, the causal network is an argument to multi_aggregate() and read_and_agg(), and so it is possible for the user to pass arbitrary networks. Here, we demonstrate how to pass a causal network that isn’t provided by HydroBOT. We do that here with read_and_agg(), since that is the most common situation, but it also works with multi_aggregate.

Setup

As always, we need to point to the data and set up aggregation sequences.

project_dir <- "hydrobot_scenarios"
hydro_dir <- file.path(project_dir, "hydrographs")
ewr_results <- file.path(project_dir, "module_output", "EWR")
agg_results <- file.path(project_dir, "aggregator_output", "demo")
aggseq <- list(
  all_time = "all_time",
  ewr_code = c("ewr_code_timing", "ewr_code"),
  env_obj = c("ewr_code", "env_obj"),
  sdl_units = sdl_units,
  Specific_goal = c("env_obj", "Specific_goal"),
  Objective = c("Specific_goal", "Objective"),
  basin = basin,
  target_5_year_2024 = c("Objective", "target_5_year_2024")
)

funseq <- list(
  all_time = "ArithmeticMean",
  ewr_code = "CompensatingFactor",
  env_obj = "ArithmeticMean",
  sdl_units = "ArithmeticMean",
  Specific_goal = "ArithmeticMean",
  Objective = "ArithmeticMean",
  basin = "SpatialWeightedMean",
  target_5_year_2024 = "ArithmeticMean"
)

Network from EWR tool

One use that is likely to be common is to extract the (sometimes newer, but less tested) causal networks from the EWR tool with get_causal_ewr().

agg_ewr_causal <- read_and_agg(
  datpath = ewr_results,
  type = "achievement",
  geopath = bom_basin_gauges,
  causalpath = get_causal_ewr(),
  groupers = "scenario",
  aggCols = "ewr_achieved",
  group_until = list(
    SWSDLName = is_notpoint,
    planning_unit_name = is_notpoint,
    gauge = is_notpoint
  ),
  pseudo_spatial = "sdl_units",
  aggsequence = aggseq,
  funsequence = funseq,
  saveintermediate = TRUE,
  namehistory = FALSE,
  keepAllPolys = FALSE,
  returnList = TRUE,
  savepath = agg_results,
  add_max = FALSE
)
! Unmatched links in causal network
• 11 from env_obj to Specific_goal
! Unmatched links in causal network
• 7 from Objective to target_5_year_2024

Arbitrary network

It is also possible to use any arbitrary network with the needed links (columns). Here, we make up a very simple one. See causal_ewr for needed structure; the main key is it needs to be a list of dataframe(s).

fakegroups <- c("a", "b", "c")
fake_causal <- tibble::tibble(
  ewr_code_timing = unique(agg_ewr_causal$agg_input$ewr_code_timing),
  fake_group = sample(fakegroups,
    length(unique(agg_ewr_causal$agg_input$ewr_code_timing)),
    replace = TRUE
  )
)
aggseq_fakecausal <- list(
  all_time = "all_time",
  fake_group = c("ewr_code_timing", "fake_group"),
  sdl_units = sdl_units
)

funseq_fakecausal <- list(
  all_time = "ArithmeticMean",
  fake_group = "CompensatingFactor",
  sdl_units = "ArithmeticMean"
)
# fake_causal_agg <- multi_aggregate(
#   dat = ewrdata,
#   causal_edges = list(fake_causal),
#   groupers = "scenario",
#   aggCols = "ewr_achieved",
#   aggsequence = aggseq_fakecausal,
#   funsequence = funseq_fakecausal,
#   auto_ewr_PU = TRUE,
#   namehistory = FALSE)

fake_causal_agg <- read_and_agg(
  datpath = ewr_results,
  type = "achievement",
  geopath = bom_basin_gauges,
  causalpath = list(fake_causal),
  groupers = "scenario",
  aggCols = "ewr_achieved",
  group_until = list(
    SWSDLName = is_notpoint,
    planning_unit_name = is_notpoint,
    gauge = is_notpoint
  ),
  pseudo_spatial = "sdl_units",
  aggsequence = aggseq_fakecausal,
  funsequence = funseq_fakecausal,
  saveintermediate = TRUE,
  namehistory = FALSE,
  keepAllPolys = FALSE,
  returnList = TRUE,
  savepath = agg_results,
  add_max = FALSE
)
Warning in filtergroups(thisdf, fromcol = p[1], tocol = p[2], fromfilter =
fromfilter, : Unable to cross-check gauges and planning units, trusting the
user they work together
Warning: Causal network does not have all groupers.
• Joining ewr_code_timing to fake_group
• Groupers are scenario, SWSDLName, planning_unit_name, gauge, polyID.
• expect causal network to have SWSDLName, planning_unit_name, gauge; it has ewr_code_timing, fake_group, fromtype, totype, edgeorder.
• Do you need to use `group_until`? Or is your network missing columns?
fake_causal_agg
$agg_input
Simple feature collection with 2946 features and 13 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 144.8811 ymin: -33.8695 xmax: 148.6839 ymax: -30.4577
Geodetic CRS:  GDA94
# A tibble: 2,946 × 14
   scenario  year date       gauge  planning_unit_name  state SWSDLName ewr_code
 * <chr>    <dbl> <date>     <chr>  <chr>               <chr> <chr>     <chr>   
 1 base      2014 2014-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 2 base      2015 2015-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 3 base      2016 2016-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 4 base      2017 2017-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 5 base      2018 2018-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 6 base      2019 2019-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 7 base      2014 2014-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 8 base      2015 2015-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
 9 base      2016 2016-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
10 base      2017 2017-07-01 421004 Baroona to Warren … NSW   Macquari… BF1     
# ℹ 2,936 more rows
# ℹ 6 more variables: ewr_code_timing <chr>, event_years <dbl>,
#   frequency_achieved <dbl>, interevent_achieved <dbl>, ewr_achieved <dbl>,
#   geometry <POINT [°]>

$all_time
Simple feature collection with 552 features and 9 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 144.8811 ymin: -33.8695 xmax: 148.6839 ymax: -30.4577
Geodetic CRS:  GDA94
# A tibble: 552 × 10
   scenario SWSDLName planning_unit_name            gauge ewr_code_timing polyID
   <chr>    <chr>     <chr>                         <chr> <chr>           <chr> 
 1 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… BF1_a           r4pdr…
 2 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… BF1_b           r4pdr…
 3 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… BF2_a           r4pdr…
 4 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… BF2_b           r4pdr…
 5 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… BK1_P           r4pdr…
 6 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… BK1_S           r4pdr…
 7 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… CF1_b           r4pdr…
 8 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… CF1_c           r4pdr…
 9 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… LF1_P           r4pdr…
10 MAX      Lachlan   Lachlan River - Lake Cargell… 4120… LF1_S           r4pdr…
# ℹ 542 more rows
# ℹ 4 more variables: geometry <POINT [°]>, ewr_achieved <dbl>, aggfun_1 <chr>,
#   aggLevel_1 <chr>

$fake_group
Simple feature collection with 108 features and 11 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 144.8811 ymin: -33.8695 xmax: 148.6839 ymax: -30.4577
Geodetic CRS:  GDA94
# A tibble: 108 × 12
   scenario SWSDLName planning_unit_name                 gauge polyID fake_group
   <chr>    <chr>     <chr>                              <chr> <chr>  <chr>     
 1 MAX      Lachlan   Lachlan River - Lake Cargelligo t… 4120… r4pdr… a         
 2 MAX      Lachlan   Lachlan River - Lake Cargelligo t… 4120… r4pdr… b         
 3 MAX      Lachlan   Lachlan River - Lake Cargelligo t… 4120… r4pdr… c         
 4 MAX      Lachlan   Merrimajeel Creek                  4120… r1zp2… a         
 5 MAX      Lachlan   Merrimajeel Creek                  4120… r1zp2… b         
 6 MAX      Lachlan   Merrimajeel Creek                  4120… r1zp2… c         
 7 MAX      Lachlan   Merrowie Creek                     4120… r4pdr… a         
 8 MAX      Lachlan   Merrowie Creek                     4120… r4pdr… b         
 9 MAX      Lachlan   Merrowie Creek                     4120… r4pdr… c         
10 MAX      Lachlan   Muggabah Creek                     4120… r1zp2… a         
# ℹ 98 more rows
# ℹ 6 more variables: geometry <POINT [°]>, ewr_achieved <dbl>, aggfun_1 <chr>,
#   aggLevel_1 <chr>, aggfun_2 <chr>, aggLevel_2 <chr>

$sdl_units
Simple feature collection with 24 features and 13 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 143.8092 ymin: -34.98763 xmax: 150.3614 ymax: -29.91875
Geodetic CRS:  GDA94
# A tibble: 24 × 14
   scenario fake_group polyID      SWSDLID StateID SWSDLName            
   <chr>    <chr>      <chr>       <chr>   <chr>   <chr>                
 1 MAX      a          r602ydft049 SS16    NSW     Lachlan              
 2 MAX      a          r6367k2uy6m SS20    NSW     Macquarie-Castlereagh
 3 MAX      b          r602ydft049 SS16    NSW     Lachlan              
 4 MAX      b          r6367k2uy6m SS20    NSW     Macquarie-Castlereagh
 5 MAX      c          r602ydft049 SS16    NSW     Lachlan              
 6 MAX      c          r6367k2uy6m SS20    NSW     Macquarie-Castlereagh
 7 base     a          r602ydft049 SS16    NSW     Lachlan              
 8 base     a          r6367k2uy6m SS20    NSW     Macquarie-Castlereagh
 9 base     b          r602ydft049 SS16    NSW     Lachlan              
10 base     b          r6367k2uy6m SS20    NSW     Macquarie-Castlereagh
# ℹ 14 more rows
# ℹ 8 more variables: geometry <MULTIPOLYGON [°]>, ewr_achieved <dbl>,
#   aggfun_1 <chr>, aggLevel_1 <chr>, aggfun_2 <chr>, aggLevel_2 <chr>,
#   aggfun_3 <chr>, aggLevel_3 <chr>

And a quick plot of the random groupings implied there.

fake_causal_agg$sdl_units |>
  plot_outcomes(
    outcome_col = "ewr_achieved",
    plot_type = "map",
    colorgroups = NULL,
    colorset = "ewr_achieved",
    pal_list = list("scico::lapaz"),
    pal_direction = -1,
    facet_col = "scenario",
    facet_row = "fake_group",
    sceneorder = c("down4", "base", "up4")
  )