Seasonal CDD development - daily data, spatial and temporal aggregation

Hi @nperez @vagudets @vtorralba @cdelgado ,

As discussed last week, I'm creating a new branch to work on the development of a workflow to provide operational seasonal forecasts of daily indicators. I'm using this script and these two recipes (load daily) (load monthly). This development will ideally be ready for the next production update.

In this case, I'm following these steps:

I'm computing cooling degree days (CDD) from daily data (with the seasonal SEAS5) with SUNSET, this works perfectly fine.
Aggregating the loaded data by country with the shapefile_europe.nc mask I created with ShapeToFile.R, and using the developing SpatialAggregation.
Compute the CDD indicator mannually using daily data:


hdd_threshold <- 15.5
cdd_threshold <- 22

compute_hdd <- function(cube, threshold) {
  out <- cube
  out$data <- pmax(0, threshold - cube$data)
  # restore dimensions
  dim(out$data) <- dim(cube$data)
  return(out)
}

compute_cdd <- function(cube, threshold) {
  out <- cube
  out$data <- pmax(0, cube$data - threshold)
  dim(out$data) <- dim(cube$data)
  return(out)
}

# First test cdd

#hdd_hcst <- compute_hdd(data$hcst, hdd_threshold)
cdd_hcst <- compute_cdd(data$hcst, cdd_threshold)

#hdd_fcst <- compute_hdd(data$fcst, hdd_threshold)
cdd_fcst <- compute_cdd(data$fcst, cdd_threshold)

#hdd_obs  <- compute_hdd(data$obs,  hdd_threshold)
cdd_obs  <- compute_cdd(data$obs,  cdd_threshold)

Aggregate the daily indicator monthly to prepare it to enter crossval_calibration.R

aggregate_monthly <- function(cube) {
  
  dates <- as.POSIXct(cube$attrs$Dates, tz = "UTC")
  months <- month(dates)
  unique_months <- sort(unique(months))
  
  dims <- dim(cube$data)
  time_dim <- which(names(dims) == "time")
  dims[time_dim] <- length(unique_months)
  mon_data <- array(NA, dims)
  
  for (i in seq_along(unique_months)) {
   mon <- unique_months[i]
    idx <- which(months == mon)
    
    mon_data_slice <- apply(cube$data, seq_along(dims)[-time_dim], function(x) {
      sum(x[idx], na.rm = TRUE)
    })
    
    idx_list <- rep(list(TRUE), length(dims))
    idx_list[[time_dim]] <- i
    mon_data <- do.call("[<-", c(list(mon_data), idx_list, list(value = mon_data_slice)))
  }
  
  mon_cube <- cube
  mon_cube$data <- mon_data
  
  month_starts <- tapply(dates, months, function(x) min(as.Date(x)))
  mon_cube$coords$time <- as.Date(month_starts, origin = "1970-01-01")
  mon_cube$attrs$Dates <- mon_cube$coords$time
  
  return(mon_cube)
}

#hdd_fcst_mon <- aggregate_monthly(hdd_fcst)
#hdd_hcst_mon <- aggregate_monthly(hdd_hcst)
#hdd_obs_mon  <- aggregate_monthly(hdd_obs)
cdd_fcst_mon <- aggregate_monthly(cdd_fcst)
cdd_hcst_mon <- aggregate_monthly(cdd_hcst)
cdd_obs_mon  <- aggregate_monthly(cdd_obs)

extract the countries (here I'm showing Spain but I do it with others):

cdd_esp <- NULL

cdd_esp$fcst$data <- cdd_fcst_mon$data[,,,,,,,2, drop = FALSE]
cdd_esp$hcst$data <- cdd_hcst_mon$data[,,,,,,,2, drop = FALSE]
cdd_esp$obs$data <- cdd_obs_mon$data[,,,,,,,2, drop = FALSE]

Now I load monthly data from another recipe (energy_demand2.yml) to then get my indicator data inside, then calibrate:

recipe_file2 <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/energy_demand2.yml"

recipe2 <- prepare_outputs(recipe_file2)

data2 <- Loading(recipe2)
data2 <- Units(recipe2, data2)


# Cooling degree days
data2$fcst$data <- cdd_esp$fcst$data
data2$hcst$data <- cdd_esp$hcst$data
data2$obs$data <- cdd_esp$obs$data

data_fra <- data2
data_uk <- data2
data_ita <- data2
data_nor <- data2
data_por <- data2

data_fra$fcst$data <- cdd_fra$fcst$data
data_fra$hcst$data <- cdd_fra$hcst$data
data_fra$obs$data <- cdd_fra$obs$data

data_uk$fcst$data <- cdd_uk$fcst$data
data_uk$hcst$data <- cdd_uk$hcst$data
data_uk$obs$data <- cdd_uk$obs$data

data_ita$fcst$data <- cdd_ita$fcst$data
data_ita$hcst$data <- cdd_ita$hcst$data
data_ita$obs$data <- cdd_ita$obs$data

data_nor$fcst$data <- cdd_nor$fcst$data
data_nor$hcst$data <- cdd_nor$hcst$data
data_nor$obs$data <- cdd_nor$obs$data

data_por$fcst$data <- cdd_por$fcst$data
data_por$hcst$data <- cdd_por$hcst$data
data_por$obs$data <- cdd_por$obs$data



source("modules/Crossval/Crossval_calibration.R")
cdd_cal_esp <- Crossval_calibration(recipe = recipe2, data = data2)
cdd_cal_fra <- Crossval_calibration(recipe = recipe2, data = data_fra)
cdd_cal_uk <- Crossval_calibration(recipe = recipe2, data = data_uk)
cdd_cal_ita <- Crossval_calibration(recipe = recipe2, data = data_ita)
cdd_cal_nor <- Crossval_calibration(recipe = recipe2, data = data_nor)
cdd_cal_por <- Crossval_calibration(recipe = recipe2, data = data_por)

and then I plot it.

As you can see, there is a lot of room for improvement here with:

Avoiding loading 2 recipes - prepare the daily indicators aggregated monthly to enter crossval_calibration.R.
Aggregate monthly on a more sofisticated way.

Please let me know if you have any suggestion to improve this workflow.

Many thanks,

Paloma