# ==========================================================
# 5-SYSTEM INTEGRATED MAINTENANCE + (OPTIONAL) LCA + OPTIM
# PDF-equivalent workflow, extended to N (=5) systems
# ==========================================================

rm(list = ls())

library(timelineS)
library(lubridate)
library(ggplot2)
library(dplyr)
library(reshape2)
library(rPref)
library(mco)
library(MASS)

# ==========================================================
# 0) GLOBAL SETTINGS
# ==========================================================
start.Date <- "2026-01-01"
lifetime   <- 100

# ----------------------------------------------------------
# SYSTEMS (for reference)
# 1. GRW  : RC Gravity Retaining wall
# 2. CRW  : RC Cantilever Retaining Wall
# 3. RCFS : RC floor slab
# 4. ETICS: External Thermal Insulation Composite System
# 5. PCF  : Precast concrete frame
# ----------------------------------------------------------

# ==========================================================
# 1) CORE FUNCTIONS (PDF-equivalent, fixed + scalable)
# ==========================================================

# Correct timeline plot: uses event_times (years from start), not lifetime
plot.timeline <- function(event_times, events.name, start.Date, plot.Name) {
  dataP <- data.frame(
    Events     = events.name,
    Event_Dates = ymd(start.Date) + years(event_times)
  )
  timelineS(
    dataP,
    main = plot.Name,
    labels = events.name,
    label.direction = "up",
    label.position = 3
  )
}

dist.Events <- function(lifetime, events, start.Date, option.Name, do.plot = FALSE) {
  
  # sort events
  events <- sort(events, decreasing = TRUE)
  
  # distribute: 0, freq, 2*freq ... <= lifetime
  distribution.events <- sapply(events, seq, from = 0, to = lifetime)
  
  # melt
  all.events <- melt(distribution.events)
  colnames(all.events) <- c("frequency", "event")
  all.events$event <- as.character(all.events$event)  # IMPORTANT: avoid factor issues
  
  # sort and keep first event at each time
  all.events <- all.events[order(all.events$frequency), ]
  unique.events <- all.events[!duplicated(all.events$frequency), ]
  
  # --- PDF logic: label time 0 as DC (start of life / construction)
  unique.events$event[unique.events$frequency == 0] <- "DC"
  
  # optional plot
  if (do.plot) {
    plot.timeline(unique.events$frequency, unique.events$event, start.Date, option.Name)
  }
  
  unique.events
}


# Combine timelines for N systems:
# - Names: concatenated with "+"
# - Duration: max() if multiple interventions coincide (aligned work)
combine.lifeTimelines <- function(products, durations) {
  
  all.freq <- sort(unique(unlist(lapply(products, \(p) p$frequency))))
  
  base <- data.frame(
    frequency = all.freq,
    Names     = NA_character_,
    duration  = 0,
    stringsAsFactors = FALSE
  )
  
  base$Names[base$frequency == 0] <- "DC"
  base$Names[base$frequency == max(base$frequency)] <- "END"
  
  for (i in seq_len(nrow(base))) {
    f <- base$frequency[i]
    if (f == 0 || f == max(base$frequency)) next
    
    active.events <- character(0)
    active.durs   <- numeric(0)
    
    for (k in seq_along(products)) {
      prod <- products[[k]]
      durv <- durations[[k]]
      
      evs <- prod$event[prod$frequency == f]
      if (length(evs) == 0) next
      
      for (e in evs) {
        
        # lifecycle markers have no duration
        if (e %in% c("DC", "END") || is.na(e)) {
          active.events <- c(active.events, e)
          next
        }
        
        active.events <- c(active.events, e)
        
        d <- if (e %in% names(durv)) durv[[e]] else NA_real_
        if (!is.na(d)) active.durs <- c(active.durs, d)
      }
    }
    
    if (length(active.events) > 0) {
      base$Names[i] <- paste(active.events, collapse = "+")
      base$duration[i] <- if (length(active.durs) > 0) max(active.durs) else 0
    } else {
      base$duration[i] <- 0
    }
  }
  
  base
}

# Convenience: compute min spacing between successive interventions (excluding END if you want)
min_spacing <- function(freq_vec) {
  diffs <- diff(sort(freq_vec))
  min(diffs)
}

# ==========================================================
# 2) DEFINE ONE BASE STRATEGY (FREQUENCIES IN YEARS; DURATIONS IN DAYS)
#    IMPORTANT: durations are NOT frequencies. They are disruption days.
# ==========================================================

# ---- Strategy 1 ----
events.GRW    <- c(SR = 51,  MR = 63, R = 44, END = lifetime)                    # years
duration.GRW  <- c(SR = 2,  MR = 10, R = 5)                                    # days

events.CRW    <- c(SR = 56,  MR  = 57, R = 49, END = lifetime)            # years
duration.CRW  <- c(SR = 3,  MR = 10,  R = 20)                 # days

events.RCFS   <- c(MR = 17,  R = 47, END = lifetime)                    # years
duration.RCFS <- c(MR = 5,  R = 25)                                    # days

events.ETICS   <- c(SR = 34, R = 66, END = lifetime)           # years
duration.ETICS <- c(SR = 3, R = 5)                           # days

events.PCF    <- c(SR = 21,  MR = 22, END = lifetime)   # years
duration.PCF  <- c(SR = 8, MR = 25)                   # days

# Plot all 5 system timelines (set do.plot=TRUE if you want the individual plots)
par(mfrow = c(3, 2))
maint.GRW   <- dist.Events(lifetime, events.GRW,   start.Date, "GRW",   do.plot = TRUE)
maint.CRW   <- dist.Events(lifetime, events.CRW,   start.Date, "CRW",   do.plot = TRUE)
maint.RCFS  <- dist.Events(lifetime, events.RCFS,  start.Date, "RCFS",  do.plot = TRUE)
maint.ETICS <- dist.Events(lifetime, events.ETICS, start.Date, "ETICS", do.plot = TRUE)
maint.PCF   <- dist.Events(lifetime, events.PCF,   start.Date, "PCF",   do.plot = TRUE)

# Combine all 5
products  <- list(maint.GRW, maint.CRW, maint.RCFS, maint.ETICS, maint.PCF)
durations <- list(duration.GRW, duration.CRW, duration.RCFS, duration.ETICS, duration.PCF)

integrated <- combine.lifeTimelines(products, durations)

# Total interruption duration (days)
total_days <- sum(integrated$duration)
total_days

# Plot integrated timeline
plot.timeline(integrated$frequency, integrated$Names, start.Date, "Integrated Maintenance Timeline (5 systems)")

# ==========================================================
# 3) DESIGN SPACE EXPLORATION (PDF-equivalent idea)
#    Here: random sampling (fast, avoids huge grids)
# ==========================================================

# Generator functions create one scenario (named vector with END = lifetime)
gen.GRW <- function()  c(SR = sample(15:55,  1), MR = sample(55:65, 1), R = sample(40:45,1), END = lifetime)
gen.CRW <- function()  c(SR = sample(15:55,  1), MR  = sample(30:50, 1), R  = sample(45:55, 1), END = lifetime)
gen.RCFS<- function()  c(MR = sample(20:25, 1), R = sample(45:55,1), END = lifetime)
gen.ETI <- function()  c(SR = sample(20:65, 1), R  = sample(55:65, 1), END = lifetime)
gen.PCF <- function()  c(SR = sample(20:25,  1), MR  = sample(25:50,  1), END = lifetime)

# Sampling function: returns dataframe with dur + dist.inter (same metrics as PDF section)
design.sample <- function(event.generators, duration.maps, n.samples = 5000) {
  out <- vector("list", n.samples)
  
  for (i in seq_len(n.samples)) {
    
    prods <- vector("list", length(event.generators))
    durs  <- vector("list", length(event.generators))
    
    for (s in seq_along(event.generators)) {
      ev <- event.generators[[s]]()
      prods[[s]] <- dist.Events(lifetime, ev, start.Date, paste0("System ", s), do.plot = FALSE)
      durs[[s]]  <- duration.maps[[s]]
    }
    
    comb <- combine.lifeTimelines(prods, durs)
    
    out[[i]] <- c(
      dur = sum(comb$duration),
      dist.inter = min_spacing(comb$frequency)
    )
  }
  
  as.data.frame(do.call(rbind, out))
}

event.generators <- list(gen.GRW, gen.CRW, gen.RCFS, gen.ETI, gen.PCF)
duration.maps    <- list(duration.GRW, duration.CRW, duration.RCFS, duration.ETICS, duration.PCF)

response.space <- design.sample(event.generators, duration.maps, n.samples = 5000)

# ==========================================================
# 4) PREFERENCE RANKING + PARETO (same as PDF)
# ==========================================================

# Minimize total interruption duration, maximize spacing between interventions
p <- low(dur) * high(dist.inter)

sky <- psel(response.space, p)
sky

pareto2 <- psel(response.space, p, top = nrow(response.space))

ggplot(response.space, aes(x = dur, y = dist.inter)) +
  geom_point(shape = 21) +
  geom_point(data = pareto2, size = 3, aes(color = factor(pareto2$.level))) +
  labs(title = "Preference-ranked scenarios (rPref)",
       x = "Total interruption duration (days)",
       y = "Min spacing between interventions (years)")

# Pareto frontier visualization helper (like PDF)
show_front <- function(pref) {
  plot(response.space$dur, response.space$dist.inter,
       xlab = "dur (days)", ylab = "dist.inter (years)")
  sky2 <- psel(response.space, pref)
  plot_front(response.space, pref, col = rgb(0, 0, 1))
  points(sky2$dur, sky2$dist.inter, pch = 19, cex = 1.2)
}

par(mfrow = c(1, 1))
show_front(p)

# Example alternative preference (reverse)
p2 <- high(dur) * low(dist.inter)
show_front(p2)

# ==========================================================
# 5) CSV-DRIVEN LCA (EXCLUDES RCFS FROM HERE ON)
# ==========================================================

read_quotedline_csv <- function(path) {
  x <- readLines(path, warn = FALSE, encoding = "UTF-8")
  x <- gsub('^"(.*)"$', "\\1", x)
  read.csv(text = x, sep = ",", header = TRUE,
           stringsAsFactors = FALSE, check.names = FALSE)
}

# ---- set your data directory once
data_dir <- "/Users/nicholas_song/Desktop/Uni/CSE/Whole Life Civil Systems Analysis/Group Assignment"

systems <- read_quotedline_csv(file.path(data_dir, "systems.csv")) %>%
  dplyr::mutate(
    system_quantity = as.numeric(system_quantity),
    system_unit     = as.character(system_unit)
  )

materials <- read_quotedline_csv(file.path(data_dir, "material_composition.csv")) %>%
  dplyr::mutate(
    material_amount_per_unit_system = as.numeric(material_amount_per_unit_system),
    material_unit = as.character(material_unit)
  )

fractions <- read_quotedline_csv(file.path(data_dir, "maintenance_action_fractions.csv")) %>%
  dplyr::mutate(
    affected_fraction  = as.numeric(affected_fraction),
    maintenance_action = as.character(maintenance_action)
  )

impacts <- read_quotedline_csv(file.path(data_dir, "material_impacts.csv"))

# Fix typo if present
if ("Cost_UER_per_unit" %in% names(impacts) && !("Cost_EUR_per_unit" %in% names(impacts))) {
  impacts <- dplyr::rename(impacts, Cost_EUR_per_unit = Cost_UER_per_unit)
}

impacts <- impacts %>%
  dplyr::mutate(
    material_unit = as.character(material_unit),
    CO2_kg_per_unit    = as.numeric(CO2_kg_per_unit),
    NOx_kg_per_unit    = as.numeric(NOx_kg_per_unit),
    SOx_kg_per_unit    = as.numeric(SOx_kg_per_unit),
    Energy_MJ_per_unit = as.numeric(Energy_MJ_per_unit),
    Cost_EUR_per_unit  = as.numeric(Cost_EUR_per_unit)
  )

count_events <- function(schedule, system_id) {
  schedule <- as.data.frame(schedule)
  
  bad <- is.na(names(schedule)) | names(schedule) == ""
  if (any(bad)) schedule <- schedule[, !bad, drop = FALSE]
  
  names(schedule) <- make.names(names(schedule), unique = TRUE)
  
  if (!("event" %in% names(schedule))) {
    stop("count_events(): schedule for system ", system_id,
         " does not have an 'event' column. Names are: ",
         paste(names(schedule), collapse = ", "))
  }
  
  schedule %>%
    dplyr::filter(!(event %in% c("DC", "END"))) %>%
    dplyr::count(event, name = "n_events") %>%
    dplyr::mutate(
      system_id = system_id,
      maintenance_action = as.character(event)
    ) %>%
    dplyr::select(system_id, maintenance_action, n_events)
}

calc_LCA_from_schedules <- function(maint_list, systems, materials, fractions, impacts) {
  
  # A) count events
  events_all <- dplyr::bind_rows(lapply(names(maint_list), function(sid) {
    count_events(maint_list[[sid]], sid)
  }))
  
  # B) join fractions + system quantities
  base_actions <- events_all %>%
    dplyr::left_join(fractions, by = c("system_id","maintenance_action")) %>%
    dplyr::left_join(systems, by = "system_id")
  
  missing_frac <- base_actions %>% dplyr::filter(is.na(affected_fraction))
  if (nrow(missing_frac) > 0) {
    print(missing_frac)
    stop("Missing affected_fraction for some (system_id, maintenance_action). Fix maintenance_action_fractions.csv")
  }
  
  missing_sys <- base_actions %>% dplyr::filter(is.na(system_quantity) | is.na(system_unit))
  if (nrow(missing_sys) > 0) {
    print(missing_sys)
    stop("Missing system_quantity/system_unit for some system_id. Fix systems.csv")
  }
  
  # C) expand to materials
  inventory <- base_actions %>%
    dplyr::left_join(materials, by = "system_id", relationship = "many-to-many") %>%
    dplyr::mutate(
      material_amount_used =
        n_events *
        system_quantity *
        material_amount_per_unit_system *
        affected_fraction
    )
  
  if (any(is.na(inventory$material_amount_per_unit_system))) {
    stop("Some systems have no material composition rows. Fix material_composition.csv")
  }
  
  # D) join impacts + totals
  inventory2 <- inventory %>%
    dplyr::left_join(impacts, by = c("material_id","material_unit"))
  
  missing_imp <- inventory2 %>%
    dplyr::filter(is.na(CO2_kg_per_unit) | is.na(Energy_MJ_per_unit) | is.na(Cost_EUR_per_unit))
  
  if (nrow(missing_imp) > 0) {
    print(missing_imp %>% dplyr::select(system_id, maintenance_action, material_id, material_unit))
    stop("Missing impact factors for some (material_id, material_unit). Fix material_impacts.csv")
  }
  
  detailed <- inventory2 %>%
    dplyr::mutate(
      CO2    = material_amount_used * CO2_kg_per_unit,
      NOx    = material_amount_used * NOx_kg_per_unit,
      SOx    = material_amount_used * SOx_kg_per_unit,
      Energy = material_amount_used * Energy_MJ_per_unit,
      Cost   = material_amount_used * Cost_EUR_per_unit
    )
  
  totals <- detailed %>%
    dplyr::summarise(
      CO2    = sum(CO2, na.rm = TRUE),
      NOx    = sum(NOx, na.rm = TRUE),
      SOx    = sum(SOx, na.rm = TRUE),
      Energy = sum(Energy, na.rm = TRUE),
      Cost   = sum(Cost, na.rm = TRUE)
    )
  
  list(totals = totals, detailed = detailed)
}

# ==========================================================
# 6) NSGA-II OPTIMISATION (EXCLUDES RCFS)
# Uses YOUR 5-system script's action sets:
#   GRW: SR MR R
#   CRW: SR MR R
#   ETICS: SR R
#   PCF: SR MR   (THIS is the key difference vs your 4-system)
# ==========================================================

fitness <- function(x) {
  
  x <- round(x, 0)
  z <- numeric(7)
  
  # Decision vars (idim = 10):
  # 1 GRW_SR, 2 GRW_MR, 3 GRW_R
  # 4 CRW_SR, 5 CRW_MR, 6 CRW_R
  # 7 ETICS_SR, 8 ETICS_R
  # 9 PCF_SR, 10 PCF_MR
  
  ev.GRW   <- c(SR = x[1], MR = x[2], R = x[3], END = lifetime)
  ev.CRW   <- c(SR = x[4], MR = x[5], R = x[6], END = lifetime)
  ev.ETICS <- c(SR = x[7], R  = x[8], END = lifetime)
  ev.PCF   <- c(SR = x[9], MR = x[10], END = lifetime)
  
  m.GRW   <- dist.Events(lifetime, ev.GRW,   start.Date, "GRW",   do.plot = FALSE)
  m.CRW   <- dist.Events(lifetime, ev.CRW,   start.Date, "CRW",   do.plot = FALSE)
  m.ETICS <- dist.Events(lifetime, ev.ETICS, start.Date, "ETICS", do.plot = FALSE)
  m.PCF   <- dist.Events(lifetime, ev.PCF,   start.Date, "PCF",   do.plot = FALSE)
  
  # ---- LCA schedules EXCLUDE RCFS
  maint_list <- list(GRW = m.GRW, CRW = m.CRW, ETICS = m.ETICS, PCF = m.PCF)
  
  # ---- Disruption metrics EXCLUDE RCFS
  comb <- combine.lifeTimelines(
    products  = list(m.GRW, m.CRW, m.ETICS, m.PCF),
    durations = list(duration.GRW, duration.CRW, duration.ETICS, duration.PCF)
  )
  
  z[1] <- sum(comb$duration)
  z[2] <- -min_spacing(comb$frequency)     # maximize spacing
  
  # ---- CSV-driven LCA totals
  lca <- calc_LCA_from_schedules(
    maint_list = maint_list,
    systems    = systems,
    materials  = materials,
    fractions  = fractions,
    impacts    = impacts
  )
  
  z[3] <- lca$totals$Energy
  z[4] <- lca$totals$CO2
  z[5] <- lca$totals$NOx
  z[6] <- lca$totals$SOx
  z[7] <- lca$totals$Cost
  
  z
}

# ---- Bounds using YOUR generator ranges (from Section 3 of your 5-system script)
lower.bounds <- c(
  15, 55, 40,   # GRW: SR, MR, R
  15, 30, 45,   # CRW: SR, MR, R
  20, 55,       # ETICS: SR, R
  20, 25        # PCF: SR, MR
)

upper.bounds <- c(
  55, 65, 45,
  55, 50, 55,
  65, 65,
  25, 50
)

# ---- sanity check: fractions completeness (NOW includes PCF SR + MR; RCFS removed)
needed <- data.frame(
  system_id = c("GRW","GRW","GRW","CRW","CRW","CRW","ETICS","ETICS","PCF","PCF"),
  maintenance_action = c("SR","MR","R","SR","MR","R","SR","R","SR","MR")
)
print(dplyr::anti_join(needed, fractions, by = c("system_id","maintenance_action")))

# ---- Run NSGA-II
r2 <- nsga2(
  fitness,
  idim = 10,
  odim = 7,
  generations = 20,
  popsize = 240,
  lower.bounds = lower.bounds,
  upper.bounds = upper.bounds
)

# ---- Results
r2Results <- as.data.frame(r2$value)
colnames(r2Results) <- c("duration_days", "neg_min_spacing", "energy_mj", "co2_kg", "nox_kg", "sox_kg", "cost_eur")

pareto <- as.data.frame(paretoFront(r2))
colnames(pareto) <- colnames(r2Results)

ggplot(r2Results, aes(x = duration_days, y = cost_eur)) +
  geom_point(shape = 21) +
  geom_point(data = pareto, size = 3, color = "red") +
  geom_line(data = pareto, color = "blue") +
  labs(
    title = "NSGA-II Pareto front (duration vs cost)",
    x = "Total interruption duration (days)",
    y = "Cost (EUR)"
  )

input.params <- round(r2$par, 0)
all.results  <- cbind(input.params, r2Results, pareto = r2$pareto.optimal)
# ---- Rename columns to numeric axis labels (1–16)
colnames(all.results)[1:17] <- as.character(1:17)



cols_to_plot <- c(
  1:10,   # GRW / CRW / ETICS / PCF decision variables
  11,    # duration_days
  12,    # neg_min_spacing
  13,    # energy_mj
  14,    # co2_kg
  15,    # nox_kg
  16,    # sox_kg
  17     # cost_eur
)

# ---- Legend text (order matches columns 1:(9+7) exactly)
axis_legend <- c(
  "1  GRW_SR",
  "2  GRW_MR",
  "3  GRW_R",
  "4  CRW_SR",
  "5  CRW_MR",
  "6  CRW_R",
  "7  ETICS_SR",
  "8  ETICS_R",
  "9  PCF_SR",
  "10 PCF_MR",
  "11 duration_days",
  "12 neg_min_spacing",
  "13 energy_mj",
  "14 co2_kg",
  "15 nox_kg",
  "16 sox_kg",
  "17 cost_eur"
)

# ---- Make room on the right for legend (increase right margin)
op <- par(no.readonly = TRUE)
par(mar = c(5, 4, 4, 22))  # <- right margin is the last number

# ---- Your plot (UNCHANGED)
MASS::parcoord(all.results[, 1:(10+7)], var.label = TRUE,
               col = ifelse(all.results$pareto, "indianred", "skyblue2"),
               lty = ifelse(all.results$pareto, 1, 3),
               lwd = ifelse(all.results$pareto, 3, 1))

# ---- Legend in the right margin (won't overlap plot)
legend("topright",
       inset = c(-0.8, 0),   # push into the right margin
       xpd = TRUE,
       bty = "n",
       cex = 0.9,
       legend = axis_legend)

# Optional: small style legend below it
legend("bottomright",
       inset = c(-0.8, 0),
       xpd = TRUE,
       bty = "n",
       cex = 0.9,
       legend = c("Pareto-optimal", "Non-Pareto"),
       col = c("indianred", "skyblue2"),
       lty = c(1, 3),
       lwd = c(3, 1))

par(op)