# ==========================================================
# 4-SYSTEM (GRW, CRW, ETICS, PCF) INTEGRATED MAINTENANCE + LCA + NSGA-II
# (RCFS is allowed in early plotting if you want, but excluded from LCA + fitness)
# ==========================================================

rm(list = ls())

library(timelineS)
library(lubridate)
library(ggplot2)
library(dplyr)
library(reshape2)
library(rPref)
library(mco)
library(MASS)

# ==========================================================
# 0) GLOBAL SETTINGS
# ==========================================================
start.Date <- "2026-01-01"
lifetime   <- 100

# ==========================================================
# 1) CORE FUNCTIONS
# ==========================================================

plot.timeline <- function(event_times, events.name, start.Date, plot.Name) {
  dataP <- data.frame(
    Events      = events.name,
    Event_Dates = ymd(start.Date) + years(event_times)
  )
  timelineS(
    dataP,
    main = plot.Name,
    labels = events.name,
    label.direction = "up",
    label.position = 3
  )
}

dist.Events <- function(lifetime, events, start.Date, option.Name, do.plot = FALSE) {
  
  events <- sort(events, decreasing = TRUE)
  
  distribution.events <- sapply(events, seq, from = 0, to = lifetime)
  
  all.events <- melt(distribution.events)
  colnames(all.events) <- c("frequency", "event")
  all.events$event <- as.character(all.events$event)
  
  all.events <- all.events[order(all.events$frequency), ]
  unique.events <- all.events[!duplicated(all.events$frequency), ]
  
  unique.events$event[unique.events$frequency == 0] <- "DC"
  
  if (do.plot) {
    plot.timeline(unique.events$frequency, unique.events$event, start.Date, option.Name)
  }
  
  unique.events
}

combine.lifeTimelines <- function(products, durations) {
  
  all.freq <- sort(unique(unlist(lapply(products, \(p) p$frequency))))
  
  base <- data.frame(
    frequency = all.freq,
    Names     = NA_character_,
    duration  = 0,
    stringsAsFactors = FALSE
  )
  
  base$Names[base$frequency == 0] <- "DC"
  base$Names[base$frequency == max(base$frequency)] <- "END"
  
  for (i in seq_len(nrow(base))) {
    f <- base$frequency[i]
    if (f == 0 || f == max(base$frequency)) next
    
    active.events <- character(0)
    active.durs   <- numeric(0)
    
    for (k in seq_along(products)) {
      prod <- products[[k]]
      durv <- durations[[k]]
      
      evs <- prod$event[prod$frequency == f]
      if (length(evs) == 0) next
      
      for (e in evs) {
        
        if (e %in% c("DC","END") || is.na(e)) {
          active.events <- c(active.events, e)
          next
        }
        
        active.events <- c(active.events, e)
        
        d <- if (e %in% names(durv)) durv[[e]] else NA_real_
        if (!is.na(d)) active.durs <- c(active.durs, d)
      }
    }
    
    if (length(active.events) > 0) {
      base$Names[i] <- paste(active.events, collapse = "+")
      base$duration[i] <- if (length(active.durs) > 0) max(active.durs) else 0
    } else {
      base$duration[i] <- 0
    }
  }
  
  base
}

min_spacing <- function(freq_vec) {
  diffs <- diff(sort(freq_vec))
  min(diffs)
}

# ==========================================================
# 2) BASE STRATEGY (for plotting)
#    (RCFS is present here only if you want the 5-plot layout.
#     It is NOT used in LCA nor in NSGA-II fitness below.)
# ==========================================================

events.GRW   <- c(SR = 13, MR = 50, R = 40, END = lifetime)
duration.GRW <- c(SR = 2,  MR = 10, R = 5)

events.CRW   <- c(SR = 30, MR = 61, R = 43, END = lifetime)
duration.CRW <- c(SR = 3,  MR = 10, R = 20)

events.RCFS   <- c(SR = 20, R = 61, END = lifetime)
duration.RCFS <- c(SR = 2,  R = 25)

events.ETICS   <- c(SR = 35, R = 60, END = lifetime)
duration.ETICS <- c(SR = 3,  R = 5)

events.PCF   <- c(MR = 15, END = lifetime)
duration.PCF <- c(MR = 25)

# ---- Plot system timelines
par(mfrow = c(3, 2))
maint.GRW   <- dist.Events(lifetime, events.GRW,   start.Date, "GRW",   do.plot = TRUE)
maint.CRW   <- dist.Events(lifetime, events.CRW,   start.Date, "CRW",   do.plot = TRUE)
maint.RCFS  <- dist.Events(lifetime, events.RCFS,  start.Date, "RCFS",  do.plot = TRUE)
maint.ETICS <- dist.Events(lifetime, events.ETICS, start.Date, "ETICS", do.plot = TRUE)
maint.PCF   <- dist.Events(lifetime, events.PCF,   start.Date, "PCF",   do.plot = TRUE)

# ---- Integrated timeline (all 5 for display)
products_all  <- list(maint.GRW, maint.CRW, maint.RCFS, maint.ETICS, maint.PCF)
durations_all <- list(duration.GRW, duration.CRW, duration.RCFS, duration.ETICS, duration.PCF)

integrated_all <- combine.lifeTimelines(products_all, durations_all)
total_days_all <- sum(integrated_all$duration)
total_days_all

plot.timeline(integrated_all$frequency, integrated_all$Names, start.Date,
              "Integrated Maintenance Timeline (display)")

# ==========================================================
# 3) DESIGN SPACE EXPLORATION (optional, unchanged)
# ==========================================================

gen.GRW <- function() c(SR = sample(15:25, 1), MR = sample(45:55, 1), R = sample(35:45, 1), END = lifetime)
gen.CRW <- function() c(SR = sample(15:30, 1), MR = sample(20:60, 1), R = sample(45:55, 1), END = lifetime)
gen.RCFS<- function() c(SR = sample(20:40, 1), R  = sample(55:65, 1), END = lifetime)
gen.ETI <- function() c(SR = sample(20:65, 1), R  = sample(55:65, 1), END = lifetime)
gen.PCF <- function() c(MR = sample(20:25, 1), END = lifetime)

design.sample <- function(event.generators, duration.maps, n.samples = 5000) {
  out <- vector("list", n.samples)
  
  for (i in seq_len(n.samples)) {
    
    prods <- vector("list", length(event.generators))
    durs  <- vector("list", length(event.generators))
    
    for (s in seq_along(event.generators)) {
      ev <- event.generators[[s]]()
      prods[[s]] <- dist.Events(lifetime, ev, start.Date, paste0("System ", s), do.plot = FALSE)
      durs[[s]]  <- duration.maps[[s]]
    }
    
    comb <- combine.lifeTimelines(prods, durs)
    
    out[[i]] <- c(
      dur = sum(comb$duration),
      dist.inter = min_spacing(comb$frequency)
    )
  }
  
  as.data.frame(do.call(rbind, out))
}

event.generators <- list(gen.GRW, gen.CRW, gen.RCFS, gen.ETI, gen.PCF)
duration.maps    <- list(duration.GRW, duration.CRW, duration.RCFS, duration.ETICS, duration.PCF)

response.space <- design.sample(event.generators, duration.maps, n.samples = 5000)

p <- low(dur) * high(dist.inter)

sky <- psel(response.space, p)
sky

pareto2 <- psel(response.space, p, top = nrow(response.space))

ggplot(response.space, aes(x = dur, y = dist.inter)) +
  geom_point(shape = 21) +
  geom_point(data = pareto2, size = 3, aes(color = factor(pareto2$.level))) +
  labs(title = "Preference-ranked scenarios (rPref)",
       x = "Total interruption duration (days)",
       y = "Min spacing between interventions (years)")

# Pareto frontier visualization helper (like PDF)
show_front <- function(pref) {
  plot(response.space$dur, response.space$dist.inter,
       xlab = "dur (days)", ylab = "dist.inter (years)")
  sky2 <- psel(response.space, pref)
  plot_front(response.space, pref, col = rgb(0, 0, 1))
  points(sky2$dur, sky2$dist.inter, pch = 19, cex = 1.2)
}

par(mfrow = c(1, 1))
show_front(p)

# Example alternative preference (reverse)
p2 <- high(dur) * low(dist.inter)
show_front(p2)


# ==========================================================
# 4) CSV-DRIVEN LCA (PDF logic)
# ==========================================================

read_quotedline_csv <- function(path) {
  x <- readLines(path, warn = FALSE, encoding = "UTF-8")
  x <- gsub('^"(.*)"$', "\\1", x)
  read.csv(text = x, sep = ",", header = TRUE,
           stringsAsFactors = FALSE, check.names = FALSE)
}

# ---- set your data directory once
data_dir <- "/Users/nicholas_song/Desktop/Uni/CSE/Whole Life Civil Systems Analysis/Group Assignment"

systems <- read_quotedline_csv(file.path(data_dir, "systems.csv")) %>%
  dplyr::mutate(
    system_quantity = as.numeric(system_quantity),
    system_unit     = as.character(system_unit)
  )

materials <- read_quotedline_csv(file.path(data_dir, "material_composition.csv")) %>%
  dplyr::mutate(
    material_amount_per_unit_system = as.numeric(material_amount_per_unit_system),
    material_unit = as.character(material_unit)
  )

fractions <- read_quotedline_csv(file.path(data_dir, "maintenance_action_fractions.csv")) %>%
  dplyr::mutate(
    affected_fraction  = as.numeric(affected_fraction),
    maintenance_action = as.character(maintenance_action)
  )

impacts <- read_quotedline_csv(file.path(data_dir, "material_impacts.csv"))


# If your file has the typo Cost_UER_per_unit, fix it; otherwise leave as-is
if ("Cost_UER_per_unit" %in% names(impacts) && !("Cost_EUR_per_unit" %in% names(impacts))) {
  impacts <- dplyr::rename(impacts, Cost_EUR_per_unit = Cost_UER_per_unit)
}


impacts <- impacts %>%
  dplyr::mutate(
    material_unit = as.character(material_unit),
    CO2_kg_per_unit    = as.numeric(CO2_kg_per_unit),
    NOx_kg_per_unit    = as.numeric(NOx_kg_per_unit),
    SOx_kg_per_unit    = as.numeric(SOx_kg_per_unit),
    Energy_MJ_per_unit = as.numeric(Energy_MJ_per_unit),
    Cost_EUR_per_unit  = as.numeric(Cost_EUR_per_unit)
  )

count_events <- function(schedule, system_id) {
  
  # Force plain data.frame
  schedule <- as.data.frame(schedule)
  
  # Drop unnamed columns (NA or "")
  bad <- is.na(names(schedule)) | names(schedule) == ""
  if (any(bad)) schedule <- schedule[, !bad, drop = FALSE]
  
  # Make remaining names safe + unique
  names(schedule) <- make.names(names(schedule), unique = TRUE)
  
  # Ensure event column exists
  if (!("event" %in% names(schedule))) {
    stop("count_events(): schedule for system ", system_id,
         " does not have an 'event' column. Names are: ",
         paste(names(schedule), collapse = ", "))
  }
  
  schedule %>%
    dplyr::filter(!(event %in% c("DC", "END"))) %>%
    dplyr::count(event, name = "n_events") %>%
    dplyr::mutate(
      system_id = system_id,
      maintenance_action = as.character(event)
    ) %>%
    dplyr::select(system_id, maintenance_action, n_events)
}


calc_LCA_from_schedules <- function(maint_list, systems, materials, fractions, impacts) {
  
  # A) count events
  events_all <- dplyr::bind_rows(lapply(names(maint_list), function(sid) {
    count_events(maint_list[[sid]], sid)
  }))
  
  # B) join fractions + system quantities
  base_actions <- events_all %>%
    dplyr::left_join(fractions, by = c("system_id","maintenance_action")) %>%
    dplyr::left_join(systems, by = "system_id")
  
  missing_frac <- base_actions %>% dplyr::filter(is.na(affected_fraction))
  if (nrow(missing_frac) > 0) {
    print(missing_frac)
    stop("Missing affected_fraction for some (system_id, maintenance_action). Fix maintenance_action_fractions.csv")
  }
  
  missing_sys <- base_actions %>% dplyr::filter(is.na(system_quantity) | is.na(system_unit))
  if (nrow(missing_sys) > 0) {
    print(missing_sys)
    stop("Missing system_quantity/system_unit for some system_id. Fix systems.csv")
  }
  
  # C) expand to materials (this is EXPECTED many-to-many: action rows x material rows)
  inventory <- base_actions %>%
    dplyr::left_join(materials, by = "system_id", relationship = "many-to-many") %>%
    dplyr::mutate(
      material_amount_used =
        n_events *
        system_quantity *
        material_amount_per_unit_system *
        affected_fraction
    )
  
  if (any(is.na(inventory$material_amount_per_unit_system))) {
    stop("Some systems have no material composition rows. Fix material_composition.csv")
  }
  
  # D) join impacts and compute totals
  inventory2 <- inventory %>%
    dplyr::left_join(impacts, by = c("material_id","material_unit"))
  
  missing_imp <- inventory2 %>%
    dplyr::filter(is.na(CO2_kg_per_unit) | is.na(Energy_MJ_per_unit) | is.na(Cost_EUR_per_unit))
  
  if (nrow(missing_imp) > 0) {
    print(missing_imp %>% dplyr::select(system_id, maintenance_action, material_id, material_unit))
    stop("Missing impact factors for some (material_id, material_unit). Fix material_impacts.csv")
  }
  
  detailed <- inventory2 %>%
    dplyr::mutate(
      CO2    = material_amount_used * CO2_kg_per_unit,
      NOx    = material_amount_used * NOx_kg_per_unit,
      SOx    = material_amount_used * SOx_kg_per_unit,
      Energy = material_amount_used * Energy_MJ_per_unit,
      Cost   = material_amount_used * Cost_EUR_per_unit
    )
  
  totals <- detailed %>%
    dplyr::summarise(
      CO2    = sum(CO2, na.rm = TRUE),
      NOx    = sum(NOx, na.rm = TRUE),
      SOx    = sum(SOx, na.rm = TRUE),
      Energy = sum(Energy, na.rm = TRUE),
      Cost   = sum(Cost, na.rm = TRUE)
    )
  
  list(totals = totals, detailed = detailed)
}

# ==========================================================
# 5) NSGA-II OPTIMISATION (NO RCFS FROM HERE ON)
# ==========================================================

fitness <- function(x) {
  
  x <- round(x, 0)
  z <- numeric(7)
  
  # Decision vars (idim = 9):
  # 1 GRW_SR, 2 GRW_MR, 3 GRW_R
  # 4 CRW_SR, 5 CRW_MR, 6 CRW_R
  # 7 ETICS_SR, 8 ETICS_R
  # 9 PCF_MR
  
  ev.GRW   <- c(SR = x[1], MR = x[2], R = x[3], END = lifetime)
  ev.CRW   <- c(SR = x[4], MR = x[5], R = x[6], END = lifetime)
  ev.ETICS <- c(SR = x[7], R  = x[8], END = lifetime)
  ev.PCF   <- c(MR = x[9], END = lifetime)
  
  m.GRW   <- dist.Events(lifetime, ev.GRW,   start.Date, "GRW",   do.plot = FALSE)
  m.CRW   <- dist.Events(lifetime, ev.CRW,   start.Date, "CRW",   do.plot = FALSE)
  m.ETICS <- dist.Events(lifetime, ev.ETICS, start.Date, "ETICS", do.plot = FALSE)
  m.PCF   <- dist.Events(lifetime, ev.PCF,   start.Date, "PCF",   do.plot = FALSE)
  
  maint_list <- list(GRW = m.GRW, CRW = m.CRW, ETICS = m.ETICS, PCF = m.PCF)
  
  # Disruption metrics
  comb <- combine.lifeTimelines(
    products  = list(m.GRW, m.CRW, m.ETICS, m.PCF),
    durations = list(duration.GRW, duration.CRW, duration.ETICS, duration.PCF)
  )
  
  z[1] <- sum(comb$duration)
  z[2] <- -min_spacing(comb$frequency)
  
  # LCA totals (CSV-driven)
  lca <- calc_LCA_from_schedules(
    maint_list = maint_list,
    systems    = systems,
    materials  = materials,
    fractions  = fractions,
    impacts    = impacts
  )
  
  z[3] <- lca$totals$Energy
  z[4] <- lca$totals$CO2
  z[5] <- lca$totals$NOx
  z[6] <- lca$totals$SOx
  z[7] <- lca$totals$Cost
  
  z
}

lower.bounds <- c(
  15, 45, 35,   # GRW: SR, MR, R
  15, 20, 45,   # CRW: SR, MR, R
  20, 55,       # ETICS: SR, R
  20            # PCF: MR
)

upper.bounds <- c(
  25, 55, 45,
  30, 60, 55,
  60, 65,
  25
)

# sanity check for fractions completeness
needed <- data.frame(
  system_id = c("GRW","GRW","GRW","CRW","CRW","CRW","ETICS","ETICS","PCF"),
  maintenance_action = c("SR","MR","R","SR","MR","R","SR","R","MR")
)
print(dplyr::anti_join(needed, fractions, by = c("system_id","maintenance_action")))

# Run NSGA-II
r2 <- nsga2(
  fitness,
  idim = 9,
  odim = 7,
  generations = 20,
  popsize = 240,
  lower.bounds = lower.bounds,
  upper.bounds = upper.bounds
)

# Results
r2Results <- as.data.frame(r2$value)
colnames(r2Results) <- c("duration_days", "neg_min_spacing", "energy_mj", "co2_kg", "nox_kg", "sox_kg", "cost_eur")

pareto <- as.data.frame(paretoFront(r2))
colnames(pareto) <- colnames(r2Results)

ggplot(r2Results, aes(x = duration_days, y = cost_eur)) +
  geom_point(shape = 21) +
  geom_point(data = pareto, size = 3, color = "red") +
  geom_line(data = pareto, color = "blue") +
  labs(
    title = "NSGA-II Pareto front (duration vs cost)",
    x = "Total interruption duration (days)",
    y = "Cost (EUR)"
  )

input.params <- round(r2$par, 0)
all.results  <- cbind(input.params, r2Results, pareto = r2$pareto.optimal)
# ---- Rename columns to numeric axis labels (1–16)
colnames(all.results)[1:16] <- as.character(1:16)



cols_to_plot <- c(
  1:9,   # GRW / CRW / ETICS / PCF decision variables
  10,    # duration_days
  11,    # neg_min_spacing
  12,    # energy_mj
  13,    # co2_kg
  14,    # nox_kg
  15,    # sox_kg
  16     # cost_eur
)

# ---- Legend text (order matches columns 1:(9+7) exactly)
axis_legend <- c(
  "1  GRW_SR",
  "2  GRW_MR",
  "3  GRW_R",
  "4  CRW_SR",
  "5  CRW_MR",
  "6  CRW_R",
  "7  ETICS_SR",
  "8  ETICS_R",
  "9  PCF_MR",
  "10 duration_days",
  "11 neg_min_spacing",
  "12 energy_mj",
  "13 co2_kg",
  "14 nox_kg",
  "15 sox_kg",
  "16 cost_eur"
)

# ---- Make room on the right for legend (increase right margin)
op <- par(no.readonly = TRUE)
par(mar = c(5, 4, 4, 22))  # <- right margin is the last number

# ---- Your plot (UNCHANGED)
MASS::parcoord(all.results[, 1:(9+7)], var.label = TRUE,
               col = ifelse(all.results$pareto, "indianred", "skyblue2"),
               lty = ifelse(all.results$pareto, 1, 3),
               lwd = ifelse(all.results$pareto, 3, 1))

# ---- Legend in the right margin (won't overlap plot)
legend("topright",
       inset = c(-0.8, 0),   # push into the right margin
       xpd = TRUE,
       bty = "n",
       cex = 0.9,
       legend = axis_legend)

# Optional: small style legend below it
legend("bottomright",
       inset = c(-0.8, 0),
       xpd = TRUE,
       bty = "n",
       cex = 0.9,
       legend = c("Pareto-optimal", "Non-Pareto"),
       col = c("indianred", "skyblue2"),
       lty = c(1, 3),
       lwd = c(3, 1))

par(op)