CIFS_Storage_Demand_Model/Scrape_Curie_Map_Totals.js
2026-01-22 16:47:37 -07:00

414 lines
14 KiB
JavaScript

///////////////////////////// // Get totals in the last year of the map (2082) as a way to check total capacity. This can be combined with the yearly data.
(async () => {
/**********************
* Utility functions *
**********************/
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
const visible = (el) => !!el && !!(el.offsetWidth || el.offsetHeight || el.getClientRects().length);
const norm = (s) => (s || "").replace(/\u00A0/g, " ").replace(/\s+/g, " ").trim();
const labelKey = (s) => norm(s).replace(/:$/, "").toLowerCase();
// CSV helpers
const csvEscape = (val) => {
const s = (val === null || val === undefined) ? "" : String(val);
if (/[",\n]/.test(s)) return `"${s.replace(/"/g, '""')}"`;
return s;
};
const saveCSV = (rows, filename = "Curie_Spent_Fuel_Site_Totals.csv") => {
const csv = rows.map(r => r.map(csvEscape).join(",")).join("\n");
const blob = new Blob([csv], { type: "text/csv;charset=utf-8;" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
setTimeout(() => {
document.body.removeChild(a);
URL.revokeObjectURL(url);
}, 1000);
};
/********************************************
* ISO-only date extraction & min/max *
********************************************/
const isValidISO = (yyyy, mm, dd) => {
const y = Number(yyyy), m = Number(mm), d = Number(dd);
if (!(y >= 1800 && y <= 2200)) return false;
if (!(m >= 1 && m <= 12)) return false;
if (!(d >= 1 && d <= 31)) return false; // light bounds
return true;
};
// Extract ISO date strings YYYY-MM-DD only, validated
const extractISODates = (str) => {
const s = norm(str);
const re = /\b(\d{4})-(\d{2})-(\d{2})\b/g;
const out = [];
let m;
while ((m = re.exec(s)) !== null) {
if (isValidISO(m[1], m[2], m[3])) out.push(`${m[1]}-${m[2]}-${m[3]}`);
}
return Array.from(new Set(out)); // dedupe
};
// Compute min/max via lexicographic compare (correct for ISO)
const isoMinMax = (valOrArr) => {
const arr = Array.isArray(valOrArr) ? valOrArr : [valOrArr];
const all = [];
for (const v of arr) {
if (!v) continue;
all.push(...extractISODates(v));
}
if (!all.length) return { min: "", max: "" };
all.sort();
return { min: all[0], max: all[all.length - 1] };
};
/****************************************
* Locate the "Locate Site" UI control *
****************************************/
const findLocateSiteControl = () => {
// 1) Prefer labeled <select>
const selects = Array.from(document.querySelectorAll("select"));
for (const sel of selects) {
const id = sel.id;
let hasLabel = false;
if (id) {
const lab = document.querySelector(`label[for="${CSS.escape(id)}"]`);
if (lab && /locate site/i.test(norm(lab.innerText))) hasLabel = true;
}
if (!hasLabel && sel.getAttribute("aria-label") && /locate site/i.test(sel.getAttribute("aria-label"))) {
hasLabel = true;
}
if (!hasLabel) {
const parentText = norm((sel.parentElement && sel.parentElement.innerText) || "");
if (/locate site/i.test(parentText)) hasLabel = true;
}
if (hasLabel) return { type: "select", el: sel };
}
// 2) ARIA combobox/button with text "Locate Site"
const candidates = Array.from(document.querySelectorAll('[role="combobox"],[aria-haspopup="listbox"],button,div'))
.filter(el => /locate site/i.test(norm(el.innerText)) || /locate site/i.test(norm(el.getAttribute("aria-label") || "")));
if (candidates.length) {
const combobox = candidates.find(visible) || candidates[0];
return { type: "combobox", el: combobox };
}
// 3) Fallback
const any = Array.from(document.querySelectorAll("*")).find(el => /locate site/i.test(norm(el.innerText)));
if (any) return { type: "unknown", el: any };
return null;
};
/*******************************************************
* Read all facility names from the control *
*******************************************************/
const collectAllFacilityNames = async (control) => {
const EXCLUDE = (t) => {
const n = norm(t).toLowerCase();
return !n || /^select|^choose|^locate site/.test(n) || n.includes("pick to zoom");
};
if (control.type === "select") {
const optEls = Array.from(control.el.querySelectorAll("option"));
return optEls.map(o => norm(o.textContent)).filter(t => !EXCLUDE(t));
}
// For combobox/custom dropdowns
const openDropdown = () => control.el.click();
const getListbox = () =>
document.querySelector('[role="listbox"], [role="menu"], ul[role], div[role="listbox"], .dropdown-menu, .menu, ul');
const closeDropdown = () => document.body.click();
openDropdown();
await sleep(300);
const container = getListbox();
if (!container) {
console.warn("Could not locate the listbox menu for the combobox.");
closeDropdown();
return [];
}
const seen = new Set();
const readVisibleItems = () => {
const items = Array.from(container.querySelectorAll('[role="option"], li, [data-value], .option, .menu-item'));
for (const it of items) {
const txt = norm(it.textContent);
if (!EXCLUDE(txt)) seen.add(txt);
}
};
let lastSize = -1;
let stagnant = 0;
while (stagnant < 3) {
readVisibleItems();
if (container.scrollHeight > container.clientHeight) {
container.scrollTop = container.scrollHeight;
await sleep(150);
readVisibleItems();
container.scrollTop = 0;
await sleep(120);
}
if (seen.size === lastSize) stagnant++;
else { lastSize = seen.size; stagnant = 0; }
}
closeDropdown();
return Array.from(seen);
};
/***************************************************************
* Select a facility by name in the control *
***************************************************************/
const selectFacility = async (control, name) => {
if (control.type === "select") {
const sel = control.el;
const option = Array.from(sel.options).find(o => norm(o.textContent) === norm(name));
if (!option) throw new Error(`Option not found: ${name}`);
sel.value = option.value;
sel.dispatchEvent(new Event("input", { bubbles: true }));
sel.dispatchEvent(new Event("change", { bubbles: true }));
return;
}
control.el.click();
await sleep(180);
const items = Array.from(document.querySelectorAll('[role="option"], li, [data-value], .option, .menu-item'));
let target = items.find(it => norm(it.textContent) === norm(name));
if (!target) target = items.find(it => norm(it.textContent).toLowerCase().includes(norm(name).toLowerCase()));
if (!target) throw new Error(`Could not find menu item for: ${name}`);
target.scrollIntoView({ block: "nearest" });
target.click();
await sleep(220);
};
/*******************************************************
* Wait for popup/details and extract the needed data *
* (scope to most recently visible container) *
*******************************************************/
const waitForDetailLis = async (timeoutMs = 15000) => {
const selectorContainers = '.leaflet-popup-content, .leaflet-popup, .popup, .modal, .panel, [role="dialog"], [role="region"]';
const start = performance.now();
while (performance.now() - start < timeoutMs) {
const containers = Array.from(document.querySelectorAll(selectorContainers)).filter(visible);
// Prefer the last (most recently added) container that contains our labels
for (let i = containers.length - 1; i >= 0; i--) {
const lis = Array.from(containers[i].querySelectorAll("li"));
const hasKey = lis.some(li =>
/^(?:Number of Assemblies:|Operating Date:|Last Projected Discharge:|Projected License Expiration Year:|Metric Tons of Heavy Metal \(MTHM\):)/i
.test(norm(li.textContent))
);
if (hasKey && lis.length) return lis;
}
// Fallback: any visible <li> that matches
const allLis = Array.from(document.querySelectorAll("li")).filter(visible);
const hasKey = allLis.some(li =>
/Number of Assemblies:|Operating Date:|Last Projected Discharge:|Projected License Expiration Year:|Metric Tons of Heavy Metal \(MTHM\):/i
.test(norm(li.textContent))
);
if (hasKey) return allLis;
await sleep(150);
}
return [];
};
// Parse list items to a Map of normalized label -> [values...], strict labels only
const parseDetailListToMap = (lis) => {
const map = new Map();
const pushVal = (key, value) => {
const k = labelKey(key);
const v = norm(value);
if (!map.has(k)) map.set(k, []);
if (v) map.get(k).push(v);
};
for (const li of lis) {
const text = norm(li.textContent);
if (!text) continue;
let label = "";
let value = "";
// Prefer <strong>/<b> at start
const boldish = li.querySelector("strong, b");
if (boldish) {
const boldText = norm(boldish.textContent);
const restText = norm(text.replace(boldText, ""));
if (/:$/.test(boldText)) {
label = boldText;
value = restText.replace(/^:\s*/, "");
} else {
const idx = text.indexOf(":");
if (idx !== -1) { label = text.slice(0, idx + 1); value = text.slice(idx + 1); }
}
} else {
const idx = text.indexOf(":");
if (idx !== -1) { label = text.slice(0, idx + 1); value = text.slice(idx + 1); }
}
label = labelKey(label);
value = norm(value);
if ([
"number of assemblies",
"operating date",
"last projected discharge",
"projected license expiration year",
"metric tons of heavy metal (mthm)"
].includes(label)) {
pushVal(label, value);
}
}
return map;
};
const getDetailsForCurrentSelection = async () => {
const lis = await waitForDetailLis(15000);
if (!lis.length) {
return {
numberOfAssemblies: "",
mthm: "",
opMin: "",
opMax: "",
lpdMin: "",
lpdMax: ""
};
}
const map = parseDetailListToMap(lis);
// Total_Assemblies
let numberOfAssemblies = "";
const noaVals = map.get("number of assemblies") || [];
if (noaVals.length) {
const m = noaVals.join(" ").match(/(\d[\d,]*)/);
numberOfAssemblies = m ? m[1].replace(/,/g, "") : norm(noaVals.join(" "));
}
// Total_Tons (MTHM)
let mthm = "";
const mthmVals = map.get("metric tons of heavy metal (mthm)") || [];
if (mthmVals.length) {
const mm = mthmVals.join(" ").match(/(\d[\d,]*(?:\.\d+)?)/);
if (mm) mthm = mm[1].replace(/,/g, "");
else mthm = norm(mthmVals.join(" "));
}
// Operating Date (ISO only)
let opMin = "", opMax = "";
const opVals = map.get("operating date") || [];
if (opVals.length) {
const { min, max } = isoMinMax(opVals);
opMin = min; opMax = max;
}
// Close date from LPD (ISO-only). Fallback to PLEY (ISO-first, then year -> YYYY-12-31)
let lpdMin = "", lpdMax = "";
const lpdVals = map.get("last projected discharge") || [];
let usedLPD = false;
if (lpdVals.length) {
const { min, max } = isoMinMax(lpdVals);
if (min && max) {
lpdMin = min; lpdMax = max;
usedLPD = true;
}
}
if (!usedLPD) {
const pleyVals = map.get("projected license expiration year") || [];
if (pleyVals.length) {
// ISO inside PLEY first
const isoFromPley = isoMinMax(pleyVals);
if (isoFromPley.min && isoFromPley.max) {
lpdMin = isoFromPley.min;
lpdMax = isoFromPley.max;
} else {
// Year-only fallback -> end-of-year
const years = [];
for (const s of pleyVals) {
const matches = norm(s).match(/\b(\d{4})\b/g) || [];
years.push(...matches.map(Number).filter(y => y >= 1800 && y <= 2200));
}
if (years.length) {
years.sort((a, b) => a - b);
lpdMin = `${years[0]}-12-31`;
lpdMax = `${years[years.length - 1]}-12-31`;
}
}
}
}
return {
numberOfAssemblies,
mthm,
opMin, opMax,
lpdMin, lpdMax
};
};
/*******************************
* Main routine starts here *
*******************************/
console.log("Locating the 'Locate Site' control...");
const control = findLocateSiteControl();
if (!control) {
console.error("Could not find the 'Locate Site' control on this page. Scroll the page to ensure it is visible, then try again.");
return;
}
console.log("Control type:", control.type);
console.log("Collecting facility names (excluding 'Pick to zoom')...");
const facilityNames = await collectAllFacilityNames(control);
if (!facilityNames.length) {
console.error("No facility names discovered in the 'Locate Site' menu.");
return;
}
console.log(`Discovered ${facilityNames.length} facilities.`);
const results = [];
// Simplified headers for R/Python use
results.push([
"Facility",
"Total_Assemblies",
"Total_Tons",
"Op_Date_Min",
"Op_Date_Max",
"Close_Date_Min",
"Close_Date_Max"
]);
let i = 0;
for (const name of facilityNames) {
i++;
console.log(`[${i}/${facilityNames.length}] Selecting: ${name}`);
try {
await selectFacility(control, name);
await sleep(350); // allow UI to render
const details = await getDetailsForCurrentSelection();
results.push([
name,
details.numberOfAssemblies || "",
details.mthm || "",
details.opMin || "",
details.opMax || "",
details.lpdMin || "",
details.lpdMax || ""
]);
} catch (err) {
console.warn(`Failed to collect for "${name}":`, err?.message || err);
results.push([name, "", "", "", "", "", ""]);
}
await sleep(220);
}
console.log("Saving CSV...");
saveCSV(results, "Curie_Spent_Fuel_Site_Totals.csv");
console.log("Done. Check your downloads for Curie_Spent_Fuel_Site_Totals.csv");
})();