424 lines
15 KiB
JavaScript
424 lines
15 KiB
JavaScript
|
|
(async () => {
|
|
// === Curie "Locate Site" scraper → CSV: year,Facility,Assemblies,Tons (stops at 2082) ===
|
|
// - Facility from dropdown selection text
|
|
// - Assemblies from: <li>Number of Assemblies: N</li> (missing -> 0)
|
|
// - Tons from: <li>Metric Tons of Heavy Metal (MTHM): N</li> (missing -> 0)
|
|
// - Debounced popup read (waits for popup content change to avoid offset)
|
|
// - Dedupes per (year|Facility)
|
|
// - Steps year-by-year to 2082 and saves Curie_Spent_Fuel_Timeline.csv
|
|
|
|
const STOP_YEAR = 2082;
|
|
|
|
const sleep = ms => new Promise(r => setTimeout(r, ms));
|
|
const inViewport = el => {
|
|
if (!el || !(el instanceof Element)) return false;
|
|
const r = el.getBoundingClientRect();
|
|
return r.width > 0 && r.height > 0;
|
|
};
|
|
|
|
// --- Year readout ---
|
|
const getCurrentYear = () => {
|
|
const el = document.querySelector('#slider-current-year.slider-display');
|
|
if (!el) return null;
|
|
const t = (el.textContent || '').trim();
|
|
const m = t.match(/\b(18|19|20)\d{2}\b/);
|
|
return m ? parseInt(m[0], 10) : null;
|
|
};
|
|
|
|
// --- Timeline arrows ---
|
|
const clickableSel = [
|
|
'button', '[role="button"]', 'a',
|
|
'.btn', '.button', '.MuiIconButton-root', '.v-btn',
|
|
'.mdc-icon-button', '.ant-btn', '.el-button',
|
|
'.mat-icon-button', '.mat-button', '.bp3-button',
|
|
'[onclick]', '[ng-click]', '[data-action]', '[data-testid]', '[data-cy]', '[data-qa]'
|
|
].join(',');
|
|
|
|
const findRightBtn = () => {
|
|
const icon = Array.from(document.querySelectorAll('i.fa.fa-arrow-right, i.fa-arrow-right, i.fas.fa-arrow-right'))
|
|
.find(inViewport);
|
|
return icon ? (icon.closest(clickableSel) || icon) : null;
|
|
};
|
|
const findLeftBtn = () => {
|
|
const icon = Array.from(document.querySelectorAll('i.fa.fa-arrow-left, i.fa-arrow-left, i.fas.fa-arrow-left'))
|
|
.find(inViewport);
|
|
return icon ? (icon.closest(clickableSel) || icon) : null;
|
|
};
|
|
|
|
const waitForYearChangeFrom = async (prevYear, timeoutMs = 4000, pollMs = 120) => {
|
|
const start = performance.now();
|
|
while (performance.now() - start < timeoutMs) {
|
|
const y = getCurrentYear();
|
|
if (Number.isInteger(y) && y !== prevYear) return y;
|
|
await sleep(pollMs);
|
|
}
|
|
return getCurrentYear();
|
|
};
|
|
|
|
const stepRightOneYear = async (rightBtn, leftBtn, prevYear) => {
|
|
if (!rightBtn) return null;
|
|
rightBtn.click();
|
|
let y = await waitForYearChangeFrom(prevYear);
|
|
if (!Number.isInteger(y) || y === prevYear) {
|
|
await sleep(250);
|
|
rightBtn.click();
|
|
y = await waitForYearChangeFrom(prevYear);
|
|
}
|
|
// Overshoot correction (> +1)
|
|
if (Number.isInteger(y) && y > prevYear + 1 && leftBtn) {
|
|
let guard = 0;
|
|
while (y > prevYear + 1 && guard < 10) {
|
|
leftBtn.click();
|
|
const prev = y;
|
|
y = await waitForYearChangeFrom(prev, 2500, 120);
|
|
if (!Number.isInteger(y)) y = prev - 1;
|
|
guard++;
|
|
}
|
|
if (y > prevYear + 1) y = prevYear + 1;
|
|
}
|
|
if (!Number.isInteger(y) || y === prevYear) return null;
|
|
return y;
|
|
};
|
|
|
|
// --- Popup helpers & parsing (DOM-based li extraction) ---
|
|
const popupEl = () => document.querySelector('.leaflet-popup-content');
|
|
const popupSignature = () => {
|
|
const el = popupEl();
|
|
return el ? (el.innerHTML || '').trim() : '';
|
|
};
|
|
const waitForPopup = async (timeoutMs = 3500, pollMs = 90) => {
|
|
const start = performance.now();
|
|
while (performance.now() - start < timeoutMs) {
|
|
const el = popupEl();
|
|
if (el && el.offsetParent !== null) return el;
|
|
await sleep(pollMs);
|
|
}
|
|
return null;
|
|
};
|
|
const waitForPopupContentChange = async (prevSig, timeoutMs = 4000, pollMs = 90) => {
|
|
const start = performance.now();
|
|
while (performance.now() - start < timeoutMs) {
|
|
const sig = popupSignature();
|
|
if (sig && sig !== prevSig) return sig;
|
|
await sleep(pollMs);
|
|
}
|
|
return popupSignature(); // may be unchanged
|
|
};
|
|
const closePopupIfOpen = () => {
|
|
const btn = document.querySelector('.leaflet-popup-close-button');
|
|
if (btn) btn.click();
|
|
};
|
|
|
|
// Extract Assemblies and Tons from <li> elements in the popup
|
|
const extractFromPopup = (popup) => {
|
|
let Assemblies = 0;
|
|
let Tons = 0;
|
|
if (!popup) return { Assemblies, Tons };
|
|
const lis = Array.from(popup.querySelectorAll('li'));
|
|
for (const li of lis) {
|
|
const t = (li.textContent || '').trim();
|
|
if (/^number\s+of\s+assemblies\s*:/i.test(t)) {
|
|
const after = t.split(':').slice(1).join(':');
|
|
const m = (after || '').replace(/,/g, '').match(/\d+/);
|
|
Assemblies = m ? parseInt(m[0], 10) : 0;
|
|
}
|
|
if (/^metric\s+tons\s+of\s+heavy\s+metal\s*\(mthm\)\s*:/i.test(t)) {
|
|
const after = t.split(':').slice(1).join(':');
|
|
const m = (after || '').replace(/,/g, '').match(/-?\d+(?:\.\d+)?/);
|
|
Tons = m ? parseFloat(m[0]) : 0;
|
|
}
|
|
}
|
|
return {
|
|
Assemblies: Number.isFinite(Assemblies) ? Assemblies : 0,
|
|
Tons: Number.isFinite(Tons) ? Tons : 0
|
|
};
|
|
};
|
|
|
|
// --- Locate Site control discovery (native <select> or ARIA combobox) ---
|
|
const isLocateLabel = (txt) => /\blocate\s+site\b/i.test(txt || '');
|
|
|
|
const findLocateSelect = () => {
|
|
// Label + select
|
|
for (const lab of Array.from(document.querySelectorAll('label'))) {
|
|
if (isLocateLabel(lab.textContent)) {
|
|
const id = lab.getAttribute('for');
|
|
if (id) {
|
|
const sel = document.getElementById(id);
|
|
if (sel && sel.tagName.toLowerCase() === 'select' && inViewport(sel)) return sel;
|
|
}
|
|
const sel2 = lab.closest('*')?.querySelector('select');
|
|
if (sel2 && inViewport(sel2)) return sel2;
|
|
}
|
|
}
|
|
// aria-label/title
|
|
const sel3 = Array.from(document.querySelectorAll('select[aria-label], select[title]'))
|
|
.find(s => isLocateLabel(s.getAttribute('aria-label') || s.getAttribute('title') || ''));
|
|
if (sel3 && inViewport(sel3)) return sel3;
|
|
// any visible select near text
|
|
for (const sel of Array.from(document.querySelectorAll('select'))) {
|
|
if (!inViewport(sel)) continue;
|
|
const parentText = (sel.closest('*')?.innerText || '').trim();
|
|
if (isLocateLabel(parentText)) return sel;
|
|
}
|
|
return null;
|
|
};
|
|
|
|
const findLocateCombobox = () => {
|
|
const byAria = Array.from(document.querySelectorAll('[role="combobox"], input[aria-haspopup="listbox"], [aria-controls]'))
|
|
.filter(inViewport)
|
|
.find(el => isLocateLabel(el.getAttribute('aria-label') || el.getAttribute('title') || el.textContent || ''));
|
|
if (byAria) return byAria;
|
|
const containers = Array.from(document.querySelectorAll('*')).filter(inViewport)
|
|
.filter(el => isLocateLabel(el.textContent || ''));
|
|
for (const c of containers) {
|
|
const trigger = c.querySelector('[role="combobox"], input, .select, .dropdown, button');
|
|
if (trigger && inViewport(trigger)) return trigger;
|
|
}
|
|
return null;
|
|
};
|
|
|
|
// Build fresh list of option texts each year (unique, in order)
|
|
const getSelectOptionTexts = (sel) => {
|
|
const out = [];
|
|
for (let i = 0; i < sel.options.length; i++) {
|
|
const o = sel.options[i];
|
|
const t = (o.text || '').trim();
|
|
if (o.disabled) continue;
|
|
if (!t) continue;
|
|
if (/^select\b|^choose\b|locate site/i.test(t)) continue; // placeholders
|
|
out.push(t);
|
|
}
|
|
return out.filter((t, i) => out.indexOf(t) === i);
|
|
};
|
|
|
|
const openComboAndGetOptionTexts = async (combo) => {
|
|
combo.scrollIntoView({ block: 'center' });
|
|
combo.click();
|
|
const start = performance.now();
|
|
let panel = null;
|
|
while (performance.now() - start < 1500) {
|
|
panel = document.querySelector('[role="listbox"], .select-menu, .dropdown-menu, .menu, .listbox');
|
|
if (panel && panel.offsetParent !== null) break;
|
|
await sleep(60);
|
|
}
|
|
if (!panel) return { texts: [] };
|
|
const items = Array.from(panel.querySelectorAll('[role="option"], li, .option, .menu-item'))
|
|
.filter(el => inViewport(el) && (el.textContent || '').trim());
|
|
const texts = items.map(el => el.textContent.trim())
|
|
.filter(t => !/^select\b|^choose\b|locate site/i.test(t));
|
|
// unique
|
|
const unique = texts.filter((t, i) => texts.indexOf(t) === i);
|
|
// Close panel to keep a clean state
|
|
document.body.click();
|
|
await sleep(80);
|
|
return { texts: unique };
|
|
};
|
|
|
|
// Select option by exact text
|
|
const selectByTextInSelect = async (sel, text) => {
|
|
let idx = -1;
|
|
for (let i = 0; i < sel.options.length; i++) {
|
|
if ((sel.options[i].text || '').trim() === text) { idx = i; break; }
|
|
}
|
|
if (idx < 0) return false;
|
|
sel.selectedIndex = idx;
|
|
sel.dispatchEvent(new Event('input', { bubbles: true }));
|
|
sel.dispatchEvent(new Event('change', { bubbles: true }));
|
|
return true;
|
|
};
|
|
|
|
const openComboAndClickText = async (combo, text) => {
|
|
combo.scrollIntoView({ block: 'center' });
|
|
combo.click();
|
|
const start = performance.now();
|
|
let panel = null;
|
|
while (performance.now() - start < 1500) {
|
|
panel = document.querySelector('[role="listbox"], .select-menu, .dropdown-menu, .menu, .listbox');
|
|
if (panel && panel.offsetParent !== null) break;
|
|
await sleep(60);
|
|
}
|
|
if (!panel) return false;
|
|
const item = Array.from(panel.querySelectorAll('[role="option"], li, .option, .menu-item'))
|
|
.find(el => (el.textContent || '').trim() === text);
|
|
if (!item) {
|
|
document.body.click();
|
|
await sleep(50);
|
|
return false;
|
|
}
|
|
item.scrollIntoView({ block: 'nearest' });
|
|
item.click();
|
|
await sleep(120);
|
|
return true;
|
|
};
|
|
|
|
// --- CSV helpers ---
|
|
const csvEscape = v => {
|
|
if (v == null) return '';
|
|
const s = String(v);
|
|
return /[",\n]/.test(s) ? `"${s.replace(/"/g, '""')}"` : s;
|
|
};
|
|
const downloadCSV = (rows) => {
|
|
const header = ['year', 'Facility', 'Assemblies', 'Tons'];
|
|
const body = rows.map(r => [r.year, r.Facility, r.Assemblies, r.Tons].map(csvEscape).join(',')).join('\n');
|
|
const csv = header.join(',') + '\n' + body;
|
|
const blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
|
|
const a = document.createElement('a');
|
|
a.href = URL.createObjectURL(blob);
|
|
a.download = 'Curie_Spent_Fuel_Timeline.csv'; // fixed name
|
|
document.body.appendChild(a);
|
|
a.click();
|
|
a.remove();
|
|
};
|
|
|
|
// --- MAIN (declare once; no redeclarations) ---
|
|
const rightBtn = findRightBtn();
|
|
const leftBtn = findLeftBtn();
|
|
|
|
window._curieScrapeStop = false;
|
|
|
|
let year = getCurrentYear();
|
|
if (!Number.isInteger(year)) {
|
|
console.warn('Could not read the current year; assuming 1968.');
|
|
year = 1968;
|
|
}
|
|
|
|
// Resolve the Locate Site control
|
|
const nativeSelect = findLocateSelect();
|
|
const comboTrigger = nativeSelect ? null : findLocateCombobox();
|
|
if (!nativeSelect && !comboTrigger) {
|
|
console.error('Could not find the "Locate Site" dropdown/combobox. Make sure it is visible.');
|
|
return;
|
|
}
|
|
|
|
const rows = [];
|
|
console.log(`Starting capture via "Locate Site" from ${year} to ${STOP_YEAR} (inclusive).`);
|
|
|
|
// Capture all facilities for a year (debounced popup + precise li parsing)
|
|
const captureYear = async (y) => {
|
|
let optionTexts = [];
|
|
if (nativeSelect) {
|
|
optionTexts = getSelectOptionTexts(nativeSelect);
|
|
} else {
|
|
const { texts } = await openComboAndGetOptionTexts(comboTrigger);
|
|
optionTexts = texts;
|
|
}
|
|
|
|
console.log(`Year ${y}: Locate Site options = ${optionTexts.length}`);
|
|
|
|
const perYearSeen = new Set(); // (year|Facility)
|
|
let prevSig = popupSignature();
|
|
|
|
// Clean start
|
|
closePopupIfOpen();
|
|
prevSig = popupSignature();
|
|
|
|
for (let i = 0; i < optionTexts.length; i++) {
|
|
if (window._curieScrapeStop) break;
|
|
|
|
const Facility = optionTexts[i];
|
|
const rowKey = `${y}|${Facility}`;
|
|
if (perYearSeen.has(rowKey)) continue;
|
|
|
|
// Close popup so content change is detectable
|
|
closePopupIfOpen();
|
|
prevSig = popupSignature();
|
|
|
|
// Select by text
|
|
let ok = false;
|
|
if (nativeSelect) {
|
|
ok = await selectByTextInSelect(nativeSelect, Facility);
|
|
} else {
|
|
ok = await openComboAndClickText(comboTrigger, Facility);
|
|
}
|
|
if (!ok) {
|
|
console.warn(` [${y}] Could not select "${Facility}".`);
|
|
continue;
|
|
}
|
|
|
|
// Wait for popup & its content to CHANGE vs previous signature
|
|
const pop = await waitForPopup(4500, 100);
|
|
if (!pop) {
|
|
console.warn(` [${y}] No popup after selecting "${Facility}". Skipping.`);
|
|
continue;
|
|
}
|
|
const newSig = await waitForPopupContentChange(prevSig, 4500, 100);
|
|
if (!newSig || newSig === prevSig) {
|
|
// Retry once
|
|
if (nativeSelect) {
|
|
await selectByTextInSelect(nativeSelect, Facility);
|
|
} else {
|
|
await openComboAndClickText(comboTrigger, Facility);
|
|
}
|
|
const pop2 = await waitForPopup(4500, 100);
|
|
if (!pop2) {
|
|
console.warn(` [${y}] Popup did not open for "${Facility}" after retry.`);
|
|
continue;
|
|
}
|
|
const newSig2 = await waitForPopupContentChange(prevSig, 4500, 100);
|
|
if (!newSig2 || newSig2 === prevSig) {
|
|
console.warn(` [${y}] Popup content unchanged for "${Facility}". Skipping to avoid offset.`);
|
|
continue;
|
|
}
|
|
prevSig = newSig2;
|
|
} else {
|
|
prevSig = newSig;
|
|
}
|
|
|
|
// Parse Assemblies and Tons from the popup's <li> elements
|
|
const parsed = extractFromPopup(popupEl());
|
|
const Assemblies = parsed.Assemblies || 0;
|
|
const Tons = parsed.Tons || 0;
|
|
|
|
// Record row (dedupe per year|Facility)
|
|
if (!perYearSeen.has(rowKey)) {
|
|
perYearSeen.add(rowKey);
|
|
rows.push({ year: y, Facility, Assemblies, Tons });
|
|
}
|
|
|
|
// Close before next selection
|
|
closePopupIfOpen();
|
|
|
|
if ((i + 1) % 10 === 0) {
|
|
console.log(` ${i + 1}/${optionTexts.length} facilities captured...`);
|
|
}
|
|
}
|
|
|
|
// Per-year quick summary
|
|
const yrRows = rows.filter(r => r.year === y);
|
|
const sumTons = yrRows.reduce((a, r) => a + (Number.isFinite(r.Tons) ? r.Tons : 0), 0);
|
|
const sumAssemblies = yrRows.reduce((a, r) => a + (Number.isFinite(r.Assemblies) ? r.Assemblies : 0), 0);
|
|
console.log(`Year ${y} summary → rows=${yrRows.length}, Sum Assemblies=${sumAssemblies}, Sum Tons=${sumTons.toFixed(3)}`);
|
|
};
|
|
|
|
// Capture starting year
|
|
await captureYear(year);
|
|
|
|
// Advance to 2082 (reuses the SAME rightBtn/leftBtn; no redeclaration)
|
|
if (rightBtn) {
|
|
while (year < STOP_YEAR) {
|
|
if (window._curieScrapeStop) break;
|
|
|
|
const prev = year;
|
|
const next = await stepRightOneYear(rightBtn, leftBtn, prev);
|
|
|
|
if (!Number.isInteger(next) || next === prev) {
|
|
console.warn('Year did not advance; saving partial results and exiting.');
|
|
break;
|
|
}
|
|
|
|
year = next;
|
|
await sleep(350); // let map/widgets update
|
|
await captureYear(year);
|
|
}
|
|
}
|
|
|
|
// Download CSV with fixed filename
|
|
downloadCSV(rows);
|
|
|
|
console.log(`Done. CSV downloaded with ${rows.length} rows. Last year captured: ${Math.min(year, STOP_YEAR)}`);
|
|
})().catch(e => console.error('[Curie via-locate-site scraper error]', e));
|
|
|
|
|
|
|