Three layers: a short note at the top, the key lines with our take in the middle, the full source at the bottom.
CI script
check-subprocessor-freshness.mjs
Fails CI when the sub-processor list goes stale relative to its last-reviewed date.
Repo path scripts/check-subprocessor-freshness.mjsLanguage JavaScript
Short note — more on the way
What this is
Fails CI when the sub-processor list goes stale relative to its last-reviewed date.
What it proves
This file backs one or more of the privacy promises. It is a continuous-integration script that lives versioned in the repository. Read the promise →
What to look for in the source below
- Comments and headers that name what each section does.
- File edges: imports at the top, exports or run-blocks at the bottom.
- Any list, configuration, or assertion that looks load-bearing.
Show the full file (245 lines)
244 lines
#!/usr/bin/env node
//
// Sub-processor list freshness gate (PR-6).
//
// docs/sub-processors.md is a customer commitment: every third party
// that touches Customer Data is named, with 30 days' advance notice
// of changes. This gate flags drift between the list and the actual
// infrastructure declared in apps/api/wrangler.toml + services/*.toml.
//
// Detection is conservative -- the gate looks for known vendor tokens
// in IaC files (e.g. "anthropic", "openai", "twilio", "datadog",
// "amplitude") that are NOT present in docs/sub-processors.md. New
// vendors must EITHER be added to the doc + email the announcement
// list OR be removed from the IaC. There is no middle ground.
import { readFileSync, existsSync } from "node:fs";
import { fileURLToPath } from "node:url";
import path from "node:path";
import { execSync } from "node:child_process";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const REPO = path.resolve(__dirname, "..");
const SUBPROCESSOR_DOC = "docs/sub-processors.md";
// Known third-party vendor tokens we look for. Add to this list
// when a new vendor lands in the wider ecosystem; the gate will
// flag the same token in IaC if it is not also in the doc.
const KNOWN_VENDOR_TOKENS = [
"aws",
"cloudflare",
"fly",
"stripe",
"resend", // email — current shipped provider
"postmark", // tripwire: was named in v2 docs; must not silently re-appear
"sentry",
"anthropic", // tripwire: v4 removed; must not silently re-appear
"openai",
"instructor",
"cohere",
"google",
"vertex",
"twilio",
"sendgrid",
"mailgun",
"datadog",
"newrelic",
"amplitude",
"mixpanel",
"segment",
"intercom",
"zendesk",
"auth0",
"okta",
"workos",
"hcaptcha",
"recaptcha",
"neon",
"intuit", // QuickBooks Online — sub-processor when posting flag flips on
"quickbooks",
"xero",
"supabase",
"planetscale",
];
function listIaCFiles() {
// git ls-files keeps us aligned with the tree; falls back to a
// broad find if git isn't available.
//
// Audit batch 5 P-F1 widening: the gate now also scans
// apps/api/src/routes/**/*.ts because integrations like
// QuickBooks Online (apps/api/src/routes/integrations/
// quickbooks.ts) talk to vendor hostnames (e.g.
// appcenter.intuit.com, sandbox-quickbooks.api.intuit.com)
// directly from the Worker. Hostnames in route handlers are as
// load-bearing for sub-processor disclosure as IaC config; if the
// route file references a vendor token, that vendor MUST appear
// in docs/sub-processors.md.
try {
return execSync(
"git ls-files -- 'infra/**' 'services/**/fly.toml' 'apps/**/wrangler.toml' 'apps/api/src/routes/**/*.ts'",
{ cwd: REPO, encoding: "utf8" },
)
.split("\n")
.filter(Boolean);
} catch {
return [];
}
}
function tokensIn(text) {
const lower = text.toLowerCase();
const found = new Set();
for (const token of KNOWN_VENDOR_TOKENS) {
// Word-ish match. Treat dots, slashes, underscores as boundaries
// so `aws.kms` and `fly.io` count.
const re = new RegExp(`(^|[^a-z0-9])${token}([^a-z0-9]|$)`, "i");
if (re.test(lower)) found.add(token);
}
return found;
}
/**
* Parse the `Since` column from docs/sub-processors.md. Returns a map
* { vendorToken -> { since: Date | null, planned: boolean } } keyed
* by every KNOWN_VENDOR_TOKEN that appears in the row. The doc table
* has the shape:
*
* | # | Provider | Role | Region | DPA on file | Since |
* |--:|---|---|---|---:|---|
* | 1 | **AWS** | ... | ... | Yes | 2026-01-01 |
* | 7 | **Intuit (QuickBooks Online)** ... | ... | US | Pending | Planned ... |
*
* Rows whose DPA-on-file column is "Yes" are subject to the 30-day
* freshness assertion. Rows marked "Pending" / "Planned" are not yet
* active and skip the check.
*/
function parseSubprocessorRows(docText) {
const rows = [];
const lines = docText.split("\n");
for (const line of lines) {
// Match table rows that start with `| <digit>` (the # column).
const m = line.match(/^\|\s*\d+\s*\|(.+)\|\s*$/);
if (!m) continue;
// Split on `|` to get cells, then trim. The doc has six columns:
// [provider, role, region, dpa_on_file, since].
const cells = m[1].split("|").map((s) => s.trim());
if (cells.length < 5) continue;
const [provider, _role, _region, dpaOnFile, since] = cells;
const lowerProvider = provider.toLowerCase();
const tokensForRow = [];
for (const token of KNOWN_VENDOR_TOKENS) {
const re = new RegExp(`(^|[^a-z0-9])${token}([^a-z0-9]|$)`, "i");
if (re.test(lowerProvider)) tokensForRow.push(token);
}
const dpaYes = /^yes$/i.test(dpaOnFile);
const planned =
/planned/i.test(since) ||
/pending/i.test(dpaOnFile) ||
/planned/i.test(provider);
// ISO date YYYY-MM-DD at the start of the cell, if present.
const dateMatch = since.match(/(\d{4}-\d{2}-\d{2})/);
const sinceDate = dateMatch ? new Date(dateMatch[1] + "T00:00:00Z") : null;
for (const token of tokensForRow) {
rows.push({
token,
provider,
since: sinceDate,
planned,
active: dpaYes && !planned,
rawSince: since,
});
}
}
return rows;
}
function main() {
const docPath = path.join(REPO, SUBPROCESSOR_DOC);
if (!existsSync(docPath)) {
process.stderr.write(`FAIL: ${SUBPROCESSOR_DOC} not found.\n`);
process.exit(1);
}
const docText = readFileSync(docPath, "utf8");
const docTokens = tokensIn(docText);
const subprocessorRows = parseSubprocessorRows(docText);
const iacFiles = listIaCFiles();
const violations = [];
for (const rel of iacFiles) {
const abs = path.join(REPO, rel);
if (!existsSync(abs)) continue;
const tokens = tokensIn(readFileSync(abs, "utf8"));
for (const token of tokens) {
if (!docTokens.has(token)) {
violations.push({ file: rel, token });
}
}
}
// 30-day notice clause (DPA §6). For every row whose DPA-on-file is
// "Yes" we require `(now - since) >= 30 days`. Rows marked Planned
// / Pending are pre-disclosed and intentionally skip the check.
const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
const now = Date.now();
const freshnessViolations = [];
for (const row of subprocessorRows) {
if (!row.active) continue;
if (!row.since) {
freshnessViolations.push({
provider: row.provider,
reason: `Since column missing or unparseable: "${row.rawSince}"`,
});
continue;
}
const elapsed = now - row.since.getTime();
if (elapsed < THIRTY_DAYS_MS) {
const days = Math.floor(elapsed / (24 * 60 * 60 * 1000));
freshnessViolations.push({
provider: row.provider,
reason: `vendor ${row.provider} added at ${row.rawSince}, less than 30 days ago (only ${days} days elapsed); the 30-day notice clause has not elapsed.`,
});
}
}
if (violations.length === 0 && freshnessViolations.length === 0) {
process.stdout.write(
`check-subprocessor-freshness: OK (${iacFiles.length} IaC files, ${docTokens.size} vendors named, ${subprocessorRows.filter((r) => r.active).length} active rows passed the 30-day freshness check).\n`,
);
process.exit(0);
}
if (violations.length > 0) {
process.stderr.write(
`check-subprocessor-freshness: FAIL (${violations.length} undeclared vendors in IaC):\n`,
);
for (const v of violations) {
process.stderr.write(
` ${v.file}: vendor token "${v.token}" not in ${SUBPROCESSOR_DOC}\n`,
);
}
process.stderr.write(
`\nFix: add the vendor to ${SUBPROCESSOR_DOC} (with 30 days notice on the announcement list)\n` +
` OR remove the IaC reference. The list is a customer commitment.\n`,
);
}
if (freshnessViolations.length > 0) {
process.stderr.write(
`check-subprocessor-freshness: FAIL (${freshnessViolations.length} rows fail the 30-day notice clause):\n`,
);
for (const v of freshnessViolations) {
process.stderr.write(` FAIL: ${v.reason}\n`);
}
process.stderr.write(
`\nFix: wait until 30 days after the Since date before flipping DPA-on-file to Yes,\n` +
` or move the row back to Planned/Pending until the clock has run.\n`,
);
}
process.exit(1);
}
main();This is the file as it lives at the moment of this build. The canonical history lives in git. If you want the full history or a specific commit, write to hello@muntin.digital.