Rewrite docs linter

1. I want to set the stage to deal with #2898 properly.
2. `request` was deprecated years ago. Decided that it's better to just
   move to native Node.js APIs in its place.
3. `glob` was outdated, and it's easier to just toss it than to upgrade
   across a major version.
4. I switched to using Marked's "lexer" directly so I'm not fussing
   with the complexity of renderers. This of course necessitated a more
   complex file processor as its "lexer" is really an AST parser.

I also decided to go a few steps further:
- Drop the cache to simplify everything. I might reverse this later,
  but just caching URLs per-page should be enough to prevent the world
  from crashing down.
- Drop some more dependencies, so I don't have to come back to this
  later nearly as quickly.
- Upgrade to a more modern language version in the scripts.
- Update Marked. It was super outdated.
- Add line and column numbers to the warnings. That took quite a bit of
  work, thanks to a missing Marked feature plus a bug in Marked.
This commit is contained in:
Claudia Meadows 2024-09-01 06:24:43 -07:00
parent 3a633ce99c
commit 0d095d1373
No known key found for this signature in database
GPG key ID: C86B594396786760
15 changed files with 1201 additions and 1154 deletions

1
.gitignore vendored
View file

@ -5,7 +5,6 @@
/.vscode
/.DS_Store
/.eslintcache
/.lint-docs-cache
# These are artifacts from various scripts
/dist

842
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -17,10 +17,10 @@
"build:docs": "node scripts/generate-docs",
"build:min": "node scripts/bundler browser.js -output mithril.min.js -minify -save",
"build:stream-min": "node scripts/minify-stream",
"cleanup:lint": "rimraf .eslintcache .lint-docs-cache",
"cleanup:lint": "rimraf .eslintcache",
"lint": "run-s -cn lint:**",
"lint:js": "eslint . --cache",
"lint:docs": "node scripts/lint-docs --cache",
"lint:docs": "node scripts/lint-docs",
"perf": "node performance/test-perf.js",
"pretest": "npm run lint",
"test": "run-s test:js",
@ -40,13 +40,11 @@
"istanbul": "^0.4.5",
"lint-staged": "^13.2.1",
"locater": "^1.3.0",
"marked": "^4.0.10",
"marked": "^14.1.0",
"minimist": "^1.2.0",
"npm-run-all": "^4.1.5",
"ospec": "4.1.6",
"pinpoint": "^1.1.0",
"request": "^2.88.0",
"request-promise-native": "^1.0.7",
"rimraf": "^3.0.0",
"semver": "^6.3.0",
"terser": "^4.3.4"

View file

@ -4,9 +4,11 @@ module.exports = {
"extends": "../.eslintrc.js",
"env": {
"browser": null,
"node": true,
"es2022": true,
},
"parserOptions": {
"ecmaVersion": 2019,
"ecmaVersion": 2022,
},
"rules": {
"no-process-env": "off",

View file

@ -0,0 +1,20 @@
"use strict"
process.on("unhandledRejection", (e) => {
process.exitCode = 1
if (!e.stdout || !e.stderr) throw e
console.error(e.stack)
if (e.stdout?.length) {
console.error(e.stdout.toString("utf-8"))
}
if (e.stderr?.length) {
console.error(e.stderr.toString("utf-8"))
}
// eslint-disable-next-line no-process-exit
process.exit()
})

View file

@ -0,0 +1,244 @@
// Disabling this globally as I use it a lot to speed up common operations and cut down on
// duplicate comparisons.
/* eslint-disable no-bitwise */
"use strict"
const win1252Map = [
0x20AC,
0x81,
0x201A,
0x0192,
0x201E,
0x2026,
0x2020,
0x2021,
0x02C6,
0x2030,
0x0160,
0x2039,
0x0152,
0x8D,
0x017D,
0x8F,
0x90,
0x2018,
0x2019,
0x201C,
0x201D,
0x2022,
0x2013,
0x2014,
0x02DC,
0x2122,
0x0161,
0x203A,
0x0153,
0x9D,
0x017E,
0x0178,
]
function decode(buffer, encoding) {
switch (encoding) {
case "utf16be":
buffer.swap16()
encoding = "utf16le"
break
case "win1252":
encoding = "latin1"
for (let i = 0; i < buffer.length; i++) {
const value = buffer[i]
if ((value & 0xE0) === 0x80) {
const u16 = new Uint16Array(buffer.length)
u16.set(buffer.subarray(0, i), 0)
for (; i < buffer.length; i++) {
const value = buffer[i]
const mask = -((value & 0xE0) === 0x80)
u16[i] = value & ~mask | win1252Map[value & 0x1F] & mask
}
buffer = Buffer.from(u16.buffer)
encoding = "utf16le"
break
}
}
break
}
return buffer.toString(encoding)
}
// Ref: https://encoding.spec.whatwg.org/#concept-encoding-get
/** @type {Array<["utf8" | "utf16le" | "utf16be" | "win1252", string]>} */
const encodingMap = [
["utf8", "UNICODE11UTF8"],
["utf8", "UNICODE20UTF8"],
["utf8", "UNICODE-1-1-UTF-8"],
["utf8", "UTF8"],
["utf8", "UTF-8"],
["utf8", "X-UNICODE20UTF8"],
["win1252", "ANSI_X3.4-1968"],
["win1252", "ASCII"],
["win1252", "CP1252"],
["win1252", "CP819"],
["win1252", "CSISOLATIN1"],
["win1252", "IBM819"],
["win1252", "ISO-8859-1"],
["win1252", "ISO-IR-100"],
["win1252", "ISO8859-1"],
["win1252", "ISO88591"],
["win1252", "ISO_8859-1"],
["win1252", "ISO_8859-1:1987"],
["win1252", "L1"],
["win1252", "LATIN1"],
["win1252", "US-ASCII"],
["win1252", "WINDOWS-1252"],
["win1252", "X-CP1252"],
["utf16be", "UNICODEFFFE"],
["utf16be", "UTF-16BE"],
["utf16le", "CSUNICODE"],
["utf16le", "ISO-10646-UCS-2"],
["utf16le", "UCS-2"],
["utf16le", "UNICODE"],
["utf16le", "UNICODEFEFF"],
["utf16le", "UTF-16"],
["utf16le", "UTF-16LE"],
]
function extractNamedEncoding(name) {
outer:
for (const entry of encodingMap) {
const expected = entry[1]
if (expected.length !== name.length) continue
for (let i = 0; i < name.length; i++) {
let ch = expected.charCodeAt(i)
const upper = ch & ~0x20
if (upper >= 0x41 && upper <= 0x5A) ch = upper
if (name.charCodeAt(i) !== expected) continue outer
}
return entry[0]
}
return undefined
}
function isAsciiWhitespace(ch) {
const mask = (
1 << (0x09 - 1) |
1 << (0x0A - 1) |
1 << (0x0C - 1) |
1 << (0x0D - 1) |
1 << (0x20 - 1)
)
ch |= 0
return ch < 0x20 && (mask >>> (ch - 1) & 1) !== 0
}
function startsWith(buffer, i, end, sequence) {
if (buffer.length < i + sequence.length) return false
for (let j = 0; j < sequence.length && i < end; i++, j++) {
let ch = sequence.charCodeAt(j)
if (ch === 0x20) {
if (!isAsciiWhitespace(buffer[i++])) return false
while (i < buffer.length && isAsciiWhitespace(buffer[i])) i++
} else {
const upper = ch & ~0x20
if (upper >= 0x41 && upper <= 0x5A) ch = upper
if (ch !== buffer[i]) return false
}
}
return true
}
const metasToCheck = encodingMap.flatMap(([e, n]) => [
[e, `charset=${n}>`],
[e, `charset="${n}">`],
[e, `charset='${n}'>`],
[e, `charset=${n}/>`],
[e, `charset="${n}"/>`],
[e, `charset='${n}'/>`],
[e, `http-equiv=content-type content=${n}>`],
[e, `http-equiv="content-type" content=${n}>`],
[e, `http-equiv='content-type' content=${n}>`],
[e, `http-equiv=content-type content="${n}">`],
[e, `http-equiv="content-type" content="${n}">`],
[e, `http-equiv='content-type' content="${n}">`],
[e, `http-equiv=content-type content='${n}'>`],
[e, `http-equiv="content-type" content='${n}'>`],
[e, `http-equiv='content-type' content='${n}'>`],
[e, `http-equiv=content-type content=${n}/>`],
[e, `http-equiv="content-type" content=${n}/>`],
[e, `http-equiv='content-type' content=${n}/>`],
[e, `http-equiv=content-type content="${n}"/>`],
[e, `http-equiv="content-type" content="${n}"/>`],
[e, `http-equiv='content-type' content="${n}"/>`],
[e, `http-equiv=content-type content='${n}'/>`],
[e, `http-equiv="content-type" content='${n}'/>`],
[e, `http-equiv='content-type' content='${n}'/>`],
])
function extractMetaEncoding(buffer, i, end) {
// Exceptionally lazy and not quite fully correct
for (const [encoding, meta] of metasToCheck) {
if (startsWith(buffer, i, end, meta)) return encoding
}
return undefined
}
/**
* @returns {"utf8" | "utf16le" | "utf16be" | "win1252"}
*/
function detectEncoding(headers, prefix) {
// This follows the HTML spec to the extent Node supports the various encodings. I'm *not*,
// however, going to bend over backwards to support obscure encodings.
// https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding
if (startsWith(prefix, 0, prefix.length, "\xEF\xBB\xBF")) return "utf8"
if (startsWith(prefix, 0, prefix.length, "\xFE\xFF")) return "utf16le"
if (startsWith(prefix, 0, prefix.length, "\xFF\xFE")) return "utf16be"
const contentType = headers["content-type"]
if (contentType) {
const result = (/;\s*charset="?([\w-]+)"?/i).exec(contentType)
if (result) {
const encoding = extractNamedEncoding(result[1])
if (encoding) return encoding
}
}
if (startsWith(prefix, 0, prefix.length, "\x3c\x00\x3F\x00\x78\x00")) return "utf16le"
if (startsWith(prefix, 0, prefix.length, "\x00\x3c\x00\x3F\x00\x78")) return "utf16be"
for (let i = 0, end = prefix.indexOf("<!--", 0, "latin1"); i < prefix.length;) {
if (i === end) {
i = prefix.indexOf("-->", i + 4, "latin1")
if (i < 0) return undefined
i += 3
end = prefix.indexOf("<!--", i, "latin1")
} else if (prefix[i] === 0x3C) {
i++
if (i === prefix.length) return "win1252"
if (startsWith(prefix, i, end, "meta ")) {
const encoding = extractMetaEncoding(prefix, i, end)
if (encoding) return encoding
} else if (prefix[i] === 0x21 || prefix[i] === 0x2F || prefix[i] === 0x3F) {
i = prefix.indexOf(0x3E, i)
if (i < 0) return "win1252"
i++
}
}
}
return "win1252"
}
function decodeResponse(headers, body) {
return decode(body, detectEncoding(headers, body.subarray(0, 1024)))
}
module.exports = {
decodeResponse,
}

View file

@ -0,0 +1,162 @@
"use strict"
const fs = require("fs")
const path = require("path")
const {submitTask} = require("./task-queue.js")
const {processOne} = require("./process-file.js")
const {root, rel, p, warnError} = require("../_utils.js")
const doNotVisit = /[\\/]node_modules(?:$|[\\/])|[\\/]docs[\\/](?:changelog|recent-changes|migration-[^\\/]*)\.md$/
function lintOne(file, callback) {
let warnings = 0
let errors = 0
let files = 0
let pending = 1
function settle() {
if (--pending === 0) {
callback(warnings, errors, files)
}
}
function visitNext(file, contents) {
if (contents !== undefined) {
files++
pending++
processOne(file, contents, (w, e) => {
warnings += w
errors += e
settle()
})
} else {
pending++
submitTask(fs.readdir.bind(null, file), (err, files) => {
if (err) {
if (err.code !== "ENOTDIR") {
warnError(err)
}
} else {
for (const child of files) {
const joined = path.join(file, child)
if (!doNotVisit.test(joined)) {
visit(joined)
}
}
}
settle()
})
}
}
function visit(file) {
if (file.endsWith(".md")) {
pending++
submitTask(fs.readFile.bind(null, file, "utf8"), (err, contents) => {
// Not found is fine. Just ignore it.
if (!err || err.code === "EISDIR") {
visitNext(file, err ? undefined : contents)
} else if (err.code !== "ENOENT") {
warnError(err)
}
settle()
})
} else {
visitNext(file, undefined)
}
}
visit(file)
}
function lintAll() {
lintOne(root, (warnings, errors, files) => {
let problems = ""
if (errors !== 0) {
process.exitCode = 1
problems = `${problems}\n${errors} error${errors === 1 ? "" : "s"}`
}
if (warnings !== 0) {
problems = `${problems}\n${warnings} warning${warnings === 1 ? "" : "s"}`
}
if (problems !== "") {
console.error(`${problems} found in the docs\n`)
console.error(`Scanned ${files} file${files === 1 ? "" : "s"}\n`)
} else {
console.log("The docs are in good shape!\n")
console.log(`Scanned ${files} file${files === 1 ? "" : "s"}\n`)
}
})
}
function lintFile(file, callback) {
if (doNotVisit.test(file)) {
if (typeof callback === "function") process.nextTick(callback)
} else {
lintOne(file, (warnings, errors) => {
const relativePath = rel(file)
let problems = ""
if (errors !== 0) {
problems = `${problems}\n${errors} error${errors === 1 ? "" : "s"}`
}
if (warnings !== 0) {
problems = `${problems}\n${warnings} warning${warnings === 1 ? "" : "s"}`
}
if (problems !== "") {
console.error(`${problems} found in ${relativePath}\n`)
}
callback?.()
})
}
}
function lintWatch() {
const timers = new Map()
const handleFileInner = (filename) => {
timers.delete(filename)
lintFile(p(filename))
}
const handleFile = (filename) => {
const timer = timers.get(filename)
if (timer !== undefined) {
timer.refresh()
} else {
timers.set(filename, setTimeout(handleFileInner, 400, filename))
}
}
let fileBuffer = []
lintFile(root, () => {
for (const file of fileBuffer) {
handleFile(file)
}
fileBuffer = undefined
})
fs.watch(root, {recursive: true}, (_, filename) => {
if (fileBuffer !== undefined) {
fileBuffer.push(filename)
} else {
handleFile(filename)
}
})
}
module.exports = {
lintAll,
lintWatch,
}

View file

@ -0,0 +1,145 @@
"use strict"
// Accept just about anything by using Babel's parser.
const babelParser = require("@babel/parser")
function getJsonError(code) {
try {
JSON.parse(code)
return undefined
} catch (e) {
return e
}
}
/** Returns `undefined` or an error */
function getBabelError(code, asTypeScript) {
// Could be within any production.
/** @type {babelParser.ParserPlugin[]} */
const plugins = [
"bigInt",
"asyncGenerators",
"classPrivateMethods",
"classPrivateProperties",
"classProperties",
"dynamicImport",
"logicalAssignment",
"nullishCoalescingOperator",
"numericSeparator",
"objectRestSpread",
"optionalCatchBinding",
"optionalChaining",
"topLevelAwait",
"jsx",
]
if (asTypeScript) {
plugins.push("typescript")
}
try {
babelParser.parse(code, {
sourceType: "unambiguous",
allowReturnOutsideFunction: true,
allowAwaitOutsideFunction: true,
allowSuperOutsideMethod: true,
allowUndeclaredExports: true,
plugins,
})
return undefined
} catch (e) {
return e
}
}
/**
* @typedef LangEntry
* @property {string} name
* @property {undefined | RegExp} unspacedComment
* @property {(code: string) => undefined | Error} getError
*/
/** @type {Map<string, string | LangEntry>} */
const recognizedLangTags = new Map([
["js", {
name: "JavaScript",
unspacedComment: /(^|\s)\/\/\S/g,
getError: (code) => getBabelError(code, false),
}],
["ts", {
name: "TypeScript",
unspacedComment: /(^|\s)\/\/\S/g,
getError: (code) => getBabelError(code, true),
}],
["json", {
name: "JSON",
unspacedComment: undefined,
getError: getJsonError,
}],
["javascript", "js"],
["typescript", "ts"],
])
/**
* @param {undefined | string} lang
* @returns {undefined | LangEntry}
*/
function lookupLang(lang) {
while (typeof lang === "string") {
lang = recognizedLangTags.get(lang)
}
return lang
}
function lintCodeIsHighlightable(codeErrors, lang) {
// We only care about what's not tagged here.
if (lang === "") {
// TODO: ensure all code blocks have tags, and check this in CI.
const langTags = []
for (const [tag, getError] of recognizedLangTags) {
if (typeof getError === "function" && !getError(tag)) {
langTags.push(tag)
}
}
if (langTags.length === 1) {
codeErrors.push(`Code block possibly missing \`${langTags[0]}\` language tag.`)
} else if (langTags.length !== 0) {
codeErrors.push([
"Code block possibly missing a language tag. Possible tags that could apply:",
...langTags.map((tag) => `- ${tag}`),
].join("\n"))
}
}
}
function lintCodeIsSyntaticallyValid(codeErrors, langEntry, error) {
if (error) {
codeErrors.push(`${langEntry.name} code block has invalid syntax: ${error.message}`)
}
}
function lintCodeCommentStyle(codeErrors, langEntry, code) {
if (langEntry?.unspacedComment?.test(code)) {
codeErrors.push("Comment is missing a preceding space.")
}
}
function getCodeLintErrors(code, lang) {
const langEntry = lookupLang(lang)
const error = langEntry?.getError(code)
const codeErrors = []
lintCodeIsHighlightable(codeErrors, lang)
lintCodeIsSyntaticallyValid(codeErrors, langEntry, error)
lintCodeCommentStyle(codeErrors, langEntry, code)
return codeErrors
}
module.exports = {
getCodeLintErrors,
}

View file

@ -0,0 +1,72 @@
"use strict"
const {tryFetch} = require("./try-fetch.js")
function checkKnownCorrectRequestFail(href, headers, status, body) {
if (status >= 400 && status <= 499) {
return `${href} is a broken link (status: ${status})`
}
// Don't fail if something weird shows up - it's the internet. Just log it and move on.
// However, some more sophisticated logging is useful.
let message = `HTTP error for ${href} (status: ${status})`
for (const [name, value] of Object.entries(headers)) {
if (Array.isArray(value)) {
for (const v of value) message = `${message}\n>${name}: ${v}`
} else {
message = `${message}\n>${name}: ${value}`
}
}
if (body !== "") {
message = `${message}\n>${body}`
}
return message
}
/**
* Returns `undefined` if no error, a string if an error does occur.
* @param {(message?: string)} callback
*/
function checkHttp(href, callback) {
// Prefer https: > http: where possible, but allow http: when https: is inaccessible.
const url = new URL(href)
url.hash = ""
const isHTTPS = url.protocol === "https:"
url.protocol = "https:"
tryFetch(url, (headers, status, body, sslError) => {
if (status >= 200 && status <= 299) {
if (isHTTPS) {
return callback()
} else {
return callback(`Change ${href} to use \`https:\``)
}
}
if (!sslError) {
return callback(checkKnownCorrectRequestFail(href, headers, status, body))
}
url.protocol = "http:"
tryFetch(url, (headers, status, body) => {
if (status >= 200 && status <= 299) {
if (isHTTPS) {
return callback(`Change ${href} to use \`http:\``)
} else {
return callback()
}
}
return callback(checkKnownCorrectRequestFail(href, headers, status, body))
})
})
}
module.exports = {
checkHttp,
}

View file

@ -0,0 +1,22 @@
"use strict"
const fs = require("fs")
const path = require("path")
/** @param {(message?: string) => void} callback */
function checkLocal(base, href, callback) {
const exec = (/^([^#?]*\.md)(?:$|\?|#)/).exec(href)
if (exec !== null) {
fs.access(path.join(base, exec[1]), (err) => {
if (err) {
callback(`Broken internal link: ${href}`)
} else {
callback()
}
})
}
}
module.exports = {
checkLocal,
}

View file

@ -0,0 +1,230 @@
"use strict"
const path = require("path")
const {marked} = require("marked")
const {getCodeLintErrors} = require("./lint-code.js")
const {checkHttp} = require("./lint-http-link.js")
const {checkLocal} = require("./lint-relative-link.js")
const {rel} = require("../_utils.js")
const {submitTask} = require("./task-queue.js")
/** @param {string} contents */
function processOne(file, contents, callback) {
/*
Unfortunately, most of this code is just working around a missing feature that's compounded on
by a lexer bug.
- No location info on lexer tokens: https://github.com/markedjs/marked/issues/2134
- Tabs not preserved in lexer tokens' raw text: https://github.com/markedjs/marked/issues/3440
This took far too long to debug, like several hours of it. But I do have correct offsets now.
*/
const relativePath = rel(file)
const base = path.dirname(file)
const syncErrors = []
let errors = 0
let warnings = 0
let pending = 1
const settle = () => {
if (--pending === 0) {
callback(warnings, errors)
}
}
const getSpanLineCol = (startOffset, endOffset) => {
let source = contents.slice(0, startOffset)
let line = 1
let next = -1
let prev = -1
while ((next = source.indexOf("\n", prev + 1)) >= 0) {
line++
prev = next
}
const startLine = line
const startCol = startOffset - prev
source = contents.slice(0, endOffset)
while ((next = source.indexOf("\n", prev + 1)) >= 0) {
line++
prev = next
}
const endLine = line
const endCol = endOffset - prev
return {startLine, startCol, endLine, endCol}
}
const showMessage = (startOffset, endOffset, label, message) => {
const {startLine, startCol, endLine, endCol} = getSpanLineCol(startOffset, endOffset)
if (!message.endsWith("\n")) message += "\n"
if (process.env.CI === "true") {
console.error(
`::${label.toLowerCase()} file=${relativePath}` +
`,line:${startLine}` +
`,endLine=${endLine}` +
`,col:${startCol}` +
`,endColumn=${endCol}` +
`::${relativePath}:${startLine}:${startCol}: ${message}`
)
} else {
console.error(`${label} in ${relativePath}:${startLine}:${startCol}: ${message}`)
}
}
const asyncWarnCallback = (startOffset, endOffset, message) => {
if (message !== undefined) {
warnings++
showMessage(startOffset, endOffset, "Warning", message)
}
settle()
}
const asyncErrorCallback = (startOffset, endOffset, message) => {
if (message !== undefined) {
errors++
showMessage(startOffset, endOffset, "Error", message)
}
settle()
}
/**
* @param {number} startOffset
* @param {import("marked").Tokens.TableCell[]} cells
*/
const visitCellList = (startOffset, parentOffset, cells, parent) => {
for (const cell of cells) {
parentOffset = visitList(startOffset, parentOffset, cell.tokens, parent)
}
return parentOffset
}
// Nasty workaround for https://github.com/markedjs/marked/issues/3440
const advanceTabViaSpaceReplacement = (offset, raw, start, end) => {
while (start < end) {
const real = contents.charCodeAt(offset++)
const synthetic = raw.charCodeAt(start++)
if (
real === 0x09 && synthetic === 0x20 &&
raw.charCodeAt(start) === 0x20 &&
raw.charCodeAt(++start) === 0x20 &&
raw.charCodeAt(++start) === 0x20
) {
start++
}
}
return offset
}
/**
* @param {number} startOffset
* @param {import("marked").MarkedToken[]} tokens
*/
const visitList = (startOffset, parentOffset, tokens, parent) => {
for (const child of tokens) {
const nextIndex = parent.raw.indexOf(child.raw, parentOffset)
const innerStart = advanceTabViaSpaceReplacement(startOffset, parent.raw, parentOffset, nextIndex)
const outerStart = advanceTabViaSpaceReplacement(innerStart, child.raw, 0, child.raw.length)
parentOffset = nextIndex + child.raw.length
startOffset = outerStart
visit(innerStart, child)
}
return parentOffset
}
const visited = new Set()
/**
* @param {number} startOffset
* @param {import("marked").MarkedToken} token
*/
const visit = (startOffset, token) => {
const endOffset = startOffset + token.raw.length
switch (token.type) {
case "link": {
// Make sure it's trimmed, so I don't have to worry about errors elsewhere.
const href = token.href.replace(/^\s+|\s+$|#[\s\S]*$/, "")
if (!visited.has(href)) {
visited.add(href)
// Prefer https: > http: where possible, but allow http: when https: is
// inaccessible.
if ((/^https?:\/\//).test(href)) {
submitTask(
checkHttp.bind(null, href),
asyncWarnCallback.bind(null, startOffset, endOffset),
)
pending++
} else if (!href.includes(":")) {
submitTask(
checkLocal.bind(null, base, href),
asyncErrorCallback.bind(null, startOffset, endOffset),
)
pending++
}
}
visitList(startOffset, 0, token.tokens, token)
break
}
case "code": {
const code = token.text
const lang = token.lang || ""
const codeErrors = getCodeLintErrors(code, lang)
if (codeErrors.length !== 0) {
errors += codeErrors.length
for (const error of codeErrors) {
syncErrors.push({startOffset, endOffset, message: error})
}
}
break
}
case "list":
visitList(startOffset, 0, token.items, token)
break
case "table": {
let parentOffset = visitCellList(startOffset, 0, token.header, token)
startOffset += parentOffset
for (const row of token.rows) {
parentOffset = visitCellList(startOffset, parentOffset, row, token)
startOffset += parentOffset
}
break
}
default:
if (token.tokens !== undefined) {
visitList(startOffset, 0, token.tokens, token)
}
}
}
visitList(0, 0, marked.lexer(contents), {raw: contents.replace(/\t/g, " ")})
for (const {startOffset, endOffset, message} of syncErrors) {
showMessage(startOffset, endOffset, "Error", message)
}
syncErrors.length = 0
settle()
}
module.exports = {
processOne,
}

View file

@ -0,0 +1,35 @@
"use strict"
// CI needs a much lower limit so it doesn't hang.
const maxConcurrency = process.env.CI === "true" ? 5 : 20
const queue = []
let running = 0
function runTask(task, callback) {
process.nextTick(task, (...args) => {
process.nextTick(callback, ...args)
if (running === maxConcurrency) {
const [nextTask, nextCallback] = queue.splice(0, 2)
runTask(nextTask, nextCallback)
}
})
}
/**
* @template {any[]} A
* @param {(callback: (...args: A) => void) => void} task
* @param {(...args: A) => void} callback
*/
function submitTask(task, callback) {
if (running < maxConcurrency) {
running++
runTask(task, callback)
} else {
queue.push(task, callback)
}
}
module.exports = {
submitTask,
}

View file

@ -0,0 +1,224 @@
"use strict"
const http = require("http")
const https = require("https")
const {decodeResponse} = require("./decode-response.js")
const {warnError, noop} = require("../_utils.js")
/**
* Always returns a response object.
* @param {URL} url
* @param {(
* headers: Record<string, string | string[]>,
* status: number,
* body: string,
* sslError: boolean,
* ) => void} callback
*/
function tryFetch(url, callback) {
const maxResponseBytes = 64 * 1024
const maxTimeoutMs = 5000
const maxDelayMs = 10000
const allowedAttempts = 3
const allowedRedirects = 10
let remainingAttempts = allowedAttempts
let remainingRedirects = allowedRedirects
let lastIsSSL = false
let lastStatus = 0
let lastHeaders, lastMessage
let request, response
const responseBuffer = Buffer.alloc(maxResponseBytes)
let responseBytes = 0
let timer
function cleanup() {
const prevReq = request
const prevRes = response
const prevTimer = timer
request = undefined
response = undefined
timer = undefined
clearTimeout(prevTimer)
try {
prevReq?.off("response", onResponse)
prevReq?.off("error", onError)
prevReq?.on("error", noop)
prevReq?.destroy()
} catch (e) {
warnError(e)
}
try {
prevRes?.off("data", onChunk)
prevRes?.off("end", onEnd)
prevRes?.off("error", onError)
prevRes?.destroy()
} catch (e) {
warnError(e)
}
}
function settle() {
cleanup()
callback(lastHeaders, lastStatus, lastMessage, lastIsSSL)
}
function onEnd() {
cleanup()
if (lastMessage === "") {
lastMessage = decodeResponse(lastHeaders, responseBuffer.subarray(0, responseBytes))
}
if (lastStatus === 429 || lastStatus >= 500) {
const retryAfter = Number(lastHeaders["retry-after"])
if (retryAfter > 0) {
setTimeout(loop, Math.max(maxDelayMs, retryAfter))
} else {
setTimeout(loop, 5000)
}
} else {
settle()
}
}
function onError(e) {
cleanup()
if (lastMessage === "") {
lastMessage = e.message
if (e.code === "ECONNRESET" || e.code === "ECONNABORT" || e.code === "ECONNREFUSED") {
lastMessage = "Request socket dropped"
} else if (
url.protocol === "https:" &&
(e.code === "ERR_TLS_CERT_ALTNAME_INVALID" || (/ssl/i).test(e.message))
) {
lastIsSSL = true
} else if (!("code" in e)) {
lastMessage = e.stack
}
}
loop()
}
function onChunk(chunk) {
const length = chunk.length
if (length === 0) return
let next = responseBytes + length
if (next > maxResponseBytes) {
chunk = chunk.subarray(0, length - (next - maxResponseBytes))
next = maxResponseBytes
}
responseBuffer.set(chunk, responseBytes)
responseBytes = next
if (next === maxResponseBytes) {
response.off("data", onChunk)
response.resume()
}
}
function onResponse(res) {
request.off("response", onResponse)
request.off("error", onError)
response = res
response.on("end", onEnd)
response.on("error", onError)
lastStatus = res.statusCode
lastHeaders = res.headers
if (
lastStatus === 301 ||
lastStatus === 302 ||
lastStatus === 303 ||
lastStatus === 307 ||
lastStatus === 308
) {
if (!lastHeaders.location) {
lastMessage = "Redirect missing location"
response.resume()
return
}
try {
url = new URL(lastHeaders.location, url)
} catch {
lastMessage = `Redirection to invalid URL ${lastHeaders.location}`
response.resume()
return
}
remainingAttempts = allowedAttempts
remainingRedirects--
} else if (lastStatus >= 200 && lastStatus <= 299) {
response.resume()
} else {
response.on("data", onChunk)
}
}
function onTimeout() {
if (lastMessage === "") {
lastMessage = "Request timed out"
}
loop()
}
function loop() {
cleanup()
if (remainingAttempts === 0) {
return settle()
}
lastIsSSL = false
lastStatus = 0
lastMessage = ""
lastHeaders = {}
remainingAttempts--
if (remainingRedirects === 0) {
lastMessage = "Too many redirects"
return settle()
}
timer = setTimeout(onTimeout, maxTimeoutMs)
request = (url.protocol === "https:" ? https : http).get(url, {
// pass along realistic headers, some sites (i.e. the IETF) return a 403 otherwise.
headers: {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:71.0) Gecko/20100101 Firefox/71.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
},
})
request.on("response", onResponse)
request.on("error", onError)
request.end()
}
loop()
}
module.exports = {
tryFetch,
}

26
scripts/_utils.js Normal file
View file

@ -0,0 +1,26 @@
"use strict"
const path = require("path")
const root = path.dirname(__dirname)
const p = (...args) => path.resolve(root, ...args)
const rel = (file) => path.relative(root, file).replace(/\\/g, "/")
const noop = () => {}
function warnError(e) {
// Don't care about any of these.
if ((/^(?:ECONNRESET|ECONNABORT|EPIPE)$/).test(e.code)) {
return
}
process.exitCode = 1
console.warn(e.stack)
}
module.exports = {
root,
p,
rel,
warnError,
noop,
}

View file

@ -1,320 +1,12 @@
#!/usr/bin/env node
"use strict"
const {promises: fs} = require("fs")
const path = require("path")
const {Glob} = require("glob")
const {marked} = require("marked")
// Accept just about anything
const babelParser = require("@babel/parser")
// Peer dependency on `request`
const request = require("request-promise-native")
require("./_improve-rejection-crashing.js")
// lint rules
class LintRenderer extends marked.Renderer {
constructor(file) {
super()
this._dir = path.dirname(file)
this._context = undefined
this._code = undefined
this._lang = undefined
this._error = undefined
this._awaiting = []
this._warnings = []
this._errors = []
}
const {lintAll, lintWatch} = require("./_lint-docs/do-lint.js")
_addWarning(...data) {
this._warnings.push(formatMessage(...data))
}
_addError(...data) {
this._errors.push(formatMessage(...data))
}
_block() {
return `\`\`\`${this._lang || ""}\n${this._code}\n\`\`\``
}
link(href) {
// Don't fail if something byzantine shows up - it's the freaking
// internet. Just log it and move on.
const httpError = (e) =>
this._addWarning(`http error for ${href}`, e.message)
// Prefer https: > http: where possible, but allow http: when https: is
// inaccessible.
if ((/^https?:\/\//).test(href)) {
const url = href.replace(/#.*$/, "")
const isHTTPS = href.startsWith("https:")
// pass along realistic headers, some sites (i.e. the IETF) return a 403 otherwise.
const headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:71.0) Gecko/20100101 Firefox/71.0",
}
// some more headers if more were ever needed (from my local Firefox)
// "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
// "Accept-Language": "en-US,en;q=0.5",
// "Accept-Encoding": "gzip, deflate, br",
// "DNT": "1",
// "Connection": "keep-alive",
// "Upgrade-Insecure-Requests": "1",
// "Pragma": "no-cache",
// "Cache-Control": "no-cache"
this._awaiting.push(request.head(url, {headers}).then(() => {
if (!isHTTPS) {
return request.head(`https:${url.slice(7)}`, {headers}).then(
() => this._addError("change http: to https:"),
() => { /* ignore inner errors */ }
)
}
}, (e) => {
if (e.statusCode === 404) {
this._addError(`broken external link: ${href}`)
}
else {
if (
isHTTPS && (
e.error.code === "ERR_TLS_CERT_ALTNAME_INVALID" ||
(/ssl/i).test(e.message)
)
) {
return request.head(`http:${url.slice(6)}`, {headers}).then(
() => this._addError(`change ${href} to use http:`),
// ignore inner errors
() => httpError(e)
)
}
httpError(e)
}
}))
}
else {
const exec = (/^([^#?]*\.md)(?:$|\?|#)/).exec(href)
if (exec != null) {
const resolved = path.resolve(this._dir, exec[1])
this._awaiting.push(fs.access(resolved).catch(() => {
this._addError(`broken internal link: ${href}`)
}))
}
}
}
code(code, lang) {
this._code = code
this._lang = lang
this._error = null
if (!lang || lang === "js" || lang === "javascript") {
try {
// Could be within any production.
babelParser.parse(code, {
sourceType: "unambiguous",
allowReturnOutsideFunction: true,
allowAwaitOutsideFunction: true,
allowSuperOutsideMethod: true,
allowUndeclaredExports: true,
plugins: ["dynamicImport"],
})
}
catch (e) {
this._error = e
}
}
this._ensureCodeIsHighlightable()
this._ensureCodeIsSyntaticallyValid()
this._ensureCommentStyle()
}
_ensureCodeIsHighlightable() {
// We only care about what's not tagged here.
if (!this._lang) {
// TODO: ensure all code blocks have tags, and check this in CI.
if (this._error == null) {
this._addError(
"Code block possibly missing `javascript` language tag",
this._block(),
)
}
try {
JSON.parse(this._code)
this._addError(
"Code block possibly missing `json` language tag",
this._block(),
)
}
catch (_) {
// ignore
}
}
}
_ensureCodeIsSyntaticallyValid() {
if (!this._lang || !(/^js$|^javascript$/).test(this._lang)) return
if (this._error != null) {
this._addError(
"JS code block has invalid syntax", this._error.message,
this._block()
)
}
}
_ensureCommentStyle() {
if (!this._lang || !(/^js$|^javascript$/).test(this._lang)) return
if ((/(^|\s)\/\/[\S]/).test(this._code)) {
this._addError("Comment is missing a preceding space", this._block())
}
}
}
async function getFileInfo(file) {
const {size, mtime} = await fs.stat(file)
const timestamp = Number(mtime)
return {size, timestamp}
}
function report(file, data, totals, nextCache) {
const {_warnings, _errors} = data;
if (_warnings.length + _errors.length > 0) {
console.log("- ".repeat(file.length/2 + 1))
console.log(file)
console.log("- ".repeat(file.length/2 + 1) + "\n")
if (_errors.length > 0) {
process.exitCode = 1
const s = _errors.length > 1 ? "s " : " -"
console.log(`-- ${_errors.length} Error${s}----------`)
_errors.forEach((msg) => console.log(`\n${msg}`))
console.log("\n")
}
if (_warnings.length > 0) {
const s = _warnings.length > 1 ? "s " : " -"
console.log(`-- ${_warnings.length} Warning${s}--------`)
_warnings.forEach((msg) => console.log(`\n${msg}`))
console.log("\n")
}
if (totals != null) {
totals.errors += _errors.length
totals.warnings += _warnings.length
}
}
if (nextCache != null) nextCache[file] = data
}
function formatMessage(...data) {
let str = data.join("\n")
if (str.endsWith("\n")) str = str.slice(0, -1)
return str
}
exports.lintOne = lintOne
// `cache` and `nextCache` are only passed from `lintAll()`, not when watching
async function lintOne(file, totals, cache, nextCache) {
const contents = await fs.readFile(file, "utf-8")
// check for nextCache, because cache will be undefined the first time the linter runs
const {size, timestamp} = (nextCache != null) ? await getFileInfo(file) : {}
if (cache != null && cache[file] != null) {
const cached = cache[file]
if (
size === cached.size &&
timestamp === cached.timestamp &&
cached._errors.length + cached._warnings.length === 0
) {
report(file, cached, totals, nextCache)
return
}
}
const renderer = new LintRenderer(file)
marked(contents, {renderer})
return Promise.all(renderer._awaiting).then(() => {
const {_warnings, _errors} = renderer
report(file, {_warnings, _errors, size, timestamp}, totals, nextCache)
})
}
const cachePath = path.join(process.cwd(), ".lint-docs-cache")
async function loadCache() {
try {
const source = await fs.readFile(cachePath, "utf-8")
try {
return JSON.parse(source)
} catch (e) {
console.error(e)
return
}
} catch (e) {
return
}
}
function saveCache(nextCache) {
return fs.writeFile(cachePath, JSON.stringify(nextCache), "utf-8")
}
function finalReport(totals) {
const buffer = []
if (totals.errors > 0) {
buffer.push(`${totals.errors} error${totals.errors > 1 ? "s" : ""}`)
}
if (totals.warnings > 0) {
buffer.push(`${totals.warnings} warning${totals.warnings > 1 ? "s" : ""}`)
}
if (buffer.length > 0) console.log(`\n${buffer.join(", ")} found in the docs\n`)
else console.log("The docs are in good shape!\n")
}
exports.lintAll = lintAll
async function lintAll({useCache}) {
const cache = useCache ? await loadCache() : null
const totals = {
errors: 0,
warnings: 0,
}
// always populate the cache, even if we don't read from it
const nextCache = {}
await new Promise((resolve, reject) => {
const glob = new Glob(path.resolve(__dirname, "../**/*.md"), {
ignore: [
"**/changelog.md",
"**/migration-*.md",
"**/node_modules/**",
"**/recent-changes.md"
],
nodir: true,
})
const awaiting = []
glob.on("match", (file) => {
awaiting.push(lintOne(file, totals, cache, nextCache))
})
glob.on("error", reject)
glob.on("end", () => resolve(Promise.all(awaiting)))
})
finalReport(totals)
await saveCache(nextCache)
// don't return anything so that _command.js picks up the errorCode.
}
/* eslint-disable global-require */
if (require.main === module) {
require("./_command")({
exec: lintAll,
watch() {
require("chokidar")
.watch(path.resolve(__dirname, "../docs/**/*.md"), {
ignore: [
"**/changelog.md",
"**/migration-*.md",
"**/node_modules/**",
"**/recent-changes.md"
],
})
.on("add", lintOne)
.on("change", lintOne)
},
})
if (process.argv.includes("--watch", 2)) {
lintWatch()
} else {
lintAll()
}