mithril-vndb/scripts/_lint-docs/try-fetch.js
Claudia Meadows 0d095d1373
Rewrite docs linter
1. I want to set the stage to deal with #2898 properly.
2. `request` was deprecated years ago. Decided that it's better to just
   move to native Node.js APIs in its place.
3. `glob` was outdated, and it's easier to just toss it than to upgrade
   across a major version.
4. I switched to using Marked's "lexer" directly so I'm not fussing
   with the complexity of renderers. This of course necessitated a more
   complex file processor as its "lexer" is really an AST parser.

I also decided to go a few steps further:
- Drop the cache to simplify everything. I might reverse this later,
  but just caching URLs per-page should be enough to prevent the world
  from crashing down.
- Drop some more dependencies, so I don't have to come back to this
  later nearly as quickly.
- Upgrade to a more modern language version in the scripts.
- Update Marked. It was super outdated.
- Add line and column numbers to the warnings. That took quite a bit of
  work, thanks to a missing Marked feature plus a bug in Marked.
2024-09-23 04:54:17 -07:00

224 lines
4.7 KiB
JavaScript

"use strict"
const http = require("http")
const https = require("https")
const {decodeResponse} = require("./decode-response.js")
const {warnError, noop} = require("../_utils.js")
/**
* Always returns a response object.
* @param {URL} url
* @param {(
* headers: Record<string, string | string[]>,
* status: number,
* body: string,
* sslError: boolean,
* ) => void} callback
*/
function tryFetch(url, callback) {
const maxResponseBytes = 64 * 1024
const maxTimeoutMs = 5000
const maxDelayMs = 10000
const allowedAttempts = 3
const allowedRedirects = 10
let remainingAttempts = allowedAttempts
let remainingRedirects = allowedRedirects
let lastIsSSL = false
let lastStatus = 0
let lastHeaders, lastMessage
let request, response
const responseBuffer = Buffer.alloc(maxResponseBytes)
let responseBytes = 0
let timer
function cleanup() {
const prevReq = request
const prevRes = response
const prevTimer = timer
request = undefined
response = undefined
timer = undefined
clearTimeout(prevTimer)
try {
prevReq?.off("response", onResponse)
prevReq?.off("error", onError)
prevReq?.on("error", noop)
prevReq?.destroy()
} catch (e) {
warnError(e)
}
try {
prevRes?.off("data", onChunk)
prevRes?.off("end", onEnd)
prevRes?.off("error", onError)
prevRes?.destroy()
} catch (e) {
warnError(e)
}
}
function settle() {
cleanup()
callback(lastHeaders, lastStatus, lastMessage, lastIsSSL)
}
function onEnd() {
cleanup()
if (lastMessage === "") {
lastMessage = decodeResponse(lastHeaders, responseBuffer.subarray(0, responseBytes))
}
if (lastStatus === 429 || lastStatus >= 500) {
const retryAfter = Number(lastHeaders["retry-after"])
if (retryAfter > 0) {
setTimeout(loop, Math.max(maxDelayMs, retryAfter))
} else {
setTimeout(loop, 5000)
}
} else {
settle()
}
}
function onError(e) {
cleanup()
if (lastMessage === "") {
lastMessage = e.message
if (e.code === "ECONNRESET" || e.code === "ECONNABORT" || e.code === "ECONNREFUSED") {
lastMessage = "Request socket dropped"
} else if (
url.protocol === "https:" &&
(e.code === "ERR_TLS_CERT_ALTNAME_INVALID" || (/ssl/i).test(e.message))
) {
lastIsSSL = true
} else if (!("code" in e)) {
lastMessage = e.stack
}
}
loop()
}
function onChunk(chunk) {
const length = chunk.length
if (length === 0) return
let next = responseBytes + length
if (next > maxResponseBytes) {
chunk = chunk.subarray(0, length - (next - maxResponseBytes))
next = maxResponseBytes
}
responseBuffer.set(chunk, responseBytes)
responseBytes = next
if (next === maxResponseBytes) {
response.off("data", onChunk)
response.resume()
}
}
function onResponse(res) {
request.off("response", onResponse)
request.off("error", onError)
response = res
response.on("end", onEnd)
response.on("error", onError)
lastStatus = res.statusCode
lastHeaders = res.headers
if (
lastStatus === 301 ||
lastStatus === 302 ||
lastStatus === 303 ||
lastStatus === 307 ||
lastStatus === 308
) {
if (!lastHeaders.location) {
lastMessage = "Redirect missing location"
response.resume()
return
}
try {
url = new URL(lastHeaders.location, url)
} catch {
lastMessage = `Redirection to invalid URL ${lastHeaders.location}`
response.resume()
return
}
remainingAttempts = allowedAttempts
remainingRedirects--
} else if (lastStatus >= 200 && lastStatus <= 299) {
response.resume()
} else {
response.on("data", onChunk)
}
}
function onTimeout() {
if (lastMessage === "") {
lastMessage = "Request timed out"
}
loop()
}
function loop() {
cleanup()
if (remainingAttempts === 0) {
return settle()
}
lastIsSSL = false
lastStatus = 0
lastMessage = ""
lastHeaders = {}
remainingAttempts--
if (remainingRedirects === 0) {
lastMessage = "Too many redirects"
return settle()
}
timer = setTimeout(onTimeout, maxTimeoutMs)
request = (url.protocol === "https:" ? https : http).get(url, {
// pass along realistic headers, some sites (i.e. the IETF) return a 403 otherwise.
headers: {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:71.0) Gecko/20100101 Firefox/71.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
},
})
request.on("response", onResponse)
request.on("error", onError)
request.end()
}
loop()
}
module.exports = {
tryFetch,
}