json_format: Add html_safe option

This commit is contained in:
Yorhel 2025-03-16 15:03:32 +01:00
parent 3fad7feec3
commit f8fe53cba9
3 changed files with 37 additions and 20 deletions

View file

@ -212,13 +212,6 @@ roughly similar to:
JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar); JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
Some modules escape the slash character in encoded strings to prevent a
potential XSS vulnerability when embedding JSON inside C<< <script> ..
</script> >> tags. This function does I<not> do that because it might not even
be sufficient. The following is probably an improvement:
json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
This function generates invalid JSON if you pass it a string with invalid This function generates invalid JSON if you pass it a string with invalid
Unicode characters; I don't see how you'd ever accidentally end up with such a Unicode characters; I don't see how you'd ever accidentally end up with such a
string, anyway. string, anyway.
@ -244,6 +237,26 @@ versions.
Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string. Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string.
=item html_safe
Boolean. When set, the encoded JSON is safe for (unescaped) inclusion into HTML
or XML content. This encodes C<< < >>, C<< > >> and C<< & >> as Unicode escapes.
Commonly used to embed data inside a HTML page:
$html = '<script id="site_data" type="application/json">'
. json_format($data, html_safe => 1)
. '</script>';
This option does NOT make it safe to include the encoded JSON as an attribute
value. There is no way to do that without violating JSON specs, so you should
use entity escaping instead.
Some JSON modules escape the forward slash (C</>) character instead, but that
is, at best, B<only> sufficient for embedding inside a C<< <script> >> tag (I'm
not sure how C<< <!-- >> and C<< <![CDATA[ >> are treated in that context). In
any other context, you'll need the more thourough escaping provided by this
C<html_safe> option.
=item max_size =item max_size
Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB. Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB.

View file

@ -3,6 +3,7 @@ typedef struct {
UV depth; UV depth;
int canon; int canon;
int pretty; /* <0 when disabled, current nesting level otherwise */ int pretty; /* <0 when disabled, current nesting level otherwise */
int htmlsafe;
} fujson_fmt_ctx; } fujson_fmt_ctx;
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *); static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
@ -27,21 +28,22 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
while (off < len) { while (off < len) {
/* Fast path: no escaping needed */ /* Fast path: no escaping needed */
loff = off; loff = off;
#define SKIPUNTIL(cond) \
while (off < len) { \
x = str[off]; \
if (x <= 0x1f || x == '"' || x == '\\' || cond) break; \
off++;\
}
if (utf8) { if (utf8) {
/* assume >=0x80 is valid utf8 */ if (!ctx->htmlsafe) { SKIPUNTIL(x == 0x7f) }
while (off < len) { else { SKIPUNTIL(x == 0x7f || x == '<' || x == '>' || x == '&') }
x = str[off];
if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break;
off++;
}
} else { } else {
/* binary strings need special handling for >=0x80 */ if (!ctx->htmlsafe) { SKIPUNTIL(x >= 0x7f) }
while (off < len) { else { SKIPUNTIL(x >= 0x7f || x == '<' || x == '>' || x == '&') }
x = str[off];
if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x7f) break;
off++;
}
} }
#undef SKIPUNTIL
fustr_write(ctx->out, (char *)str+loff, off-loff); fustr_write(ctx->out, (char *)str+loff, off-loff);
if (off < len) { /* early break, which means current byte needs special processing */ if (off < len) { /* early break, which means current byte needs special processing */
@ -279,7 +281,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
ctx.out = &out; ctx.out = &out;
ctx.depth = 0; ctx.depth = 0;
ctx.pretty = INT_MIN; ctx.pretty = INT_MIN;
ctx.canon = 0; ctx.canon = ctx.htmlsafe = 0;
while (i < argc) { while (i < argc) {
arg = SvPV_nolen(ST(i)); arg = SvPV_nolen(ST(i));
i++; i++;
@ -289,6 +291,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r); if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r);
else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN; else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN;
else if (strcmp(arg, "html_safe") == 0) ctx.htmlsafe = !!SvPVXtrue(r);
else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r); else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
else if (strcmp(arg, "max_size") == 0) out.maxlen = SvUV(r); else if (strcmp(arg, "max_size") == 0) out.maxlen = SvUV(r);
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r); else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);

View file

@ -110,6 +110,7 @@ is json_format(
} }
_ _
is json_format('<hello & world>', html_safe => 1), '"\u003chello \u0026 world\u003e"';
eval { json_format [[]], max_depth => 2 }; eval { json_format [[]], max_depth => 2 };
like $@, qr/max_depth exceeded while formatting JSON/; like $@, qr/max_depth exceeded while formatting JSON/;