json_format: Add html_safe option
This commit is contained in:
parent
3fad7feec3
commit
f8fe53cba9
3 changed files with 37 additions and 20 deletions
27
FU/Util.pm
27
FU/Util.pm
|
|
@ -212,13 +212,6 @@ roughly similar to:
|
||||||
|
|
||||||
JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
|
JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
|
||||||
|
|
||||||
Some modules escape the slash character in encoded strings to prevent a
|
|
||||||
potential XSS vulnerability when embedding JSON inside C<< <script> ..
|
|
||||||
</script> >> tags. This function does I<not> do that because it might not even
|
|
||||||
be sufficient. The following is probably an improvement:
|
|
||||||
|
|
||||||
json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
|
|
||||||
|
|
||||||
This function generates invalid JSON if you pass it a string with invalid
|
This function generates invalid JSON if you pass it a string with invalid
|
||||||
Unicode characters; I don't see how you'd ever accidentally end up with such a
|
Unicode characters; I don't see how you'd ever accidentally end up with such a
|
||||||
string, anyway.
|
string, anyway.
|
||||||
|
|
@ -244,6 +237,26 @@ versions.
|
||||||
|
|
||||||
Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string.
|
Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string.
|
||||||
|
|
||||||
|
=item html_safe
|
||||||
|
|
||||||
|
Boolean. When set, the encoded JSON is safe for (unescaped) inclusion into HTML
|
||||||
|
or XML content. This encodes C<< < >>, C<< > >> and C<< & >> as Unicode escapes.
|
||||||
|
Commonly used to embed data inside a HTML page:
|
||||||
|
|
||||||
|
$html = '<script id="site_data" type="application/json">'
|
||||||
|
. json_format($data, html_safe => 1)
|
||||||
|
. '</script>';
|
||||||
|
|
||||||
|
This option does NOT make it safe to include the encoded JSON as an attribute
|
||||||
|
value. There is no way to do that without violating JSON specs, so you should
|
||||||
|
use entity escaping instead.
|
||||||
|
|
||||||
|
Some JSON modules escape the forward slash (C</>) character instead, but that
|
||||||
|
is, at best, B<only> sufficient for embedding inside a C<< <script> >> tag (I'm
|
||||||
|
not sure how C<< <!-- >> and C<< <![CDATA[ >> are treated in that context). In
|
||||||
|
any other context, you'll need the more thourough escaping provided by this
|
||||||
|
C<html_safe> option.
|
||||||
|
|
||||||
=item max_size
|
=item max_size
|
||||||
|
|
||||||
Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB.
|
Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB.
|
||||||
|
|
|
||||||
29
c/jsonfmt.c
29
c/jsonfmt.c
|
|
@ -3,6 +3,7 @@ typedef struct {
|
||||||
UV depth;
|
UV depth;
|
||||||
int canon;
|
int canon;
|
||||||
int pretty; /* <0 when disabled, current nesting level otherwise */
|
int pretty; /* <0 when disabled, current nesting level otherwise */
|
||||||
|
int htmlsafe;
|
||||||
} fujson_fmt_ctx;
|
} fujson_fmt_ctx;
|
||||||
|
|
||||||
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
|
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
|
||||||
|
|
@ -27,21 +28,22 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
|
||||||
while (off < len) {
|
while (off < len) {
|
||||||
/* Fast path: no escaping needed */
|
/* Fast path: no escaping needed */
|
||||||
loff = off;
|
loff = off;
|
||||||
|
|
||||||
|
#define SKIPUNTIL(cond) \
|
||||||
|
while (off < len) { \
|
||||||
|
x = str[off]; \
|
||||||
|
if (x <= 0x1f || x == '"' || x == '\\' || cond) break; \
|
||||||
|
off++;\
|
||||||
|
}
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
/* assume >=0x80 is valid utf8 */
|
if (!ctx->htmlsafe) { SKIPUNTIL(x == 0x7f) }
|
||||||
while (off < len) {
|
else { SKIPUNTIL(x == 0x7f || x == '<' || x == '>' || x == '&') }
|
||||||
x = str[off];
|
|
||||||
if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break;
|
|
||||||
off++;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
/* binary strings need special handling for >=0x80 */
|
if (!ctx->htmlsafe) { SKIPUNTIL(x >= 0x7f) }
|
||||||
while (off < len) {
|
else { SKIPUNTIL(x >= 0x7f || x == '<' || x == '>' || x == '&') }
|
||||||
x = str[off];
|
|
||||||
if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x7f) break;
|
|
||||||
off++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
#undef SKIPUNTIL
|
||||||
|
|
||||||
fustr_write(ctx->out, (char *)str+loff, off-loff);
|
fustr_write(ctx->out, (char *)str+loff, off-loff);
|
||||||
|
|
||||||
if (off < len) { /* early break, which means current byte needs special processing */
|
if (off < len) { /* early break, which means current byte needs special processing */
|
||||||
|
|
@ -279,7 +281,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
||||||
ctx.out = &out;
|
ctx.out = &out;
|
||||||
ctx.depth = 0;
|
ctx.depth = 0;
|
||||||
ctx.pretty = INT_MIN;
|
ctx.pretty = INT_MIN;
|
||||||
ctx.canon = 0;
|
ctx.canon = ctx.htmlsafe = 0;
|
||||||
while (i < argc) {
|
while (i < argc) {
|
||||||
arg = SvPV_nolen(ST(i));
|
arg = SvPV_nolen(ST(i));
|
||||||
i++;
|
i++;
|
||||||
|
|
@ -289,6 +291,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
||||||
|
|
||||||
if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r);
|
if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r);
|
||||||
else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN;
|
else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN;
|
||||||
|
else if (strcmp(arg, "html_safe") == 0) ctx.htmlsafe = !!SvPVXtrue(r);
|
||||||
else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
|
else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
|
||||||
else if (strcmp(arg, "max_size") == 0) out.maxlen = SvUV(r);
|
else if (strcmp(arg, "max_size") == 0) out.maxlen = SvUV(r);
|
||||||
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
|
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,7 @@ is json_format(
|
||||||
}
|
}
|
||||||
_
|
_
|
||||||
|
|
||||||
|
is json_format('<hello & world>', html_safe => 1), '"\u003chello \u0026 world\u003e"';
|
||||||
|
|
||||||
eval { json_format [[]], max_depth => 2 };
|
eval { json_format [[]], max_depth => 2 };
|
||||||
like $@, qr/max_depth exceeded while formatting JSON/;
|
like $@, qr/max_depth exceeded while formatting JSON/;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue