diff --git a/FU.xs b/FU.xs index 666f342..a702cbc 100644 --- a/FU.xs +++ b/FU.xs @@ -26,6 +26,8 @@ #include "c/jsonparse.c" #include "c/fdpass.c" #include "c/fcgi.c" +#include "c/xmlwr.c" + #include "c/libpq.h" #include "c/pgtypes.c" #include "c/pgconn.c" @@ -57,6 +59,7 @@ PROTOTYPES: DISABLE TYPEMAP: <stflags, SvPVutf8_nolen(sv), ax, items); + MODULE = FU PACKAGE = FU::Pg::txn void DESTROY(fupg_txn *t) @@ -251,6 +261,7 @@ void q(fupg_txn *t, SV *sv, ...) ST(0) = fupg_q(aTHX_ t->conn, t->stflags, SvPVutf8_nolen(sv), ax, items); + MODULE = FU PACKAGE = FU::Pg::st void cache(fupg_st *x, ...) @@ -330,3 +341,42 @@ void kvh(fupg_st *st) void DESTROY(fupg_st *st) CODE: fupg_st_destroy(aTHX_ st); + + + +MODULE = FU PACKAGE = FU::XMLWriter + +void _new() + CODE: + ST(0) = fuxmlwr_new(aTHX); + +void _done(fuxmlwr *wr) + CODE: + ST(0) = fustr_done(&wr->out); + fustr_init(aTHX_ &wr->out, NULL, SIZE_MAX); + +void lit_(SV *sv) + CODE: + if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance"); + STRLEN len; + const char *buf = SvPVutf8(sv, len); + fustr_write(aTHX_ &fuxmlwr_tail->out, buf, len); + +void txt_(SV *sv) + CODE: + if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance"); + fuxmlwr_escape(aTHX_ fuxmlwr_tail, sv); + +void tag_(SV *sv, ...) + CODE: + if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance"); + STRLEN len; + const char *tagname = SvPV(sv, len); + fuxmlwr_isname(tagname); + fuxmlwr_tag(aTHX_ fuxmlwr_tail, ax, 1, items, 0, tagname, len); + +INCLUDE_COMMAND: $^X -e '$FU::XMLWriter::XSPRINT=1; require "./FU/XMLWriter.pm"' + +void DESTROY(fuxmlwr *wr) + CODE: + fuxmlwr_destroy(aTHX_ wr); diff --git a/FU/Util.pm b/FU/Util.pm index 289f2e5..f56d6a4 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -163,6 +163,10 @@ be sufficient. The following is probably an improvement: json_format($data) =~ s{ are supported: =over diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm new file mode 100644 index 0000000..a36fe84 --- /dev/null +++ b/FU/XMLWriter.pm @@ -0,0 +1,284 @@ +package FU::XMLWriter 0.1; +use v5.36; +use Carp 'confess'; +use Exporter 'import'; + +our $XSPRINT; +BEGIN { require FU::XS unless $XSPRINT } + +my @NORMAL_TAGS = qw/ + a_ abbr_ address_ article_ aside_ audio_ b_ bb_ bdo_ blockquote_ body_ + button_ canvas_ caption_ cite_ code_ colgroup_ datagrid_ datalist_ dd_ del_ + details_ dfn_ dialog_ div_ dl_ dt_ em_ fieldset_ figure_ footer_ form_ h1_ + h2_ h3_ h4_ h5_ h6_ head_ header_ i_ iframe_ ins_ kbd_ label_ legend_ li_ + main_ map_ mark_ menu_ meter_ nav_ noscript_ object_ ol_ optgroup_ option_ + output_ p_ pre_ progress_ q_ rp_ rt_ ruby_ samp_ script_ section_ select_ + small_ span_ strong_ style_ sub_ summary_ sup_ table_ tbody_ td_ textarea_ + tfoot_ th_ thead_ time_ title_ tr_ ul_ var_ video_ +/; + +my @SELFCLOSE_TAGS = qw/ + area_ base_ br_ col_ command_ embed_ hr_ img_ input_ link_ meta_ param_ + source_ +/; + +# Used by FU.xs to generate an XS function for each tag. +# (Wrapping tag_() within Perl is slow, using ALIAS is possible but still benefits from code gen) +if ($XSPRINT) { + sub f($name, $selfclose) { + my $tag = $name =~ s/_$//r; + my $len = length $tag; + printf <<~_; + void $name(...) + CODE: + if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance"); + fuxmlwr_tag(aTHX_ fuxmlwr_tail, ax, 0, items, $selfclose, "$tag", $len); + + _ + } + f $_, 0 for @NORMAL_TAGS; + f $_, 1 for @SELFCLOSE_TAGS; +} + + +our %EXPORT_TAGS = ( + html5_ => [ qw/tag_ html_ lit_ txt_/, @NORMAL_TAGS, @SELFCLOSE_TAGS ], + xml_ => [ qw/xml_ tag_ lit_ txt_/ ], +); + +our @EXPORT_OK = ( + qw/fragment xml_ xml_escape/, + @{$EXPORT_TAGS{html5_}}, +); + +my %XML = qw/& & < < " "/; +sub xml_escape($s) { + return '' if !defined $s; + $s =~ s/([&<"])/$XML{$1}/gr; +} + +sub fragment :prototype(&) ($f) { + my $wr = _new(); + $f->(); + $wr->_done; +} + +sub html_(@arg) { + fragment { + lit_("\n"); + tag_('html', @arg); + } +} + +sub xml_ :prototype(&) ($f) { + fragment { + lit_(qq{\n}); + $f->(); + } +} + +1; +__END__ +=head1 NAME + +FU::XMLWriter - Convenient and efficient XML and HTML generator. + +=head1 SYNOPSIS + + use FU::XMLWriter ':html5'; + + my $html_string = html_ sub { + head_ sub { + title_ 'Document title!'; + }; + body_ sub { + h1_ 'Main title!'; + p_ class => 'description', sub { + txt_ 'Here we have data.'; + br_; + a_ href => '/path', 'And a link.'; + }; + }; + }; + + # Or XML: + + use FU::XMLWriter ':xml'; + + my $xml_string = xml_ sub { + tag_ feed => xmlns => 'http://www.w3.org/2005/Atom', + 'xml:lang' => 'en', 'xml:base' => 'https://mywebsite/atom.feed', sub { + tag_ title => 'My awesome Atom feed'; + # etc + }; + }; + +=head1 DESCRIPTION + +This is a convenient XML writer that provides an imperative API to generating +dynamic XML. It just so happens that XML syntax is also completely valid for +HTML5, so this module is primarily abused for that purpose. + +As a naming convention, all XML/HTML output functions are suffixed with an +underscore (C<_>) to make their functionality easy to identify and avoid +potential naming collisions. You are encouraged to follow this convention in +your own code. For example, if you have a function to convert some data into a +nicely formatted table, you could name it C or something. It's +like having composable custom HTML elements, but in the backend! + +=head2 Top-level functions + +These functions all return a byte string with (UTF-8) encoded XML. + +=over + +=item fragment($block) + +Executes C<$block> and captures the output of all I +called within the same scope into a string. This function can be safely nested: + + my $string = fragment { + p_ 'Stuff here'; + + my $subfragment = fragment { + div_ 'More stuff here'; + }; + # $subfragment = '
More stuff here
' + }; + # $string = '

Stuff here

' + +=item xml_($block) + +Like C but adds a C<< >> declaration. + +=item html_(@args) + +Like C but adds a suitable DOCTYPE of HTML5. The C<@args> are +passed to the C call for the top-level C<< >> element. + +=back + +=head2 Output functions + +=over + +=item tag_($name, @attrs, $content) + +This is the meat of this module. Output an XML element with the given C<$name>. +C<$content> can either be C to create a self-closing tag: + + tag_ 'br', undef; + #
+ +Or a string: + + tag_ 'title', 'My title & stuff'; + # My title & stuff + +Or a subroutine: + + tag_ 'div', sub { + tag_ 'br', undef; + }; + #

+ +Attributes can be given as key/value pairs: + + tag_ 'a', href => '/?f&c', title => 'Homepage', 'link'; + # link + +An C value causes the attribute to be ignored: + + tag_ 'option', selected => time % 2 == 0 ? 'selected' : undef, ''; + # Depending on the time: + # + # Or + # + +A C<'+'> attribute name can be used to append a string to the previously given +attribute: + + tag_ 'div', class => $is_hidden ? 'hidden' : undef, + '+' => $is_warning ? 'warning' : undef, 'Text'; + # Results in either: + #
Text
+ # + #
Text
+ # + +=item txt_($string) + +Takes a Unicode string and outputs it, escaping any special XML characters in +the process. + +=item lit_($string) + +Takes a Unicode string and outputs it literally, i.e. without any XML escaping. + +=item _(@attrs, $content) + +This module provides a short-hand function for every HTML5 tag. Using these is +less typing and also slightly more performant than calling C. The +following C-like wrapper functions are provided: + + a_ abbr_ address_ article_ aside_ audio_ b_ bb_ bdo_ blockquote_ body_ + button_ canvas_ caption_ cite_ code_ colgroup_ datagrid_ datalist_ dd_ del_ + details_ dfn_ dialog_ div_ dl_ dt_ em_ fieldset_ figure_ footer_ form_ h1_ + h2_ h3_ h4_ h5_ h6_ head_ header_ i_ iframe_ ins_ kbd_ label_ legend_ li_ + main_ map_ mark_ menu_ meter_ nav_ noscript_ object_ ol_ optgroup_ option_ + output_ p_ pre_ progress_ q_ rp_ rt_ ruby_ samp_ script_ section_ select_ + small_ span_ strong_ style_ sub_ summary_ sup_ table_ tbody_ td_ textarea_ + tfoot_ th_ thead_ time_ title_ tr_ ul_ var_ video_ + +Additionally, the following self-closing-tag functions are provided: + + area_ base_ br_ col_ command_ embed_ hr_ img_ input_ link_ meta_ param_ + source_ + +The self-closing functions do not require a C<$content> argument; if none is +provided it defaults to C. + +=back + +=head2 Utility function + +=over + +=item xml_escape($string) + +Return the XML-escaped version of C<$string>. The characters C<&>, C>, +and C<"> are replaced with their XML entity. + +=back + +=head2 Import options + +All of the functions mentioned in this document can be imported individually. +There are also two import groups: + + use FU::XMLWriter ':html'; + +Exports C, C, C, C and all of the C<< +_ >> functions mentioned above. + + use FU::XMLWriter ':xml'; + +Exports C, C, C and C. + +=head1 SEE ALSO + +This module is part of the L framework, although it can be used +independently of it. + +This module was based on L, which was in turn inspired by +L, which is more powerful but less convenient. + +There's also L, a slightly more featureful, heavyweight and +opinionated HTML-templating-inside-Perl module, based on L. + +And there's L, which is conceptually simpler than both this and +L, but its syntax isn't quite as nice. + +And there's also L, L, L and many +more modules on CPAN. In fact I don't know why you should use this module +instead of whatever is available on CPAN. diff --git a/README.md b/README.md index 90a0a58..0db5808 100644 --- a/README.md +++ b/README.md @@ -20,5 +20,4 @@ Things that may or may not happen: - FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy. - FU::Log - Basic logger. - FU::Validate - TUWF::Validate & normalization with some improvements. -- FU::XML - TUWF::XMLXS with some improvements. - FU::Mailer - Simple sendmail wrapper diff --git a/c/jsonfmt.c b/c/jsonfmt.c index bd50f9b..f13eed8 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -21,15 +21,6 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l unsigned char *buf; unsigned char x = 0; - /* Validate entire string for conformance if this is flagged as a utf8 - * string, this lets us be lazy further on. - * Commenting this out doubles the performance for formatting unicode - * strings, I suspect there's room for optimizations in - * is_c9strict_utf8_string(). */ - if (utf8 && !is_c9strict_utf8_string(str, len)) { - croak("invalid codepoint encountered in string, cannot format to JSON"); - } - fustr_write_ch(ctx->out, '\"'); fustr_reserve(ctx->out, len); @@ -37,7 +28,7 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l /* Fast path: no escaping needed */ loff = off; if (utf8) { - /* we already validated everything >=0x80 */ + /* assume >=0x80 is valid utf8 */ while (off < len) { x = str[off]; if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break; diff --git a/c/xmlwr.c b/c/xmlwr.c new file mode 100644 index 0000000..e6d8ac3 --- /dev/null +++ b/c/xmlwr.c @@ -0,0 +1,161 @@ +typedef struct fuxmlwr fuxmlwr; +struct fuxmlwr { + SV *self; + fuxmlwr *next, *prev; + fustr out; +}; + +static fuxmlwr *fuxmlwr_tail = NULL; + +static SV *fuxmlwr_new(pTHX) { + fuxmlwr *wr = safemalloc(sizeof(*wr)); + wr->next = NULL; + wr->prev = fuxmlwr_tail; + if (fuxmlwr_tail) fuxmlwr_tail->next = wr; + fuxmlwr_tail = wr; + fustr_init(&wr->out, NULL, SIZE_MAX); + return fu_selfobj(wr, "FU::XMLWriter"); +} + +static void fuxmlwr_destroy(pTHX_ fuxmlwr *wr) { + if (fuxmlwr_tail == wr) fuxmlwr_tail = wr->next ? wr->next : wr->prev; + if (wr->next) wr->next->prev = wr->prev; + if (wr->prev) wr->prev->next = wr->next; + if (wr->out.sv) SvREFCNT_dec(wr->out.sv); + safefree(wr); +} + + +static void fuxmlwr_escape(pTHX_ fuxmlwr *wr, SV *sv) { + STRLEN len; + const unsigned char *str = (unsigned char *)SvPV_const(sv, len); + const unsigned char *tmp, *end = str + len; + unsigned char x = 0; + int utf8 = SvUTF8(sv); + + while (str < end) { + tmp = str; + if (utf8) { + while (tmp < end) { + x = *tmp; + if (x == '<' || x == '&' || x == '"') break; + tmp++; + } + } else { + while (tmp < end) { + x = *tmp; + if (x == '<' || x == '&' || x == '"' || x >= 0x80) break; + tmp++; + } + } + fustr_write(&wr->out, (const char *)str, tmp-str); + if (tmp == end) return; + switch (x) { + case '<': fustr_write(&wr->out, "<", 4); break; + case '&': fustr_write(&wr->out, "&", 5); break; + case '"': fustr_write(&wr->out, """, 6); break; + default: + unsigned char *buf = (unsigned char *)fustr_write_buf(&wr->out, 2); + buf[0] = 0xc0 | (x >> 6); + buf[1] = 0x80 | (x & 0x3f); + break; + } + str = tmp + 1; + } +} + + +static int fuxmlwr_isnamechar(unsigned int x) { + return (x|32)-'a' < 26 || x-'0' < 10 || x == '_' || x == ':' || x == '-'; +} + +// Validate a tag or attribute name. Pretty much /^[a-z0-9_:-]+$/i. +// This does not at all match with the XML and HTML standards, but this +// approach is simpler and catches the most important bugs anyway. +static void fuxmlwr_isname(const char *str) { + const char *x = str; + while (fuxmlwr_isnamechar(*x)) x++; + if (*x || x == str) fu_confess("Invalid tag or attribute name: '%s'", str); +} + + +static void fuxmlwr_tag(pTHX_ fuxmlwr *wr, I32 ax, I32 offset, I32 argc, int selfclose, const char *tagname, int tagnamelen) { + SV *key, *val; + const char *keys, *lastkey = NULL; + int isopen = 0; + dSP; + + if (!selfclose && ((argc - offset) & 1) == 0) fu_confess("Invalid number of arguments"); + fustr_write_ch(&wr->out, '<'); + fustr_write(&wr->out, tagname, tagnamelen); + + while (offset < argc-1) { + key = ST(offset); + offset++; + val = ST(offset); + offset++; + + // Don't even try to stringify other arguments; non-string keys are always a bug. + if (!SvPOK(key)) fu_confess("Non-string attribute"); + keys = SvPVX(key); + + SvGETMAGIC(val); + /* TODO: Support boolean values */ + if (keys[0] == '+' && keys[1] == 0) { + if (!SvOK(val)) { + // ignore + } else if (isopen) { + fustr_write_ch(&wr->out, ' '); + fuxmlwr_escape(aTHX_ wr, val); + } else if (lastkey) { + fustr_write_ch(&wr->out, ' '); + fustr_write(&wr->out, lastkey, strlen(lastkey)); + fustr_write(&wr->out, "=\"", 2); + fuxmlwr_escape(aTHX_ wr, val); + isopen = 1; + } else { + fu_confess("Cannot use '+' as first attribute"); + } + } else { + if (isopen) { + fustr_write_ch(&wr->out, '"'); + isopen = 0; + } + fuxmlwr_isname(keys); + if (!SvOK(val)) { + lastkey = keys; + } else { + fustr_write_ch(&wr->out, ' '); + fustr_write(&wr->out, keys, SvCUR(key)); + fustr_write(&wr->out, "=\"", 2); + fuxmlwr_escape(aTHX_ wr, val); + isopen = 1; + } + } + } + + if (isopen) fustr_write_ch(&wr->out, '"'); + + if (offset < argc) { + val = ST(offset); + SvGETMAGIC(val); + } else + val = &PL_sv_undef; + + if (!SvOK(val)) { // undef + fustr_write(&wr->out, " />", 3); + } else if (SvROK(val) && strcmp(sv_reftype(SvRV(val), 0), "CODE") == 0) { // CODE ref + fustr_write_ch(&wr->out, '>'); + PUSHMARK(SP); + call_sv(val, G_VOID|G_DISCARD|G_NOARGS); + fustr_write(&wr->out, "out, tagname, tagnamelen); + fustr_write_ch(&wr->out, '>'); + } else { + fustr_write_ch(&wr->out, '>'); + fuxmlwr_escape(aTHX_ wr, val); + fustr_write(&wr->out, "out, tagname, tagnamelen); + fustr_write_ch(&wr->out, '>'); + } +} diff --git a/t/json_format.t b/t/json_format.t index ca43242..d631bf7 100644 --- a/t/json_format.t +++ b/t/json_format.t @@ -62,9 +62,6 @@ my @errors = ( *STDOUT, qr/unable to format unknown value/, 'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/, 'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/, - "\x{D83D}", qr/invalid codepoint encountered in string/, - "\x{DE03}", qr/invalid codepoint encountered in string/, - do { no warnings 'portable'; "\x{ffffffff}" }, qr/invalid codepoint encountered in string/, do { my $o = {}; bless $o, 'FU::Whatever' }, qr/unable to format 'FU::Whatever' object as JSON/, do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/, ); diff --git a/t/xmlwr.t b/t/xmlwr.t new file mode 100644 index 0000000..e8b2d95 --- /dev/null +++ b/t/xmlwr.t @@ -0,0 +1,68 @@ +use v5.36; +use Test::More; +use FU::XMLWriter qw/:html5_ fragment/; + +is fragment {}, ''; +is fragment { lit_ ''; txt_ '' }, '<hi>'; +is fragment { tag_ 'br', undef }, '
'; +is fragment { tag_ 'a', href => '/&ops', 't&xt' }, 't&xt'; +is fragment { a_ href => '/&ops', 't&xt' }, 't&xt'; +is fragment { txt_ "\x{1f973}" }, '🥳'; + +ok !eval { lit_ 'hi'; 1 }; +ok !eval { txt_ 'hi'; 1 }; +ok !eval { a_ 'hi'; 1 }; + +is fragment { + ok !eval { a_; 1 }; + ok !eval { lit_; 1 }; + ok !eval { tag_ 'é'; 1 }; + ok !eval { tag_ ';'; 1 }; + ok !eval { tag_ ''; 1 }; + ok !eval { tag_ 'a', 'é', 1, 1 }; + ok !eval { tag_ 'a', ';', 1, 1 }; + ok !eval { tag_ 'a', '', 1, 1 }; + ok !eval { a_ undef, 1, 1 }; + ok !eval { a_ [], 1, 1 }; +}, '1'; + +is fragment { div_ x => 1, '+' => 2, '+', 3, undef }, '
'; +is fragment { div_ x => 1, '+' => 2, '+', undef, undef }, '
'; +is fragment { div_ x => 1, '+' => undef, '+', 3, undef }, '
'; +is fragment { div_ x => 1, '+' => undef, y => undef, '+', 3, undef }, '
'; +is fragment { div_ x => undef, '+' => undef, y => undef, '+', 3, undef }, '
'; +is fragment { div_ x => undef, '+' => undef, '+', 1, undef }, '
'; + +ok !eval { fragment { div_ '+' => 1, undef } }; + +sub lit { lit_ ""; } + +sub t { + is $_[0], 'arg'; + div_ attr1 => $_[0], sub { + is $_[0], 'arg'; + + span_ 'ab" < c &< d'; + span_ \&lit; + + is fragment(\&lit), ""; + + is fragment { + is fragment { br_ }, '
'; + }, ''; + + eval { fragment { tag_ '', '' } }; + like $@, qr/Invalid tag or attribute name/; + + txt_ "\x{1f973}"; + }; +} + +is fragment { t 'arg' }, '
ab" < c &< d🥳
'; + +done_testing;