Add FU::XMLWriter
And remove UTF-8 check in JSON writer. It honestly feels kind of silly to do that validation there while I've never done similar validations in any other output routines - including this XML writer. FU::XMLWriter is a copy of TUWF::XMLXS with a bunch of improvements applied: now uses refcounts to determine the current output instance, auto-generates XS functions and has faster escaped string output - inspired by the JSON writer. TODO: - Integrate into FU - Do something with bool attribute values - Benchmarks - Should $content be optional for all tags? The reason they weren't in TUWF::XMLXS is because TUWF::XML supports opening tags without closing them, but that idea turned out to suck and isn't supported anymore. This is hopefully the last XS module for the FU framework. The only C code being written now should be bug fixes and extending FU::Pg with some planned features. Already ended up with more C than I had planned...
This commit is contained in:
parent
67e6d99f01
commit
9014e2900c
8 changed files with 568 additions and 14 deletions
50
FU.xs
50
FU.xs
|
|
@ -26,6 +26,8 @@
|
||||||
#include "c/jsonparse.c"
|
#include "c/jsonparse.c"
|
||||||
#include "c/fdpass.c"
|
#include "c/fdpass.c"
|
||||||
#include "c/fcgi.c"
|
#include "c/fcgi.c"
|
||||||
|
#include "c/xmlwr.c"
|
||||||
|
|
||||||
#include "c/libpq.h"
|
#include "c/libpq.h"
|
||||||
#include "c/pgtypes.c"
|
#include "c/pgtypes.c"
|
||||||
#include "c/pgconn.c"
|
#include "c/pgconn.c"
|
||||||
|
|
@ -57,6 +59,7 @@ PROTOTYPES: DISABLE
|
||||||
TYPEMAP: <<EOT
|
TYPEMAP: <<EOT
|
||||||
TYPEMAP
|
TYPEMAP
|
||||||
fufcgi * FUFCGI
|
fufcgi * FUFCGI
|
||||||
|
fuxmlwr * FUXMLWR
|
||||||
fupg_conn * FUPG_CONN
|
fupg_conn * FUPG_CONN
|
||||||
fupg_txn * FUPG_TXN
|
fupg_txn * FUPG_TXN
|
||||||
fupg_st * FUPG_ST
|
fupg_st * FUPG_ST
|
||||||
|
|
@ -66,6 +69,10 @@ FUFCGI
|
||||||
if (sv_derived_from($arg, \"FU::fcgi\")) $var = (fufcgi *)SvIVX(SvRV($arg));
|
if (sv_derived_from($arg, \"FU::fcgi\")) $var = (fufcgi *)SvIVX(SvRV($arg));
|
||||||
else fu_confess(\"invalid FastCGI object\");
|
else fu_confess(\"invalid FastCGI object\");
|
||||||
|
|
||||||
|
FUXMLWR
|
||||||
|
if (sv_derived_from($arg, \"FU::XMLWriter\")) $var = (fuxmlwr *)SvIVX(SvRV($arg));
|
||||||
|
else fu_confess(\"invalid FU::XMLWriter object\");
|
||||||
|
|
||||||
FUPG_CONN
|
FUPG_CONN
|
||||||
if (sv_derived_from($arg, \"FU::Pg::conn\")) $var = (fupg_conn *)SvIVX(SvRV($arg));
|
if (sv_derived_from($arg, \"FU::Pg::conn\")) $var = (fupg_conn *)SvIVX(SvRV($arg));
|
||||||
else fu_confess(\"invalid connection object\");
|
else fu_confess(\"invalid connection object\");
|
||||||
|
|
@ -137,6 +144,7 @@ void DESTROY(fufcgi *ctx)
|
||||||
safefree(ctx);
|
safefree(ctx);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MODULE = FU PACKAGE = FU::Pg
|
MODULE = FU PACKAGE = FU::Pg
|
||||||
|
|
||||||
void _load_libpq()
|
void _load_libpq()
|
||||||
|
|
@ -153,6 +161,7 @@ void connect(const char *pkg, const char *conninfo)
|
||||||
ST(0) = fupg_connect(aTHX_ conninfo);
|
ST(0) = fupg_connect(aTHX_ conninfo);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MODULE = FU PACKAGE = FU::Pg::conn
|
MODULE = FU PACKAGE = FU::Pg::conn
|
||||||
|
|
||||||
void server_version(fupg_conn *c)
|
void server_version(fupg_conn *c)
|
||||||
|
|
@ -207,6 +216,7 @@ void q(fupg_conn *c, SV *sv, ...)
|
||||||
ST(0) = fupg_q(aTHX_ c, c->stflags, SvPVutf8_nolen(sv), ax, items);
|
ST(0) = fupg_q(aTHX_ c, c->stflags, SvPVutf8_nolen(sv), ax, items);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MODULE = FU PACKAGE = FU::Pg::txn
|
MODULE = FU PACKAGE = FU::Pg::txn
|
||||||
|
|
||||||
void DESTROY(fupg_txn *t)
|
void DESTROY(fupg_txn *t)
|
||||||
|
|
@ -251,6 +261,7 @@ void q(fupg_txn *t, SV *sv, ...)
|
||||||
ST(0) = fupg_q(aTHX_ t->conn, t->stflags, SvPVutf8_nolen(sv), ax, items);
|
ST(0) = fupg_q(aTHX_ t->conn, t->stflags, SvPVutf8_nolen(sv), ax, items);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MODULE = FU PACKAGE = FU::Pg::st
|
MODULE = FU PACKAGE = FU::Pg::st
|
||||||
|
|
||||||
void cache(fupg_st *x, ...)
|
void cache(fupg_st *x, ...)
|
||||||
|
|
@ -330,3 +341,42 @@ void kvh(fupg_st *st)
|
||||||
void DESTROY(fupg_st *st)
|
void DESTROY(fupg_st *st)
|
||||||
CODE:
|
CODE:
|
||||||
fupg_st_destroy(aTHX_ st);
|
fupg_st_destroy(aTHX_ st);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
MODULE = FU PACKAGE = FU::XMLWriter
|
||||||
|
|
||||||
|
void _new()
|
||||||
|
CODE:
|
||||||
|
ST(0) = fuxmlwr_new(aTHX);
|
||||||
|
|
||||||
|
void _done(fuxmlwr *wr)
|
||||||
|
CODE:
|
||||||
|
ST(0) = fustr_done(&wr->out);
|
||||||
|
fustr_init(aTHX_ &wr->out, NULL, SIZE_MAX);
|
||||||
|
|
||||||
|
void lit_(SV *sv)
|
||||||
|
CODE:
|
||||||
|
if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance");
|
||||||
|
STRLEN len;
|
||||||
|
const char *buf = SvPVutf8(sv, len);
|
||||||
|
fustr_write(aTHX_ &fuxmlwr_tail->out, buf, len);
|
||||||
|
|
||||||
|
void txt_(SV *sv)
|
||||||
|
CODE:
|
||||||
|
if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance");
|
||||||
|
fuxmlwr_escape(aTHX_ fuxmlwr_tail, sv);
|
||||||
|
|
||||||
|
void tag_(SV *sv, ...)
|
||||||
|
CODE:
|
||||||
|
if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance");
|
||||||
|
STRLEN len;
|
||||||
|
const char *tagname = SvPV(sv, len);
|
||||||
|
fuxmlwr_isname(tagname);
|
||||||
|
fuxmlwr_tag(aTHX_ fuxmlwr_tail, ax, 1, items, 0, tagname, len);
|
||||||
|
|
||||||
|
INCLUDE_COMMAND: $^X -e '$FU::XMLWriter::XSPRINT=1; require "./FU/XMLWriter.pm"'
|
||||||
|
|
||||||
|
void DESTROY(fuxmlwr *wr)
|
||||||
|
CODE:
|
||||||
|
fuxmlwr_destroy(aTHX_ wr);
|
||||||
|
|
|
||||||
|
|
@ -163,6 +163,10 @@ be sufficient. The following is probably an improvement:
|
||||||
|
|
||||||
json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
|
json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
|
||||||
|
|
||||||
|
This function generates invalid JSON if you pass it a string with invalid
|
||||||
|
Unicode characters; I don't see how you'd ever accidentally end up with such a
|
||||||
|
string, anyway.
|
||||||
|
|
||||||
The following C<%options> are supported:
|
The following C<%options> are supported:
|
||||||
|
|
||||||
=over
|
=over
|
||||||
|
|
|
||||||
284
FU/XMLWriter.pm
Normal file
284
FU/XMLWriter.pm
Normal file
|
|
@ -0,0 +1,284 @@
|
||||||
|
package FU::XMLWriter 0.1;
|
||||||
|
use v5.36;
|
||||||
|
use Carp 'confess';
|
||||||
|
use Exporter 'import';
|
||||||
|
|
||||||
|
our $XSPRINT;
|
||||||
|
BEGIN { require FU::XS unless $XSPRINT }
|
||||||
|
|
||||||
|
my @NORMAL_TAGS = qw/
|
||||||
|
a_ abbr_ address_ article_ aside_ audio_ b_ bb_ bdo_ blockquote_ body_
|
||||||
|
button_ canvas_ caption_ cite_ code_ colgroup_ datagrid_ datalist_ dd_ del_
|
||||||
|
details_ dfn_ dialog_ div_ dl_ dt_ em_ fieldset_ figure_ footer_ form_ h1_
|
||||||
|
h2_ h3_ h4_ h5_ h6_ head_ header_ i_ iframe_ ins_ kbd_ label_ legend_ li_
|
||||||
|
main_ map_ mark_ menu_ meter_ nav_ noscript_ object_ ol_ optgroup_ option_
|
||||||
|
output_ p_ pre_ progress_ q_ rp_ rt_ ruby_ samp_ script_ section_ select_
|
||||||
|
small_ span_ strong_ style_ sub_ summary_ sup_ table_ tbody_ td_ textarea_
|
||||||
|
tfoot_ th_ thead_ time_ title_ tr_ ul_ var_ video_
|
||||||
|
/;
|
||||||
|
|
||||||
|
my @SELFCLOSE_TAGS = qw/
|
||||||
|
area_ base_ br_ col_ command_ embed_ hr_ img_ input_ link_ meta_ param_
|
||||||
|
source_
|
||||||
|
/;
|
||||||
|
|
||||||
|
# Used by FU.xs to generate an XS function for each tag.
|
||||||
|
# (Wrapping tag_() within Perl is slow, using ALIAS is possible but still benefits from code gen)
|
||||||
|
if ($XSPRINT) {
|
||||||
|
sub f($name, $selfclose) {
|
||||||
|
my $tag = $name =~ s/_$//r;
|
||||||
|
my $len = length $tag;
|
||||||
|
printf <<~_;
|
||||||
|
void $name(...)
|
||||||
|
CODE:
|
||||||
|
if (!fuxmlwr_tail) fu_confess("No active FU::XMLWriter instance");
|
||||||
|
fuxmlwr_tag(aTHX_ fuxmlwr_tail, ax, 0, items, $selfclose, "$tag", $len);
|
||||||
|
|
||||||
|
_
|
||||||
|
}
|
||||||
|
f $_, 0 for @NORMAL_TAGS;
|
||||||
|
f $_, 1 for @SELFCLOSE_TAGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
our %EXPORT_TAGS = (
|
||||||
|
html5_ => [ qw/tag_ html_ lit_ txt_/, @NORMAL_TAGS, @SELFCLOSE_TAGS ],
|
||||||
|
xml_ => [ qw/xml_ tag_ lit_ txt_/ ],
|
||||||
|
);
|
||||||
|
|
||||||
|
our @EXPORT_OK = (
|
||||||
|
qw/fragment xml_ xml_escape/,
|
||||||
|
@{$EXPORT_TAGS{html5_}},
|
||||||
|
);
|
||||||
|
|
||||||
|
my %XML = qw/& & < < " "/;
|
||||||
|
sub xml_escape($s) {
|
||||||
|
return '' if !defined $s;
|
||||||
|
$s =~ s/([&<"])/$XML{$1}/gr;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub fragment :prototype(&) ($f) {
|
||||||
|
my $wr = _new();
|
||||||
|
$f->();
|
||||||
|
$wr->_done;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub html_(@arg) {
|
||||||
|
fragment {
|
||||||
|
lit_("<!DOCTYPE html>\n");
|
||||||
|
tag_('html', @arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub xml_ :prototype(&) ($f) {
|
||||||
|
fragment {
|
||||||
|
lit_(qq{<?xml version="1.0" encoding="UTF-8"?>\n});
|
||||||
|
$f->();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
||||||
|
__END__
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
FU::XMLWriter - Convenient and efficient XML and HTML generator.
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
use FU::XMLWriter ':html5';
|
||||||
|
|
||||||
|
my $html_string = html_ sub {
|
||||||
|
head_ sub {
|
||||||
|
title_ 'Document title!';
|
||||||
|
};
|
||||||
|
body_ sub {
|
||||||
|
h1_ 'Main title!';
|
||||||
|
p_ class => 'description', sub {
|
||||||
|
txt_ 'Here we have <textual> data.';
|
||||||
|
br_;
|
||||||
|
a_ href => '/path', 'And a link.';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Or XML:
|
||||||
|
|
||||||
|
use FU::XMLWriter ':xml';
|
||||||
|
|
||||||
|
my $xml_string = xml_ sub {
|
||||||
|
tag_ feed => xmlns => 'http://www.w3.org/2005/Atom',
|
||||||
|
'xml:lang' => 'en', 'xml:base' => 'https://mywebsite/atom.feed', sub {
|
||||||
|
tag_ title => 'My awesome Atom feed';
|
||||||
|
# etc
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
This is a convenient XML writer that provides an imperative API to generating
|
||||||
|
dynamic XML. It just so happens that XML syntax is also completely valid for
|
||||||
|
HTML5, so this module is primarily abused for that purpose.
|
||||||
|
|
||||||
|
As a naming convention, all XML/HTML output functions are suffixed with an
|
||||||
|
underscore (C<_>) to make their functionality easy to identify and avoid
|
||||||
|
potential naming collisions. You are encouraged to follow this convention in
|
||||||
|
your own code. For example, if you have a function to convert some data into a
|
||||||
|
nicely formatted table, you could name it C<info_table_()> or something. It's
|
||||||
|
like having composable custom HTML elements, but in the backend!
|
||||||
|
|
||||||
|
=head2 Top-level functions
|
||||||
|
|
||||||
|
These functions all return a byte string with (UTF-8) encoded XML.
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item fragment($block)
|
||||||
|
|
||||||
|
Executes C<$block> and captures the output of all I</"Output functions">
|
||||||
|
called within the same scope into a string. This function can be safely nested:
|
||||||
|
|
||||||
|
my $string = fragment {
|
||||||
|
p_ 'Stuff here';
|
||||||
|
|
||||||
|
my $subfragment = fragment {
|
||||||
|
div_ 'More stuff here';
|
||||||
|
};
|
||||||
|
# $subfragment = '<div>More stuff here</div>'
|
||||||
|
};
|
||||||
|
# $string = '<p>Stuff here</p>'
|
||||||
|
|
||||||
|
=item xml_($block)
|
||||||
|
|
||||||
|
Like C<fragment()> but adds a C<< <?xml ..> >> declaration.
|
||||||
|
|
||||||
|
=item html_(@args)
|
||||||
|
|
||||||
|
Like C<fragment()> but adds a suitable DOCTYPE of HTML5. The C<@args> are
|
||||||
|
passed to the C<tag_()> call for the top-level C<< <html> >> element.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 Output functions
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item tag_($name, @attrs, $content)
|
||||||
|
|
||||||
|
This is the meat of this module. Output an XML element with the given C<$name>.
|
||||||
|
C<$content> can either be C<undef> to create a self-closing tag:
|
||||||
|
|
||||||
|
tag_ 'br', undef;
|
||||||
|
# <br />
|
||||||
|
|
||||||
|
Or a string:
|
||||||
|
|
||||||
|
tag_ 'title', 'My title & stuff';
|
||||||
|
# <title>My title & stuff</title>
|
||||||
|
|
||||||
|
Or a subroutine:
|
||||||
|
|
||||||
|
tag_ 'div', sub {
|
||||||
|
tag_ 'br', undef;
|
||||||
|
};
|
||||||
|
# <div><br /></div>
|
||||||
|
|
||||||
|
Attributes can be given as key/value pairs:
|
||||||
|
|
||||||
|
tag_ 'a', href => '/?f&c', title => 'Homepage', 'link';
|
||||||
|
# <a href="/f&c" title="Homepage">link</a>
|
||||||
|
|
||||||
|
An C<undef> value causes the attribute to be ignored:
|
||||||
|
|
||||||
|
tag_ 'option', selected => time % 2 == 0 ? 'selected' : undef, '';
|
||||||
|
# Depending on the time:
|
||||||
|
# <option></option>
|
||||||
|
# Or
|
||||||
|
# <option selected="selected"></option>
|
||||||
|
|
||||||
|
A C<'+'> attribute name can be used to append a string to the previously given
|
||||||
|
attribute:
|
||||||
|
|
||||||
|
tag_ 'div', class => $is_hidden ? 'hidden' : undef,
|
||||||
|
'+' => $is_warning ? 'warning' : undef, 'Text';
|
||||||
|
# Results in either:
|
||||||
|
# <div>Text</div>
|
||||||
|
# <div class="hidden">Text</div>
|
||||||
|
# <div class="warning">Text</div>
|
||||||
|
# <div class="hidden warning">Text</div>
|
||||||
|
|
||||||
|
=item txt_($string)
|
||||||
|
|
||||||
|
Takes a Unicode string and outputs it, escaping any special XML characters in
|
||||||
|
the process.
|
||||||
|
|
||||||
|
=item lit_($string)
|
||||||
|
|
||||||
|
Takes a Unicode string and outputs it literally, i.e. without any XML escaping.
|
||||||
|
|
||||||
|
=item <html-tag>_(@attrs, $content)
|
||||||
|
|
||||||
|
This module provides a short-hand function for every HTML5 tag. Using these is
|
||||||
|
less typing and also slightly more performant than calling C<tag_()>. The
|
||||||
|
following C<tag_()>-like wrapper functions are provided:
|
||||||
|
|
||||||
|
a_ abbr_ address_ article_ aside_ audio_ b_ bb_ bdo_ blockquote_ body_
|
||||||
|
button_ canvas_ caption_ cite_ code_ colgroup_ datagrid_ datalist_ dd_ del_
|
||||||
|
details_ dfn_ dialog_ div_ dl_ dt_ em_ fieldset_ figure_ footer_ form_ h1_
|
||||||
|
h2_ h3_ h4_ h5_ h6_ head_ header_ i_ iframe_ ins_ kbd_ label_ legend_ li_
|
||||||
|
main_ map_ mark_ menu_ meter_ nav_ noscript_ object_ ol_ optgroup_ option_
|
||||||
|
output_ p_ pre_ progress_ q_ rp_ rt_ ruby_ samp_ script_ section_ select_
|
||||||
|
small_ span_ strong_ style_ sub_ summary_ sup_ table_ tbody_ td_ textarea_
|
||||||
|
tfoot_ th_ thead_ time_ title_ tr_ ul_ var_ video_
|
||||||
|
|
||||||
|
Additionally, the following self-closing-tag functions are provided:
|
||||||
|
|
||||||
|
area_ base_ br_ col_ command_ embed_ hr_ img_ input_ link_ meta_ param_
|
||||||
|
source_
|
||||||
|
|
||||||
|
The self-closing functions do not require a C<$content> argument; if none is
|
||||||
|
provided it defaults to C<undef>.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 Utility function
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item xml_escape($string)
|
||||||
|
|
||||||
|
Return the XML-escaped version of C<$string>. The characters C<&>, C<E<lt>>,
|
||||||
|
and C<"> are replaced with their XML entity.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head2 Import options
|
||||||
|
|
||||||
|
All of the functions mentioned in this document can be imported individually.
|
||||||
|
There are also two import groups:
|
||||||
|
|
||||||
|
use FU::XMLWriter ':html';
|
||||||
|
|
||||||
|
Exports C<tag_()>, C<html_()>, C<lit_()>, C<txt_()> and all of the C<<
|
||||||
|
<html-tag>_ >> functions mentioned above.
|
||||||
|
|
||||||
|
use FU::XMLWriter ':xml';
|
||||||
|
|
||||||
|
Exports C<xml_()>, C<tag_()>, C<lit_()> and C<txt_()>.
|
||||||
|
|
||||||
|
=head1 SEE ALSO
|
||||||
|
|
||||||
|
This module is part of the L<FU> framework, although it can be used
|
||||||
|
independently of it.
|
||||||
|
|
||||||
|
This module was based on L<TUWF::XML>, which was in turn inspired by
|
||||||
|
L<XML::Writer>, which is more powerful but less convenient.
|
||||||
|
|
||||||
|
There's also L<DSL::HTML>, a slightly more featureful, heavyweight and
|
||||||
|
opinionated HTML-templating-inside-Perl module, based on L<HTML::Tree>.
|
||||||
|
|
||||||
|
And there's L<HTML::Declare>, which is conceptually simpler than both this and
|
||||||
|
L<DSL::HTML>, but its syntax isn't quite as nice.
|
||||||
|
|
||||||
|
And there's also L<HTML::FromArrayref>, L<HTML::Tiny>, L<HTML::Untidy> and many
|
||||||
|
more modules on CPAN. In fact I don't know why you should use this module
|
||||||
|
instead of whatever is available on CPAN.
|
||||||
|
|
@ -20,5 +20,4 @@ Things that may or may not happen:
|
||||||
- FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy.
|
- FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy.
|
||||||
- FU::Log - Basic logger.
|
- FU::Log - Basic logger.
|
||||||
- FU::Validate - TUWF::Validate & normalization with some improvements.
|
- FU::Validate - TUWF::Validate & normalization with some improvements.
|
||||||
- FU::XML - TUWF::XMLXS with some improvements.
|
|
||||||
- FU::Mailer - Simple sendmail wrapper
|
- FU::Mailer - Simple sendmail wrapper
|
||||||
|
|
|
||||||
11
c/jsonfmt.c
11
c/jsonfmt.c
|
|
@ -21,15 +21,6 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
|
||||||
unsigned char *buf;
|
unsigned char *buf;
|
||||||
unsigned char x = 0;
|
unsigned char x = 0;
|
||||||
|
|
||||||
/* Validate entire string for conformance if this is flagged as a utf8
|
|
||||||
* string, this lets us be lazy further on.
|
|
||||||
* Commenting this out doubles the performance for formatting unicode
|
|
||||||
* strings, I suspect there's room for optimizations in
|
|
||||||
* is_c9strict_utf8_string(). */
|
|
||||||
if (utf8 && !is_c9strict_utf8_string(str, len)) {
|
|
||||||
croak("invalid codepoint encountered in string, cannot format to JSON");
|
|
||||||
}
|
|
||||||
|
|
||||||
fustr_write_ch(ctx->out, '\"');
|
fustr_write_ch(ctx->out, '\"');
|
||||||
fustr_reserve(ctx->out, len);
|
fustr_reserve(ctx->out, len);
|
||||||
|
|
||||||
|
|
@ -37,7 +28,7 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
|
||||||
/* Fast path: no escaping needed */
|
/* Fast path: no escaping needed */
|
||||||
loff = off;
|
loff = off;
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
/* we already validated everything >=0x80 */
|
/* assume >=0x80 is valid utf8 */
|
||||||
while (off < len) {
|
while (off < len) {
|
||||||
x = str[off];
|
x = str[off];
|
||||||
if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break;
|
if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break;
|
||||||
|
|
|
||||||
161
c/xmlwr.c
Normal file
161
c/xmlwr.c
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
typedef struct fuxmlwr fuxmlwr;
|
||||||
|
struct fuxmlwr {
|
||||||
|
SV *self;
|
||||||
|
fuxmlwr *next, *prev;
|
||||||
|
fustr out;
|
||||||
|
};
|
||||||
|
|
||||||
|
static fuxmlwr *fuxmlwr_tail = NULL;
|
||||||
|
|
||||||
|
static SV *fuxmlwr_new(pTHX) {
|
||||||
|
fuxmlwr *wr = safemalloc(sizeof(*wr));
|
||||||
|
wr->next = NULL;
|
||||||
|
wr->prev = fuxmlwr_tail;
|
||||||
|
if (fuxmlwr_tail) fuxmlwr_tail->next = wr;
|
||||||
|
fuxmlwr_tail = wr;
|
||||||
|
fustr_init(&wr->out, NULL, SIZE_MAX);
|
||||||
|
return fu_selfobj(wr, "FU::XMLWriter");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fuxmlwr_destroy(pTHX_ fuxmlwr *wr) {
|
||||||
|
if (fuxmlwr_tail == wr) fuxmlwr_tail = wr->next ? wr->next : wr->prev;
|
||||||
|
if (wr->next) wr->next->prev = wr->prev;
|
||||||
|
if (wr->prev) wr->prev->next = wr->next;
|
||||||
|
if (wr->out.sv) SvREFCNT_dec(wr->out.sv);
|
||||||
|
safefree(wr);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void fuxmlwr_escape(pTHX_ fuxmlwr *wr, SV *sv) {
|
||||||
|
STRLEN len;
|
||||||
|
const unsigned char *str = (unsigned char *)SvPV_const(sv, len);
|
||||||
|
const unsigned char *tmp, *end = str + len;
|
||||||
|
unsigned char x = 0;
|
||||||
|
int utf8 = SvUTF8(sv);
|
||||||
|
|
||||||
|
while (str < end) {
|
||||||
|
tmp = str;
|
||||||
|
if (utf8) {
|
||||||
|
while (tmp < end) {
|
||||||
|
x = *tmp;
|
||||||
|
if (x == '<' || x == '&' || x == '"') break;
|
||||||
|
tmp++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (tmp < end) {
|
||||||
|
x = *tmp;
|
||||||
|
if (x == '<' || x == '&' || x == '"' || x >= 0x80) break;
|
||||||
|
tmp++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fustr_write(&wr->out, (const char *)str, tmp-str);
|
||||||
|
if (tmp == end) return;
|
||||||
|
switch (x) {
|
||||||
|
case '<': fustr_write(&wr->out, "<", 4); break;
|
||||||
|
case '&': fustr_write(&wr->out, "&", 5); break;
|
||||||
|
case '"': fustr_write(&wr->out, """, 6); break;
|
||||||
|
default:
|
||||||
|
unsigned char *buf = (unsigned char *)fustr_write_buf(&wr->out, 2);
|
||||||
|
buf[0] = 0xc0 | (x >> 6);
|
||||||
|
buf[1] = 0x80 | (x & 0x3f);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
str = tmp + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int fuxmlwr_isnamechar(unsigned int x) {
|
||||||
|
return (x|32)-'a' < 26 || x-'0' < 10 || x == '_' || x == ':' || x == '-';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate a tag or attribute name. Pretty much /^[a-z0-9_:-]+$/i.
|
||||||
|
// This does not at all match with the XML and HTML standards, but this
|
||||||
|
// approach is simpler and catches the most important bugs anyway.
|
||||||
|
static void fuxmlwr_isname(const char *str) {
|
||||||
|
const char *x = str;
|
||||||
|
while (fuxmlwr_isnamechar(*x)) x++;
|
||||||
|
if (*x || x == str) fu_confess("Invalid tag or attribute name: '%s'", str);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void fuxmlwr_tag(pTHX_ fuxmlwr *wr, I32 ax, I32 offset, I32 argc, int selfclose, const char *tagname, int tagnamelen) {
|
||||||
|
SV *key, *val;
|
||||||
|
const char *keys, *lastkey = NULL;
|
||||||
|
int isopen = 0;
|
||||||
|
dSP;
|
||||||
|
|
||||||
|
if (!selfclose && ((argc - offset) & 1) == 0) fu_confess("Invalid number of arguments");
|
||||||
|
fustr_write_ch(&wr->out, '<');
|
||||||
|
fustr_write(&wr->out, tagname, tagnamelen);
|
||||||
|
|
||||||
|
while (offset < argc-1) {
|
||||||
|
key = ST(offset);
|
||||||
|
offset++;
|
||||||
|
val = ST(offset);
|
||||||
|
offset++;
|
||||||
|
|
||||||
|
// Don't even try to stringify other arguments; non-string keys are always a bug.
|
||||||
|
if (!SvPOK(key)) fu_confess("Non-string attribute");
|
||||||
|
keys = SvPVX(key);
|
||||||
|
|
||||||
|
SvGETMAGIC(val);
|
||||||
|
/* TODO: Support boolean values */
|
||||||
|
if (keys[0] == '+' && keys[1] == 0) {
|
||||||
|
if (!SvOK(val)) {
|
||||||
|
// ignore
|
||||||
|
} else if (isopen) {
|
||||||
|
fustr_write_ch(&wr->out, ' ');
|
||||||
|
fuxmlwr_escape(aTHX_ wr, val);
|
||||||
|
} else if (lastkey) {
|
||||||
|
fustr_write_ch(&wr->out, ' ');
|
||||||
|
fustr_write(&wr->out, lastkey, strlen(lastkey));
|
||||||
|
fustr_write(&wr->out, "=\"", 2);
|
||||||
|
fuxmlwr_escape(aTHX_ wr, val);
|
||||||
|
isopen = 1;
|
||||||
|
} else {
|
||||||
|
fu_confess("Cannot use '+' as first attribute");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (isopen) {
|
||||||
|
fustr_write_ch(&wr->out, '"');
|
||||||
|
isopen = 0;
|
||||||
|
}
|
||||||
|
fuxmlwr_isname(keys);
|
||||||
|
if (!SvOK(val)) {
|
||||||
|
lastkey = keys;
|
||||||
|
} else {
|
||||||
|
fustr_write_ch(&wr->out, ' ');
|
||||||
|
fustr_write(&wr->out, keys, SvCUR(key));
|
||||||
|
fustr_write(&wr->out, "=\"", 2);
|
||||||
|
fuxmlwr_escape(aTHX_ wr, val);
|
||||||
|
isopen = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isopen) fustr_write_ch(&wr->out, '"');
|
||||||
|
|
||||||
|
if (offset < argc) {
|
||||||
|
val = ST(offset);
|
||||||
|
SvGETMAGIC(val);
|
||||||
|
} else
|
||||||
|
val = &PL_sv_undef;
|
||||||
|
|
||||||
|
if (!SvOK(val)) { // undef
|
||||||
|
fustr_write(&wr->out, " />", 3);
|
||||||
|
} else if (SvROK(val) && strcmp(sv_reftype(SvRV(val), 0), "CODE") == 0) { // CODE ref
|
||||||
|
fustr_write_ch(&wr->out, '>');
|
||||||
|
PUSHMARK(SP);
|
||||||
|
call_sv(val, G_VOID|G_DISCARD|G_NOARGS);
|
||||||
|
fustr_write(&wr->out, "</", 2);
|
||||||
|
fustr_write(&wr->out, tagname, tagnamelen);
|
||||||
|
fustr_write_ch(&wr->out, '>');
|
||||||
|
} else {
|
||||||
|
fustr_write_ch(&wr->out, '>');
|
||||||
|
fuxmlwr_escape(aTHX_ wr, val);
|
||||||
|
fustr_write(&wr->out, "</", 2);
|
||||||
|
fustr_write(&wr->out, tagname, tagnamelen);
|
||||||
|
fustr_write_ch(&wr->out, '>');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -62,9 +62,6 @@ my @errors = (
|
||||||
*STDOUT, qr/unable to format unknown value/,
|
*STDOUT, qr/unable to format unknown value/,
|
||||||
'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
||||||
'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
||||||
"\x{D83D}", qr/invalid codepoint encountered in string/,
|
|
||||||
"\x{DE03}", qr/invalid codepoint encountered in string/,
|
|
||||||
do { no warnings 'portable'; "\x{ffffffff}" }, qr/invalid codepoint encountered in string/,
|
|
||||||
do { my $o = {}; bless $o, 'FU::Whatever' }, qr/unable to format 'FU::Whatever' object as JSON/,
|
do { my $o = {}; bless $o, 'FU::Whatever' }, qr/unable to format 'FU::Whatever' object as JSON/,
|
||||||
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
|
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
|
||||||
);
|
);
|
||||||
|
|
|
||||||
68
t/xmlwr.t
Normal file
68
t/xmlwr.t
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
use v5.36;
|
||||||
|
use Test::More;
|
||||||
|
use FU::XMLWriter qw/:html5_ fragment/;
|
||||||
|
|
||||||
|
is fragment {}, '';
|
||||||
|
is fragment { lit_ '<hi>'; txt_ '<hi>' }, '<hi><hi>';
|
||||||
|
is fragment { tag_ 'br', undef }, '<br />';
|
||||||
|
is fragment { tag_ 'a', href => '/&ops', 't&xt' }, '<a href="/&ops">t&xt</a>';
|
||||||
|
is fragment { a_ href => '/&ops', 't&xt' }, '<a href="/&ops">t&xt</a>';
|
||||||
|
is fragment { txt_ "\x{1f973}" }, '🥳';
|
||||||
|
|
||||||
|
ok !eval { lit_ 'hi'; 1 };
|
||||||
|
ok !eval { txt_ 'hi'; 1 };
|
||||||
|
ok !eval { a_ 'hi'; 1 };
|
||||||
|
|
||||||
|
is fragment {
|
||||||
|
ok !eval { a_; 1 };
|
||||||
|
ok !eval { lit_; 1 };
|
||||||
|
ok !eval { tag_ 'é'; 1 };
|
||||||
|
ok !eval { tag_ ';'; 1 };
|
||||||
|
ok !eval { tag_ ''; 1 };
|
||||||
|
ok !eval { tag_ 'a', 'é', 1, 1 };
|
||||||
|
ok !eval { tag_ 'a', ';', 1, 1 };
|
||||||
|
ok !eval { tag_ 'a', '', 1, 1 };
|
||||||
|
ok !eval { a_ undef, 1, 1 };
|
||||||
|
ok !eval { a_ [], 1, 1 };
|
||||||
|
}, '<a<a<a<a<a'; # Arguably a bug, but rolling back earlier writes on error seems not worth the effort.
|
||||||
|
|
||||||
|
is fragment {
|
||||||
|
tag_ 'customTag', 1;
|
||||||
|
tag_ 'custom-selfclose', undef;
|
||||||
|
}, '<customTag>1</customTag><custom-selfclose />';
|
||||||
|
|
||||||
|
is fragment { div_ x => 1, '+' => 2, '+', 3, undef }, '<div x="1 2 3" />';
|
||||||
|
is fragment { div_ x => 1, '+' => 2, '+', undef, undef }, '<div x="1 2" />';
|
||||||
|
is fragment { div_ x => 1, '+' => undef, '+', 3, undef }, '<div x="1 3" />';
|
||||||
|
is fragment { div_ x => 1, '+' => undef, y => undef, '+', 3, undef }, '<div x="1" y="3" />';
|
||||||
|
is fragment { div_ x => undef, '+' => undef, y => undef, '+', 3, undef }, '<div y="3" />';
|
||||||
|
is fragment { div_ x => undef, '+' => undef, '+', 1, undef }, '<div x="1" />';
|
||||||
|
|
||||||
|
ok !eval { fragment { div_ '+' => 1, undef } };
|
||||||
|
|
||||||
|
sub lit { lit_ "<ok\x{1f973}ay>"; }
|
||||||
|
|
||||||
|
sub t {
|
||||||
|
is $_[0], 'arg';
|
||||||
|
div_ attr1 => $_[0], sub {
|
||||||
|
is $_[0], 'arg';
|
||||||
|
|
||||||
|
span_ 'ab" < c &< d';
|
||||||
|
span_ \&lit;
|
||||||
|
|
||||||
|
is fragment(\&lit), "<ok🥳ay>";
|
||||||
|
|
||||||
|
is fragment {
|
||||||
|
is fragment { br_ }, '<br />';
|
||||||
|
}, '';
|
||||||
|
|
||||||
|
eval { fragment { tag_ '<oops>', '' } };
|
||||||
|
like $@, qr/Invalid tag or attribute name/;
|
||||||
|
|
||||||
|
txt_ "\x{1f973}";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
is fragment { t 'arg' }, '<div attr1="arg"><span>ab" < c &< d</span><span><ok🥳ay></span>🥳</div>';
|
||||||
|
|
||||||
|
done_testing;
|
||||||
Loading…
Add table
Add a link
Reference in a new issue