A few more HTML conversion improvements
- Fix segfault on empty output (bug was in XS code) - Still better end-of-URL detection - Recognize a few common multicharacter sections in man references
This commit is contained in:
parent
1ccc86ce86
commit
746889851c
3 changed files with 18 additions and 5 deletions
2
Makefile
2
Makefile
|
|
@ -6,7 +6,7 @@ all: ManUtils indexer
|
||||||
ManUtils: lib/ManUtils/inst/lib/perl5/x86_64-linux/ManUtils.pm
|
ManUtils: lib/ManUtils/inst/lib/perl5/x86_64-linux/ManUtils.pm
|
||||||
|
|
||||||
lib/ManUtils/inst/lib/perl5/x86_64-linux/ManUtils.pm: lib/ManUtils/Build.PL lib/ManUtils/ManUtils.pm lib/ManUtils/ManUtils.xs web/target/release/libweb.a
|
lib/ManUtils/inst/lib/perl5/x86_64-linux/ManUtils.pm: lib/ManUtils/Build.PL lib/ManUtils/ManUtils.pm lib/ManUtils/ManUtils.xs web/target/release/libweb.a
|
||||||
test lib/ManUtils/ManUtils.xs -ot web/target/release/libweb.a && touch -r web/target/release/libweb.a lib/ManUtils/ManUtils.xs
|
-test lib/ManUtils/ManUtils.xs -ot web/target/release/libweb.a && touch -r web/target/release/libweb.a lib/ManUtils/ManUtils.xs
|
||||||
cd lib/ManUtils && perl Build.PL && ./Build install --install-base=inst
|
cd lib/ManUtils && perl Build.PL && ./Build install --install-base=inst
|
||||||
touch lib/ManUtils/inst/lib/perl5/x86_64-linux/ManUtils.pm
|
touch lib/ManUtils/inst/lib/perl5/x86_64-linux/ManUtils.pm
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ html(str)
|
||||||
STRLEN len;
|
STRLEN len;
|
||||||
char *inbuf = SvPV(str, len);
|
char *inbuf = SvPV(str, len);
|
||||||
struct StringWrap buf = grotty2html_wrap(inbuf, len);
|
struct StringWrap buf = grotty2html_wrap(inbuf, len);
|
||||||
SV *dest = newSVpv(buf.buf, buf.len);
|
SV *dest = buf.len ? newSVpv(buf.buf, buf.len) : newSVpv("", 0);
|
||||||
grotty2html_free(buf);
|
grotty2html_free(buf);
|
||||||
SvUTF8_on(dest);
|
SvUTF8_on(dest);
|
||||||
RETVAL = dest;
|
RETVAL = dest;
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,9 @@ impl FmtBuf {
|
||||||
// - https://manned.org/urn/8cb83e85
|
// - https://manned.org/urn/8cb83e85
|
||||||
// TODO: Add heuristic to only remove ) at the end of the URL if there is no matching (
|
// TODO: Add heuristic to only remove ) at the end of the URL if there is no matching (
|
||||||
// inside the URL.
|
// inside the URL.
|
||||||
let url = url.trim_right_matches('.').trim_right_matches(',').trim_right_matches(';').trim_right_matches(')').trim_right_matches('⟩').trim_right_matches('\'');
|
let url = url.trim_right_matches(|c|
|
||||||
|
match c { '.' | ',' | ';' | ')' | '⟩' | '\'' | ':' | ']' | '}' => true, _ => false }
|
||||||
|
);
|
||||||
if url.len() < 10 {
|
if url.len() < 10 {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -194,8 +196,19 @@ impl FmtBuf {
|
||||||
fn flush_ref(&self, st: &mut Flush, end: usize) {
|
fn flush_ref(&self, st: &mut Flush, end: usize) {
|
||||||
// We know where the closing bracket is in the string, so this regex is used to search
|
// We know where the closing bracket is in the string, so this regex is used to search
|
||||||
// backwards from there and find the start of the reference.
|
// backwards from there and find the start of the reference.
|
||||||
|
// There are a lot of 'special' multi-character section names, so it might not make sense
|
||||||
|
// to parse all of them. Here's an estimate of a few 'special' section references, in
|
||||||
|
// number of man pages using the reference (using ~ '%(3pm)%' on the 2017-01-14 database):
|
||||||
|
// - 3pm 17810
|
||||||
|
// - 3w 8729 (just a few packages)
|
||||||
|
// - 3tcl 2000
|
||||||
|
// - 3tk 758
|
||||||
|
// - 3p 309
|
||||||
|
// - 3perl 268
|
||||||
|
// - 3ssl 198
|
||||||
lazy_static!(
|
lazy_static!(
|
||||||
static ref REF: Regex = Regex::new(r"([A-Za-z0-9:\._-]+)\(([1-9nl])\)$").unwrap();
|
// XXX: Make sure to keep this regex in sync with the one in flush()
|
||||||
|
static ref REF: Regex = Regex::new(r"([A-Za-z0-9:\._-]+)\(([1-9nl]|3tcl|3pm|3tk)\)$").unwrap();
|
||||||
);
|
);
|
||||||
|
|
||||||
// Disallow some characters following a reference
|
// Disallow some characters following a reference
|
||||||
|
|
@ -223,7 +236,7 @@ impl FmtBuf {
|
||||||
// This regex is used to quickly *find* interesting patterns, any further validation
|
// This regex is used to quickly *find* interesting patterns, any further validation
|
||||||
// and processing is done afterwards by the (slower) specialized flush_ methods.
|
// and processing is done afterwards by the (slower) specialized flush_ methods.
|
||||||
lazy_static!(
|
lazy_static!(
|
||||||
static ref SEARCH: Regex = Regex::new(r"(?m)(^\[\[\[MANNEDINCLUDE|https?://|[A-Za-z0-9]+\([1-9nl]\))").unwrap();
|
static ref SEARCH: Regex = Regex::new(r"(?m)(^\[\[\[MANNEDINCLUDE|https?://|[A-Za-z0-9]+\(([1-9nl]|3tcl|3pm|3tk)\))").unwrap();
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut st = Flush{
|
let mut st = Flush{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue