Fix handling of URLs ending in a ⟩
I've known about this issue before, but didn't realize it was so widespread. This fixes many links.
This commit is contained in:
parent
7d31f41ba8
commit
17fc298217
2 changed files with 15 additions and 3 deletions
|
|
@ -5,6 +5,7 @@ use Module::Build;
|
||||||
Module::Build->new(
|
Module::Build->new(
|
||||||
dist_name => 'ManUtils',
|
dist_name => 'ManUtils',
|
||||||
dist_version_from => 'ManUtils.pm',
|
dist_version_from => 'ManUtils.pm',
|
||||||
|
dist_abstract => 'Utils for manned.org',
|
||||||
pm_files => {
|
pm_files => {
|
||||||
'ManUtils.pm' => 'lib/ManUtils.pm',
|
'ManUtils.pm' => 'lib/ManUtils.pm',
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -169,9 +169,8 @@ static void flushline(ctx_t *x) {
|
||||||
|
|
||||||
// HTTP(s) URL.
|
// HTTP(s) URL.
|
||||||
// This is just a simple q{https?://[^ ][.,;"\)>]?( |$)} match, doesn't
|
// This is just a simple q{https?://[^ ][.,;"\)>]?( |$)} match, doesn't
|
||||||
// always work right:
|
// always work right, e.g.:
|
||||||
// - troff.1: ⟨http://www.gnu.org/copyleft/fdl.html⟩. <- yes, that's an Unicode character.
|
// - https://manned.org/spu_run/414316a1 -> URL wrapped to new line
|
||||||
// - roff.7: Has quite a few issues with wrapped URLs and situations similar to the above.
|
|
||||||
// Note: Don't use strncmp() before manually checking for 'http'. The parse
|
// Note: Don't use strncmp() before manually checking for 'http'. The parse
|
||||||
// time is otherwise increased by a factor 2.
|
// time is otherwise increased by a factor 2.
|
||||||
if(s[0] == 'h' && s[1] == 't' && s[2] == 't' && s[3] == 'p' && (strncmp(s, "http://", 7) == 0 || strncmp(s, "https://", 8) == 0)) {
|
if(s[0] == 'h' && s[1] == 't' && s[2] == 't' && s[3] == 'p' && (strncmp(s, "http://", 7) == 0 || strncmp(s, "https://", 8) == 0)) {
|
||||||
|
|
@ -189,6 +188,18 @@ static void flushline(ctx_t *x) {
|
||||||
endchr = *sp;
|
endchr = *sp;
|
||||||
*(sp--) = 0;
|
*(sp--) = 0;
|
||||||
}
|
}
|
||||||
|
// Also catch a Unicode '⟩', which is how groff sometimes ends a .UR, e.g.:
|
||||||
|
// - https://manned.org/troff/c4467840
|
||||||
|
// - https://manned.org/pass/78413b49
|
||||||
|
// - https://manned.org/empathy-accounts/8c05b2c1
|
||||||
|
// - https://manned.org/urn/8cb83e85
|
||||||
|
// - https://manned.org/wine/4a699a22
|
||||||
|
if(*sp == '\xa9' && *(sp-1) == '\x9f' && *(sp-2) == '\xe2') {
|
||||||
|
sp[1] = endchr;
|
||||||
|
sp -= 3;
|
||||||
|
endchr = sp[1];
|
||||||
|
sp[1] = 0;
|
||||||
|
}
|
||||||
sv_catpvf(x->dest, "<a href=\"%s\" rel=\"nofollow\">%s</a>", s, s);
|
sv_catpvf(x->dest, "<a href=\"%s\" rel=\"nofollow\">%s</a>", s, s);
|
||||||
*(++sp) = endchr;
|
*(++sp) = endchr;
|
||||||
es = s = sp;
|
es = s = sp;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue