From 53857d5b84b110b76aa0abedeadacb0c365cd635 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 17 Jul 2012 21:22:38 +0200 Subject: [PATCH] ManUtils: Fix XSS vulnerability and rendering bug in URL formatting The characters <>" are now simply not allowed in URLs. Incidentally, this also fixes formatting of some URLs within brackets, e.g. . --- lib/ManUtils/ManUtils.xs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/ManUtils/ManUtils.xs b/lib/ManUtils/ManUtils.xs index 711cdde..8335e22 100644 --- a/lib/ManUtils/ManUtils.xs +++ b/lib/ManUtils/ManUtils.xs @@ -170,23 +170,21 @@ static void flushline(ctx_t *x) { // HTTP(s) URL. // This is just a simple q{https?://[^ ][.,;"\)>]?( |$)} match, doesn't // always work right: - // - chmod.1: . - // - pod2man.1: . // - troff.1: ⟨http://www.gnu.org/copyleft/fdl.html⟩. <- yes, that's an Unicode character. // - roff.7: Has quite a few issues with wrapped URLs and situations similar to the above. - // - JSON.3pm: "RFC4627"(). // Note: Don't use strncmp() before manually checking for 'http'. The parse // time is otherwise increased by a factor 2. if(s[0] == 'h' && s[1] == 't' && s[2] == 't' && s[3] == 'p' && (strncmp(s, "http://", 7) == 0 || strncmp(s, "https://", 8) == 0)) { - char *sep = strchr(s, ' '); - if(!sep) - sep = s+strlen(s); + // Find the end of the URL (space or some other weird character). + char *sep = s; + while(*sep && *sep != '>' && *sep != '<' && *sep != ' ' && *sep != '"') + sep++; char *sp = sep; if(sp > s+10) { flush(s); char endchr = *sp; *(sp--) = 0; - if(*sp == '.' || *sp == ',' || *sp == ';' || *sp == '"' || *sp == ')' || *sp == '>') { + if(*sp == '.' || *sp == ',' || *sp == ';' || *sp == ')') { sp[1] = endchr; endchr = *sp; *(sp--) = 0;