From 7c561aa5fb408ae896ed7666967607b1c6f73273 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 23 Jan 2013 19:18:27 +0100 Subject: [PATCH 1/4] mark-yank-urls: match generic URLs This commit introduces a more generic URL parser, although not fully conforming to RFC3986, it does try to place as few restrictions as possible on the URL, while still not matching noise. --- mark-yank-urls | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mark-yank-urls b/mark-yank-urls index 831f95e..5373fc5 100644 --- a/mark-yank-urls +++ b/mark-yank-urls @@ -7,9 +7,14 @@ use strict; use warnings; my $url_matcher = qr{( - (?:https?://|ftp://|news://|mailto:|file://|www\.)[ab-zA-Z0-9\-\@;\/?:&=%\$_.+!*\x27(),~#]+ - [ab-zA-Z0-9\-\@;\/?&=%\$_+!*\x27()~] # exclude some trailing characters (heuristic) -)}x; + \b + [a-z][a-z0-9+]+:// # scheme + [\[\]a-z0-9\@:;.-]* # hostname + (?: + /[a-z0-9.,:;~\@?\$\&()/=#%+!*_-]* # path (and query string etc) + )? + (? Date: Wed, 23 Jan 2013 19:25:11 +0100 Subject: [PATCH 2/4] mark-yank-urls: Ability to define handlers per protocol This change introduces the concept of "handler", a command that handles a specific protocol. E.g., x-www-browser for handling http, https and ftp (default). Pressing enter when marking URLs with other schemes will no op. This still leaves us with the possibility to yank the URL to the clipboard. --- mark-yank-urls | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/mark-yank-urls b/mark-yank-urls index 5373fc5..7b38cb4 100644 --- a/mark-yank-urls +++ b/mark-yank-urls @@ -16,6 +16,23 @@ my $url_matcher = qr{( (? 'x-www-browser', + https => 'x-www-browser', + ftp => 'x-www-browser', +); + +sub scheme { + my $uri = shift; + my ($scheme) = $uri =~ m|^([^:]+)://|; + return $scheme; +} + +sub get_handler { + my $uri = shift; + my $scheme = scheme($uri); + return $handlers{$scheme}; +} sub on_start { my ($term) = @_; @@ -25,15 +42,11 @@ sub on_start { import Clipboard; } - eval { require Regexp::Common::URI }; - if(!$@) { - require Regexp::Common; - Regexp::Common->import('URI'); - - $url_matcher = $Regexp::Common::RE{URI}{HTTP}; - } - - $term->{browser} = $term->x_resource ("urlLauncher") || "x-www-browser"; + if (my $browser = $term->x_resource ('urlLauncher')) { + $handlers{http} = $browser; + $handlers{https} = $browser; + $handlers{ftp} = $browser; + } () } @@ -82,10 +95,11 @@ sub on_button_release { while ($text =~ /($url_matcher)/g) { my ($url, $first, $last) = ($1, $-[1], $+[1]); + my $handler = get_handler{$url}; - if($first <= $col && $last >= $col) { + if($handler and $first <= $col && $last >= $col) { $url =~ s/["']$//; - $term->exec_async($term->{browser}, $url); + $term->exec_async($handler, $url); return 1; } } @@ -176,7 +190,8 @@ sub on_key_release { } elsif ($keysym == 65293) { # my $url = get_active_url($term); - $term->exec_async($term->{browser}, $url); + my $handler = get_handler($url); + $term->exec_async($handler, $url) if $handler; deactivate_mark_mode ($term); return 1; From add7a5195cea859e65ffa736389acd75ead4329a Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 23 Jan 2013 19:46:48 +0100 Subject: [PATCH 3/4] Support scheme less www domains Guess scheme to be "http" if there is no real scheme in the URL, but the domain is "www.*". --- mark-yank-urls | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mark-yank-urls b/mark-yank-urls index 7b38cb4..9a7259d 100644 --- a/mark-yank-urls +++ b/mark-yank-urls @@ -8,7 +8,10 @@ use warnings; my $url_matcher = qr{( \b - [a-z][a-z0-9+]+:// # scheme + (?: + [a-z][a-z0-9+]+:// | # scheme + www\. # or www. subdomain + ) [\[\]a-z0-9\@:;.-]* # hostname (?: /[a-z0-9.,:;~\@?\$\&()/=#%+!*_-]* # path (and query string etc) @@ -31,6 +34,8 @@ sub scheme { sub get_handler { my $uri = shift; my $scheme = scheme($uri); + + $scheme = 'http' if not $scheme and $uri =~ /^www\./; return $handlers{$scheme}; } From 251293beed6837095950c8193e8d1aeaac5c0654 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 26 Jun 2020 15:37:41 +0200 Subject: [PATCH 4/4] Fix function call syntax error when using mouse clicks foo() vs foo{} This fixes warnings like (as seen on urxvt's stdout/err), if one ever gets the idea of middle clicking a link: Odd number of elements in anonymous hash at $HOME/.urxvt/mark-yank-urls line 104. Use of uninitialized value $scheme in hash element at $HOME/.urxvt/mark-yank-urls line 40. This bug was introduced in my commit adding "ability to define handlers per protocol". Because of the age of the branch, I will refrain from rebasing and let this be a tombstone of one of my many mistakes throughout the years. The expected behavior (processing links on mouse button2 key release) was confirmed to be working with this change applied. --- mark-yank-urls | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mark-yank-urls b/mark-yank-urls index 9a7259d..3fcbccc 100644 --- a/mark-yank-urls +++ b/mark-yank-urls @@ -100,7 +100,7 @@ sub on_button_release { while ($text =~ /($url_matcher)/g) { my ($url, $first, $last) = ($1, $-[1], $+[1]); - my $handler = get_handler{$url}; + my $handler = get_handler($url); if($handler and $first <= $col && $last >= $col) { $url =~ s/["']$//;