diff --git a/README.markdown b/README.markdown index 1a20a7e..cfb5136 100644 --- a/README.markdown +++ b/README.markdown @@ -1,3 +1,22 @@ +# What this fork adds: + +## Twitter option + + linkify('@mahemoff', { twitter: true }) +becomes: + @mahemoff + +In doing so, there's some refactoring to simplify the way the uber URI regexp is built up. +There's also some basic support for Node and a tiny test (in CoffeeScript). + +## Attribs option + + linkify('a.com b.com', { target: '_blank' }) +becomes: + a.com b.com + +Note these links have target='_blank' as an extra attribute, in addition to the regular href and title. + # JavaScript Linkify: Process links in text! # [http://benalman.com/projects/javascript-linkify/](http://benalman.com/projects/javascript-linkify/) diff --git a/ba-linkify.js b/ba-linkify.js index 4bf27ef..d92ad62 100644 --- a/ba-linkify.js +++ b/ba-linkify.js @@ -68,147 +68,170 @@ // // (String) An HTML string containing links. -window.linkify = (function(){ - var - SCHEME = "[a-z\\d.-]+://", - IPV4 = "(?:(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.){3}(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])", - HOSTNAME = "(?:(?:[^\\s!@#$%^&*()_=+[\\]{}\\\\|;:'\",.<>/?]+)\\.)+", - TLD = "(?:ac|ad|aero|ae|af|ag|ai|al|am|an|ao|aq|arpa|ar|asia|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|biz|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|cat|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|coop|com|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|info|int|in|io|iq|ir|is|it|je|jm|jobs|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mobi|mo|mp|mq|mr|ms|mt|museum|mu|mv|mw|mx|my|mz|name|na|nc|net|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pro|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|travel|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xn--0zwm56d|xn--11b5bs3a9aj6g|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad|xn--g6w251d|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp|xn--kgbechtv|xn--zckzah|ye|yt|yu|za|zm|zw)", - HOST_OR_IP = "(?:" + HOSTNAME + TLD + "|" + IPV4 + ")", - PATH = "(?:[;/][^#?<>\\s]*)?", - QUERY_FRAG = "(?:\\?[^#<>\\s]*)?(?:#[^<>\\s]*)?", - URI1 = "\\b" + SCHEME + "[^<>\\s]+", - URI2 = "\\b" + HOST_OR_IP + PATH + QUERY_FRAG + "(?!\\w)", - - MAILTO = "mailto:", - EMAIL = "(?:" + MAILTO + ")?[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@" + HOST_OR_IP + QUERY_FRAG + "(?!\\w)", - - URI_RE = new RegExp( "(?:" + URI1 + "|" + URI2 + "|" + EMAIL + ")", "ig" ), - SCHEME_RE = new RegExp( "^" + SCHEME, "i" ), - - quotes = { - "'": "`", - '>': '<', - ')': '(', - ']': '[', - '}': '{', - '»': '«', - '›': '‹' - }, - - default_options = { - callback: function( text, href ) { - return href ? '' + text + '' : text; - }, - punct_regexp: /(?:[!?.,:;'"]|(?:&|&)(?:lt|gt|quot|apos|raquo|laquo|rsaquo|lsaquo);)$/ - }; - - return function( txt, options ) { - options = options || {}; +(function() { + + var linkify = (function(){ - // Temp variables. - var arr, - i, - link, - href, + var + SCHEME = "[a-z\\d.-]+://", + IPV4 = "(?:(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.){3}(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])", + HOSTNAME = "(?:(?:[^\\s!@#$%^&*()_=+[\\]{}\\\\|;:'\",.<>/?]+)\\.)+", + TLD = "(?:ac|ad|aero|ae|af|ag|ai|al|am|an|ao|aq|arpa|ar|asia|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|biz|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|cat|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|coop|com|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|info|int|in|io|iq|ir|is|it|je|jm|jobs|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mobi|mo|mp|mq|mr|ms|mt|museum|mu|mv|mw|mx|my|mz|name|na|nc|net|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pro|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|travel|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xn--0zwm56d|xn--11b5bs3a9aj6g|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad|xn--g6w251d|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp|xn--kgbechtv|xn--zckzah|ye|yt|yu|za|zm|zw)", + HOST_OR_IP = "(?:" + HOSTNAME + TLD + "|" + IPV4 + ")", + PATH = "(?:[;/][^#?<>\\s]*)?", + QUERY_FRAG = "(?:\\?[^#<>\\s]*)?(?:#[^<>\\s]*)?", + URI1 = "\\b" + SCHEME + "[^<>\\s]+", + URI2 = "\\b" + HOST_OR_IP + PATH + QUERY_FRAG + "(?!\\w)", - // Output HTML. - html = '', + MAILTO = "mailto:", + EMAIL = "(?:" + MAILTO + ")?[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@" + HOST_OR_IP + QUERY_FRAG + "(?!\\w)", - // Store text / link parts, in order, for re-combination. - parts = [], + URI_RE_LIST = [URI1, URI2, EMAIL], + SCHEME_RE = new RegExp( "^" + SCHEME, "i" ), - // Used for keeping track of indices in the text. - idx_prev, - idx_last, - idx, - link_last, + quotes = { + "'": "`", + '>': '<', + ')': '(', + ']': '[', + '}': '{', + '»': '«', + '›': '‹' + }, - // Used for trimming trailing punctuation and quotes from links. - matches_begin, - matches_end, - quote_begin, - quote_end; - - // Initialize options. - for ( i in default_options ) { - if ( options[ i ] === undefined ) { - options[ i ] = default_options[ i ]; - } - } + default_options = { + callback: function( text, href, options ) { + return href ? '' + text + '' : text; + }, + punct_regexp: /(?:[!?.,:;'"]|(?:&|&)(?:lt|gt|quot|apos|raquo|laquo|rsaquo|lsaquo);)$/, + twitter: false, + attribs: {} + }; - // Find links. - while ( arr = URI_RE.exec( txt ) ) { + return function( txt, options ) { + options = options || {}; + + if (options.twitter) URI_RE_LIST.push("@[a-zA-Z0-9_]+"); + + URI_RE = new RegExp( "(?:" + URI_RE_LIST.join("|") + ")", "ig" ); - link = arr[0]; - idx_last = URI_RE.lastIndex; - idx = idx_last - link.length; + // Temp variables. + var arr, + i, + link, + href, + + // Output HTML. + html = '', + + // Store text / link parts, in order, for re-combination. + parts = [], + + // Used for keeping track of indices in the text. + idx_prev, + idx_last, + idx, + link_last, + + // Used for trimming trailing punctuation and quotes from links. + matches_begin, + matches_end, + quote_begin, + quote_end; - // Not a link if preceded by certain characters. - if ( /[\/:]/.test( txt.charAt( idx - 1 ) ) ) { - continue; + // Initialize options. + for ( i in default_options ) { + if ( options[ i ] === undefined ) { + options[ i ] = default_options[ i ]; + } } - // Trim trailing punctuation. - do { - // If no changes are made, we don't want to loop forever! - link_last = link; + // Find links. + while ( arr = URI_RE.exec( txt ) ) { - quote_end = link.substr( -1 ) - quote_begin = quotes[ quote_end ]; + link = arr[0]; + idx_last = URI_RE.lastIndex; + idx = idx_last - link.length; - // Ending quote character? - if ( quote_begin ) { - matches_begin = link.match( new RegExp( '\\' + quote_begin + '(?!$)', 'g' ) ); - matches_end = link.match( new RegExp( '\\' + quote_end, 'g' ) ); + // Not a link if preceded by certain characters. + if ( /[\/:]/.test( txt.charAt( idx - 1 ) ) ) { + continue; + } + + // Trim trailing punctuation. + do { + // If no changes are made, we don't want to loop forever! + link_last = link; + + quote_end = link.substr( -1 ) + quote_begin = quotes[ quote_end ]; + + // Ending quote character? + if ( quote_begin ) { + matches_begin = link.match( new RegExp( '\\' + quote_begin + '(?!$)', 'g' ) ); + matches_end = link.match( new RegExp( '\\' + quote_end, 'g' ) ); + + // If quotes are unbalanced, remove trailing quote character. + if ( ( matches_begin ? matches_begin.length : 0 ) < ( matches_end ? matches_end.length : 0 ) ) { + link = link.substr( 0, link.length - 1 ); + idx_last--; + } + } - // If quotes are unbalanced, remove trailing quote character. - if ( ( matches_begin ? matches_begin.length : 0 ) < ( matches_end ? matches_end.length : 0 ) ) { - link = link.substr( 0, link.length - 1 ); - idx_last--; + // Ending non-quote punctuation character? + if ( options.punct_regexp ) { + link = link.replace( options.punct_regexp, function(a){ + idx_last -= a.length; + return ''; + }); } + } while ( link.length && link !== link_last ); + + href = link; + + // Add appropriate protocol to naked links. + if (options.twitter && href.indexOf( '@' ) == 0) + href = 'http://twitter.com/' + href.substr(1); + else if ( !SCHEME_RE.test( href ) ) { + href = ( href.indexOf( '@' ) !== -1 ? ( !href.indexOf( MAILTO ) ? '' : MAILTO ) + : !href.indexOf( 'irc.' ) ? 'irc://' + : !href.indexOf( 'ftp.' ) ? 'ftp://' + : 'http://' ) + + href; } - // Ending non-quote punctuation character? - if ( options.punct_regexp ) { - link = link.replace( options.punct_regexp, function(a){ - idx_last -= a.length; - return ''; - }); + // Push preceding non-link text onto the array. + if ( idx_prev != idx ) { + parts.push([ txt.slice( idx_prev, idx ), null ]); + idx_prev = idx_last; } - } while ( link.length && link !== link_last ); - - href = link; - - // Add appropriate protocol to naked links. - if ( !SCHEME_RE.test( href ) ) { - href = ( href.indexOf( '@' ) !== -1 ? ( !href.indexOf( MAILTO ) ? '' : MAILTO ) - : !href.indexOf( 'irc.' ) ? 'irc://' - : !href.indexOf( 'ftp.' ) ? 'ftp://' - : 'http://' ) - + href; - } + + // Push massaged link onto the array + parts.push([ link, href ]); + }; - // Push preceding non-link text onto the array. - if ( idx_prev != idx ) { - parts.push([ txt.slice( idx_prev, idx ) ]); - idx_prev = idx_last; + // Push remaining non-link text onto the array. + parts.push([ txt.substr( idx_prev ), null ]); + + // Process the array items. + for ( i = 0; i < parts.length; i++ ) { + html += options.callback.apply( null, parts[i].concat(options) ); } - // Push massaged link onto the array - parts.push([ link, href ]); + // In case of catastrophic failure, return the original text; + return html || txt; }; - - // Push remaining non-link text onto the array. - parts.push([ txt.substr( idx_prev ) ]); - - // Process the array items. - for ( i = 0; i < parts.length; i++ ) { - html += options.callback.apply( window, parts[i] ); + + function buildAttribsString(attribs) { + var s=" "; + for (var key in attribs) + s+= key + '="' + attribs[key] + '" '; + return s.replace(/ $/,'') } - // In case of catastrophic failure, return the original text; - return html || txt; - }; - + })(); + + (typeof(window)=='undefined') ? module.exports = linkify : window.linkify = linkify; + })(); diff --git a/test/test.coffee b/test/test.coffee new file mode 100644 index 0000000..7ee9f95 --- /dev/null +++ b/test/test.coffee @@ -0,0 +1,6 @@ +linkify = require '../ba-linkify.js' +console.log linkify 'abc http://def.com @cowboy ghijk good.com @mahemoff' +console.log linkify '@mahemoff', twitter: true +console.log linkify 'abc http://def.com @cowboy ghijk irc.freenode.org good.com @mahemoff', twitter: true +console.log linkify 'http://google.com', attribs: { target: '_blank' } +console.log linkify 'a.com b.com', attribs: { target: '_blank' }