Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.markdown
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
# What this fork adds:

## Twitter option

linkify('@mahemoff', { twitter: true })
becomes:
<a href="http://twitter.com/mahemoff" title="http://twitter.com/mahemoff">@mahemoff</a>

In doing so, there's some refactoring to simplify the way the uber URI regexp is built up.
There's also some basic support for Node and a tiny test (in CoffeeScript).

## Attribs option

linkify('a.com b.com', { target: '_blank' })
becomes:
<a href="http://a.com" title="http://a.com target="_blank' >a.com</a> <a href="http://b.com" title="http://b.com target="_blank' >b.com</a>

Note these links have target='_blank' as an extra attribute, in addition to the regular href and title.

# JavaScript Linkify: Process links in text! #
[http://benalman.com/projects/javascript-linkify/](http://benalman.com/projects/javascript-linkify/)

Expand Down
263 changes: 143 additions & 120 deletions ba-linkify.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,147 +68,170 @@
//
// (String) An HTML string containing links.

window.linkify = (function(){
var
SCHEME = "[a-z\\d.-]+://",
IPV4 = "(?:(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.){3}(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])",
HOSTNAME = "(?:(?:[^\\s!@#$%^&*()_=+[\\]{}\\\\|;:'\",.<>/?]+)\\.)+",
TLD = "(?:ac|ad|aero|ae|af|ag|ai|al|am|an|ao|aq|arpa|ar|asia|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|biz|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|cat|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|coop|com|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|info|int|in|io|iq|ir|is|it|je|jm|jobs|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mobi|mo|mp|mq|mr|ms|mt|museum|mu|mv|mw|mx|my|mz|name|na|nc|net|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pro|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|travel|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xn--0zwm56d|xn--11b5bs3a9aj6g|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad|xn--g6w251d|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp|xn--kgbechtv|xn--zckzah|ye|yt|yu|za|zm|zw)",
HOST_OR_IP = "(?:" + HOSTNAME + TLD + "|" + IPV4 + ")",
PATH = "(?:[;/][^#?<>\\s]*)?",
QUERY_FRAG = "(?:\\?[^#<>\\s]*)?(?:#[^<>\\s]*)?",
URI1 = "\\b" + SCHEME + "[^<>\\s]+",
URI2 = "\\b" + HOST_OR_IP + PATH + QUERY_FRAG + "(?!\\w)",

MAILTO = "mailto:",
EMAIL = "(?:" + MAILTO + ")?[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@" + HOST_OR_IP + QUERY_FRAG + "(?!\\w)",

URI_RE = new RegExp( "(?:" + URI1 + "|" + URI2 + "|" + EMAIL + ")", "ig" ),
SCHEME_RE = new RegExp( "^" + SCHEME, "i" ),

quotes = {
"'": "`",
'>': '<',
')': '(',
']': '[',
'}': '{',
'»': '«',
'›': '‹'
},

default_options = {
callback: function( text, href ) {
return href ? '<a href="' + encodeURI(href) + '" title="' + encodeURI(href) + '">' + text + '</a>' : text;
},
punct_regexp: /(?:[!?.,:;'"]|(?:&|&amp;)(?:lt|gt|quot|apos|raquo|laquo|rsaquo|lsaquo);)$/
};

return function( txt, options ) {
options = options || {};
(function() {

var linkify = (function(){

// Temp variables.
var arr,
i,
link,
href,
var
SCHEME = "[a-z\\d.-]+://",
IPV4 = "(?:(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.){3}(?:[0-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])",
HOSTNAME = "(?:(?:[^\\s!@#$%^&*()_=+[\\]{}\\\\|;:'\",.<>/?]+)\\.)+",
TLD = "(?:ac|ad|aero|ae|af|ag|ai|al|am|an|ao|aq|arpa|ar|asia|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|biz|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|cat|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|coop|com|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|info|int|in|io|iq|ir|is|it|je|jm|jobs|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mobi|mo|mp|mq|mr|ms|mt|museum|mu|mv|mw|mx|my|mz|name|na|nc|net|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pro|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|travel|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xn--0zwm56d|xn--11b5bs3a9aj6g|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad|xn--g6w251d|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp|xn--kgbechtv|xn--zckzah|ye|yt|yu|za|zm|zw)",
HOST_OR_IP = "(?:" + HOSTNAME + TLD + "|" + IPV4 + ")",
PATH = "(?:[;/][^#?<>\\s]*)?",
QUERY_FRAG = "(?:\\?[^#<>\\s]*)?(?:#[^<>\\s]*)?",
URI1 = "\\b" + SCHEME + "[^<>\\s]+",
URI2 = "\\b" + HOST_OR_IP + PATH + QUERY_FRAG + "(?!\\w)",

// Output HTML.
html = '',
MAILTO = "mailto:",
EMAIL = "(?:" + MAILTO + ")?[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@" + HOST_OR_IP + QUERY_FRAG + "(?!\\w)",

// Store text / link parts, in order, for re-combination.
parts = [],
URI_RE_LIST = [URI1, URI2, EMAIL],
SCHEME_RE = new RegExp( "^" + SCHEME, "i" ),

// Used for keeping track of indices in the text.
idx_prev,
idx_last,
idx,
link_last,
quotes = {
"'": "`",
'>': '<',
')': '(',
']': '[',
'}': '{',
'»': '«',
'›': '‹'
},

// Used for trimming trailing punctuation and quotes from links.
matches_begin,
matches_end,
quote_begin,
quote_end;

// Initialize options.
for ( i in default_options ) {
if ( options[ i ] === undefined ) {
options[ i ] = default_options[ i ];
}
}
default_options = {
callback: function( text, href, options ) {
return href ? '<a href="' + encodeURI(href) + '" title="' + encodeURI(href) + '"' +
buildAttribsString(options ? options.attribs : '') + '>' + text + '</a>' : text;
},
punct_regexp: /(?:[!?.,:;'"]|(?:&|&amp;)(?:lt|gt|quot|apos|raquo|laquo|rsaquo|lsaquo);)$/,
twitter: false,
attribs: {}
};

// Find links.
while ( arr = URI_RE.exec( txt ) ) {
return function( txt, options ) {
options = options || {};

if (options.twitter) URI_RE_LIST.push("@[a-zA-Z0-9_]+");

URI_RE = new RegExp( "(?:" + URI_RE_LIST.join("|") + ")", "ig" );

link = arr[0];
idx_last = URI_RE.lastIndex;
idx = idx_last - link.length;
// Temp variables.
var arr,
i,
link,
href,

// Output HTML.
html = '',

// Store text / link parts, in order, for re-combination.
parts = [],

// Used for keeping track of indices in the text.
idx_prev,
idx_last,
idx,
link_last,

// Used for trimming trailing punctuation and quotes from links.
matches_begin,
matches_end,
quote_begin,
quote_end;

// Not a link if preceded by certain characters.
if ( /[\/:]/.test( txt.charAt( idx - 1 ) ) ) {
continue;
// Initialize options.
for ( i in default_options ) {
if ( options[ i ] === undefined ) {
options[ i ] = default_options[ i ];
}
}

// Trim trailing punctuation.
do {
// If no changes are made, we don't want to loop forever!
link_last = link;
// Find links.
while ( arr = URI_RE.exec( txt ) ) {

quote_end = link.substr( -1 )
quote_begin = quotes[ quote_end ];
link = arr[0];
idx_last = URI_RE.lastIndex;
idx = idx_last - link.length;

// Ending quote character?
if ( quote_begin ) {
matches_begin = link.match( new RegExp( '\\' + quote_begin + '(?!$)', 'g' ) );
matches_end = link.match( new RegExp( '\\' + quote_end, 'g' ) );
// Not a link if preceded by certain characters.
if ( /[\/:]/.test( txt.charAt( idx - 1 ) ) ) {
continue;
}

// Trim trailing punctuation.
do {
// If no changes are made, we don't want to loop forever!
link_last = link;

quote_end = link.substr( -1 )
quote_begin = quotes[ quote_end ];

// Ending quote character?
if ( quote_begin ) {
matches_begin = link.match( new RegExp( '\\' + quote_begin + '(?!$)', 'g' ) );
matches_end = link.match( new RegExp( '\\' + quote_end, 'g' ) );

// If quotes are unbalanced, remove trailing quote character.
if ( ( matches_begin ? matches_begin.length : 0 ) < ( matches_end ? matches_end.length : 0 ) ) {
link = link.substr( 0, link.length - 1 );
idx_last--;
}
}

// If quotes are unbalanced, remove trailing quote character.
if ( ( matches_begin ? matches_begin.length : 0 ) < ( matches_end ? matches_end.length : 0 ) ) {
link = link.substr( 0, link.length - 1 );
idx_last--;
// Ending non-quote punctuation character?
if ( options.punct_regexp ) {
link = link.replace( options.punct_regexp, function(a){
idx_last -= a.length;
return '';
});
}
} while ( link.length && link !== link_last );

href = link;

// Add appropriate protocol to naked links.
if (options.twitter && href.indexOf( '@' ) == 0)
href = 'http://twitter.com/' + href.substr(1);
else if ( !SCHEME_RE.test( href ) ) {
href = ( href.indexOf( '@' ) !== -1 ? ( !href.indexOf( MAILTO ) ? '' : MAILTO )
: !href.indexOf( 'irc.' ) ? 'irc://'
: !href.indexOf( 'ftp.' ) ? 'ftp://'
: 'http://' )
+ href;
}

// Ending non-quote punctuation character?
if ( options.punct_regexp ) {
link = link.replace( options.punct_regexp, function(a){
idx_last -= a.length;
return '';
});
// Push preceding non-link text onto the array.
if ( idx_prev != idx ) {
parts.push([ txt.slice( idx_prev, idx ), null ]);
idx_prev = idx_last;
}
} while ( link.length && link !== link_last );

href = link;

// Add appropriate protocol to naked links.
if ( !SCHEME_RE.test( href ) ) {
href = ( href.indexOf( '@' ) !== -1 ? ( !href.indexOf( MAILTO ) ? '' : MAILTO )
: !href.indexOf( 'irc.' ) ? 'irc://'
: !href.indexOf( 'ftp.' ) ? 'ftp://'
: 'http://' )
+ href;
}

// Push massaged link onto the array
parts.push([ link, href ]);
};

// Push preceding non-link text onto the array.
if ( idx_prev != idx ) {
parts.push([ txt.slice( idx_prev, idx ) ]);
idx_prev = idx_last;
// Push remaining non-link text onto the array.
parts.push([ txt.substr( idx_prev ), null ]);

// Process the array items.
for ( i = 0; i < parts.length; i++ ) {
html += options.callback.apply( null, parts[i].concat(options) );
}

// Push massaged link onto the array
parts.push([ link, href ]);
// In case of catastrophic failure, return the original text;
return html || txt;
};

// Push remaining non-link text onto the array.
parts.push([ txt.substr( idx_prev ) ]);

// Process the array items.
for ( i = 0; i < parts.length; i++ ) {
html += options.callback.apply( window, parts[i] );

function buildAttribsString(attribs) {
var s=" ";
for (var key in attribs)
s+= key + '="' + attribs[key] + '" ';
return s.replace(/ $/,'')
}

// In case of catastrophic failure, return the original text;
return html || txt;
};
})();

(typeof(window)=='undefined') ? module.exports = linkify : window.linkify = linkify;

})();
6 changes: 6 additions & 0 deletions test/test.coffee
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
linkify = require '../ba-linkify.js'
console.log linkify 'abc http://def.com @cowboy ghijk good.com @mahemoff'
console.log linkify '@mahemoff', twitter: true
console.log linkify 'abc http://def.com @cowboy ghijk irc.freenode.org good.com @mahemoff', twitter: true
console.log linkify 'http://google.com', attribs: { target: '_blank' }
console.log linkify 'a.com b.com', attribs: { target: '_blank' }