// xregexp 1.5.1 // (c) 2007-2012 steven levithan // mit license // // provides an augmented, extensible, cross-browser implementation of regular expressions, // including support for additional syntax, flags, and methods var xregexp; if (xregexp) { // avoid running twice, since that would break references to native globals throw error("can't load xregexp twice in the same frame"); } // run within an anonymous function to protect variables and avoid new globals (function (undefined) { //--------------------------------- // constructor //--------------------------------- // accepts a pattern and flags; returns a new, extended `regexp` object. differs from a native // regular expression in that additional syntax and flags are supported and cross-browser // syntax inconsistencies are ameliorated. `xregexp(/regex/)` clones an existing regex and // converts to type xregexp xregexp = function (pattern, flags) { var output = [], currscope = xregexp.outside_class, pos = 0, context, tokenresult, match, chr, regex; if (xregexp.isregexp(pattern)) { if (flags !== undefined) throw typeerror("can't supply flags when constructing one regexp from another"); return clone(pattern); } // tokens become part of the regex construction process, so protect against infinite // recursion when an xregexp is constructed within a token handler or trigger if (isinsideconstructor) throw error("can't call the xregexp constructor within token definition functions"); flags = flags || ""; context = { // `this` object for custom tokens hasnamedcapture: false, capturenames: [], hasflag: function (flag) {return flags.indexof(flag) > -1;}, setflag: function (flag) {flags += flag;} }; while (pos < pattern.length) { // check for custom tokens at the current position tokenresult = runtokens(pattern, pos, currscope, context); if (tokenresult) { output.push(tokenresult.output); pos += (tokenresult.match[0].length || 1); } else { // check for native multicharacter metasequences (excluding character classes) at // the current position if (match = nativ.exec.call(nativetokens[currscope], pattern.slice(pos))) { output.push(match[0]); pos += match[0].length; } else { chr = pattern.charat(pos); if (chr === "[") currscope = xregexp.inside_class; else if (chr === "]") currscope = xregexp.outside_class; // advance position one character output.push(chr); pos++; } } } regex = regexp(output.join(""), nativ.replace.call(flags, flagclip, "")); regex._xregexp = { source: pattern, capturenames: context.hasnamedcapture ? context.capturenames : null }; return regex; }; //--------------------------------- // public properties //--------------------------------- xregexp.version = "1.5.1"; // token scope bitflags xregexp.inside_class = 1; xregexp.outside_class = 2; //--------------------------------- // private variables //--------------------------------- var replacementtoken = /\$(?:(\d\d?|[$&`'])|{([$\w]+)})/g, flagclip = /[^gimy]+|([\s\s])(?=[\s\s]*\1)/g, // nonnative and duplicate flags quantifier = /^(?:[?*+]|{\d+(?:,\d*)?})\??/, isinsideconstructor = false, tokens = [], // copy native globals for reference ("native" is an es3 reserved keyword) nativ = { exec: regexp.prototype.exec, test: regexp.prototype.test, match: string.prototype.match, replace: string.prototype.replace, split: string.prototype.split }, compliantexecnpcg = nativ.exec.call(/()??/, "")[1] === undefined, // check `exec` handling of nonparticipating capturing groups compliantlastindexincrement = function () { var x = /^/g; nativ.test.call(x, ""); return !x.lastindex; }(), hasnativey = regexp.prototype.sticky !== undefined, nativetokens = {}; // `nativetokens` match native multicharacter metasequences only (including deprecated octals, // excluding character classes) nativetokens[xregexp.inside_class] = /^(?:\\(?:[0-3][0-7]{0,2}|[4-7][0-7]?|x[\da-fa-f]{2}|u[\da-fa-f]{4}|c[a-za-z]|[\s\s]))/; nativetokens[xregexp.outside_class] = /^(?:\\(?:0(?:[0-3][0-7]{0,2}|[4-7][0-7]?)?|[1-9]\d*|x[\da-fa-f]{2}|u[\da-fa-f]{4}|c[a-za-z]|[\s\s])|\(\?[:=!]|[?*+]\?|{\d+(?:,\d*)?}\??)/; //--------------------------------- // public methods //--------------------------------- // lets you extend or change xregexp syntax and create custom flags. this is used internally by // the xregexp library and can be used to create xregexp plugins. this function is intended for // users with advanced knowledge of javascript's regular expression syntax and behavior. it can // be disabled by `xregexp.freezetokens` xregexp.addtoken = function (regex, handler, scope, trigger) { tokens.push({ pattern: clone(regex, "g" + (hasnativey ? "y" : "")), handler: handler, scope: scope || xregexp.outside_class, trigger: trigger || null }); }; // accepts a pattern and flags; returns an extended `regexp` object. if the pattern and flag // combination has previously been cached, the cached copy is returned; otherwise the newly // created regex is cached xregexp.cache = function (pattern, flags) { var key = pattern + "/" + (flags || ""); return xregexp.cache[key] || (xregexp.cache[key] = xregexp(pattern, flags)); }; // accepts a `regexp` instance; returns a copy with the `/g` flag set. the copy has a fresh // `lastindex` (set to zero). if you want to copy a regex without forcing the `global` // property, use `xregexp(regex)`. do not use `regexp(regex)` because it will not preserve // special properties required for named capture xregexp.copyasglobal = function (regex) { return clone(regex, "g"); }; // accepts a string; returns the string with regex metacharacters escaped. the returned string // can safely be used at any point within a regex to match the provided literal string. escaped // characters are [ ] { } ( ) * + ? - . , \ ^ $ | # and whitespace xregexp.escape = function (str) { return str.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&"); }; // accepts a string to search, regex to search with, position to start the search within the // string (default: 0), and an optional boolean indicating whether matches must start at-or- // after the position or at the specified position only. this function ignores the `lastindex` // of the provided regex in its own handling, but updates the property for compatibility xregexp.execat = function (str, regex, pos, anchored) { var r2 = clone(regex, "g" + ((anchored && hasnativey) ? "y" : "")), match; r2.lastindex = pos = pos || 0; match = r2.exec(str); // run the altered `exec` (required for `lastindex` fix, etc.) if (anchored && match && match.index !== pos) match = null; if (regex.global) regex.lastindex = match ? r2.lastindex : 0; return match; }; // breaks the unrestorable link to xregexp's private list of tokens, thereby preventing // syntax and flag changes. should be run after xregexp and any plugins are loaded xregexp.freezetokens = function () { xregexp.addtoken = function () { throw error("can't run addtoken after freezetokens"); }; }; // accepts any value; returns a boolean indicating whether the argument is a `regexp` object. // note that this is also `true` for regex literals and regexes created by the `xregexp` // constructor. this works correctly for variables created in another frame, when `instanceof` // and `constructor` checks would fail to work as intended xregexp.isregexp = function (o) { return object.prototype.tostring.call(o) === "[object regexp]"; }; // executes `callback` once per match within `str`. provides a simpler and cleaner way to // iterate over regex matches compared to the traditional approaches of subverting // `string.prototype.replace` or repeatedly calling `exec` within a `while` loop xregexp.iterate = function (str, regex, callback, context) { var r2 = clone(regex, "g"), i = -1, match; while (match = r2.exec(str)) { // run the altered `exec` (required for `lastindex` fix, etc.) if (regex.global) regex.lastindex = r2.lastindex; // doing this to follow expectations if `lastindex` is checked within `callback` callback.call(context, match, ++i, str, regex); if (r2.lastindex === match.index) r2.lastindex++; } if (regex.global) regex.lastindex = 0; }; // accepts a string and an array of regexes; returns the result of using each successive regex // to search within the matches of the previous regex. the array of regexes can also contain // objects with `regex` and `backref` properties, in which case the named or numbered back- // references specified are passed forward to the next regex or returned. e.g.: // var xregexpimgfilenames = xregexp.matchchain(html, [ // {regex: /]+)>/i, backref: 1}, // tag attributes // {regex: xregexp('(?ix) \\s src=" (? [^"]+ )'), backref: "src"}, // src attribute values // {regex: xregexp("^http://xregexp\\.com(/[^#?]+)", "i"), backref: 1}, // xregexp.com paths // /[^\/]+$/ // filenames (strip directory paths) // ]); xregexp.matchchain = function (str, chain) { return function recursechain (values, level) { var item = chain[level].regex ? chain[level] : {regex: chain[level]}, regex = clone(item.regex, "g"), matches = [], i; for (i = 0; i < values.length; i++) { xregexp.iterate(values[i], regex, function (match) { matches.push(item.backref ? (match[item.backref] || "") : match[0]); }); } return ((level === chain.length - 1) || !matches.length) ? matches : recursechain(matches, level + 1); }([str], 0); }; //--------------------------------- // new regexp prototype methods //--------------------------------- // accepts a context object and arguments array; returns the result of calling `exec` with the // first value in the arguments array. the context is ignored but is accepted for congruity // with `function.prototype.apply` regexp.prototype.apply = function (context, args) { return this.exec(args[0]); }; // accepts a context object and string; returns the result of calling `exec` with the provided // string. the context is ignored but is accepted for congruity with `function.prototype.call` regexp.prototype.call = function (context, str) { return this.exec(str); }; //--------------------------------- // overriden native methods //--------------------------------- // adds named capture support (with backreferences returned as `result.name`), and fixes two // cross-browser issues per es3: // - captured values for nonparticipating capturing groups should be returned as `undefined`, // rather than the empty string. // - `lastindex` should not be incremented after zero-length matches. regexp.prototype.exec = function (str) { var match, name, r2, origlastindex; if (!this.global) origlastindex = this.lastindex; match = nativ.exec.apply(this, arguments); if (match) { // fix browsers whose `exec` methods don't consistently return `undefined` for // nonparticipating capturing groups if (!compliantexecnpcg && match.length > 1 && indexof(match, "") > -1) { r2 = regexp(this.source, nativ.replace.call(getnativeflags(this), "g", "")); // using `str.slice(match.index)` rather than `match[0]` in case lookahead allowed // matching due to characters outside the match nativ.replace.call((str + "").slice(match.index), r2, function () { for (var i = 1; i < arguments.length - 2; i++) { if (arguments[i] === undefined) match[i] = undefined; } }); } // attach named capture properties if (this._xregexp && this._xregexp.capturenames) { for (var i = 1; i < match.length; i++) { name = this._xregexp.capturenames[i - 1]; if (name) match[name] = match[i]; } } // fix browsers that increment `lastindex` after zero-length matches if (!compliantlastindexincrement && this.global && !match[0].length && (this.lastindex > match.index)) this.lastindex--; } if (!this.global) this.lastindex = origlastindex; // fix ie, opera bug (last tested ie 9.0.5, opera 11.61 on windows) return match; }; // fix browser bugs in native method regexp.prototype.test = function (str) { // use the native `exec` to skip some processing overhead, even though the altered // `exec` would take care of the `lastindex` fixes var match, origlastindex; if (!this.global) origlastindex = this.lastindex; match = nativ.exec.call(this, str); // fix browsers that increment `lastindex` after zero-length matches if (match && !compliantlastindexincrement && this.global && !match[0].length && (this.lastindex > match.index)) this.lastindex--; if (!this.global) this.lastindex = origlastindex; // fix ie, opera bug (last tested ie 9.0.5, opera 11.61 on windows) return !!match; }; // adds named capture support and fixes browser bugs in native method string.prototype.match = function (regex) { if (!xregexp.isregexp(regex)) regex = regexp(regex); // native `regexp` if (regex.global) { var result = nativ.match.apply(this, arguments); regex.lastindex = 0; // fix ie bug return result; } return regex.exec(this); // run the altered `exec` }; // adds support for `${n}` tokens for named and numbered backreferences in replacement text, // and provides named backreferences to replacement functions as `arguments[0].name`. also // fixes cross-browser differences in replacement text syntax when performing a replacement // using a nonregex search value, and the value of replacement regexes' `lastindex` property // during replacement iterations. note that this doesn't support spidermonkey's proprietary // third (`flags`) parameter string.prototype.replace = function (search, replacement) { var isregex = xregexp.isregexp(search), capturenames, result, str, origlastindex; // there are too many combinations of search/replacement types/values and browser bugs that // preclude passing to native `replace`, so don't try //if (...) // return nativ.replace.apply(this, arguments); if (isregex) { if (search._xregexp) capturenames = search._xregexp.capturenames; // array or `null` if (!search.global) origlastindex = search.lastindex; } else { search = search + ""; // type conversion } if (object.prototype.tostring.call(replacement) === "[object function]") { result = nativ.replace.call(this + "", search, function () { if (capturenames) { // change the `arguments[0]` string primitive to a string object which can store properties arguments[0] = new string(arguments[0]); // store named backreferences on `arguments[0]` for (var i = 0; i < capturenames.length; i++) { if (capturenames[i]) arguments[0][capturenames[i]] = arguments[i + 1]; } } // update `lastindex` before calling `replacement` (fix browsers) if (isregex && search.global) search.lastindex = arguments[arguments.length - 2] + arguments[0].length; return replacement.apply(null, arguments); }); } else { str = this + ""; // type conversion, so `args[args.length - 1]` will be a string (given nonstring `this`) result = nativ.replace.call(str, search, function () { var args = arguments; // keep this function's `arguments` available through closure return nativ.replace.call(replacement + "", replacementtoken, function ($0, $1, $2) { // numbered backreference (without delimiters) or special variable if ($1) { switch ($1) { case "$": return "$"; case "&": return args[0]; case "`": return args[args.length - 1].slice(0, args[args.length - 2]); case "'": return args[args.length - 1].slice(args[args.length - 2] + args[0].length); // numbered backreference default: // what does "$10" mean? // - backreference 10, if 10 or more capturing groups exist // - backreference 1 followed by "0", if 1-9 capturing groups exist // - otherwise, it's the string "$10" // also note: // - backreferences cannot be more than two digits (enforced by `replacementtoken`) // - "$01" is equivalent to "$1" if a capturing group exists, otherwise it's the string "$01" // - there is no "$0" token ("$&" is the entire match) var literalnumbers = ""; $1 = +$1; // type conversion; drop leading zero if (!$1) // `$1` was "0" or "00" return $0; while ($1 > args.length - 3) { literalnumbers = string.prototype.slice.call($1, -1) + literalnumbers; $1 = math.floor($1 / 10); // drop the last digit } return ($1 ? args[$1] || "" : "$") + literalnumbers; } // named backreference or delimited numbered backreference } else { // what does "${n}" mean? // - backreference to numbered capture n. two differences from "$n": // - n can be more than two digits // - backreference 0 is allowed, and is the entire match // - backreference to named capture n, if it exists and is not a number overridden by numbered capture // - otherwise, it's the string "${n}" var n = +$2; // type conversion; drop leading zeros if (n <= args.length - 3) return args[n]; n = capturenames ? indexof(capturenames, $2) : -1; return n > -1 ? args[n + 1] : $0; } }); }); } if (isregex) { if (search.global) search.lastindex = 0; // fix ie, safari bug (last tested ie 9.0.5, safari 5.1.2 on windows) else search.lastindex = origlastindex; // fix ie, opera bug (last tested ie 9.0.5, opera 11.61 on windows) } return result; }; // a consistent cross-browser, es3 compliant `split` string.prototype.split = function (s /* separator */, limit) { // if separator `s` is not a regex, use the native `split` if (!xregexp.isregexp(s)) return nativ.split.apply(this, arguments); var str = this + "", // type conversion output = [], lastlastindex = 0, match, lastlength; // behavior for `limit`: if it's... // - `undefined`: no limit // - `nan` or zero: return an empty array // - a positive number: use `math.floor(limit)` // - a negative number: no limit // - other: type-convert, then use the above rules if (limit === undefined || +limit < 0) { limit = infinity; } else { limit = math.floor(+limit); if (!limit) return []; } // this is required if not `s.global`, and it avoids needing to set `s.lastindex` to zero // and restore it to its original value when we're done using the regex s = xregexp.copyasglobal(s); while (match = s.exec(str)) { // run the altered `exec` (required for `lastindex` fix, etc.) if (s.lastindex > lastlastindex) { output.push(str.slice(lastlastindex, match.index)); if (match.length > 1 && match.index < str.length) array.prototype.push.apply(output, match.slice(1)); lastlength = match[0].length; lastlastindex = s.lastindex; if (output.length >= limit) break; } if (s.lastindex === match.index) s.lastindex++; } if (lastlastindex === str.length) { if (!nativ.test.call(s, "") || lastlength) output.push(""); } else { output.push(str.slice(lastlastindex)); } return output.length > limit ? output.slice(0, limit) : output; }; //--------------------------------- // private helper functions //--------------------------------- // supporting function for `xregexp`, `xregexp.copyasglobal`, etc. returns a copy of a `regexp` // instance with a fresh `lastindex` (set to zero), preserving properties required for named // capture. also allows adding new flags in the process of copying the regex function clone (regex, additionalflags) { if (!xregexp.isregexp(regex)) throw typeerror("type regexp expected"); var x = regex._xregexp; regex = xregexp(regex.source, getnativeflags(regex) + (additionalflags || "")); if (x) { regex._xregexp = { source: x.source, capturenames: x.capturenames ? x.capturenames.slice(0) : null }; } return regex; } function getnativeflags (regex) { return (regex.global ? "g" : "") + (regex.ignorecase ? "i" : "") + (regex.multiline ? "m" : "") + (regex.extended ? "x" : "") + // proposed for es4; included in as3 (regex.sticky ? "y" : ""); } function runtokens (pattern, index, scope, context) { var i = tokens.length, result, match, t; // protect against constructing xregexps within token handler and trigger functions isinsideconstructor = true; // must reset `isinsideconstructor`, even if a `trigger` or `handler` throws try { while (i--) { // run in reverse order t = tokens[i]; if ((scope & t.scope) && (!t.trigger || t.trigger.call(context))) { t.pattern.lastindex = index; match = t.pattern.exec(pattern); // running the altered `exec` here allows use of named backreferences, etc. if (match && match.index === index) { result = { output: t.handler.call(context, match, scope), match: match }; break; } } } } catch (err) { throw err; } finally { isinsideconstructor = false; } return result; } function indexof (array, item, from) { if (array.prototype.indexof) // use the native array method if available return array.indexof(item, from); for (var i = from || 0; i < array.length; i++) { if (array[i] === item) return i; } return -1; } //--------------------------------- // built-in tokens //--------------------------------- // augment xregexp's regular expression syntax and flags. note that when adding tokens, the // third (`scope`) argument defaults to `xregexp.outside_class` // comment pattern: (?# ) xregexp.addtoken( /\(\?#[^)]*\)/, function (match) { // keep tokens separated unless the following token is a quantifier return nativ.test.call(quantifier, match.input.slice(match.index + match[0].length)) ? "" : "(?:)"; } ); // capturing group (match the opening parenthesis only). // required for support of named capturing groups xregexp.addtoken( /\((?!\?)/, function () { this.capturenames.push(null); return "("; } ); // named capturing group (match the opening delimiter only): (? xregexp.addtoken( /\(\?<([$\w]+)>/, function (match) { this.capturenames.push(match[1]); this.hasnamedcapture = true; return "("; } ); // named backreference: \k xregexp.addtoken( /\\k<([\w$]+)>/, function (match) { var index = indexof(this.capturenames, match[1]); // keep backreferences separate from subsequent literal numbers. preserve back- // references to named groups that are undefined at this point as literal strings return index > -1 ? "\\" + (index + 1) + (isnan(match.input.charat(match.index + match[0].length)) ? "" : "(?:)") : match[0]; } ); // empty character class: [] or [^] xregexp.addtoken( /\[\^?]/, function (match) { // for cross-browser compatibility with es3, convert [] to \b\b and [^] to [\s\s]. // (?!) should work like \b\b, but is unreliable in firefox return match[0] === "[]" ? "\\b\\b" : "[\\s\\s]"; } ); // mode modifier at the start of the pattern only, with any combination of flags imsx: (?imsx) // does not support x(?i), (?-i), (?i-m), (?i: ), (?i)(?m), etc. xregexp.addtoken( /^\(\?([imsx]+)\)/, function (match) { this.setflag(match[1]); return ""; } ); // whitespace and comments, in free-spacing (aka extended) mode only xregexp.addtoken( /(?:\s+|#.*)+/, function (match) { // keep tokens separated unless the following token is a quantifier return nativ.test.call(quantifier, match.input.slice(match.index + match[0].length)) ? "" : "(?:)"; }, xregexp.outside_class, function () {return this.hasflag("x");} ); // dot, in dotall (aka singleline) mode only xregexp.addtoken( /\./, function () {return "[\\s\\s]";}, xregexp.outside_class, function () {return this.hasflag("s");} ); //--------------------------------- // backward compatibility //--------------------------------- // uncomment the following block for compatibility with xregexp 1.0-1.2: /* xregexp.matchwithinchain = xregexp.matchchain; regexp.prototype.addflags = function (s) {return clone(this, s);}; regexp.prototype.execall = function (s) {var r = []; xregexp.iterate(s, this, function (m) {r.push(m);}); return r;}; regexp.prototype.foreachexec = function (s, f, c) {return xregexp.iterate(s, this, f, c);}; regexp.prototype.validate = function (s) {var r = regexp("^(?:" + this.source + ")$(?!\\s)", getnativeflags(this)); if (this.global) this.lastindex = 0; return s.search(r) === 0;}; */ })(); // // begin anonymous function. this is used to contain local scope variables without polutting global scope. // if (typeof(syntaxhighlighter) == 'undefined') var syntaxhighlighter = function() { // commonjs if (typeof(require) != 'undefined' && typeof(xregexp) == 'undefined') { xregexp = require('xregexp').xregexp; } // shortcut object which will be assigned to the syntaxhighlighter variable. // this is a shorthand for local reference in order to avoid long namespace // references to syntaxhighlighter.whatever... var sh = { defaults : { /** additional css class names to be added to highlighter elements. */ 'class-name' : '', /** first line number. */ 'first-line' : 1, /** * pads line numbers. possible values are: * * false - don't pad line numbers. * true - automaticaly pad numbers with minimum required number of leading zeroes. * [int] - length up to which pad line numbers. */ 'pad-line-numbers' : false, /** lines to highlight. */ 'highlight' : false, /** title to be displayed above the code block. */ 'title' : null, /** enables or disables smart tabs. */ 'smart-tabs' : true, /** gets or sets tab size. */ 'tab-size' : 4, /** enables or disables gutter. */ 'gutter' : true, /** enables or disables toolbar. */ 'toolbar' : true, /** enables quick code copy and paste from double click. */ 'quick-code' : true, /** forces code view to be collapsed. */ 'collapse' : false, /** enables or disables automatic links. */ 'auto-links' : false, /** gets or sets light mode. equavalent to turning off gutter and toolbar. */ 'light' : false, 'unindent' : true, 'html-script' : false }, config : { space : ' ', /** enables use of