--[[ A parser that approximates 8chan's markup: Surround text with double single-quotes(') to make text italic Surround text with triple single-quotes to make text bold Surround text with underscores(_) to make it underlined Surround text with double asterisks(*) to make it spoilered Surround text with tildes(~) to make it strike through Begin a line with a greater-than followed by a a space to make it >greentext Begin a line with a less-than followed by a space to make it <pinktext Surround text with forum-style [spoiler] and [/spoiler] tags as a second way to spoiler Surround text with forum-style [code] and [/code] tags to make it preformatted and monospace ]] local lpeg = require("lpeg") lpeg.locale(lpeg) local V,P,C,S,B,Cs = lpeg.V,lpeg.P,lpeg.C,lpeg.S,lpeg.B,lpeg.Cs --Characters to escape in the body text local escapes = { ["&"] = "&", ["<"] = "<", [">"] = ">", } local esctbl = {} for char,_ in pairs(escapes) do table.insert(esctbl,char) end local escapematch = string.format("([%s])",table.concat(esctbl)) local function sanitize_item(capture) return escapes[capture] or capture end local function sanitize(text) local ret,_ = string.gsub(text,escapematch,sanitize_item) return ret end --Grammar local space = S" \t\r"^0 local special = P{ P"**" + P"''" + P"'''" + P"__" + P"==" + P"~~" + P"\n>" + P"\n<" + P"\n" + P"[code]" + P"[spoiler]" } local word = Cs((1 - special)^1) * space / sanitize --Generates a pattern that formats text inside matching 'seq' tags with format --ex wrap("^^",[[<sup>%s</sup>]]) --will wrap text "5^^3^^" as "5<sup>3</sup>" --The third argument is nessessary to stop exponential backtracking. This removes --a DOS vulnerability: If tags are nested really deep, the parser can lock up, --potentially locking up all processes. --[[ local function wrap(seq,format,V"sup") return P(seq) * Cs(((V"marked" + word + P"\n"))^1) * P(seq) / function(a) return string.format(format,a) end end ]] local function wrap(seq,format,s) return P(seq) * Cs((((V"marked" - s) + word + P"\n"))^0) * P(seq) / function(a) return string.format(format,a) end end --Generates a pattern that formats text inside opening and closing "name" tags --with a format, BB forum style local function tag(name,format) local start_tag = P(string.format("[%s]",name)) local end_tag = P(string.format("[/%s]",name)) return start_tag * Cs(((1 - end_tag))^0) * end_tag / function(a) return string.format(format,sanitize(a)) end end local grammar = P{ "chunk"; --regular spoiler = wrap("**",[[<span class="spoiler">%s</span>]],V"spoiler"), spoiler2 = tag("spoiler",[[<span class="spoiler2">%s</span>]]), italic = wrap("''",[[<i>%s</i>]], V"italic"), bold = wrap("'''",[[<b>%s</b>]], V"bold"), underline = wrap("__",[[<u>%s</u>]], V"underline"), heading = wrap("==",[[<h2>%s</h2>]], V"heading"), strike = wrap("~~",[[<s>%s</s>]], V"strike"), code = tag("code",[[<pre><code>%s</code></pre>]]), greentext = P">" * (B"\n>" + B">") * Cs((V"marked" + word)^0) / function(a) return string.format([[<span class="greentext">>%s</span>]],a) end, pinktext = P"<" * (B"\n<" + B"<") * Cs((V"marked" + word)^0) / function(a) return string.format([[<span class="pinktext"><%s</span>]],a) end, marked = V"spoiler" + V"bold" + V"italic" + V"underline" + V"heading" + V"strike" + V"spoiler2" + V"code", plainline = (V"marked" + word)^0, line = Cs(V"greentext" + V"pinktext" + V"plainline" + P"") * P"\n" / function(a) if a == "\r" then return "<br/>" else return string.format("<p>%s</p>",a) end end, ending = C(P(1)^0) / function(a) return sanitize(a) end, chunk = V"line"^0 * V"plainline" * V"ending" } return function(text) return table.concat({grammar:match(text .. "\n")}," ") end