2020-12-23 07:02:02 +01:00
|
|
|
--[[
|
|
|
|
A parser that approximates 8chan's markup:
|
|
|
|
|
|
|
|
Surround text with double single-quotes(') to make text italic
|
|
|
|
Surround text with triple single-quotes to make text bold
|
|
|
|
Surround text with underscores(_) to make it underlined
|
|
|
|
Surround text with double asterisks(*) to make it spoilered
|
|
|
|
Surround text with tildes(~) to make it strike through
|
|
|
|
Begin a line with a greater-than followed by a a space to make it
|
|
|
|
>greentext
|
|
|
|
Begin a line with a less-than followed by a space to make it
|
|
|
|
<pinktext
|
|
|
|
Surround text with forum-style [spoiler] and [/spoiler] tags as a second way to spoiler
|
|
|
|
Surround text with forum-style [code] and [/code] tags to make it preformatted and monospace
|
|
|
|
|
|
|
|
]]
|
|
|
|
|
2020-05-16 01:10:11 +02:00
|
|
|
local lpeg = require("lpeg")
|
|
|
|
lpeg.locale(lpeg)
|
|
|
|
local V,P,C,S,B,Cs = lpeg.V,lpeg.P,lpeg.C,lpeg.S,lpeg.B,lpeg.Cs
|
|
|
|
--Characters to escape in the body text
|
|
|
|
local escapes = {
|
|
|
|
["&"] = "&",
|
|
|
|
["<"] = "<",
|
|
|
|
[">"] = ">",
|
|
|
|
}
|
|
|
|
local esctbl = {}
|
|
|
|
for char,_ in pairs(escapes) do
|
|
|
|
table.insert(esctbl,char)
|
|
|
|
end
|
|
|
|
local escapematch = string.format("([%s])",table.concat(esctbl))
|
|
|
|
local function sanitize_item(capture)
|
|
|
|
return escapes[capture] or capture
|
|
|
|
end
|
|
|
|
local function sanitize(text)
|
|
|
|
local ret,_ = string.gsub(text,escapematch,sanitize_item)
|
|
|
|
return ret
|
|
|
|
end
|
|
|
|
|
2020-05-17 18:05:00 +02:00
|
|
|
--Grammar
|
2020-05-16 01:10:11 +02:00
|
|
|
local space = S" \t\r"^0
|
|
|
|
local special = P{
|
|
|
|
P"**" + P"''" + P"'''" +
|
|
|
|
P"__" + P"==" + P"~~" +
|
|
|
|
P"\n>" + P"\n<" + P"\n" +
|
|
|
|
P"[code]" + P"[spoiler]"
|
|
|
|
}
|
|
|
|
local word = Cs((1 - special)^1) * space / sanitize
|
|
|
|
|
|
|
|
--Generates a pattern that formats text inside matching 'seq' tags with format
|
|
|
|
--ex wrap("^^",[[<sup>%s</sup>]])
|
2020-05-17 18:05:00 +02:00
|
|
|
--will wrap text "5^^3^^" as "5<sup>3</sup>"
|
2020-12-29 21:19:05 +01:00
|
|
|
--The third argument is nessessary to stop exponential backtracking. This removes
|
|
|
|
--a DOS vulnerability: If tags are nested really deep, the parser can lock up,
|
|
|
|
--potentially locking up all processes.
|
|
|
|
--[[
|
|
|
|
local function wrap(seq,format,V"sup")
|
2020-08-24 23:38:24 +02:00
|
|
|
return P(seq) * Cs(((V"marked" + word + P"\n"))^1) * P(seq) / function(a)
|
|
|
|
return string.format(format,a)
|
2020-05-16 01:10:11 +02:00
|
|
|
end
|
|
|
|
end
|
2020-12-29 21:19:05 +01:00
|
|
|
]]
|
2021-03-20 07:39:46 +01:00
|
|
|
|
2020-12-29 21:19:05 +01:00
|
|
|
local function wrap(seq,format,s)
|
2021-03-20 07:39:46 +01:00
|
|
|
return P(seq) * Cs((((V"marked" - s) + word + P"\n"))^0) * P(seq) / function(a)
|
2020-12-29 21:19:05 +01:00
|
|
|
return string.format(format,a)
|
|
|
|
end
|
|
|
|
end
|
2020-05-16 01:10:11 +02:00
|
|
|
|
2020-05-17 18:05:00 +02:00
|
|
|
--Generates a pattern that formats text inside opening and closing "name" tags
|
2020-05-16 01:10:11 +02:00
|
|
|
--with a format, BB forum style
|
|
|
|
local function tag(name,format)
|
|
|
|
local start_tag = P(string.format("[%s]",name))
|
|
|
|
local end_tag = P(string.format("[/%s]",name))
|
2021-03-20 07:39:46 +01:00
|
|
|
return start_tag * Cs(((1 - end_tag))^0) * end_tag / function(a)
|
2020-05-16 01:10:11 +02:00
|
|
|
return string.format(format,sanitize(a))
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-05-17 18:05:00 +02:00
|
|
|
local grammar = P{
|
2020-05-16 01:10:11 +02:00
|
|
|
"chunk";
|
|
|
|
--regular
|
2020-12-29 21:19:05 +01:00
|
|
|
spoiler = wrap("**",[[<span class="spoiler">%s</span>]],V"spoiler"),
|
2020-05-16 01:10:11 +02:00
|
|
|
spoiler2 = tag("spoiler",[[<span class="spoiler2">%s</span>]]),
|
2020-12-29 21:19:05 +01:00
|
|
|
italic = wrap("''",[[<i>%s</i>]], V"italic"),
|
|
|
|
bold = wrap("'''",[[<b>%s</b>]], V"bold"),
|
|
|
|
underline = wrap("__",[[<u>%s</u>]], V"underline"),
|
|
|
|
heading = wrap("==",[[<h2>%s</h2>]], V"heading"),
|
|
|
|
strike = wrap("~~",[[<s>%s</s>]], V"strike"),
|
2020-05-16 01:10:11 +02:00
|
|
|
code = tag("code",[[<pre><code>%s</code></pre>]]),
|
2020-08-13 19:59:33 +02:00
|
|
|
greentext = P">" * (B"\n>" + B">") * Cs((V"marked" + word)^0) / function(a)
|
2020-05-16 01:10:11 +02:00
|
|
|
return string.format([[<span class="greentext">>%s</span>]],a)
|
|
|
|
end,
|
2020-08-13 19:59:33 +02:00
|
|
|
pinktext = P"<" * (B"\n<" + B"<") * Cs((V"marked" + word)^0) / function(a)
|
2020-05-16 01:10:11 +02:00
|
|
|
return string.format([[<span class="pinktext"><%s</span>]],a)
|
|
|
|
end,
|
|
|
|
marked = V"spoiler" + V"bold" + V"italic" + V"underline" + V"heading" + V"strike" + V"spoiler2" + V"code",
|
|
|
|
plainline = (V"marked" + word)^0,
|
|
|
|
line = Cs(V"greentext" + V"pinktext" + V"plainline" + P"") * P"\n" / function(a)
|
2020-10-11 01:28:39 +02:00
|
|
|
if a == "\r" then
|
|
|
|
return "<br/>"
|
|
|
|
else
|
|
|
|
return string.format("<p>%s</p>",a)
|
|
|
|
end
|
2020-05-16 01:10:11 +02:00
|
|
|
end,
|
2021-01-04 04:20:55 +01:00
|
|
|
ending = C(P(1)^0) / function(a) return sanitize(a) end,
|
2020-05-16 01:10:11 +02:00
|
|
|
chunk = V"line"^0 * V"plainline" * V"ending"
|
|
|
|
}
|
|
|
|
|
2020-05-17 19:17:15 +02:00
|
|
|
return function(text)
|
|
|
|
return table.concat({grammar:match(text .. "\n")}," ")
|
2020-05-17 18:05:00 +02:00
|
|
|
end
|