2020-12-23 07:02:02 +01:00
--[[
A parser that approximates 8 chan ' s markup:
Surround text with double single - quotes ( ' ) to make text italic
Surround text with triple single - quotes to make text bold
Surround text with underscores ( _ ) to make it underlined
Surround text with double asterisks ( * ) to make it spoilered
Surround text with tildes ( ~ ) to make it strike through
Begin a line with a greater - than followed by a a space to make it
> greentext
Begin a line with a less - than followed by a space to make it
< pinktext
Surround text with forum - style [ spoiler ] and [ / spoiler ] tags as a second way to spoiler
Surround text with forum - style [ code ] and [ / code ] tags to make it preformatted and monospace
] ]
2020-05-16 01:10:11 +02:00
local lpeg = require ( " lpeg " )
lpeg.locale ( lpeg )
local V , P , C , S , B , Cs = lpeg.V , lpeg.P , lpeg.C , lpeg.S , lpeg.B , lpeg.Cs
--Characters to escape in the body text
local escapes = {
[ " & " ] = " & " ,
[ " < " ] = " < " ,
[ " > " ] = " > " ,
}
local esctbl = { }
for char , _ in pairs ( escapes ) do
table.insert ( esctbl , char )
end
local escapematch = string.format ( " ([%s]) " , table.concat ( esctbl ) )
local function sanitize_item ( capture )
return escapes [ capture ] or capture
end
local function sanitize ( text )
local ret , _ = string.gsub ( text , escapematch , sanitize_item )
return ret
end
2020-05-17 18:05:00 +02:00
--Grammar
2020-05-16 01:10:11 +02:00
local space = S " \t \r " ^ 0
local special = P {
P " ** " + P " '' " + P " ''' " +
P " __ " + P " == " + P " ~~ " +
P " \n > " + P " \n < " + P " \n " +
P " [code] " + P " [spoiler] "
}
local word = Cs ( ( 1 - special ) ^ 1 ) * space / sanitize
--Generates a pattern that formats text inside matching 'seq' tags with format
--ex wrap("^^",[[<sup>%s</sup>]])
2020-05-17 18:05:00 +02:00
--will wrap text "5^^3^^" as "5<sup>3</sup>"
2020-12-29 21:19:05 +01:00
--The third argument is nessessary to stop exponential backtracking. This removes
--a DOS vulnerability: If tags are nested really deep, the parser can lock up,
--potentially locking up all processes.
--[[
local function wrap ( seq , format , V " sup " )
2020-08-24 23:38:24 +02:00
return P ( seq ) * Cs ( ( ( V " marked " + word + P " \n " ) ) ^ 1 ) * P ( seq ) / function ( a )
return string.format ( format , a )
2020-05-16 01:10:11 +02:00
end
end
2020-12-29 21:19:05 +01:00
] ]
local function wrap ( seq , format , s )
return P ( seq ) * Cs ( ( ( ( V " marked " - s ) + word + P " \n " ) ) ^ 1 ) * P ( seq ) / function ( a )
return string.format ( format , a )
end
end
2020-05-16 01:10:11 +02:00
2020-05-17 18:05:00 +02:00
--Generates a pattern that formats text inside opening and closing "name" tags
2020-05-16 01:10:11 +02:00
--with a format, BB forum style
local function tag ( name , format )
local start_tag = P ( string.format ( " [%s] " , name ) )
local end_tag = P ( string.format ( " [/%s] " , name ) )
2020-08-24 23:38:24 +02:00
return start_tag * Cs ( ( ( 1 - end_tag ) ) ^ 1 ) * end_tag / function ( a )
2020-05-16 01:10:11 +02:00
return string.format ( format , sanitize ( a ) )
end
end
2020-05-17 18:05:00 +02:00
local grammar = P {
2020-05-16 01:10:11 +02:00
" chunk " ;
--regular
2020-12-29 21:19:05 +01:00
spoiler = wrap ( " ** " , [[<span class="spoiler">%s</span>]] , V " spoiler " ) ,
2020-05-16 01:10:11 +02:00
spoiler2 = tag ( " spoiler " , [[<span class="spoiler2">%s</span>]] ) ,
2020-12-29 21:19:05 +01:00
italic = wrap ( " '' " , [[<i>%s</i>]] , V " italic " ) ,
bold = wrap ( " ''' " , [[<b>%s</b>]] , V " bold " ) ,
underline = wrap ( " __ " , [[<u>%s</u>]] , V " underline " ) ,
heading = wrap ( " == " , [[<h2>%s</h2>]] , V " heading " ) ,
strike = wrap ( " ~~ " , [[<s>%s</s>]] , V " strike " ) ,
2020-05-16 01:10:11 +02:00
code = tag ( " code " , [[<pre><code>%s</code></pre>]] ) ,
2020-08-13 19:59:33 +02:00
greentext = P " > " * ( B " \n > " + B " > " ) * Cs ( ( V " marked " + word ) ^ 0 ) / function ( a )
2020-05-16 01:10:11 +02:00
return string.format ( [[<span class="greentext">>%s</span>]] , a )
end ,
2020-08-13 19:59:33 +02:00
pinktext = P " < " * ( B " \n < " + B " < " ) * Cs ( ( V " marked " + word ) ^ 0 ) / function ( a )
2020-05-16 01:10:11 +02:00
return string.format ( [[<span class="pinktext"><%s</span>]] , a )
end ,
marked = V " spoiler " + V " bold " + V " italic " + V " underline " + V " heading " + V " strike " + V " spoiler2 " + V " code " ,
plainline = ( V " marked " + word ) ^ 0 ,
line = Cs ( V " greentext " + V " pinktext " + V " plainline " + P " " ) * P " \n " / function ( a )
2020-12-29 21:19:05 +01:00
print ( " Found line: " , a )
2020-10-11 01:28:39 +02:00
if a == " \r " then
return " <br/> "
else
return string.format ( " <p>%s</p> " , a )
end
2020-05-16 01:10:11 +02:00
end ,
2020-08-24 23:38:24 +02:00
ending = C ( P ( 1 ) ^ 0 ) / function ( a ) print ( " failed with ending: " , a ) return sanitize ( a ) end ,
2020-05-16 01:10:11 +02:00
chunk = V " line " ^ 0 * V " plainline " * V " ending "
}
2020-12-29 21:19:05 +01:00
--A chunk of text that the parser chokes on:
local s = [ = [
Minor update to the search function , also added a search bar to the front page .
Characters in ' ' ' bold ' ' ' are literal characters , things in ' ' < angle brackets and italics > ' ' are substitutions .
The search utility searches for stories on the site . At it ' s most simple, it searches stories based on tags, but it can also filter stories based on the fields: ' ' ' title ' ' ' , ' ' ' author ' ' ' , ' ' ' date ' ' ' , and ' ' ' hits ' ' ' . In general, the syntax for search is { ' ' ' +- ' ' ' } ' ' <field> ' ' ' ' <operator> ' ' ' ' <value> ' '
The first ' ' ' + ' ' ' or ' ' ' - ' ' ' specifies weather to include or exclude results based on this search , the ' ' < field > ' ' specifies what field to search for ( or search based on tag if this is missing ) , and ' ' < operator > ' ' specifies how to search .
For title and author , the only allowed operator is ' ' ' = ' ' ' . This operator will search for ' ' < value > ' ' appearing anywhere in the field , case insensitive . For ' ' ' hits ' ' ' and ' ' ' time ' ' ' , the allowed operators are ' ' ' > ' ' ' , ' ' ' < ' ' ' , ' ' ' >= ' ' ' , ' ' ' <= ' ' ' , ' ' ' = ' ' ' , which searches for greater than , less than , greater than or equal to , less than or equal to , and strictly equal to respectively . ' ' ' tag ' ' ' does not need a ' ' < field > ' ' or ' ' < operator > ' ' , and only allows exact matches . As a quirk of this system , it is impossible to search for the tags " author " , " title " , " hits " or " date " .
Examples :
[ code ]
+ author = admin - meta
[ / code ]
Will return all stories by the users " admin " and " b'''admin'''ton_enthusiast " that do not include the " meta " tag .
[ code ]
+ hits > 20 - date >= 1609459201
[ / code ]
Will return all stories with more than 20 hits that were posted before January 1 , 2021 ( unix timestamp 1609459201 ) .
While the date field is a little hard to use for humans , it may be useful for robots .
] = ]
--print(table.concat({grammar:match(s .. "\n")}," "))
2020-05-17 19:17:15 +02:00
return function ( text )
return table.concat ( { grammar : match ( text .. " \n " ) } , " " )
2020-05-17 18:05:00 +02:00
end