From 99cd9c84abf3c52970981499875b402d5fb68085 Mon Sep 17 00:00:00 2001 From: defanor Date: Thu, 8 Feb 2018 11:09:40 +0300 Subject: Introduce pandoc timeouts Sometimes pandoc takes unreasonably long time to parse a document (though it warns about that in the documentation), so a safeguard is needed. --- Pancake/Configuration.hs | 4 ++++ Pancake/Reading.hs | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Pancake/Configuration.hs b/Pancake/Configuration.hs index 9df236a..9b0f83d 100644 --- a/Pancake/Configuration.hs +++ b/Pancake/Configuration.hs @@ -69,6 +69,9 @@ data Config = Config { commands :: M.Map String String -- inside divs. , unclutter :: M.Map String String -- ^ XSLT file and URI regex. + , pandocTimeout :: Int + -- ^ Maximum amount of time (in seconds) allowed + -- for document parsing. } deriving (Generic, Show, Eq) -- | For configuration parsing. @@ -117,6 +120,7 @@ instance Default Config where "^https://news\\.ycombinator\\.com/((news|show|ask).*)?$") , ("mediawiki", "^https://en\\.(m.)?(wiktionary|wikipedia)\\.org/wiki/") , ("github", "^https://github\\.com/")] + , pandocTimeout = 60 } where curl = "curl -A \"pancake/${PANCAKE}\" --compressed -4 -L " ++ diff --git a/Pancake/Reading.hs b/Pancake/Reading.hs index 7d86219..3a83e46 100644 --- a/Pancake/Reading.hs +++ b/Pancake/Reading.hs @@ -54,6 +54,7 @@ import qualified Data.Map as M import Control.Monad.IO.Class import System.Directory import Control.Concurrent.STM.TVar +import System.Timeout import Text.Pandoc.Readers.Plain import Text.Pandoc.Readers.Gopher @@ -237,13 +238,14 @@ readDoc c rdfc out dt uri = do (parse pEmacsMode (uriToString id uri "") out) cols = fromMaybe 80 $ getCapability term termColumns opts = def { P.readerColumns = cols, P.readerExtensions = exts } - liftIO $ case reader of + r <- liftIO $ timeout (pandocTimeout c * 1000000) $ case reader of P.TextReader f -> case decodeUtf8' out of Left err -> do putErrLn $ show err P.runIO $ f opts $ decodeLatin1 out Right r -> P.runIO $ f opts r P.ByteStringReader f -> P.runIO $ f opts $ BL.fromStrict out + pure $ fromMaybe (Left (P.PandocSomeError "Timed out.")) r where http ext = byExtension' ext <|> html html = P.getReader "html" -- cgit v1.2.3