diff options
author | defanor <defanor@uberspace.net> | 2017-11-15 05:17:02 +0300 |
---|---|---|
committer | defanor <defanor@uberspace.net> | 2017-11-15 05:17:02 +0300 |
commit | ca120bde0cf023219d765f06c3c52720ead9a6a6 (patch) | |
tree | ee102caf705a7e2357adc7a942cf1626b328c6c2 /Text/Pandoc | |
parent | 266e0908e1ecc3f63b3f61b53a61fc5810d05ce3 (diff) |
Relax Gopher directory parser
Accept erroneous directory entries, but mark them as errors.
Diffstat (limited to 'Text/Pandoc')
-rw-r--r-- | Text/Pandoc/Readers/Gopher.hs | 40 |
1 files changed, 28 insertions, 12 deletions
diff --git a/Text/Pandoc/Readers/Gopher.hs b/Text/Pandoc/Readers/Gopher.hs index 7ece808..09dfa6c 100644 --- a/Text/Pandoc/Readers/Gopher.hs +++ b/Text/Pandoc/Readers/Gopher.hs @@ -18,6 +18,8 @@ import Text.Parsec.Text import Text.Pandoc.Readers.Plain import Text.Pandoc.Class import qualified Data.Text as T +import Control.Monad.Except (throwError) +import Text.Pandoc.Error -- | UNASCII ::= ASCII - [Tab CR-LF NUL]. @@ -51,19 +53,31 @@ pLink = do prefix = mkPrefix $ case t of '0' -> "(text)" '1' -> "(dir)" + '3' -> "(err)" 'h' -> "(html)" '9' -> "(bin)" + 'g' -> "(gif)" 'I' -> "(img)" 's' -> "(snd)" '7' -> "(srch)" _ -> "(?)" - pure [Link (name, [], []) (prefix ++ lineToInlines name) (uri, ""), LineBreak] + line = case t of + '3' -> prefix ++ lineToInlines name + _ -> [Link (name, [], []) (prefix ++ lineToInlines name) (uri, "")] + pure $ line ++ [LineBreak] + +-- | An erroneous directory entry. Still parsing it, since there is a +-- lot of broken directories out there -- but marking as an error. +pError :: Parser [Inline] +pError = do + line <- manyTill anyChar (lookAhead $ try pEOL) + pure $ [Strong $ mkPrefix "error"] ++ lineToInlines line ++ [LineBreak] -- | Parses last line, with adjustments for what's used in the wild. pLastLine :: Parser () --- Sometimes there's additional newline, sometimes there's no dot, and --- sometimes LF is used instead of CRLF. -pLastLine = optional (optional endOfLine *> char '.' *> endOfLine) *> eof +-- Sometimes there's additional newline, sometimes there's no dot or +-- multiple dots, and sometimes LF is used instead of CRLF. +pLastLine = optional (try $ optional endOfLine *> char '.' *> endOfLine) *> eof -- | Parses end-of-line, skipping Gopher+ extensions if present. pEOL :: Parser () @@ -72,14 +86,16 @@ pEOL = (endOfLine *> pure ()) -- | Parses a directory. pDirEntries :: Parser [Inline] -pDirEntries = concat <$> manyTill (choice [pInfo, pLink] <* pEOL) pLastLine +pDirEntries = concat <$> + manyTill (choice ([ try pInfo <?> "info entry" + , try pLink <?> "link entry" + , pError <?> "erroneous entry"]) + <* pEOL) + pLastLine --- | Reads Gopher directory entries, falls back to plain text on --- failure. +-- | Reads Gopher directory entries. readGopher :: PandocMonad m => T.Text -> m Pandoc -readGopher s = pure . Pandoc mempty . pure . Plain $ +readGopher s = case parse pDirEntries "directory entry" s of - -- fallback to plain text - Left _ -> - concatMap (\l -> (lineToInlines $ T.unpack l) ++ [LineBreak]) $ T.lines s - Right r -> r + Left err -> throwError $ PandocParseError $ show err + Right r -> pure . Pandoc mempty . pure $ Plain r |