From e9d5306f9df690803399f81bcc31d86e28084ab9 Mon Sep 17 00:00:00 2001 From: defanor Date: Thu, 26 Oct 2017 01:27:09 +0300 Subject: Initial commit --- Text/Pandoc/Readers/Gopher.hs | 70 +++++++++++++++++++++++++++++++++++++++++++ Text/Pandoc/Readers/Plain.hs | 20 +++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 Text/Pandoc/Readers/Gopher.hs create mode 100644 Text/Pandoc/Readers/Plain.hs (limited to 'Text/Pandoc') diff --git a/Text/Pandoc/Readers/Gopher.hs b/Text/Pandoc/Readers/Gopher.hs new file mode 100644 index 0000000..33dec3e --- /dev/null +++ b/Text/Pandoc/Readers/Gopher.hs @@ -0,0 +1,70 @@ +{- | +Module : Text.Pandoc.Readers.Gopher +Maintainer : defanor +Stability : unstable +Portability : portable + +Loosely based on , but +since the commonly found in the wild directories tend to differ from +that, there are some adjustments. +-} + +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.Gopher ( readGopher ) where + +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Parsec +import Text.Parsec.String + +-- | Translates a text line into a list of 'Inline' elements suitable +-- for further processing. +lineToInlines :: String -> [Inline] +lineToInlines [] = [] +lineToInlines (' ':rest) = Space : lineToInlines rest +lineToInlines s = let (cur, next) = break (== ' ') s + in Str cur : lineToInlines next + +-- | UNASCII ::= ASCII - [Tab CR-LF NUL]. +unascii :: Parser Char +unascii = noneOf ['\t', '\n', '\r', '\0'] + +-- | An informational directory entry. +pInfo :: Parser [Inline] +pInfo = do + _ <- char 'i' + info <- manyTill unascii tab + _ <- manyTill unascii tab + _ <- manyTill unascii tab + _ <- many1 digit + pure $ lineToInlines info ++ [LineBreak] + +-- | A file\/link (i.e., any other than informational) directory +-- entry. +pLink :: Parser [Inline] +pLink = do + t <- alphaNum + name <- manyTill unascii tab + selector <- manyTill unascii tab + host <- manyTill unascii tab + port <- many1 digit + let uri = concat ["gopher://", host, ":", port, "/", [t], selector] + pure [Link (name, [], []) (lineToInlines name) (uri, ""), LineBreak] + +-- | Parses last line, with adjustments for what's used in the wild. +pLastLine :: Parser () +-- Sometimes there's additional newline, sometimes there's no dot, and +-- sometimes LF is used instead of CRLF. +pLastLine = optional (optional endOfLine *> char '.' *> endOfLine) *> eof + +pDirEntries :: Parser [Inline] +pDirEntries = concat <$> manyTill (choice [pInfo, pLink] <* endOfLine) pLastLine + +-- | Reads Gopher directory entries, falls back to plain text on +-- failure. +readGopher :: String -> Either PandocError Pandoc +readGopher s = Right . Pandoc mempty . pure . Plain $ + case parse pDirEntries "directory entry" s of + -- fallback to plain text + Left _ -> concatMap (\l -> (lineToInlines l) ++ [LineBreak]) $ lines s + Right r -> r diff --git a/Text/Pandoc/Readers/Plain.hs b/Text/Pandoc/Readers/Plain.hs new file mode 100644 index 0000000..600e5f8 --- /dev/null +++ b/Text/Pandoc/Readers/Plain.hs @@ -0,0 +1,20 @@ +{- | +Module : Text.Pandoc.Readers.Plain +Maintainer : defanor +Stability : unstable +Portability : portable +-} + +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.Plain ( readPlain ) where + +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Data.List + +-- | Reads plain text, always succeeding and producing a single +-- 'Plain' block. +readPlain :: String -> Either PandocError Pandoc +readPlain = Right . Pandoc mempty . pure . Plain . + concatMap (\l -> (intersperse Space $ map Str $ words l) ++ [LineBreak]) . lines + -- or Right . Pandoc mempty . pure . RawBlock "plain" -- cgit v1.2.3