summaryrefslogtreecommitdiff
path: root/Text/Pandoc
diff options
context:
space:
mode:
authordefanor <defanor@uberspace.net>2017-10-26 01:27:09 +0300
committerdefanor <defanor@uberspace.net>2017-10-26 01:27:09 +0300
commite9d5306f9df690803399f81bcc31d86e28084ab9 (patch)
treed1dc013cc00b348d5c71dae9d8eef052f1ad34aa /Text/Pandoc
Initial commit
Diffstat (limited to 'Text/Pandoc')
-rw-r--r--Text/Pandoc/Readers/Gopher.hs70
-rw-r--r--Text/Pandoc/Readers/Plain.hs20
2 files changed, 90 insertions, 0 deletions
diff --git a/Text/Pandoc/Readers/Gopher.hs b/Text/Pandoc/Readers/Gopher.hs
new file mode 100644
index 0000000..33dec3e
--- /dev/null
+++ b/Text/Pandoc/Readers/Gopher.hs
@@ -0,0 +1,70 @@
+{- |
+Module : Text.Pandoc.Readers.Gopher
+Maintainer : defanor <defanor@uberspace.net>
+Stability : unstable
+Portability : portable
+
+Loosely based on <https://www.ietf.org/rfc/rfc1436.txt RFC 1436>, but
+since the commonly found in the wild directories tend to differ from
+that, there are some adjustments.
+-}
+
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.Gopher ( readGopher ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Parsec
+import Text.Parsec.String
+
+-- | Translates a text line into a list of 'Inline' elements suitable
+-- for further processing.
+lineToInlines :: String -> [Inline]
+lineToInlines [] = []
+lineToInlines (' ':rest) = Space : lineToInlines rest
+lineToInlines s = let (cur, next) = break (== ' ') s
+ in Str cur : lineToInlines next
+
+-- | UNASCII ::= ASCII - [Tab CR-LF NUL].
+unascii :: Parser Char
+unascii = noneOf ['\t', '\n', '\r', '\0']
+
+-- | An informational directory entry.
+pInfo :: Parser [Inline]
+pInfo = do
+ _ <- char 'i'
+ info <- manyTill unascii tab
+ _ <- manyTill unascii tab
+ _ <- manyTill unascii tab
+ _ <- many1 digit
+ pure $ lineToInlines info ++ [LineBreak]
+
+-- | A file\/link (i.e., any other than informational) directory
+-- entry.
+pLink :: Parser [Inline]
+pLink = do
+ t <- alphaNum
+ name <- manyTill unascii tab
+ selector <- manyTill unascii tab
+ host <- manyTill unascii tab
+ port <- many1 digit
+ let uri = concat ["gopher://", host, ":", port, "/", [t], selector]
+ pure [Link (name, [], []) (lineToInlines name) (uri, ""), LineBreak]
+
+-- | Parses last line, with adjustments for what's used in the wild.
+pLastLine :: Parser ()
+-- Sometimes there's additional newline, sometimes there's no dot, and
+-- sometimes LF is used instead of CRLF.
+pLastLine = optional (optional endOfLine *> char '.' *> endOfLine) *> eof
+
+pDirEntries :: Parser [Inline]
+pDirEntries = concat <$> manyTill (choice [pInfo, pLink] <* endOfLine) pLastLine
+
+-- | Reads Gopher directory entries, falls back to plain text on
+-- failure.
+readGopher :: String -> Either PandocError Pandoc
+readGopher s = Right . Pandoc mempty . pure . Plain $
+ case parse pDirEntries "directory entry" s of
+ -- fallback to plain text
+ Left _ -> concatMap (\l -> (lineToInlines l) ++ [LineBreak]) $ lines s
+ Right r -> r
diff --git a/Text/Pandoc/Readers/Plain.hs b/Text/Pandoc/Readers/Plain.hs
new file mode 100644
index 0000000..600e5f8
--- /dev/null
+++ b/Text/Pandoc/Readers/Plain.hs
@@ -0,0 +1,20 @@
+{- |
+Module : Text.Pandoc.Readers.Plain
+Maintainer : defanor <defanor@uberspace.net>
+Stability : unstable
+Portability : portable
+-}
+
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.Plain ( readPlain ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Data.List
+
+-- | Reads plain text, always succeeding and producing a single
+-- 'Plain' block.
+readPlain :: String -> Either PandocError Pandoc
+readPlain = Right . Pandoc mempty . pure . Plain .
+ concatMap (\l -> (intersperse Space $ map Str $ words l) ++ [LineBreak]) . lines
+ -- or Right . Pandoc mempty . pure . RawBlock "plain"