From 09e4bdddc7415117cc27047ad0d7afe523f064c2 Mon Sep 17 00:00:00 2001 From: defanor Date: Sun, 5 Nov 2017 11:16:46 +0300 Subject: Read metadata Reading effective URI and content type (if those are available) now, aiming `curl -w` or similar commands. Not a particularly nice way, and complicates both the program and the configuration, but sometimes file name extensions are deceptive. --- Pancake/Command.hs | 3 ++- Pancake/Configuration.hs | 7 ++++-- Pancake/Reading.hs | 55 ++++++++++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 16 deletions(-) (limited to 'Pancake') diff --git a/Pancake/Command.hs b/Pancake/Command.hs index a1a0b09..6adc51d 100644 --- a/Pancake/Command.hs +++ b/Pancake/Command.hs @@ -84,5 +84,6 @@ command c = [ basicCommand "basic command" , followRef "follow ref" , showRef "show ref" + , shortcut (shortcuts c) "shortcut" , goTo "go to" - , shortcut (shortcuts c) "shortcut"]) + ]) diff --git a/Pancake/Configuration.hs b/Pancake/Configuration.hs index fa33d52..ff2e748 100644 --- a/Pancake/Configuration.hs +++ b/Pancake/Configuration.hs @@ -56,9 +56,12 @@ instance ToJSON Config instance Default Config where def = Config { commands = M.fromList - [ ("ssh", "scp \"${URI_REGNAME}:${URI_PATH}\" /dev/stdout") - , ("gopher", "curl \"${URI}\"")] + [ ("ssh", "scp \"${URI_REGNAME}:${URI_PATH}\" /dev/stdout" + ++ " && echo -e '\n-pancake-'") + , ("gopher", "curl \"${URI}\"" + ++ " -w \"\n-pancake-\n\"")] , defaultCommand = "curl -4 -L \"${URI}\"" + ++ " -w \"\n-pancake-\nuri: %{url_effective}\ntype: %{content_type}\n\"" , externalViewers = M.fromList $ map (flip (,) "emacsclient -n \"${FILE}\"") ["hs", "cabal", "c", "h", "el", "scm", "idr"] diff --git a/Pancake/Reading.hs b/Pancake/Reading.hs index d0a6ef4..2069f35 100644 --- a/Pancake/Reading.hs +++ b/Pancake/Reading.hs @@ -18,29 +18,56 @@ import qualified Data.ByteString.Char8 as BS import Network.URI import qualified Text.Pandoc as P import System.Process -import Control.Exception -import Control.Applicative +import Control.Exception (handle, SomeException) +import Control.Applicative ((<|>)) import Data.Text.Encoding (decodeUtf8', decodeLatin1) import Data.Default -import System.Console.Terminfo +import System.Console.Terminfo (setupTermFromEnv, getCapability, termColumns) import System.FilePath import Data.Char import System.Exit import System.Environment import GHC.IO.Handle +import Text.Parsec hiding ((<|>)) +import Text.Parsec.ByteString import Text.Pandoc.Readers.Plain import Text.Pandoc.Readers.Gopher import Pancake.Common +-- | Metadata (header, URI, document type) parser. +pMeta :: Parser (Maybe URI, Maybe String) +pMeta = do + _ <- newline + _ <- string "-pancake-" + _ <- newline + u <- optionMaybe $ do + _ <- string "uri: " + u <- manyTill anyToken newline + maybe (fail "Failed to parse URI") pure $ parseURI u + t <- option Nothing $ do + _ <- string "type: " + optional $ try $ manyTill alphaNum (char '/') + t <- optionMaybe $ many1 alphaNum + _ <- manyTill anyToken newline + pure t + eof + pure (u, t) + +-- | Document body + metadata parser. +pWithMeta :: Parser (BS.ByteString, (Maybe URI, Maybe String)) +pWithMeta = (,) + <$> BS.pack <$> manyTill anyToken (try $ lookAhead pMeta) + <*> pMeta + -- | Retrieves a document. Prints an error message and returns an -- empty string on failure. retrieve :: String -- ^ Shell command to use for retrieval. -> URI -- ^ Document URI. - -> IO BS.ByteString + -> IO (Maybe (BS.ByteString, Maybe URI, Maybe String)) -- ^ Document contents. retrieve cmd uri = do putErrLn $ "Retrieving " ++ show uri @@ -58,13 +85,13 @@ retrieve cmd uri = do ++ envAuthority handle (\(e :: SomeException) -> putErrLn (concat ["Failed to run `", cmd, "`: ", show e]) - >> pure BS.empty) $ + >> pure Nothing) $ withCreateProcess ((shell cmd) { env = Just environment , std_out = CreatePipe , std_err = CreatePipe , delegate_ctlc = True }) $ \_ stdout stderr ph -> case stdout of - Nothing -> putErrLn "No stdout" >> pure BS.empty + Nothing -> putErrLn "No stdout" >> pure Nothing Just stdout' -> do hSetBinaryMode stdout' True out <- BS.hGetContents stdout' @@ -78,20 +105,21 @@ retrieve cmd uri = do err <- BS.hGetContents stderr' putErrLn $ "stderr:\n" ++ BS.unpack err else putErrLn $ show uri - pure out + case parse pWithMeta (uriToString id uri "") out of + Left _ -> pure $ Just (out, Nothing, Nothing) + Right (bs, (u, t)) -> pure $ Just (bs, u, t) --- | Reads a document: retrieves it and parses into a Pandoc --- structure. The parser is chosen depending on the URI. -readDoc :: String - -- ^ Shell command to use for retrieval. +-- | Parses a document into a Pandoc structure. The parser is chosen +-- depending on the document type (if one is provided) or its URI. +readDoc :: BS.ByteString + -- ^ Raw document data. -> Maybe String -- ^ Document type. -> URI -- ^ Document URI. -> IO (Either P.PandocError P.Pandoc) -- ^ A parsed document. -readDoc cmd dt uri = do - out <- retrieve cmd uri +readDoc out dt uri = do term <- setupTermFromEnv let reader = either (const plain) id $ maybe (Left "no type suggestions") (byExtension . ('.':)) dt @@ -132,4 +160,5 @@ readDoc cmd dt uri = do byExtension ".ltx" = P.getReader "latex" byExtension ".tex" = P.getReader "latex" byExtension ".txt" = pure plain + byExtension ".plain" = pure plain byExtension ext = P.getReader $ tail ext -- cgit v1.2.3