From 09e4bdddc7415117cc27047ad0d7afe523f064c2 Mon Sep 17 00:00:00 2001 From: defanor Date: Sun, 5 Nov 2017 11:16:46 +0300 Subject: Read metadata Reading effective URI and content type (if those are available) now, aiming `curl -w` or similar commands. Not a particularly nice way, and complicates both the program and the configuration, but sometimes file name extensions are deceptive. --- Pancake.hs | 89 +++++++++++++++++++++++++----------------------- Pancake/Command.hs | 3 +- Pancake/Configuration.hs | 7 ++-- Pancake/Reading.hs | 55 +++++++++++++++++++++++------- README.org | 22 +++++------- 5 files changed, 105 insertions(+), 71 deletions(-) diff --git a/Pancake.hs b/Pancake.hs index e346d0c..619b0f8 100644 --- a/Pancake.hs +++ b/Pancake.hs @@ -31,6 +31,7 @@ import System.Exit import Control.Exception import Data.Char import System.IO.Error +import Control.Applicative import Pancake.Common import Pancake.Configuration @@ -82,52 +83,56 @@ loadDocument :: MonadIO m -> URI -- ^ Document URI. -> StateT LoopState m (URI, Maybe P.Pandoc) -loadDocument t u' = do +loadDocument sType rawURI = do st <- get - let ddg = isPrefixOf "/l/?kh=-1&uddg=" $ uriToString id u' "" - u = case (ddg, uriIsAbsolute u', history st) of - -- fix DDG links (that's rather hacky, todo: improve) - (True, _, _) -> maybe u' id $ - parseAbsoluteURI (unEscapeString $ drop 12 (uriQuery u')) - -- handle relative URIs - (_, False, ((cur, _):_, _)) -> relativeTo u' cur - _ -> u' - uScheme = case uriScheme u of + let ddg = isPrefixOf "/l/?kh=-1&uddg=" $ uriToString id rawURI "" + adjustedURI = case (ddg, uriIsAbsolute rawURI, history st) of + -- fix DDG links (that's rather hacky, todo: improve) + (True, _, _) -> maybe rawURI id $ + parseAbsoluteURI (unEscapeString $ drop 12 (uriQuery rawURI)) + -- handle relative URIs + (_, False, ((cur, _):_, _)) -> relativeTo rawURI cur + _ -> rawURI + uScheme = case uriScheme adjustedURI of [] -> "unknown" - (_:s) -> s + s -> init s cmd = maybe (defaultCommand $ conf st) id $ M.lookup uScheme (commands $ conf st) - ext = case (t, takeExtension $ uriPath u) of - (Just x, _) -> x - (_, '.':xs) -> map toLower xs - (_, other) -> other - d <- liftIO $ do - case M.lookup ext (externalViewers $ conf st) of - Nothing -> do - doc <- readDoc cmd t u - case doc of - Left err -> do - putErrLn $ show err - pure mzero - Right r -> pure $ pure r - Just ev -> do - d <- retrieve cmd u - dir <- getXdgDirectory XdgCache "pancake" - let tmpPath = dir (takeFileName $ uriPath u) - handle - (\(e :: SomeException) -> - putErrLn (concat ["Failed to open `", tmpPath, "` with `" - , cmd, "`: ", show e])) $ do - createDirectoryIfMissing True dir - BS.writeFile tmpPath d - curEnv <- getEnvironment - ec <- withCreateProcess - ((shell ev) { env = Just (("FILE", tmpPath) : curEnv) }) $ - \_ _ _ p -> waitForProcess p - when (ec /= ExitSuccess) $ - putErrLn $ concat ["An error occured. Exit code: ", show ec] - pure mzero - pure (u, d) + liftIO $ do + docData <- retrieve cmd adjustedURI + case docData of + Nothing -> pure (adjustedURI, mzero) + Just (rawDoc, mdURI, mdType) -> do + let effectiveURI = maybe adjustedURI id mdURI + fType = sType <|> mdType + ext = case (fType, takeExtension $ uriPath effectiveURI) of + (Just x, _) -> x + (_, '.':xs) -> map toLower xs + (_, other) -> other + case M.lookup ext (externalViewers $ conf st) of + Nothing -> do + doc <- readDoc rawDoc fType effectiveURI + case doc of + Left err -> do + putErrLn $ show err + pure (effectiveURI, mzero) + Right r -> pure (effectiveURI, pure r) + Just ev -> do + dir <- getXdgDirectory XdgCache "pancake" + let tmpPath = dir (takeFileName $ uriPath effectiveURI) + handle + (\(e :: SomeException) -> + putErrLn (concat ["Failed to open `", tmpPath, "` with `" + , cmd, "`: ", show e])) $ do + createDirectoryIfMissing True dir + BS.writeFile tmpPath rawDoc + curEnv <- getEnvironment + ec <- withCreateProcess + ((shell ev) { env = Just (("FILE", tmpPath) : curEnv) }) $ + \_ _ _ p -> waitForProcess p + when (ec /= ExitSuccess) $ + putErrLn $ concat ["An error occured. Exit code: ", show ec] + pure (effectiveURI, mzero) -- | Visits an URI, updates history accordingly. goTo :: MonadIO m => Maybe String -> URI -> StateT LoopState m () diff --git a/Pancake/Command.hs b/Pancake/Command.hs index a1a0b09..6adc51d 100644 --- a/Pancake/Command.hs +++ b/Pancake/Command.hs @@ -84,5 +84,6 @@ command c = [ basicCommand "basic command" , followRef "follow ref" , showRef "show ref" + , shortcut (shortcuts c) "shortcut" , goTo "go to" - , shortcut (shortcuts c) "shortcut"]) + ]) diff --git a/Pancake/Configuration.hs b/Pancake/Configuration.hs index fa33d52..ff2e748 100644 --- a/Pancake/Configuration.hs +++ b/Pancake/Configuration.hs @@ -56,9 +56,12 @@ instance ToJSON Config instance Default Config where def = Config { commands = M.fromList - [ ("ssh", "scp \"${URI_REGNAME}:${URI_PATH}\" /dev/stdout") - , ("gopher", "curl \"${URI}\"")] + [ ("ssh", "scp \"${URI_REGNAME}:${URI_PATH}\" /dev/stdout" + ++ " && echo -e '\n-pancake-'") + , ("gopher", "curl \"${URI}\"" + ++ " -w \"\n-pancake-\n\"")] , defaultCommand = "curl -4 -L \"${URI}\"" + ++ " -w \"\n-pancake-\nuri: %{url_effective}\ntype: %{content_type}\n\"" , externalViewers = M.fromList $ map (flip (,) "emacsclient -n \"${FILE}\"") ["hs", "cabal", "c", "h", "el", "scm", "idr"] diff --git a/Pancake/Reading.hs b/Pancake/Reading.hs index d0a6ef4..2069f35 100644 --- a/Pancake/Reading.hs +++ b/Pancake/Reading.hs @@ -18,29 +18,56 @@ import qualified Data.ByteString.Char8 as BS import Network.URI import qualified Text.Pandoc as P import System.Process -import Control.Exception -import Control.Applicative +import Control.Exception (handle, SomeException) +import Control.Applicative ((<|>)) import Data.Text.Encoding (decodeUtf8', decodeLatin1) import Data.Default -import System.Console.Terminfo +import System.Console.Terminfo (setupTermFromEnv, getCapability, termColumns) import System.FilePath import Data.Char import System.Exit import System.Environment import GHC.IO.Handle +import Text.Parsec hiding ((<|>)) +import Text.Parsec.ByteString import Text.Pandoc.Readers.Plain import Text.Pandoc.Readers.Gopher import Pancake.Common +-- | Metadata (header, URI, document type) parser. +pMeta :: Parser (Maybe URI, Maybe String) +pMeta = do + _ <- newline + _ <- string "-pancake-" + _ <- newline + u <- optionMaybe $ do + _ <- string "uri: " + u <- manyTill anyToken newline + maybe (fail "Failed to parse URI") pure $ parseURI u + t <- option Nothing $ do + _ <- string "type: " + optional $ try $ manyTill alphaNum (char '/') + t <- optionMaybe $ many1 alphaNum + _ <- manyTill anyToken newline + pure t + eof + pure (u, t) + +-- | Document body + metadata parser. +pWithMeta :: Parser (BS.ByteString, (Maybe URI, Maybe String)) +pWithMeta = (,) + <$> BS.pack <$> manyTill anyToken (try $ lookAhead pMeta) + <*> pMeta + -- | Retrieves a document. Prints an error message and returns an -- empty string on failure. retrieve :: String -- ^ Shell command to use for retrieval. -> URI -- ^ Document URI. - -> IO BS.ByteString + -> IO (Maybe (BS.ByteString, Maybe URI, Maybe String)) -- ^ Document contents. retrieve cmd uri = do putErrLn $ "Retrieving " ++ show uri @@ -58,13 +85,13 @@ retrieve cmd uri = do ++ envAuthority handle (\(e :: SomeException) -> putErrLn (concat ["Failed to run `", cmd, "`: ", show e]) - >> pure BS.empty) $ + >> pure Nothing) $ withCreateProcess ((shell cmd) { env = Just environment , std_out = CreatePipe , std_err = CreatePipe , delegate_ctlc = True }) $ \_ stdout stderr ph -> case stdout of - Nothing -> putErrLn "No stdout" >> pure BS.empty + Nothing -> putErrLn "No stdout" >> pure Nothing Just stdout' -> do hSetBinaryMode stdout' True out <- BS.hGetContents stdout' @@ -78,20 +105,21 @@ retrieve cmd uri = do err <- BS.hGetContents stderr' putErrLn $ "stderr:\n" ++ BS.unpack err else putErrLn $ show uri - pure out + case parse pWithMeta (uriToString id uri "") out of + Left _ -> pure $ Just (out, Nothing, Nothing) + Right (bs, (u, t)) -> pure $ Just (bs, u, t) --- | Reads a document: retrieves it and parses into a Pandoc --- structure. The parser is chosen depending on the URI. -readDoc :: String - -- ^ Shell command to use for retrieval. +-- | Parses a document into a Pandoc structure. The parser is chosen +-- depending on the document type (if one is provided) or its URI. +readDoc :: BS.ByteString + -- ^ Raw document data. -> Maybe String -- ^ Document type. -> URI -- ^ Document URI. -> IO (Either P.PandocError P.Pandoc) -- ^ A parsed document. -readDoc cmd dt uri = do - out <- retrieve cmd uri +readDoc out dt uri = do term <- setupTermFromEnv let reader = either (const plain) id $ maybe (Left "no type suggestions") (byExtension . ('.':)) dt @@ -132,4 +160,5 @@ readDoc cmd dt uri = do byExtension ".ltx" = P.getReader "latex" byExtension ".tex" = P.getReader "latex" byExtension ".txt" = pure plain + byExtension ".plain" = pure plain byExtension ext = P.getReader $ tail ext diff --git a/README.org b/README.org index 393cc92..aa78a3f 100644 --- a/README.org +++ b/README.org @@ -46,7 +46,15 @@ externalViewers: commands: gopher: torify curl "${URI}" ssh: scp "${URI_REGNAME}:${URI_PATH}" /dev/stdout -defaultCommand: curl -4 -L "${URI}" +defaultCommand: ! 'curl -4 -L "${URI}" -w " + + -pancake- + + uri: %{url_effective} + + type: %{content_type} + + "' shortcuts: ddg: https://duckduckgo.com/lite/?q= wt: https://en.m.wiktionary.org/w/index.php?search= @@ -57,18 +65,6 @@ historyDepth: 100 paginate: true #+END_SRC -* Excluded features - -The following features were not implemented, since the complication -may be not worth the benefits, but maybe they will be in the future: - -- Usage of content types reported by network protocols: pancake relies - on URIs for format detection, what usually works, but can be - improved. -- Semantic markup for embedding: it might be useful to provide - denotations to ~pancake-mode~ (and for embedding in general) in - order to buttonize links, render TeX math, embed images, etc. - * Screenshots [[https://defanor.uberspace.net/projects/pancake/gopher.png]] -- cgit v1.2.3