summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordefanor <defanor@uberspace.net>2018-01-23 15:28:05 +0300
committerdefanor <defanor@uberspace.net>2018-01-23 15:28:05 +0300
commit11e83528df0f4e48d9324b2e4dd82dc58792fb16 (patch)
tree28b256af09b52717552f41c98ccfb32626ca0c3b
parent9e58afe6f706aa4db06e82b35abf329368a0de98 (diff)
downloadpancake-11e83528df0f4e48d9324b2e4dd82dc58792fb16.zip
pancake-11e83528df0f4e48d9324b2e4dd82dc58792fb16.tar.gz
pancake-11e83528df0f4e48d9324b2e4dd82dc58792fb16.tar.bz2
Add initial RDF support
With two types of caching: file-based for retrieved documents, and memory-based for predicate labels extracted from those.
-rw-r--r--Pancake.hs40
-rw-r--r--Pancake/Common.hs11
-rw-r--r--Pancake/Reading.hs73
-rw-r--r--README8
-rw-r--r--Text/Pandoc/Readers/RDF.hs188
-rw-r--r--pancake.12
-rw-r--r--pancake.cabal16
7 files changed, 296 insertions, 42 deletions
diff --git a/Pancake.hs b/Pancake.hs
index c59a6e1..38b6a7d 100644
--- a/Pancake.hs
+++ b/Pancake.hs
@@ -52,6 +52,7 @@ import Data.Version
import System.Console.GetOpt
import Text.Regex.TDFA
import qualified System.Console.Haskeline as HL
+import Control.Concurrent.STM.TVar
import Pancake.Common
import Pancake.Configuration
@@ -81,6 +82,7 @@ data LoopState = LS { history :: Sliding HistoryEntry
, interrupted :: Bool
, unclutterRegexps :: [(Regex, String)]
, columns :: Maybe Int
+ , rdfCache :: TVar (M.Map String String)
}
-- | Main event loop's type.
@@ -111,25 +113,20 @@ updateConfig mp = do
u <- prepareUnclutter c
modify $ \s -> s { conf = c, unclutterRegexps = u }
--- | A wrapper around 'retrieve' that adjusts the URI.
+-- | A wrapper around 'retrieve''.
loadRaw :: URI -> Pancake (URI, Maybe (BS.ByteString, Maybe URI, Maybe String))
loadRaw rawURI = do
st <- get
- let ddg = isInfixOf "/l/?kh=-1&uddg=" $ uriToString id rawURI ""
- adjustedURI = case (ddg, uriIsAbsolute rawURI, history st) of
+ let adjustedURI
-- fix DDG links (that's rather hacky, todo: improve)
- (True, _, _) -> fromMaybe rawURI $
+ | isInfixOf "/l/?kh=-1&uddg=" $ uriToString id rawURI "" =
+ fromMaybe rawURI $
parseAbsoluteURI (unEscapeString $ drop 12 (uriQuery rawURI))
- -- handle relative URIs
- (_, False, (h:_, _)) -> relativeTo rawURI (hURI h)
- _ -> rawURI
- uScheme = case uriScheme adjustedURI of
- [] -> "unknown"
- s -> init s
- cmd = fromMaybe (defaultCommand $ conf st) $
- M.lookup uScheme (commands $ conf st)
- doc <- liftIO $ retrieve cmd adjustedURI
- pure (adjustedURI, doc)
+ | otherwise = rawURI
+ currentURI = case history st of
+ (h:_, _) -> pure (hURI h)
+ _ -> mzero
+ retrieve' (conf st) False currentURI adjustedURI
-- | Decides what to do with a given URI; either returns a document or
-- runs an external viewer. Used by both 'GoTo' and 'Reload'.
@@ -153,7 +150,7 @@ loadDocument sType rawURI = do
case M.lookup ext (externalViewers $ conf st) of
Nothing -> do
uDoc <- tryUnclutter (unclutterRegexps st) effectiveURI rawDoc
- doc <- readDoc uDoc fType effectiveURI
+ doc <- readDoc (conf st) (rdfCache st) uDoc fType effectiveURI
case doc of
Left err -> do
putErrLn $ show err
@@ -246,7 +243,7 @@ command (Save (RURI uri') p) = do
remoteURIStr = uriToString id remoteURI ""
remoteFileName' = takeFileName $ uriPath remoteURI
remoteFileName = if remoteFileName' `elem` [".", "..", ""]
- then map escapeURI remoteURIStr
+ then escapeURI remoteURI
else remoteFileName'
targetFileName = fromMaybe remoteFileName mTargetName
targetPath = targetDir </> targetFileName
@@ -260,10 +257,6 @@ command (Save (RURI uri') p) = do
, encodeSexpStr $ uriToString id uri' ""
, encodeSexpStr targetPath]
putErrLn $ unwords ["Saved", remoteURIStr, "as", targetPath]
- where
- escapeURI c
- | isPathSeparator c = '-'
- | otherwise = c
command (Save (RNumber i) p) = do
st <- get
if length (rLinks $ rendered st) > i
@@ -347,10 +340,7 @@ command ShowCurrent = do
command (Shortcut u q) = command . GoTo Nothing . RURI . fromJust . parseURI $
u ++ escapeURIString isUnreserved q
command (LoadConfig p) = updateConfig p
-command Quit = liftIO $ do
- dir <- getXdgDirectory XdgCache "pancake"
- exists <- doesDirectoryExist dir
- when exists $ removeDirectoryRecursive dir
+command Quit = pure ()
command Interrupt =
putErrLn "Received SIGINT. Interrupt twice in a row to quit."
command (SetWidth w) = modify $ \s -> s { columns = w }
@@ -441,6 +431,7 @@ main = do
>>= \st -> command (parseCommand (conf st) (unwords cmd))
dir <- getXdgDirectory XdgConfig "pancake"
let hfn = dir </> "command_history"
+ rdfc <- newTVarIO M.empty
_ <- runStateT
(HL.runInputT
(HL.setComplete complete HL.defaultSettings)
@@ -457,6 +448,7 @@ main = do
, interrupted = False
, unclutterRegexps = []
, columns = Nothing
+ , rdfCache = rdfc
}
pure ()
run
diff --git a/Pancake/Common.hs b/Pancake/Common.hs
index ee21ab3..d107a02 100644
--- a/Pancake/Common.hs
+++ b/Pancake/Common.hs
@@ -24,9 +24,11 @@ Portability : portable
Utility functions.
-}
-module Pancake.Common ( putErrLn ) where
+module Pancake.Common ( putErrLn, escapeURI ) where
import System.IO
import Control.Monad.IO.Class
+import Network.URI
+import System.FilePath
-- | Prints a line into stderr.
@@ -34,3 +36,10 @@ putErrLn :: MonadIO m => String -> m ()
putErrLn s = liftIO $ do
hPutStrLn stderr s
hFlush stderr
+
+-- | Escapes an URI for use as a file name.
+escapeURI :: URI -> FilePath
+escapeURI u = map escapeChar $ uriToString id u ""
+ where escapeChar c
+ | isPathSeparator c = '-'
+ | otherwise = c
diff --git a/Pancake/Reading.hs b/Pancake/Reading.hs
index 52b7c28..7d86219 100644
--- a/Pancake/Reading.hs
+++ b/Pancake/Reading.hs
@@ -27,6 +27,7 @@ Document retrieval and parsing.
{-# LANGUAGE ScopedTypeVariables #-}
module Pancake.Reading ( retrieve
+ , retrieve'
, readDoc
) where
@@ -49,9 +50,15 @@ import Text.Parsec hiding ((<|>))
import Text.Parsec.ByteString
import Data.Maybe
import Data.Version
+import qualified Data.Map as M
+import Control.Monad.IO.Class
+import System.Directory
+import Control.Concurrent.STM.TVar
import Text.Pandoc.Readers.Plain
import Text.Pandoc.Readers.Gopher
+import Text.Pandoc.Readers.RDF
+import Pancake.Configuration
import Pancake.Common
import Paths_pancake
@@ -80,6 +87,46 @@ pWithMeta :: Parser (BS.ByteString, (Maybe URI, Maybe String))
pWithMeta = (,) . BS.pack <$> manyTill anyToken (try $ lookAhead pMeta)
<*> pMeta
+-- | A wrapper around 'retrieve' that adjusts the URI.
+retrieve' :: MonadIO m
+ => Config
+ -- ^ Configuration.
+ -> Bool
+ -- ^ Cache.
+ -> Maybe URI
+ -- ^ Current URI.
+ -> URI
+ -- ^ Target URI.
+ -> m (URI, Maybe (BS.ByteString, Maybe URI, Maybe String))
+retrieve' c cache cu tu' = do
+ let tu = tu' { uriFragment = "" }
+ adjustedURI = case (cu, uriIsAbsolute tu) of
+ (Just cu', False) -> relativeTo tu cu'
+ _ -> tu
+ uScheme = case uriScheme adjustedURI of
+ [] -> "unknown"
+ s -> init s
+ cmd = fromMaybe (defaultCommand c) $ M.lookup uScheme (commands c)
+ doc <- liftIO $ if cache
+ then do
+ cacheDir <- getXdgDirectory XdgCache "pancake"
+ createDirectoryIfMissing True cacheDir
+ let fp = cacheDir </> escapeURI tu
+ exists <- doesFileExist fp
+ if exists
+ then do
+ fc <- BS.readFile fp
+ pure $ Just (fc, Nothing, Nothing)
+ else do
+ d <- retrieve cmd adjustedURI
+ case d of
+ (Just (fc, _, _)) -> BS.writeFile fp fc
+ _ -> pure ()
+ pure d
+ else retrieve cmd adjustedURI
+ pure (adjustedURI, doc)
+
+
-- | Retrieves a document. Prints an error message and returns an
-- empty string on failure.
retrieve :: String
@@ -152,16 +199,21 @@ pEmacsMode = do
-- | Parses a document into a Pandoc structure. The parser is chosen
-- depending on the document type (if one is provided) or its URI.
-readDoc :: BS.ByteString
+readDoc :: MonadIO m
+ => Config
+ -- ^ Configuration.
+ -> TVar (M.Map String String)
+ -- ^ RDF cache
+ -> BS.ByteString
-- ^ Raw document data.
-> Maybe String
-- ^ Document type.
-> URI
-- ^ Document URI.
- -> IO (Either P.PandocError P.Pandoc)
+ -> m (Either P.PandocError P.Pandoc)
-- ^ A parsed document.
-readDoc out dt uri = do
- term <- setupTermFromEnv
+readDoc c rdfc out dt uri = do
+ term <- liftIO setupTermFromEnv
let (reader, exts) = either (const plain) id $
maybe (Left "no type suggestions") byExtension dt
<|> case (uriScheme uri, map toLower $ takeExtension $ uriPath uri) of
@@ -185,7 +237,7 @@ readDoc out dt uri = do
(parse pEmacsMode (uriToString id uri "") out)
cols = fromMaybe 80 $ getCapability term termColumns
opts = def { P.readerColumns = cols, P.readerExtensions = exts }
- case reader of
+ liftIO $ case reader of
P.TextReader f -> case decodeUtf8' out of
Left err -> do
putErrLn $ show err
@@ -197,6 +249,8 @@ readDoc out dt uri = do
html = P.getReader "html"
plain = (P.TextReader . const $ readPlain, P.emptyExtensions)
gopher = pure (P.TextReader . const $ readGopher, P.emptyExtensions)
+ rdf = ( P.TextReader . const $ readRDF rdfc uri retrieveRelative
+ , P.emptyExtensions)
byExtension' ext = byExtension $ dropWhile (== '.') ext
byExtension "md" = P.getReader "markdown"
byExtension "htm" = html
@@ -204,4 +258,13 @@ readDoc out dt uri = do
byExtension "tex" = P.getReader "latex"
byExtension "txt" = pure plain
byExtension "plain" = pure plain
+ byExtension "rdf" = pure rdf
+ byExtension "turtle" = pure rdf
byExtension ext = P.getReader ext
+ retrieveRelative u = do
+ x <- retrieve' c True (Just uri) u
+ case x of
+ (_, Just (bs, _, _)) -> case decodeUtf8' bs of
+ Right t -> pure t
+ _ -> fail "Failed to decode as UTF-8"
+ _ -> fail "Failed to retrieve a document"
diff --git a/README b/README
index 662e9e9..a7ecec9 100644
--- a/README
+++ b/README
@@ -5,9 +5,9 @@ Pancake
This is a CLI/Emacs web/gopher/file browser.
It utilizes pandoc and external downloaders such as curl, adding
-support for Gopher directories and plain text files, and invoking
-external applications (e.g., image and PDF viewers) depending on its
-configuration.
+support for Gopher directories, plain text files, and RDF, and
+invoking external applications (e.g., image and PDF viewers) depending
+on its configuration.
User interaction capabilities are rather basic, as it is intended to
be combined with software that provides better user interfaces – such
@@ -56,7 +56,7 @@ specific websites, e.g. webcomics, and perhaps specific images only.
Commands
--------
-:quit or EOF: quit pancake, cleaning the cache
+:quit or EOF: quit pancake
:[: back
:]: forward
:load config[ <path>]: load configuration from a specified file or
diff --git a/Text/Pandoc/Readers/RDF.hs b/Text/Pandoc/Readers/RDF.hs
new file mode 100644
index 0000000..9a7b6de
--- /dev/null
+++ b/Text/Pandoc/Readers/RDF.hs
@@ -0,0 +1,188 @@
+{-
+Copyright (C) 2017-2018 defanor <defanor@uberspace.net>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+-}
+
+{- |
+Module : Text.Pandoc.Readers.RDF
+Maintainer : defanor <defanor@uberspace.net>
+Stability : unstable
+
+This module is for RDF documents reading. It's not strictly a parser,
+since it requests additional external documents to read predicate
+labels from, and generally controls how those would be rendered.
+-}
+
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+module Text.Pandoc.Readers.RDF ( readRDF ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Class (PandocMonad)
+import Data.RDF
+import qualified Data.Text as T
+import Network.URI (URI, uriFragment, parseURIReference, relativeFrom,
+ uriToString)
+import Data.List (intersperse)
+import Control.Monad.IO.Class (liftIO, MonadIO)
+import Text.XML.HXT.Core ( readString, withParseHTML, withWarnings,
+ runX, (>>>), yes, no, XNode(..))
+import Data.Tree.NTree.TypeDefs (NTree(..))
+import Text.XML.HXT.XPath.Arrows (getXPathTreesInDoc)
+import Control.Monad.Error.Class (throwError, catchError)
+import Text.Pandoc.Error (PandocError(..))
+import Control.Concurrent.STM.TVar (TVar, readTVarIO, modifyTVar)
+import Control.Monad.STM (atomically)
+import qualified Data.Map as M
+
+
+-- | Reads a literal value.
+readLiteral :: LValue -> [Inline]
+readLiteral (PlainL t) = pure . Str $ T.unpack t
+readLiteral (PlainLL t _) = pure . Str $ T.unpack t
+readLiteral (TypedL t _) = pure . Str $ T.unpack t
+
+-- | Reads an arbitrary 'Node', used mostly for objects and subjects.
+readNode :: URI -> Node -> [Inline]
+readNode _ (LNode l) = readLiteral l
+readNode bu (UNode u) = case parseURIReference (T.unpack u) of
+ Just u' -> let u'' = relativeFrom u' bu
+ us = uriToString id u'' ""
+ in [Link (us, [], []) [] (us, us)]
+ Nothing -> let s = (T.unpack u) in [Link (s, [], []) [] (s, s)]
+readNode _ (BNode t) = let s = T.unpack t
+ in [Link (s, [], []) [] (s, s)]
+readNode _ (BNodeGen i) = let s = show i
+ in [Link (s, [], []) [] (s, s)]
+
+-- | Reads a predicate, looks up its label in external documents using
+-- the provided retrieval function.
+readPredicate :: (MonadIO m, PandocMonad m)
+ => TVar (M.Map String String)
+ -- ^ RDF cache
+ -> URI
+ -- ^ Base URI.
+ -> (URI -> m T.Text)
+ -- ^ Retrieval function.
+ -> Node
+ -- ^ A node to read.
+ -> m [Inline]
+readPredicate rdfc _ rf (UNode u) = do
+ rdfc' <- liftIO $ readTVarIO rdfc
+ l <- case M.lookup uriStr rdfc' of
+ Just cl -> pure cl
+ Nothing -> do
+ (u', doc) <- case parseURIReference uriStr of
+ Just r -> do
+ ret <- rf r
+ pure (r, ret)
+ Nothing -> throwError $ PandocParseError $
+ "Failed to parse an URI reference"
+ -- use URIs when failing to read a label
+ catchError (labelFromRDF u' doc) $ const $ pure uriStr
+ pure [Link (uriStr, [], []) [] (uriStr, l)]
+ where
+ uriStr = T.unpack u
+ labelFromRDF u' doc = do
+ rdf <- parseRDF u' rf doc
+ let label = (UNode "http://www.w3.org/2000/01/rdf-schema#label")
+ -- 'query' doesn't expand triples, so filtering manually
+ l <- case filter
+ (\x -> subjectOf x == (UNode u) && predicateOf x == label)
+ (expandTriples rdf) of
+ -- todo: there could be multiple labels in different
+ -- languages, handle that.
+ [sl] -> case objectOf sl of
+ (LNode (PlainL sl')) -> pure $ T.unpack sl'
+ _ -> pure $ show sl
+ _ -> pure uriStr
+ liftIO $ atomically $ modifyTVar rdfc $ M.insert uriStr l
+ pure l
+readPredicate _ bu _ n = pure $ readNode bu n
+
+-- | Reads a triple.
+readTriple :: (MonadIO m, PandocMonad m)
+ => TVar (M.Map String String)
+ -- ^ RDF cache
+ -> URI
+ -- ^ Base (source) URI.
+ -> (URI -> m T.Text)
+ -- ^ Retrieval function.
+ -> Triple
+ -- ^ A triple to read.
+ -> m [Inline]
+readTriple rdfc bu rf t = do
+ p <- readPredicate rdfc bu rf $ predicateOf t
+ pure $ intersperse Space $
+ concat [readNode bu $ subjectOf t, p, readNode bu $ objectOf t]
+
+-- | Parses an RDF (XML/RDF or Turtle). The provided document may also
+-- be an HTML document with an alternate version that is RDF;
+-- retrieves it with the provided retrieval function in such a case.
+parseRDF :: (MonadIO m, PandocMonad m)
+ => URI
+ -- ^ Base (source) URI.
+ -> (URI -> m T.Text)
+ -- ^ Retrieval function.
+ -> T.Text
+ -- ^ Document to parse.
+ -> m (RDF AdjHashMap)
+parseRDF bu rf t = do
+ let baseURI = uriToString id bu { uriFragment = "" } ""
+ -- check link rel
+ let doc = readString [withParseHTML yes, withWarnings no] (T.unpack t)
+ rc <- liftIO $ runX $ doc
+ >>> getXPathTreesInDoc
+ "//link[@rel=\"alternate\" and @type=\"application/rdf+xml\"]/@href/text()"
+ case rc of
+ [NTree (XText uri) []] -> case parseURIReference uri of
+ Nothing -> throwError $ PandocParseError $
+ "Failed to parse an alternate URI"
+ Just u' ->
+ if u' /= bu
+ then do
+ t' <- rf u'
+ parseRDF u' rf t'
+ else throwError $ PandocSomeError $
+ "A loop is detected in alternate document versions."
+ _ -> do
+ let burl = T.pack baseURI
+ tryParse :: (Rdf a, RdfParser p) => p -> Either ParseFailure (RDF a)
+ tryParse p = parseString p t
+ parsed :: Either ParseFailure (RDF AdjHashMap)
+ parsed = -- todo: alternatives should be used here
+ either
+ (const $ either (const $ tryParse NTriplesParser) pure
+ (tryParse $ TurtleParser (Just (BaseUrl burl)) (Just burl)))
+ pure
+ (tryParse $ XmlParser (Just (BaseUrl burl)) (Just burl))
+ case parsed of
+ Left err -> throwError $ PandocParseError $ show err
+ Right x -> pure x
+
+readRDF :: (MonadIO m, PandocMonad m)
+ => TVar (M.Map String String)
+ -- ^ RDF cache
+ -> URI
+ -- ^ Base (source) URI.
+ -> (URI -> m T.Text)
+ -- ^ Retrieval function.
+ -> T.Text
+ -- ^ Document to parse.
+ -> m Pandoc
+readRDF rdfc bu rf t = do
+ rdf <- parseRDF bu rf t
+ Pandoc mempty . pure . LineBlock
+ <$> mapM (readTriple rdfc bu rf) (expandTriples rdf)
diff --git a/pancake.1 b/pancake.1
index d82bca7..09cdcc6 100644
--- a/pancake.1
+++ b/pancake.1
@@ -32,7 +32,7 @@ Load configuration from a specified file.
.SH COMMANDS
.IP "\fBquit\fR or EOF"
-quit pancake, cleaning the cache
+quit pancake
.IP "\fB[\fR"
back
.IP "\fB]\fR"
diff --git a/pancake.cabal b/pancake.cabal
index dbd0f78..0cc2f41 100644
--- a/pancake.cabal
+++ b/pancake.cabal
@@ -1,11 +1,10 @@
name: pancake
version: 0.1.9
-synopsis: A CLI web/gopher/file browser
-description: Pancake is a CLI web/gopher/file browser inspired
- by Line Mode Browser. It relies on pandoc for
- parsing, on curl and other external applications
- for data loading, on emacs and other applications
- to provide user interfaces.
+synopsis: A CLI/Emacs web/gopher/file browser
+description: Pancake is a CLI/Emacs web browser. It relies on
+ pandoc for parsing, on curl and other external
+ applications for data loading, on emacs and other
+ applications to provide user interfaces.
tested-with: GHC == 8.0.1, GHC == 8.0.2, GHC == 8.2.1, GHC == 8.2.2
license: GPL-3
license-file: LICENSE
@@ -50,7 +49,9 @@ executable pancake
, data-default >= 0.7.1.1 && < 1
, directory >= 1.2.6.2 && < 2
, filepath >= 1.4.1.0 && < 2
+ , haskeline >= 0.7 && < 1
, hxt >= 9.3.1 && < 10
+ , hxt-xpath >= 9.1.2.2
, hxt-xslt >= 9.1 && < 10
, mtl >= 2.2.1 && < 3
, network-uri >= 2.6.1.0 && < 3
@@ -58,13 +59,14 @@ executable pancake
, pandoc-types >= 1.17.0.5 && < 2
, parsec >= 3.1.11 && < 4
, process >= 1.6 && < 2
+ , rdf4h >= 3 && < 4
, regex-base >= 0.93.2 && < 1
, regex-tdfa >= 1.2.2 && < 2
+ , stm >= 2.4 && < 3
, terminfo >= 0.4.0.2 && < 1
, text >= 1.2.2.2 && < 2
, unix >= 2.7.2.0 && < 3
, yaml >= 0.8.23.3 && < 1
- , haskeline >= 0.7 && < 1
-- hs-source-dirs:
default-language: Haskell2010
ghc-options: -Wall