summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordefanor <defanor@uberspace.net>2018-02-21 18:55:51 +0300
committerdefanor <defanor@uberspace.net>2018-02-21 18:55:51 +0300
commit9939aa23bb11b22be9e570c972be6a898b9df8eb (patch)
tree07bc44208b4337b91cc64ec96a3e422b888cd604
parent99cd9c84abf3c52970981499875b402d5fb68085 (diff)
downloadpancake-9939aa23bb11b22be9e570c972be6a898b9df8eb.zip
pancake-9939aa23bb11b22be9e570c972be6a898b9df8eb.tar.gz
pancake-9939aa23bb11b22be9e570c972be6a898b9df8eb.tar.bz2
Use Redland bindings instead of rdf4h
It's faster, includes a proper triple store with querying, and generally should be more useful.
-rw-r--r--Pancake.hs6
-rw-r--r--Pancake/Reading.hs7
-rw-r--r--Text/Pandoc/Readers/RDF.hs211
-rw-r--r--pancake.cabal3
4 files changed, 82 insertions, 145 deletions
diff --git a/Pancake.hs b/Pancake.hs
index 38b6a7d..5420530 100644
--- a/Pancake.hs
+++ b/Pancake.hs
@@ -52,7 +52,6 @@ import Data.Version
import System.Console.GetOpt
import Text.Regex.TDFA
import qualified System.Console.Haskeline as HL
-import Control.Concurrent.STM.TVar
import Pancake.Common
import Pancake.Configuration
@@ -82,7 +81,6 @@ data LoopState = LS { history :: Sliding HistoryEntry
, interrupted :: Bool
, unclutterRegexps :: [(Regex, String)]
, columns :: Maybe Int
- , rdfCache :: TVar (M.Map String String)
}
-- | Main event loop's type.
@@ -150,7 +148,7 @@ loadDocument sType rawURI = do
case M.lookup ext (externalViewers $ conf st) of
Nothing -> do
uDoc <- tryUnclutter (unclutterRegexps st) effectiveURI rawDoc
- doc <- readDoc (conf st) (rdfCache st) uDoc fType effectiveURI
+ doc <- readDoc (conf st) uDoc fType effectiveURI
case doc of
Left err -> do
putErrLn $ show err
@@ -431,7 +429,6 @@ main = do
>>= \st -> command (parseCommand (conf st) (unwords cmd))
dir <- getXdgDirectory XdgConfig "pancake"
let hfn = dir </> "command_history"
- rdfc <- newTVarIO M.empty
_ <- runStateT
(HL.runInputT
(HL.setComplete complete HL.defaultSettings)
@@ -448,7 +445,6 @@ main = do
, interrupted = False
, unclutterRegexps = []
, columns = Nothing
- , rdfCache = rdfc
}
pure ()
run
diff --git a/Pancake/Reading.hs b/Pancake/Reading.hs
index 3a83e46..0124ec9 100644
--- a/Pancake/Reading.hs
+++ b/Pancake/Reading.hs
@@ -53,7 +53,6 @@ import Data.Version
import qualified Data.Map as M
import Control.Monad.IO.Class
import System.Directory
-import Control.Concurrent.STM.TVar
import System.Timeout
import Text.Pandoc.Readers.Plain
@@ -203,8 +202,6 @@ pEmacsMode = do
readDoc :: MonadIO m
=> Config
-- ^ Configuration.
- -> TVar (M.Map String String)
- -- ^ RDF cache
-> BS.ByteString
-- ^ Raw document data.
-> Maybe String
@@ -213,7 +210,7 @@ readDoc :: MonadIO m
-- ^ Document URI.
-> m (Either P.PandocError P.Pandoc)
-- ^ A parsed document.
-readDoc c rdfc out dt uri = do
+readDoc c out dt uri = do
term <- liftIO setupTermFromEnv
let (reader, exts) = either (const plain) id $
maybe (Left "no type suggestions") byExtension dt
@@ -251,7 +248,7 @@ readDoc c rdfc out dt uri = do
html = P.getReader "html"
plain = (P.TextReader . const $ readPlain, P.emptyExtensions)
gopher = pure (P.TextReader . const $ readGopher, P.emptyExtensions)
- rdf = ( P.TextReader . const $ readRDF rdfc uri retrieveRelative
+ rdf = ( P.TextReader . const $ readRDF uri retrieveRelative
, P.emptyExtensions)
byExtension' ext = byExtension $ dropWhile (== '.') ext
byExtension "md" = P.getReader "markdown"
diff --git a/Text/Pandoc/Readers/RDF.hs b/Text/Pandoc/Readers/RDF.hs
index 9a7b6de..081b71e 100644
--- a/Text/Pandoc/Readers/RDF.hs
+++ b/Text/Pandoc/Readers/RDF.hs
@@ -22,7 +22,8 @@ Stability : unstable
This module is for RDF documents reading. It's not strictly a parser,
since it requests additional external documents to read predicate
-labels from, and generally controls how those would be rendered.
+labels from, and looks up for alternate versions, and decides how to
+render those.
-}
{-# LANGUAGE OverloadedStrings #-}
@@ -31,158 +32,102 @@ module Text.Pandoc.Readers.RDF ( readRDF ) where
import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad)
-import Data.RDF
+-- import Data.RDF
import qualified Data.Text as T
-import Network.URI (URI, uriFragment, parseURIReference, relativeFrom,
- uriToString)
+import Network.URI (URI, parseURIReference, relativeFrom, uriToString)
import Data.List (intersperse)
import Control.Monad.IO.Class (liftIO, MonadIO)
import Text.XML.HXT.Core ( readString, withParseHTML, withWarnings,
runX, (>>>), yes, no, XNode(..))
import Data.Tree.NTree.TypeDefs (NTree(..))
import Text.XML.HXT.XPath.Arrows (getXPathTreesInDoc)
-import Control.Monad.Error.Class (throwError, catchError)
+import Control.Monad.Error.Class (throwError)
import Text.Pandoc.Error (PandocError(..))
-import Control.Concurrent.STM.TVar (TVar, readTVarIO, modifyTVar)
-import Control.Monad.STM (atomically)
-import qualified Data.Map as M
+import Control.Exception
+import System.Directory
+import Foreign
+import Redland.MidLevel
+import Redland.LowLevel
+import Redland.Util
--- | Reads a literal value.
-readLiteral :: LValue -> [Inline]
-readLiteral (PlainL t) = pure . Str $ T.unpack t
-readLiteral (PlainLL t _) = pure . Str $ T.unpack t
-readLiteral (TypedL t _) = pure . Str $ T.unpack t
--- | Reads an arbitrary 'Node', used mostly for objects and subjects.
-readNode :: URI -> Node -> [Inline]
-readNode _ (LNode l) = readLiteral l
-readNode bu (UNode u) = case parseURIReference (T.unpack u) of
- Just u' -> let u'' = relativeFrom u' bu
- us = uriToString id u'' ""
- in [Link (us, [], []) [] (us, us)]
- Nothing -> let s = (T.unpack u) in [Link (s, [], []) [] (s, s)]
-readNode _ (BNode t) = let s = T.unpack t
- in [Link (s, [], []) [] (s, s)]
-readNode _ (BNodeGen i) = let s = show i
- in [Link (s, [], []) [] (s, s)]
-
--- | Reads a predicate, looks up its label in external documents using
--- the provided retrieval function.
-readPredicate :: (MonadIO m, PandocMonad m)
- => TVar (M.Map String String)
- -- ^ RDF cache
- -> URI
- -- ^ Base URI.
- -> (URI -> m T.Text)
- -- ^ Retrieval function.
- -> Node
- -- ^ A node to read.
- -> m [Inline]
-readPredicate rdfc _ rf (UNode u) = do
- rdfc' <- liftIO $ readTVarIO rdfc
- l <- case M.lookup uriStr rdfc' of
- Just cl -> pure cl
- Nothing -> do
- (u', doc) <- case parseURIReference uriStr of
- Just r -> do
- ret <- rf r
- pure (r, ret)
- Nothing -> throwError $ PandocParseError $
- "Failed to parse an URI reference"
- -- use URIs when failing to read a label
- catchError (labelFromRDF u' doc) $ const $ pure uriStr
- pure [Link (uriStr, [], []) [] (uriStr, l)]
- where
- uriStr = T.unpack u
- labelFromRDF u' doc = do
- rdf <- parseRDF u' rf doc
- let label = (UNode "http://www.w3.org/2000/01/rdf-schema#label")
- -- 'query' doesn't expand triples, so filtering manually
- l <- case filter
- (\x -> subjectOf x == (UNode u) && predicateOf x == label)
- (expandTriples rdf) of
- -- todo: there could be multiple labels in different
- -- languages, handle that.
- [sl] -> case objectOf sl of
- (LNode (PlainL sl')) -> pure $ T.unpack sl'
- _ -> pure $ show sl
- _ -> pure uriStr
- liftIO $ atomically $ modifyTVar rdfc $ M.insert uriStr l
- pure l
-readPredicate _ bu _ n = pure $ readNode bu n
-
--- | Reads a triple.
-readTriple :: (MonadIO m, PandocMonad m)
- => TVar (M.Map String String)
- -- ^ RDF cache
- -> URI
- -- ^ Base (source) URI.
- -> (URI -> m T.Text)
- -- ^ Retrieval function.
- -> Triple
- -- ^ A triple to read.
- -> m [Inline]
-readTriple rdfc bu rf t = do
- p <- readPredicate rdfc bu rf $ predicateOf t
- pure $ intersperse Space $
- concat [readNode bu $ subjectOf t, p, readNode bu $ objectOf t]
-
--- | Parses an RDF (XML/RDF or Turtle). The provided document may also
--- be an HTML document with an alternate version that is RDF;
--- retrieves it with the provided retrieval function in such a case.
-parseRDF :: (MonadIO m, PandocMonad m)
- => URI
- -- ^ Base (source) URI.
- -> (URI -> m T.Text)
- -- ^ Retrieval function.
- -> T.Text
- -- ^ Document to parse.
- -> m (RDF AdjHashMap)
-parseRDF bu rf t = do
- let baseURI = uriToString id bu { uriFragment = "" } ""
- -- check link rel
+detectAlternateVersion :: T.Text -> IO (Maybe URI)
+detectAlternateVersion t = do
let doc = readString [withParseHTML yes, withWarnings no] (T.unpack t)
rc <- liftIO $ runX $ doc
>>> getXPathTreesInDoc
"//link[@rel=\"alternate\" and @type=\"application/rdf+xml\"]/@href/text()"
- case rc of
- [NTree (XText uri) []] -> case parseURIReference uri of
- Nothing -> throwError $ PandocParseError $
- "Failed to parse an alternate URI"
- Just u' ->
- if u' /= bu
- then do
- t' <- rf u'
- parseRDF u' rf t'
- else throwError $ PandocSomeError $
- "A loop is detected in alternate document versions."
- _ -> do
- let burl = T.pack baseURI
- tryParse :: (Rdf a, RdfParser p) => p -> Either ParseFailure (RDF a)
- tryParse p = parseString p t
- parsed :: Either ParseFailure (RDF AdjHashMap)
- parsed = -- todo: alternatives should be used here
- either
- (const $ either (const $ tryParse NTriplesParser) pure
- (tryParse $ TurtleParser (Just (BaseUrl burl)) (Just burl)))
- pure
- (tryParse $ XmlParser (Just (BaseUrl burl)) (Just burl))
- case parsed of
- Left err -> throwError $ PandocParseError $ show err
- Right x -> pure x
+ pure $ case rc of
+ [NTree (XText uri) []] -> parseURIReference uri
+ _ -> Nothing
+
readRDF :: (MonadIO m, PandocMonad m)
- => TVar (M.Map String String)
- -- ^ RDF cache
- -> URI
+ => URI
-- ^ Base (source) URI.
-> (URI -> m T.Text)
-- ^ Retrieval function.
-> T.Text
-- ^ Document to parse.
-> m Pandoc
-readRDF rdfc bu rf t = do
- rdf <- parseRDF bu rf t
- Pandoc mempty . pure . LineBlock
- <$> mapM (readTriple rdfc bu rf) (expandTriples rdf)
+readRDF bu rf t = do
+ alt <- liftIO $ detectAlternateVersion t
+ case alt of
+ Nothing -> do
+ r <- liftIO $ handle handleRE parseDoc
+ case r of
+ Left e -> throwError $ PandocParseError e
+ Right r' -> pure r'
+ Just alt' -> do
+ newDoc <- rf alt'
+ readRDF alt' rf newDoc
+ where
+ handleRE :: RedlandException -> IO (Either String Pandoc)
+ handleRE e = pure $ Left $ show e
+ parseDoc :: IO (Either String Pandoc)
+ parseDoc = Right . Pandoc mempty . pure . LineBlock <$> do
+ withWSMU "memory" [] "temporary" "" (uriToString id bu "") $
+ \world _ model uri -> do
+ tryParseStringIntoModel world ["turtle", "rdfxml", "n-triples"]
+ model uri (T.unpack t)
+ withQuery world model "sparql" "SELECT ?s ?p ?o WHERE { ?s ?p ?o }"
+ (Just uri) $ \results -> do
+ cacheDir <- getXdgDirectory XdgCache "pancake"
+ createDirectoryIfMissing True cacheDir
+ withWSMU "hashes" [("hash-type", "bdb"), ("dir", cacheDir)]
+ "rdf-cache" "" (uriToString id bu "") $ \world' _ model' uri' ->
+ mapM (printTriple (world', model', uri')) results
+ -- todo: should probably use statements instead, need to complete
+ -- that part of the bindings.
+ printTriple :: ( ForeignPtr RedlandWorld
+ , ForeignPtr RedlandModel
+ , ForeignPtr RedlandURI)
+ -> [(String, Node)]
+ -> IO [Inline]
+ printTriple wmu l =
+ concat . intersperse [Space] <$> mapM (printNode wmu . snd) l
+ printNode :: ( ForeignPtr RedlandWorld
+ , ForeignPtr RedlandModel
+ , ForeignPtr RedlandURI)
+ -> Node
+ -> IO [Inline]
+ printNode _ (BlankNode s) = pure [Str s]
+ printNode _ (LiteralNode s) = pure [Str s]
+ printNode (w, m, u) (ResourceNode s) =
+ let su = showURI s
+ q = "SELECT ?label WHERE { <" ++ s ++
+ "> <http://www.w3.org/2000/01/rdf-schema#label> ?label }"
+ in do
+ l <- withQuery w m "sparql" q (Just u) $ \r ->
+ case r of
+ ([("label", LiteralNode label)]:_) -> pure label
+ _ -> pure su
+ pure [Link (su, [], []) [] (su, l)]
+ showURI :: String -> String
+ showURI u = case parseURIReference u of
+ Just u' -> uriToString id (relativeFrom u' bu) ""
+ Nothing -> u
+
+-- rdfproc rdf-cache parse http://xmlns.com/foaf/0.1/
+-- rdfproc rdf-cache parse http://www.w3.org/1999/02/22-rdf-syntax-ns
diff --git a/pancake.cabal b/pancake.cabal
index 0cc2f41..a1455c0 100644
--- a/pancake.cabal
+++ b/pancake.cabal
@@ -59,14 +59,13 @@ executable pancake
, pandoc-types >= 1.17.0.5 && < 2
, parsec >= 3.1.11 && < 4
, process >= 1.6 && < 2
- , rdf4h >= 3 && < 4
, regex-base >= 0.93.2 && < 1
, regex-tdfa >= 1.2.2 && < 2
- , stm >= 2.4 && < 3
, terminfo >= 0.4.0.2 && < 1
, text >= 1.2.2.2 && < 2
, unix >= 2.7.2.0 && < 3
, yaml >= 0.8.23.3 && < 1
+ , redland == 0.1.0.0
-- hs-source-dirs:
default-language: Haskell2010
ghc-options: -Wall