summaryrefslogtreecommitdiff
path: root/Text/Pandoc/Readers/RDF.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Text/Pandoc/Readers/RDF.hs')
-rw-r--r--Text/Pandoc/Readers/RDF.hs211
1 files changed, 78 insertions, 133 deletions
diff --git a/Text/Pandoc/Readers/RDF.hs b/Text/Pandoc/Readers/RDF.hs
index 9a7b6de..081b71e 100644
--- a/Text/Pandoc/Readers/RDF.hs
+++ b/Text/Pandoc/Readers/RDF.hs
@@ -22,7 +22,8 @@ Stability : unstable
This module is for RDF documents reading. It's not strictly a parser,
since it requests additional external documents to read predicate
-labels from, and generally controls how those would be rendered.
+labels from, and looks up for alternate versions, and decides how to
+render those.
-}
{-# LANGUAGE OverloadedStrings #-}
@@ -31,158 +32,102 @@ module Text.Pandoc.Readers.RDF ( readRDF ) where
import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad)
-import Data.RDF
+-- import Data.RDF
import qualified Data.Text as T
-import Network.URI (URI, uriFragment, parseURIReference, relativeFrom,
- uriToString)
+import Network.URI (URI, parseURIReference, relativeFrom, uriToString)
import Data.List (intersperse)
import Control.Monad.IO.Class (liftIO, MonadIO)
import Text.XML.HXT.Core ( readString, withParseHTML, withWarnings,
runX, (>>>), yes, no, XNode(..))
import Data.Tree.NTree.TypeDefs (NTree(..))
import Text.XML.HXT.XPath.Arrows (getXPathTreesInDoc)
-import Control.Monad.Error.Class (throwError, catchError)
+import Control.Monad.Error.Class (throwError)
import Text.Pandoc.Error (PandocError(..))
-import Control.Concurrent.STM.TVar (TVar, readTVarIO, modifyTVar)
-import Control.Monad.STM (atomically)
-import qualified Data.Map as M
+import Control.Exception
+import System.Directory
+import Foreign
+import Redland.MidLevel
+import Redland.LowLevel
+import Redland.Util
--- | Reads a literal value.
-readLiteral :: LValue -> [Inline]
-readLiteral (PlainL t) = pure . Str $ T.unpack t
-readLiteral (PlainLL t _) = pure . Str $ T.unpack t
-readLiteral (TypedL t _) = pure . Str $ T.unpack t
--- | Reads an arbitrary 'Node', used mostly for objects and subjects.
-readNode :: URI -> Node -> [Inline]
-readNode _ (LNode l) = readLiteral l
-readNode bu (UNode u) = case parseURIReference (T.unpack u) of
- Just u' -> let u'' = relativeFrom u' bu
- us = uriToString id u'' ""
- in [Link (us, [], []) [] (us, us)]
- Nothing -> let s = (T.unpack u) in [Link (s, [], []) [] (s, s)]
-readNode _ (BNode t) = let s = T.unpack t
- in [Link (s, [], []) [] (s, s)]
-readNode _ (BNodeGen i) = let s = show i
- in [Link (s, [], []) [] (s, s)]
-
--- | Reads a predicate, looks up its label in external documents using
--- the provided retrieval function.
-readPredicate :: (MonadIO m, PandocMonad m)
- => TVar (M.Map String String)
- -- ^ RDF cache
- -> URI
- -- ^ Base URI.
- -> (URI -> m T.Text)
- -- ^ Retrieval function.
- -> Node
- -- ^ A node to read.
- -> m [Inline]
-readPredicate rdfc _ rf (UNode u) = do
- rdfc' <- liftIO $ readTVarIO rdfc
- l <- case M.lookup uriStr rdfc' of
- Just cl -> pure cl
- Nothing -> do
- (u', doc) <- case parseURIReference uriStr of
- Just r -> do
- ret <- rf r
- pure (r, ret)
- Nothing -> throwError $ PandocParseError $
- "Failed to parse an URI reference"
- -- use URIs when failing to read a label
- catchError (labelFromRDF u' doc) $ const $ pure uriStr
- pure [Link (uriStr, [], []) [] (uriStr, l)]
- where
- uriStr = T.unpack u
- labelFromRDF u' doc = do
- rdf <- parseRDF u' rf doc
- let label = (UNode "http://www.w3.org/2000/01/rdf-schema#label")
- -- 'query' doesn't expand triples, so filtering manually
- l <- case filter
- (\x -> subjectOf x == (UNode u) && predicateOf x == label)
- (expandTriples rdf) of
- -- todo: there could be multiple labels in different
- -- languages, handle that.
- [sl] -> case objectOf sl of
- (LNode (PlainL sl')) -> pure $ T.unpack sl'
- _ -> pure $ show sl
- _ -> pure uriStr
- liftIO $ atomically $ modifyTVar rdfc $ M.insert uriStr l
- pure l
-readPredicate _ bu _ n = pure $ readNode bu n
-
--- | Reads a triple.
-readTriple :: (MonadIO m, PandocMonad m)
- => TVar (M.Map String String)
- -- ^ RDF cache
- -> URI
- -- ^ Base (source) URI.
- -> (URI -> m T.Text)
- -- ^ Retrieval function.
- -> Triple
- -- ^ A triple to read.
- -> m [Inline]
-readTriple rdfc bu rf t = do
- p <- readPredicate rdfc bu rf $ predicateOf t
- pure $ intersperse Space $
- concat [readNode bu $ subjectOf t, p, readNode bu $ objectOf t]
-
--- | Parses an RDF (XML/RDF or Turtle). The provided document may also
--- be an HTML document with an alternate version that is RDF;
--- retrieves it with the provided retrieval function in such a case.
-parseRDF :: (MonadIO m, PandocMonad m)
- => URI
- -- ^ Base (source) URI.
- -> (URI -> m T.Text)
- -- ^ Retrieval function.
- -> T.Text
- -- ^ Document to parse.
- -> m (RDF AdjHashMap)
-parseRDF bu rf t = do
- let baseURI = uriToString id bu { uriFragment = "" } ""
- -- check link rel
+detectAlternateVersion :: T.Text -> IO (Maybe URI)
+detectAlternateVersion t = do
let doc = readString [withParseHTML yes, withWarnings no] (T.unpack t)
rc <- liftIO $ runX $ doc
>>> getXPathTreesInDoc
"//link[@rel=\"alternate\" and @type=\"application/rdf+xml\"]/@href/text()"
- case rc of
- [NTree (XText uri) []] -> case parseURIReference uri of
- Nothing -> throwError $ PandocParseError $
- "Failed to parse an alternate URI"
- Just u' ->
- if u' /= bu
- then do
- t' <- rf u'
- parseRDF u' rf t'
- else throwError $ PandocSomeError $
- "A loop is detected in alternate document versions."
- _ -> do
- let burl = T.pack baseURI
- tryParse :: (Rdf a, RdfParser p) => p -> Either ParseFailure (RDF a)
- tryParse p = parseString p t
- parsed :: Either ParseFailure (RDF AdjHashMap)
- parsed = -- todo: alternatives should be used here
- either
- (const $ either (const $ tryParse NTriplesParser) pure
- (tryParse $ TurtleParser (Just (BaseUrl burl)) (Just burl)))
- pure
- (tryParse $ XmlParser (Just (BaseUrl burl)) (Just burl))
- case parsed of
- Left err -> throwError $ PandocParseError $ show err
- Right x -> pure x
+ pure $ case rc of
+ [NTree (XText uri) []] -> parseURIReference uri
+ _ -> Nothing
+
readRDF :: (MonadIO m, PandocMonad m)
- => TVar (M.Map String String)
- -- ^ RDF cache
- -> URI
+ => URI
-- ^ Base (source) URI.
-> (URI -> m T.Text)
-- ^ Retrieval function.
-> T.Text
-- ^ Document to parse.
-> m Pandoc
-readRDF rdfc bu rf t = do
- rdf <- parseRDF bu rf t
- Pandoc mempty . pure . LineBlock
- <$> mapM (readTriple rdfc bu rf) (expandTriples rdf)
+readRDF bu rf t = do
+ alt <- liftIO $ detectAlternateVersion t
+ case alt of
+ Nothing -> do
+ r <- liftIO $ handle handleRE parseDoc
+ case r of
+ Left e -> throwError $ PandocParseError e
+ Right r' -> pure r'
+ Just alt' -> do
+ newDoc <- rf alt'
+ readRDF alt' rf newDoc
+ where
+ handleRE :: RedlandException -> IO (Either String Pandoc)
+ handleRE e = pure $ Left $ show e
+ parseDoc :: IO (Either String Pandoc)
+ parseDoc = Right . Pandoc mempty . pure . LineBlock <$> do
+ withWSMU "memory" [] "temporary" "" (uriToString id bu "") $
+ \world _ model uri -> do
+ tryParseStringIntoModel world ["turtle", "rdfxml", "n-triples"]
+ model uri (T.unpack t)
+ withQuery world model "sparql" "SELECT ?s ?p ?o WHERE { ?s ?p ?o }"
+ (Just uri) $ \results -> do
+ cacheDir <- getXdgDirectory XdgCache "pancake"
+ createDirectoryIfMissing True cacheDir
+ withWSMU "hashes" [("hash-type", "bdb"), ("dir", cacheDir)]
+ "rdf-cache" "" (uriToString id bu "") $ \world' _ model' uri' ->
+ mapM (printTriple (world', model', uri')) results
+ -- todo: should probably use statements instead, need to complete
+ -- that part of the bindings.
+ printTriple :: ( ForeignPtr RedlandWorld
+ , ForeignPtr RedlandModel
+ , ForeignPtr RedlandURI)
+ -> [(String, Node)]
+ -> IO [Inline]
+ printTriple wmu l =
+ concat . intersperse [Space] <$> mapM (printNode wmu . snd) l
+ printNode :: ( ForeignPtr RedlandWorld
+ , ForeignPtr RedlandModel
+ , ForeignPtr RedlandURI)
+ -> Node
+ -> IO [Inline]
+ printNode _ (BlankNode s) = pure [Str s]
+ printNode _ (LiteralNode s) = pure [Str s]
+ printNode (w, m, u) (ResourceNode s) =
+ let su = showURI s
+ q = "SELECT ?label WHERE { <" ++ s ++
+ "> <http://www.w3.org/2000/01/rdf-schema#label> ?label }"
+ in do
+ l <- withQuery w m "sparql" q (Just u) $ \r ->
+ case r of
+ ([("label", LiteralNode label)]:_) -> pure label
+ _ -> pure su
+ pure [Link (su, [], []) [] (su, l)]
+ showURI :: String -> String
+ showURI u = case parseURIReference u of
+ Just u' -> uriToString id (relativeFrom u' bu) ""
+ Nothing -> u
+
+-- rdfproc rdf-cache parse http://xmlns.com/foaf/0.1/
+-- rdfproc rdf-cache parse http://www.w3.org/1999/02/22-rdf-syntax-ns