summaryrefslogtreecommitdiff
path: root/Pancake
diff options
context:
space:
mode:
authordefanor <defanor@uberspace.net>2017-12-13 07:46:46 +0300
committerdefanor <defanor@uberspace.net>2017-12-13 07:46:46 +0300
commitf735c69d6d0d13858588c3c35317ef294f2382d6 (patch)
tree6d5e69b76a5d082b44d25a81db24e061043a3107 /Pancake
parent3c03b674fbec2dddce744365ddc6b96daf398864 (diff)
Add uncluttering/XSLT support
Diffstat (limited to 'Pancake')
-rw-r--r--Pancake/Configuration.hs3
-rw-r--r--Pancake/Unclutter.hs92
2 files changed, 95 insertions, 0 deletions
diff --git a/Pancake/Configuration.hs b/Pancake/Configuration.hs
index 2748907..ae42f65 100644
--- a/Pancake/Configuration.hs
+++ b/Pancake/Configuration.hs
@@ -67,6 +67,8 @@ data Config = Config { commands :: M.Map String String
, indentDivs :: Bool
-- ^ Whether to add indentation for elements
-- inside divs.
+ , unclutter :: M.Map String String
+ -- ^ XSLT file and URI regex.
} deriving (Generic, Show, Eq)
-- | For configuration parsing.
@@ -107,6 +109,7 @@ instance Default Config where
, historyDepth = 100
, referenceDigits = "0123456789"
, indentDivs = False
+ , unclutter = M.empty
}
where
curl = "curl --compressed -4 -L " ++
diff --git a/Pancake/Unclutter.hs b/Pancake/Unclutter.hs
new file mode 100644
index 0000000..9cf2331
--- /dev/null
+++ b/Pancake/Unclutter.hs
@@ -0,0 +1,92 @@
+{-
+Copyright (C) 2017 defanor <defanor@uberspace.net>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+-}
+
+{- |
+Module : Pancake.Unclutter
+Maintainer : defanor <defanor@uberspace.net>
+Stability : unstable
+Portability : non-portable (GHC extensions are used)
+
+An XSLT-based data extraction module.
+-}
+
+{-# LANGUAGE TupleSections #-}
+
+module Pancake.Unclutter ( tryUnclutter
+ , prepareUnclutter
+ ) where
+
+import qualified Data.ByteString.Char8 as BS
+import Control.Monad.IO.Class
+import qualified Data.Map as M
+import Network.URI
+import Data.Either
+import System.Directory
+import System.FilePath
+import Data.List
+import Text.Regex.TDFA.String
+import Text.Regex.Base.RegexLike
+import Text.XML.HXT.Core (readString, withParseHTML, runX, XmlTree, (>>>), yes)
+import Text.XML.HXT.DOM.ShowXml (xshow)
+import Text.XML.HXT.XSLT (xsltApplyStylesheetFromURI)
+import Data.Text.Encoding (decodeUtf8', decodeLatin1, encodeUtf8)
+import qualified Data.Text as T
+
+
+import Pancake.Common
+import Pancake.Configuration
+
+-- | Tries to unclutter a document.
+tryUnclutter :: MonadIO m
+ => [(Regex, String)]
+ -- ^ Obtained with 'prepareUnclutter'.
+ -> URI
+ -- ^ Document URI.
+ -> BS.ByteString
+ -- ^ Raw document.
+ -> m BS.ByteString
+tryUnclutter rs u d = do
+ let matches (r, _) = case execute r uStr of
+ Right (Just _) -> True
+ _ -> False
+ case find matches rs of
+ Just (_, fn) -> liftIO $ do
+ dir <- getXdgDirectory XdgConfig "pancake"
+ let src = dir </> "unclutter" </> fn <.> "xsl"
+ exists <- doesFileExist src
+ if exists
+ then do
+ let txt = T.unpack $ either (const $ decodeLatin1 d) id $ decodeUtf8' d
+ doc = readString [withParseHTML yes] txt
+ rc <- runX (doc >>> xsltApplyStylesheetFromURI src) :: IO [XmlTree]
+ pure $ case rc of
+ [] -> d
+ _ -> encodeUtf8 $ T.pack $ xshow rc
+ else pure d
+ Nothing -> pure d
+ where uStr = uriToString id u ""
+
+-- | Compiles regexps for uncluttering.
+prepareUnclutter :: MonadIO m => Config -> m [(Regex, String)]
+prepareUnclutter c = do
+ let re = map
+ (\(f, r) -> fmap (, f) (compile defaultCompOpt defaultExecOpt r))
+ $ M.toList $ unclutter c
+ errs = lefts re
+ compiled = rights re
+ mapM_ putErrLn errs
+ pure compiled