From e76c1f7206a730b474b840032d8617f2eaaa1bf9 Mon Sep 17 00:00:00 2001 From: fr33domlover Date: Tue, 17 May 2016 20:34:22 +0000 Subject: [PATCH] Compressed inventory parser and DarcsRev TH utils --- config/models | 9 + src/Darcs/Local/Hash/Codec.hs | 49 ++++ src/Darcs/Local/Hash/Types.hs | 50 +++++ .../Local/{PatchInfo => Inventory}/Parser.hs | 209 +++++++++++------- src/Darcs/Local/Inventory/Read.hs | 96 ++++++++ src/Darcs/Local/Inventory/Types.hs | 74 +++++++ src/Darcs/Local/Patch.hs | 107 +++++++++ src/Darcs/Local/Patch/Types.hs | 72 ++++++ src/Darcs/Local/PatchInfo/Types.hs | 119 ---------- src/Data/Attoparsec/ByteString/Local.hs | 37 +++- src/Development/DarcsRev.hs | 186 ++++++++++++++++ src/Vervis/Darcs.hs | 15 +- vervis.cabal | 13 +- 13 files changed, 827 insertions(+), 209 deletions(-) create mode 100644 src/Darcs/Local/Hash/Codec.hs create mode 100644 src/Darcs/Local/Hash/Types.hs rename src/Darcs/Local/{PatchInfo => Inventory}/Parser.hs (59%) create mode 100644 src/Darcs/Local/Inventory/Read.hs create mode 100644 src/Darcs/Local/Inventory/Types.hs create mode 100644 src/Darcs/Local/Patch.hs create mode 100644 src/Darcs/Local/Patch/Types.hs delete mode 100644 src/Darcs/Local/PatchInfo/Types.hs create mode 100644 src/Development/DarcsRev.hs diff --git a/config/models b/config/models index 4ed4d12..72bdb1e 100644 --- a/config/models +++ b/config/models @@ -77,3 +77,12 @@ Ticket closer PersonId UniqueTicket project number + +Discussion + +Message + author PersonId + created UTCTime + content Text -- Assume this is Pandoc Markdown + parent MessageId Maybe + root DiscussionId diff --git a/src/Darcs/Local/Hash/Codec.hs b/src/Darcs/Local/Hash/Codec.hs new file mode 100644 index 0000000..dc787f2 --- /dev/null +++ b/src/Darcs/Local/Hash/Codec.hs @@ -0,0 +1,49 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +module Darcs.Local.Hash.Codec + ( encodePatchHash + , encodeInventoryHash + ) +where + +import Prelude + +import Data.ByteString (ByteString) +import Data.Monoid ((<>)) + +import qualified Data.ByteString as B (length, replicate) +import qualified Data.ByteString.Base16 as B16 (encode) +import qualified Data.ByteString.Lex.Integral as BX (packDecimal) + +import Darcs.Local.Hash.Types + +encodeHash :: ByteString -> ByteString +encodeHash = B16.encode + +encodeSize :: Int -> ByteString +encodeSize n = + case BX.packDecimal n of + Nothing -> error "negative size in sizehash" + Just b -> + if B.length b < 10 + then B.replicate (10 - B.length b) 0x30 <> b + else b + +encodePatchHash :: PatchHash -> ByteString +encodePatchHash (PatchHash h) = encodeHash h + +encodeInventoryHash :: InventoryHash -> ByteString +encodeInventoryHash (InventoryHash s h) = encodeSize s <> "-" <> encodeHash h diff --git a/src/Darcs/Local/Hash/Types.hs b/src/Darcs/Local/Hash/Types.hs new file mode 100644 index 0000000..30d5215 --- /dev/null +++ b/src/Darcs/Local/Hash/Types.hs @@ -0,0 +1,50 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +module Darcs.Local.Hash.Types + ( PatchHash (..) + , ContentHash (..) + , InventoryHash (..) + , PristineHash (..) + ) +where + +import Prelude + +import Data.ByteString (ByteString) + +-- | A SHA1 hash of the patch info (author, title, description including junk, +-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20 +-- bytes. +newtype PatchHash = PatchHash { unPatchHash :: ByteString } + +-- | Content size and SHA256 hash of a patch's info and content. The hash is in +-- binary form, not hex, i.e. its size is always 32 bytes. +data ContentHash = ContentHash + { chSize :: Int + , chHash :: ByteString + } + +-- | Content size and SHA256 hash of an inventory (a patch set in a single +-- invetory file). The hash is in binary form, not hex, i.e. its size is always +-- 32 bytes. +data InventoryHash = InventoryHash + { ihSize :: Int + , ihHash :: ByteString + } + +-- | A SHA256 hash of the entire recorded state of the repo. The hash is in +-- binary form, not hex, i.e. its size is always 32 bytes. +newtype PristineHash = PristineHash { unPristineHash :: ByteString } diff --git a/src/Darcs/Local/PatchInfo/Parser.hs b/src/Darcs/Local/Inventory/Parser.hs similarity index 59% rename from src/Darcs/Local/PatchInfo/Parser.hs rename to src/Darcs/Local/Inventory/Parser.hs index 41645b8..50c0827 100644 --- a/src/Darcs/Local/PatchInfo/Parser.hs +++ b/src/Darcs/Local/Inventory/Parser.hs @@ -20,10 +20,16 @@ -- doesn't result with the exact original text, we'll have the wrong hash. To -- make sure it's exactly the right content, we use ByteString first and then -- later decode to Text. -module Darcs.Local.PatchInfo.Parser - ( readPatchInfoCount - , readPatchInfoAll - , readPatchInfoPage +module Darcs.Local.Inventory.Parser + ( latestInventoryPristineP + , latestInventorySizeP + , latestInventoryPrevSizeP + , latestInventoryPageP + , latestInventoryAllP + , earlyInventorySizeP + , earlyInventoryPrevSizeP + , earlyInventoryPageP + , earlyInventoryAllP ) where @@ -47,7 +53,10 @@ import qualified Data.ByteString.Base16 as B16 import qualified Data.ByteString.Lex.Integral as BX import Control.Applicative.Local -import Darcs.Local.PatchInfo.Types +import Darcs.Local.Hash.Types +import Darcs.Local.Inventory.Types +import Darcs.Local.Patch +import Darcs.Local.Patch.Types import Data.Attoparsec.ByteString.Local import Data.ByteString.Local (stripPrefix) import Data.Text.UTF8.Local (decodeStrict) @@ -193,37 +202,11 @@ patchInfoRawP = do , pirInverted = inverted } -hashPatchInfo :: HashAlgorithm a => a -> PatchInfoRaw -> Digest a -hashPatchInfo _algo pir = - let add = flip hashUpdate - adds = flip hashUpdates - in hashFinalize $ - add (if pirInverted pir then "t" else "f" :: ByteString) $ - adds (pirDescription pir) $ - add (pirJunkContent pir) $ - add (pirJunkPrefix pir) $ - add (fst $ pirTime pir) $ - add (pirAuthor pir) $ - add (pirTitle pir) - hashInit - -refinePatchInfo :: PatchInfoRaw -> PatchInfo -refinePatchInfo pir = - let rtitle = pirTitle pir - (title, tag) = case stripPrefix "TAG " rtitle of - Nothing -> (rtitle, False) - Just rest -> (rest, True) - description = case pirDescription pir of - [] -> Nothing - l -> Just $ BC.unlines l - in PatchInfo - { piAuthor = decodeStrict $ pirAuthor pir - , piHash = uncurry ContentHash $ pirHash pir - , piTitle = decodeStrict title - , piDescription = decodeStrict <$> description - , piTag = tag - , piTime = snd $ pirTime pir - } +-- TODO +-- +-- * Finish DarcsRev code, make it build +-- * Update darcs change view code to work correctly in the case of previous +-- inventories, test vervis against libravatar for that -- | Parse patch metadata and compute the metadata's hash, which can be used as -- a patch identifier for lookup and matching. @@ -232,26 +215,119 @@ patchInfoP = do pir <- patchInfoRawP return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir) -patchInfosCountP :: Parser Int -patchInfosCountP = do - -- pristine hash - skipLine - -- previous inventory - optional $ - eol *> string "Starting" *> skipRestOfLine *> eol *> - skipLine - -- patch info - n <- length <$> (many $ eol *> skipPatchP) - eol - return n +tagInfoP :: Parser (TagInfo, PatchHash) +tagInfoP = do + (pi, ph) <- patchInfoP + case patchToTag pi of + Nothing -> fail "Expected a tag, got a patch that isn't a tag" + Just ti -> return (ti, ph) -patchInfosAllP :: Parser PatchSeq -patchInfosAllP = PatchSeq +------------------------------------------------------------------------------- +-- Latest inventory +------------------------------------------------------------------------------- + +latestInventoryPristineP :: Parser PristineHash +latestInventoryPristineP = pristineP + +latestInventorySizeP :: Parser Int +latestInventorySizeP = + -- pristine hash + skipLine *> + -- previous inventory + optional + ( eol *> string "Starting" *> skipRestOfLine *> + eol *> skipLine + ) *> + -- patch info + (length <$> many (eol *> skipPatchP)) <* + eol + +latestInventoryPrevSizeP :: Parser (Maybe InventoryHash, Int) +latestInventoryPrevSizeP = + liftA2 (,) + ( -- pristine hash + skipLine *> + -- previous inventory + optional (eol *> prevInvP) + ) + ( -- patch info + (length <$> many (eol *> skipPatchP)) <* + eol + ) + +latestInventoryPageP + :: Int -> Int -> Parser (Maybe InventoryHash, [(PatchInfo, PatchHash)]) +latestInventoryPageP off lim = + let f mPrevTag pis = + case mPrevTag of + Nothing -> (Nothing, pis) + Just (ih, pi) -> (Just ih, pi : pis) + in liftA2 f + -- pristine + ( skipLine *> + -- previous inventory and clean tag + optional (liftA2 (,) (eol *> prevInvP) (eol *> patchInfoP)) <* + -- skip offset + replicateM_ off (eol *> skipPatchP) + ) + -- take limit + (atMost lim $ eol *> patchInfoP) + +latestInventoryAllP :: Parser LatestInventory +latestInventoryAllP = LatestInventory <$> pristineP - <*> optional (eol *> prevInvP) + <*> optional (liftA2 (,) (eol *> prevInvP) (eol *> tagInfoP)) <*> many (eol *> patchInfoP) <* eol +------------------------------------------------------------------------------- +-- Early inventory +------------------------------------------------------------------------------- + +earlyInventorySizeP :: Parser Int +earlyInventorySizeP = + -- previous inventory + optional + ( string "Starting" *> skipRestOfLine *> + eol *> skipLine + ) *> + -- patch info + (length <$> many (eol *> skipPatchP)) <* + eol + +earlyInventoryPrevSizeP :: Parser (Maybe InventoryHash, Int) +earlyInventoryPrevSizeP = + liftA2 (,) + -- previous inventory + (optional $ prevInvP <* eol) + -- patch info + (length <$> many (skipPatchP *> eol)) + +earlyInventoryPageP + :: Int -> Int -> Parser (Maybe InventoryHash, [(PatchInfo, PatchHash)]) +earlyInventoryPageP off lim = + let f mPrevTag pis = + case mPrevTag of + Nothing -> (Nothing, pis) + Just (ih, pi) -> (Just ih, pi : pis) + in liftA2 f + -- previous inventory and clean tag + ( optional (liftA2 (,) (prevInvP <* eol) (patchInfoP <* eol)) <* + -- skip offset + replicateM_ off (skipPatchP *> eol) + ) + -- take limit + (atMost lim $ patchInfoP <* eol) + +earlyInventoryAllP :: Parser (Either EarliestInventory MiddleInventory) +earlyInventoryAllP = + let f Nothing pis = Left $ EarliestInventory pis + f (Just (prev, ti)) pis = Right $ MiddleInventory prev ti pis + in liftA2 f + (optional $ liftA2 (,) (prevInvP <* eol) (tagInfoP <* eol)) + (many (patchInfoP <* eol)) + +{- patchInfosOffsetP :: Int -> Parser PatchSeq patchInfosOffsetP off = PatchSeq <$> pristineP @@ -261,38 +337,9 @@ patchInfosOffsetP off = PatchSeq ) <* eol - patchInfosLimitP :: Int -> Parser PatchSeq patchInfosLimitP lim = PatchSeq <$> pristineP <*> optional (eol *> prevInvP) <*> atMost lim (eol *> patchInfoP) - -patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq -patchInfosOffsetLimitP off lim = PatchSeq - <$> pristineP - <*> optional (eol *> prevInvP) - <*> ( replicateM_ off (eol *> skipPatchP) *> - atMost lim (eol *> patchInfoP) - ) - -darcsDir :: FilePath -darcsDir = "_darcs" - -inventoryFile :: FilePath -inventoryFile = "hashed_inventory" - -readPatchInfoCount :: FilePath -> IO (Either String Int) -readPatchInfoCount repoPath = do - let invPath = repoPath darcsDir inventoryFile - parseFileIncremental invPath $ patchInfosCountP <* endOfInput - -readPatchInfoAll :: FilePath -> IO (Either String PatchSeq) -readPatchInfoAll repoPath = do - let invPath = repoPath darcsDir inventoryFile - parseFileIncremental invPath $ patchInfosAllP <* endOfInput - -readPatchInfoPage :: Int -> Int -> FilePath -> IO (Either String PatchSeq) -readPatchInfoPage off lim repoPath = do - let invPath = repoPath darcsDir inventoryFile - parseFileIncremental invPath $ patchInfosOffsetLimitP off lim +-} diff --git a/src/Darcs/Local/Inventory/Read.hs b/src/Darcs/Local/Inventory/Read.hs new file mode 100644 index 0000000..5ab243e --- /dev/null +++ b/src/Darcs/Local/Inventory/Read.hs @@ -0,0 +1,96 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +module Darcs.Local.Inventory.Read + ( readLatestInventory + , readCompressedInventory + ) +where + +import Prelude + +import Codec.Compression.Zlib.Internal +import Control.Applicative (many, optional, liftA2) +import Control.Arrow (second) +import Control.Monad (replicateM_) +import Crypto.Hash +import Data.Attoparsec.ByteString +import Data.ByteArray (convert) +import Data.ByteString (ByteString) +import Data.Time.Calendar (fromGregorianValid) +import Data.Time.Clock (UTCTime (..), secondsToDiffTime) +import Data.Word (Word8) +import System.FilePath (()) + +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as BC +import qualified Data.ByteString.Base16 as B16 +import qualified Data.ByteString.Lex.Integral as BX + +import Control.Applicative.Local +import Darcs.Local.Hash.Codec +import Darcs.Local.Hash.Types +import Darcs.Local.Inventory.Parser +import Darcs.Local.Inventory.Types +import Data.Attoparsec.ByteString.Local +import Data.ByteString.Local (stripPrefix) +import Data.Text.UTF8.Local (decodeStrict) + +darcsDir :: FilePath +darcsDir = "_darcs" + +inventoryDir :: FilePath +inventoryDir = "inventories" + +inventoryFile :: FilePath +inventoryFile = "hashed_inventory" + +readLatestInventory :: FilePath -> Parser a -> IO (Either String a) +readLatestInventory repo = + parseFileIncremental $ repo darcsDir inventoryFile + +readCompressedInventory + :: FilePath -> InventoryHash -> Parser a -> IO (Either String a) +readCompressedInventory repo ih = + let invFile = BC.unpack $ encodeInventoryHash ih + invPath = repo darcsDir inventoryDir invFile + defParams = defaultDecompressParams + bufSize = min (decompressBufferSize defParams) (ihSize ih) + params = defParams { decompressBufferSize = bufSize } + in parseCompressedFileIncremental gzipFormat params invPath + +{- +readLatestInventorySize :: FilePath -> IO (Either String Int) + +readLatestInventoryAll :: FilePath -> IO (Either String LatestInventory) + +readLatestInventoryPage + :: Int -> Int -> FilePath -> IO (Either String LatestInventory) + +readInventorySize :: FilePath -> IO (Either String Int) +readInventorySize repoPath = do + let invPath = repoPath darcsDir inventoryFile + parseFileIncremental invPath $ patchInfosCountP <* endOfInput + +readPatchInfoAll :: FilePath -> IO (Either String PatchSeq) +readPatchInfoAll repoPath = do + let invPath = repoPath darcsDir inventoryFile + parseFileIncremental invPath $ patchInfosAllP <* endOfInput + +readPatchInfoPage :: Int -> Int -> FilePath -> IO (Either String PatchSeq) +readPatchInfoPage off lim repoPath = do + let invPath = repoPath darcsDir inventoryFile + parseFileIncremental invPath $ patchInfosOffsetLimitP off lim +-} diff --git a/src/Darcs/Local/Inventory/Types.hs b/src/Darcs/Local/Inventory/Types.hs new file mode 100644 index 0000000..ebb4a7f --- /dev/null +++ b/src/Darcs/Local/Inventory/Types.hs @@ -0,0 +1,74 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +module Darcs.Local.Inventory.Types + ( LatestInventory (..) + , MiddleInventory (..) + , EarliestInventory (..) + ) +where + +-- TODO +-- +-- Apparently, after a while, some of the patches are moved from +-- hashed_inventory into the inventories/ dir. So the patch set contains more +-- than one group. This means I need to extend my parser to cover this case. +-- Sources for info about this thing: +-- +-- * Darcs source code +-- * Darcs wiki +-- * Local Darcs repos I have +-- +-- From Darcs source code: +-- +-- > The patches in a repository are stored in chunks broken up at \"clean\" +-- > tags. A tag is clean if the only patches before it in the current +-- > repository ordering are ones that the tag depends on (either directly +-- > or indirectly). Each chunk is stored in a separate inventory file on disk. +-- > +-- > A 'PatchSet' represents a repo's history as the list of patches since the +-- > last clean tag, and then a list of patch lists each delimited by clean tags. +-- > +-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo' +-- > representing a clean tag, the hash of the previous inventory (if it exists), +-- > and the list of patches since that previous inventory. +-- +-- Let's start with finding out the format of the binary inventories and +-- parsing them. + +import Prelude + +import Data.ByteString (ByteString) +import Data.Text (Text) +import Data.Time.Clock (UTCTime) + +import Darcs.Local.Hash.Types +import Darcs.Local.Patch.Types + +data LatestInventory = LatestInventory + { liPristineHash :: PristineHash + , liPrevTag :: Maybe (InventoryHash, (TagInfo, PatchHash)) + , liPatches :: [(PatchInfo, PatchHash)] + } + +data MiddleInventory = MiddleInventory + { miPrevious :: InventoryHash + , miTag :: (TagInfo, PatchHash) + , miPatches :: [(PatchInfo, PatchHash)] + } + +newtype EarliestInventory = EarliestInventory + { eiPatches :: [(PatchInfo, PatchHash)] + } diff --git a/src/Darcs/Local/Patch.hs b/src/Darcs/Local/Patch.hs new file mode 100644 index 0000000..a1bd938 --- /dev/null +++ b/src/Darcs/Local/Patch.hs @@ -0,0 +1,107 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +module Darcs.Local.Patch + ( hashPatchInfo + , refinePatchInfo + , tagToPatch + , patchToTag + , patchToTag_ + ) +where + +import Prelude hiding (take, takeWhile) + +import Control.Applicative (many, optional, liftA2) +import Control.Arrow (second) +import Control.Monad (replicateM_) +import Crypto.Hash +import Data.Attoparsec.ByteString +import Data.ByteArray (convert) +import Data.ByteString (ByteString) +import Data.Time.Calendar (fromGregorianValid) +import Data.Time.Clock (UTCTime (..), secondsToDiffTime) +import Data.Word (Word8) +import System.FilePath (()) + +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as BC +import qualified Data.ByteString.Base16 as B16 +import qualified Data.ByteString.Lex.Integral as BX + +import Control.Applicative.Local +import Darcs.Local.Hash.Types +import Darcs.Local.Inventory.Types +import Darcs.Local.Patch.Types +import Data.Attoparsec.ByteString.Local +import Data.ByteString.Local (stripPrefix) +import Data.Text.UTF8.Local (decodeStrict) + +hashPatchInfo :: HashAlgorithm a => a -> PatchInfoRaw -> Digest a +hashPatchInfo _algo pir = + let add = flip hashUpdate + adds = flip hashUpdates + in hashFinalize $ + add (if pirInverted pir then "t" else "f" :: ByteString) $ + adds (pirDescription pir) $ + add (pirJunkContent pir) $ + add (pirJunkPrefix pir) $ + add (fst $ pirTime pir) $ + add (pirAuthor pir) $ + add (pirTitle pir) + hashInit + +refinePatchInfo :: PatchInfoRaw -> PatchInfo +refinePatchInfo pir = + let rtitle = pirTitle pir + (title, tag) = case stripPrefix "TAG " rtitle of + Nothing -> (rtitle, False) + Just rest -> (rest, True) + description = case pirDescription pir of + [] -> Nothing + l -> Just $ BC.unlines l + in PatchInfo + { piAuthor = decodeStrict $ pirAuthor pir + , piHash = uncurry ContentHash $ pirHash pir + , piTitle = decodeStrict title + , piDescription = decodeStrict <$> description + , piTag = tag + , piTime = snd $ pirTime pir + } + +tagToPatch :: TagInfo -> PatchInfo +tagToPatch tag = PatchInfo + { piAuthor = tiAuthor tag + , piHash = tiHash tag + , piTitle = tiTitle tag + , piDescription = tiDescription tag + , piTag = True + , piTime = tiTime tag + } + +patchToTag :: PatchInfo -> Maybe TagInfo +patchToTag pi = + if piTag pi + then Just $ patchToTag_ pi + else Nothing + +patchToTag_ :: PatchInfo -> TagInfo +patchToTag_ patch = TagInfo + { tiAuthor = piAuthor patch + , tiHash = piHash patch + , tiTitle = piTitle patch + , tiDescription = piDescription patch + , tiTime = piTime patch + } diff --git a/src/Darcs/Local/Patch/Types.hs b/src/Darcs/Local/Patch/Types.hs new file mode 100644 index 0000000..8cafe46 --- /dev/null +++ b/src/Darcs/Local/Patch/Types.hs @@ -0,0 +1,72 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +module Darcs.Local.Patch.Types + ( PatchInfoRaw (..) + , PatchInfo (..) + , TagInfo (..) + ) +where + +import Prelude + +import Data.ByteString (ByteString) +import Data.Text (Text) +import Data.Time.Clock (UTCTime) + +import Darcs.Local.Hash.Types (ContentHash) + +-- | Patch metadata in raw form. This is intended for accurate hashing of the +-- patch info. +data PatchInfoRaw = PatchInfoRaw + { pirAuthor :: ByteString + , pirHash :: (Int, ByteString) + , pirTitle :: ByteString + , pirDescription :: [ByteString] + , pirJunkPrefix :: ByteString + , pirJunkContent :: ByteString + , pirTime :: (ByteString, UTCTime) + , pirInverted :: Bool + } + +-- | Patch metadata read from the inventory file. +data PatchInfo = PatchInfo + { -- | Author name and email + piAuthor :: Text + -- | Patch content hash + , piHash :: ContentHash + -- | Single message line + , piTitle :: Text + -- | Optional description, may contain several lines + , piDescription :: Maybe Text + -- | Whether this is a tag + , piTag :: Bool + -- | When the patch was recorded + , piTime :: UTCTime + } + +-- | Tag metadata read from the inventory file. +data TagInfo = TagInfo + { -- | Author name and email + tiAuthor :: Text + -- | Tag content hash + , tiHash :: ContentHash + -- | Single message line + , tiTitle :: Text + -- | Optional description, may contain several lines + , tiDescription :: Maybe Text + -- | When the tag was recorded + , tiTime :: UTCTime + } diff --git a/src/Darcs/Local/PatchInfo/Types.hs b/src/Darcs/Local/PatchInfo/Types.hs deleted file mode 100644 index e9291f4..0000000 --- a/src/Darcs/Local/PatchInfo/Types.hs +++ /dev/null @@ -1,119 +0,0 @@ -{- This file is part of Vervis. - - - - Written in 2016 by fr33domlover . - - - - ♡ Copying is an act of love. Please copy, reuse and share. - - - - The author(s) have dedicated all copyright and related and neighboring - - rights to this software to the public domain worldwide. This software is - - distributed without any warranty. - - - - You should have received a copy of the CC0 Public Domain Dedication along - - with this software. If not, see - - . - -} - -module Darcs.Local.PatchInfo.Types - ( PatchInfoRaw (..) - , PatchHash (..) - , ContentHash (..) - , InventoryHash (..) - , PristineHash (..) - , PatchInfo (..) - , PatchSeq (..) - ) -where - --- TODO --- --- Apparently, after a while, some of the patches are moved from --- hashed_inventory into the inventories/ dir. So the patch set contains more --- than one group. This means I need to extend my parser to cover this case. --- Sources for info about this thing: --- --- * Darcs source code --- * Darcs wiki --- * Local Darcs repos I have --- --- From Darcs source code: --- --- > The patches in a repository are stored in chunks broken up at \"clean\" --- > tags. A tag is clean if the only patches before it in the current --- > repository ordering are ones that the tag depends on (either directly --- > or indirectly). Each chunk is stored in a separate inventory file on disk. --- > --- > A 'PatchSet' represents a repo's history as the list of patches since the --- > last clean tag, and then a list of patch lists each delimited by clean tags. --- > --- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo' --- > representing a clean tag, the hash of the previous inventory (if it exists), --- > and the list of patches since that previous inventory. --- --- Let's start with finding out the format of the binary inventories and --- parsing them. - -import Prelude - -import Data.ByteString (ByteString) -import Data.Text (Text) -import Data.Time.Clock (UTCTime) - --- | Patch metadata in raw form. This is intended for accurate hashing of the --- patch info. -data PatchInfoRaw = PatchInfoRaw - { pirAuthor :: ByteString - , pirHash :: (Int, ByteString) - , pirTitle :: ByteString - , pirDescription :: [ByteString] - , pirJunkPrefix :: ByteString - , pirJunkContent :: ByteString - , pirTime :: (ByteString, UTCTime) - , pirInverted :: Bool - } - --- | A SHA1 hash of the patch info (author, title, description including junk, --- timestamp). The hash is in binary form, not hex, i.e. its size is always 20 --- bytes. -newtype PatchHash = PatchHash { unPatchHash :: ByteString } - --- | Content size and SHA256 hash of a patch's info and content. The hash is in --- binary form, not hex, i.e. its size is always 32 bytes. -data ContentHash = ContentHash - { chSize :: Int - , chHash :: ByteString - } - --- | Content size and SHA256 hash of an inventory (a patch set in a single --- invetory file). The hash is in binary form, not hex, i.e. its size is always --- 32 bytes. -data InventoryHash = InventoryHash - { ihSize :: Int - , ihHash :: ByteString - } - --- | A SHA256 hash of the entire recorded state of the repo. The hash is in --- binary form, not hex, i.e. its size is always 32 bytes. -newtype PristineHash = PristineHash { unPristineHash :: ByteString } - --- | Patch metadata read from the inventory file. -data PatchInfo = PatchInfo - { -- | Author name and email - piAuthor :: Text - -- | Patch content hash - , piHash :: ContentHash - -- | Single message line - , piTitle :: Text - -- | Optional description, may contain several lines - , piDescription :: Maybe Text - -- | Whether this is a tag - , piTag :: Bool - -- | When the patch was recorded - , piTime :: UTCTime - } - --- | The information from the hashed inventory file. -data PatchSeq = PatchSeq - { psPristineHash :: PristineHash - , psPrevious :: Maybe InventoryHash - , psPatches :: [(PatchInfo, PatchHash)] - } diff --git a/src/Data/Attoparsec/ByteString/Local.hs b/src/Data/Attoparsec/ByteString/Local.hs index 448e013..d376376 100644 --- a/src/Data/Attoparsec/ByteString/Local.hs +++ b/src/Data/Attoparsec/ByteString/Local.hs @@ -15,15 +15,17 @@ module Data.Attoparsec.ByteString.Local ( parseFileIncremental + , parseCompressedFileIncremental ) where import Prelude +import Codec.Compression.Zlib.Internal import Data.Attoparsec.ByteString import System.IO -import qualified Data.ByteString as B (hGet) +import qualified Data.ByteString as B (null, hGet) import qualified Data.ByteString.Lazy.Internal as BLI (defaultChunkSize) parseFileIncremental :: FilePath -> Parser a -> IO (Either String a) @@ -36,3 +38,36 @@ parseFileIncremental file parser = firstChunk <- getChunk let firstResult = parse parser firstChunk go firstResult + +parseCompressedFileIncremental + :: Format + -> DecompressParams + -> FilePath + -> Parser a + -> IO (Either String a) +parseCompressedFileIncremental format params file parser = + withBinaryFile file ReadMode $ \ h -> do + let getChunk = B.hGet h BLI.defaultChunkSize + + pGo _ (Fail _remainder _contexts msg) = return $ Left msg + pGo f (Partial cont) = f cont + pGo _ (Done _remainder value) = return $ Right value + + dGo pCont (DecompressInputRequired dCont) = + getChunk >>= dCont >>= dGo pCont + dGo pCont (DecompressOutputAvailable output next) = + pGo (\ c -> next >>= dGo c) (pCont output) + dGo pCont (DecompressStreamEnd remainder) = + if B.null remainder + then + pGo + ( const $ + return $ + Left "Parser wants input but there's none" + ) + (pCont remainder) + else return $ Left "Decompression ended with remainder" + dGo pCont (DecompressStreamError err) = + return $ Left $ show err + + dGo (parse parser) (decompressIO format params) diff --git a/src/Development/DarcsRev.hs b/src/Development/DarcsRev.hs new file mode 100644 index 0000000..cb0b24b --- /dev/null +++ b/src/Development/DarcsRev.hs @@ -0,0 +1,186 @@ +{- This file is part of Vervis. + - + - Written in 2016 by fr33domlover . + - + - ♡ Copying is an act of love. Please copy, reuse and share. + - + - The author(s) have dedicated all copyright and related and neighboring + - rights to this software to the public domain worldwide. This software is + - distributed without any warranty. + - + - You should have received a copy of the CC0 Public Domain Dedication along + - with this software. If not, see + - . + -} + +{-# LANGUAGE CPP #-} + +module Development.DarcsRev + ( darcsLastPatchHash + , darcsLastPatchTime + , darcsLastPatchTitle + , darcsLastPatchIsTag + , darcsLastTagHash + , darcsLastTagTime + , darcsLastTagName + , darcsPatchesSinceLastTag + , darcsBranchSharer + , darcsBranchRepo + , darcsTotalPatches + , darcsTreeDirty + ) +where + +import Prelude + +import Data.Fixed +import Data.Foldable (find) +import Data.Time +import Language.Haskell.TH + +import qualified Data.ByteString.Char8 as BC (unpack) +import qualified Data.Text as T (unpack) + +import Darcs.Local.Hash.Codec +import Darcs.Local.Hash.Types +import Darcs.Local.Patch +import Darcs.Local.Patch.Types +import Darcs.Local.Inventory.Parser +import Darcs.Local.Inventory.Read +import Darcs.Local.Inventory.Types + +-- TODO +-- +-- * Number of patches since latest tag +-- * Branch name, basically the / thing. This allows to identify +-- which "darcs branch" is being used +-- * Whether there are unrecorded changes to tracked files + +repoPath :: FilePath +repoPath = "." + +readLatestInv :: IO [(PatchInfo, PatchHash)] +readLatestInv = do + einv <- readLatestInventory repoPath latestInventoryAllP + case einv of + Left err -> error $ "Failed to parse Darcs inventory file: " ++ err + Right inv -> return $ + case snd <$> liPrevTag inv of + Nothing -> liPatches inv + Just (tag, h) -> (tagToPatch tag, h) : liPatches inv + +readLastPatch :: IO (PatchInfo, PatchHash) +readLastPatch = do + pis <- readLatestInv + if null pis + then error "No patches found" + else return $ last pis + +readLastTag :: IO (PatchInfo, PatchHash) +readLastTag = do + pis <- readLatestInv + if null pis + then error "No patches found" + else case find (piTag . fst) $ reverse pis of + Nothing -> error "No tags found" + Just tag -> return tag + +darcsHash :: IO PatchHash -> Q Exp +darcsHash readHash = runIO readHash >>= stringE . BC.unpack . encodePatchHash + +darcsTime :: IO PatchInfo -> Q Exp +darcsTime readPI = do + pi <- runIO readPI + let UTCTime (ModifiedJulianDay day) diff' = piTime pi +#if MIN_VERSION_time(1,6,0) + diff = diffTimeToPicoseconds diff' +#else + diff = + let MkFixed pico = realToFrac diff' :: Pico + in pico +#endif + return $ RecConE 'UTCTime + [ ( 'utctDay + , AppE (VarE 'ModifiedJulianDay) (LitE $ IntegerL day) + ) + , ( 'utctDayTime + , AppE (VarE 'picosecondsToDiffTime) (LitE $ IntegerL diff) + ) + ] + +darcsTitle :: IO PatchInfo -> Q Exp +darcsTitle readPI = runIO readPI >>= stringE . T.unpack . piTitle + +darcsIsTag :: IO PatchInfo -> Q Exp +darcsIsTag readPI = do + pi <- runIO readPI + return $ ConE $ if piTag pi then 'True else 'False + +-- | The ASCII lowercase hexadecimal representation of the hash of the last +-- recorded patch, as a string literal. This the SHA256 patch hash, i.e. a hash +-- of the patch info, not content. This is what @darcs log@ displays. +darcsLastPatchHash :: Q Exp +darcsLastPatchHash = darcsHash $ snd <$> readLastPatch + +-- | The time of the last recorded patch, as a 'UTCTime' value. +darcsLastPatchTime :: Q Exp +darcsLastPatchTime = darcsTime $ fst <$> readLastPatch + +-- | The title of the last recorded patch, as a string literal. +darcsLastPatchTitle :: Q Exp +darcsLastPatchTitle = darcsTitle $ fst <$> readLastPatch + +-- | A 'Bool' saying whether the last recorded patch is actually a tag. +darcsLastPatchIsTag :: Q Exp +darcsLastPatchIsTag = darcsIsTag $ fst <$> readLastPatch + +-- | The ASCII lowercase hexadecimal representation of the hash of the last +-- recorded tag (i.e. the last patch that is a tag). +darcsLastTagHash :: Q Exp +darcsLastTagHash = darcsHash $ snd <$> readLastTag + +-- | The time of the last recorded tag, as a 'UTCTime' value. +darcsLastTagTime :: Q Exp +darcsLastTagTime = darcsTime $ fst <$> readLastTag + +-- | The name of the last recorded tag, as a string literal. This is a result +-- of taking the title of the tag and dropping the @"TAG "@ prefix. +darcsLastTagName :: Q Exp +darcsLastTagName = darcsTitle $ fst <$> readLastTag + +-- | Number of patches recorded after the last tag, as a number literal. +darcsPatchesSinceLastTag :: Q Exp +darcsPatchesSinceLastTag = do + pisAll <- runIO readLatestInv + case break (piTag . fst) $ reverse pisAll of + (_, []) -> fail "No tag found" + (pisAfter, (_tag : _pisBefore)) -> + litE $ integerL $ toInteger $ length pisAfter + +-- | Not implemented yet +darcsBranchSharer :: Q Exp +darcsBranchSharer = undefined + +-- | Not implemented yet +darcsBranchRepo :: Q Exp +darcsBranchRepo = undefined + +-- | Total number of recorded patches. +darcsTotalPatches :: Q Exp +darcsTotalPatches = do + let go Nothing n = return n + go (Just ih) n = do + einv <- readCompressedInventory repoPath ih earlyInventoryPrevSizeP + case einv of + Left err -> error $ "Failed to parse inventory: " ++ err + Right (mih, m) -> go mih $ n + m + nPatches <- runIO $ do + einv <- readLatestInventory repoPath latestInventoryPrevSizeP + case einv of + Left err -> error $ "Failed to parse latest inventory: " ++ err + Right (mih, n) -> go mih n + litE $ integerL $ toInteger nPatches + +-- | Not implemented yet +darcsTreeDirty :: Q Exp +darcsTreeDirty = undefined diff --git a/src/Vervis/Darcs.hs b/src/Vervis/Darcs.hs index 9633007..82acab6 100644 --- a/src/Vervis/Darcs.hs +++ b/src/Vervis/Darcs.hs @@ -38,8 +38,11 @@ import qualified Data.ByteString.Base16 as B16 (encode) import qualified Data.Foldable as F (find) import qualified Data.Text as T (takeWhile, stripEnd) -import Darcs.Local.PatchInfo.Parser -import Darcs.Local.PatchInfo.Types +import Darcs.Local.Hash.Codec +import Darcs.Local.Inventory.Parser +import Darcs.Local.Inventory.Read +import Darcs.Local.Inventory.Types +import Darcs.Local.Patch.Types import Darcs.Local.Repository import Data.Either.Local (maybeRight) import Data.EventTime.Local @@ -125,18 +128,18 @@ readChangesView -> IO (Maybe (Int, [LogEntry])) -- ^ Total number of changes, and view of the chosen subset readChangesView path off lim = fmap maybeRight $ runExceptT $ do - total <- ExceptT $ readPatchInfoCount path + total <- ExceptT $ readLatestInventory path latestInventorySizeP let off' = total - off - lim - ps <- ExceptT $ readPatchInfoPage off' lim path + ps <- ExceptT $ readLatestInventory path $ latestInventoryPageP off' lim now <- lift getCurrentTime let toLE pi h = LogEntry { leAuthor = T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi - , leHash = decodeStrict $ B16.encode $ unPatchHash h + , leHash = decodeStrict $ encodePatchHash h , leMessage = piTitle pi , leTime = intervalToEventTime $ FriendlyConvert $ now `diffUTCTime` piTime pi } - return (total, map (uncurry toLE) $ reverse $ psPatches ps) + return (total, map (uncurry toLE) $ reverse $ snd ps) diff --git a/vervis.cabal b/vervis.cabal index 649c1bf..f265390 100644 --- a/vervis.cabal +++ b/vervis.cabal @@ -35,8 +35,13 @@ flag library-only library exposed-modules: Control.Applicative.Local - Darcs.Local.PatchInfo.Parser - Darcs.Local.PatchInfo.Types + Darcs.Local.Hash.Codec + Darcs.Local.Hash.Types + Darcs.Local.Inventory.Parser + Darcs.Local.Inventory.Read + Darcs.Local.Inventory.Types + Darcs.Local.Patch + Darcs.Local.Patch.Types Darcs.Local.Repository Data.Attoparsec.ByteString.Local Data.Binary.Local @@ -53,6 +58,7 @@ library Data.Text.UTF8.Local Data.Text.Lazy.UTF8.Local Data.Time.Clock.Local + Development.DarcsRev Network.SSH.Local Text.FilePath.Local Text.Jasmine.Local @@ -205,6 +211,9 @@ library , yesod-form , yesod-static , yesod-persistent + -- for reading gzipped darcs inventory via utils in + -- Data.Attoparsec.ByteString.Local + , zlib hs-source-dirs: src default-language: Haskell2010