Compressed inventory parser and DarcsRev TH utils

This commit is contained in:
fr33domlover 2016-05-17 20:34:22 +00:00
parent 9ba6761459
commit e76c1f7206
13 changed files with 827 additions and 209 deletions

View file

@ -77,3 +77,12 @@ Ticket
closer PersonId closer PersonId
UniqueTicket project number UniqueTicket project number
Discussion
Message
author PersonId
created UTCTime
content Text -- Assume this is Pandoc Markdown
parent MessageId Maybe
root DiscussionId

View file

@ -0,0 +1,49 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Darcs.Local.Hash.Codec
( encodePatchHash
, encodeInventoryHash
)
where
import Prelude
import Data.ByteString (ByteString)
import Data.Monoid ((<>))
import qualified Data.ByteString as B (length, replicate)
import qualified Data.ByteString.Base16 as B16 (encode)
import qualified Data.ByteString.Lex.Integral as BX (packDecimal)
import Darcs.Local.Hash.Types
encodeHash :: ByteString -> ByteString
encodeHash = B16.encode
encodeSize :: Int -> ByteString
encodeSize n =
case BX.packDecimal n of
Nothing -> error "negative size in sizehash"
Just b ->
if B.length b < 10
then B.replicate (10 - B.length b) 0x30 <> b
else b
encodePatchHash :: PatchHash -> ByteString
encodePatchHash (PatchHash h) = encodeHash h
encodeInventoryHash :: InventoryHash -> ByteString
encodeInventoryHash (InventoryHash s h) = encodeSize s <> "-" <> encodeHash h

View file

@ -0,0 +1,50 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Darcs.Local.Hash.Types
( PatchHash (..)
, ContentHash (..)
, InventoryHash (..)
, PristineHash (..)
)
where
import Prelude
import Data.ByteString (ByteString)
-- | A SHA1 hash of the patch info (author, title, description including junk,
-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20
-- bytes.
newtype PatchHash = PatchHash { unPatchHash :: ByteString }
-- | Content size and SHA256 hash of a patch's info and content. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
data ContentHash = ContentHash
{ chSize :: Int
, chHash :: ByteString
}
-- | Content size and SHA256 hash of an inventory (a patch set in a single
-- invetory file). The hash is in binary form, not hex, i.e. its size is always
-- 32 bytes.
data InventoryHash = InventoryHash
{ ihSize :: Int
, ihHash :: ByteString
}
-- | A SHA256 hash of the entire recorded state of the repo. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
newtype PristineHash = PristineHash { unPristineHash :: ByteString }

View file

@ -20,10 +20,16 @@
-- doesn't result with the exact original text, we'll have the wrong hash. To -- doesn't result with the exact original text, we'll have the wrong hash. To
-- make sure it's exactly the right content, we use ByteString first and then -- make sure it's exactly the right content, we use ByteString first and then
-- later decode to Text. -- later decode to Text.
module Darcs.Local.PatchInfo.Parser module Darcs.Local.Inventory.Parser
( readPatchInfoCount ( latestInventoryPristineP
, readPatchInfoAll , latestInventorySizeP
, readPatchInfoPage , latestInventoryPrevSizeP
, latestInventoryPageP
, latestInventoryAllP
, earlyInventorySizeP
, earlyInventoryPrevSizeP
, earlyInventoryPageP
, earlyInventoryAllP
) )
where where
@ -47,7 +53,10 @@ import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Lex.Integral as BX import qualified Data.ByteString.Lex.Integral as BX
import Control.Applicative.Local import Control.Applicative.Local
import Darcs.Local.PatchInfo.Types import Darcs.Local.Hash.Types
import Darcs.Local.Inventory.Types
import Darcs.Local.Patch
import Darcs.Local.Patch.Types
import Data.Attoparsec.ByteString.Local import Data.Attoparsec.ByteString.Local
import Data.ByteString.Local (stripPrefix) import Data.ByteString.Local (stripPrefix)
import Data.Text.UTF8.Local (decodeStrict) import Data.Text.UTF8.Local (decodeStrict)
@ -193,37 +202,11 @@ patchInfoRawP = do
, pirInverted = inverted , pirInverted = inverted
} }
hashPatchInfo :: HashAlgorithm a => a -> PatchInfoRaw -> Digest a -- TODO
hashPatchInfo _algo pir = --
let add = flip hashUpdate -- * Finish DarcsRev code, make it build
adds = flip hashUpdates -- * Update darcs change view code to work correctly in the case of previous
in hashFinalize $ -- inventories, test vervis against libravatar for that
add (if pirInverted pir then "t" else "f" :: ByteString) $
adds (pirDescription pir) $
add (pirJunkContent pir) $
add (pirJunkPrefix pir) $
add (fst $ pirTime pir) $
add (pirAuthor pir) $
add (pirTitle pir)
hashInit
refinePatchInfo :: PatchInfoRaw -> PatchInfo
refinePatchInfo pir =
let rtitle = pirTitle pir
(title, tag) = case stripPrefix "TAG " rtitle of
Nothing -> (rtitle, False)
Just rest -> (rest, True)
description = case pirDescription pir of
[] -> Nothing
l -> Just $ BC.unlines l
in PatchInfo
{ piAuthor = decodeStrict $ pirAuthor pir
, piHash = uncurry ContentHash $ pirHash pir
, piTitle = decodeStrict title
, piDescription = decodeStrict <$> description
, piTag = tag
, piTime = snd $ pirTime pir
}
-- | Parse patch metadata and compute the metadata's hash, which can be used as -- | Parse patch metadata and compute the metadata's hash, which can be used as
-- a patch identifier for lookup and matching. -- a patch identifier for lookup and matching.
@ -232,26 +215,119 @@ patchInfoP = do
pir <- patchInfoRawP pir <- patchInfoRawP
return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir) return (refinePatchInfo pir, PatchHash $ convert $ hashPatchInfo SHA1 pir)
patchInfosCountP :: Parser Int tagInfoP :: Parser (TagInfo, PatchHash)
patchInfosCountP = do tagInfoP = do
-- pristine hash (pi, ph) <- patchInfoP
skipLine case patchToTag pi of
-- previous inventory Nothing -> fail "Expected a tag, got a patch that isn't a tag"
optional $ Just ti -> return (ti, ph)
eol *> string "Starting" *> skipRestOfLine *> eol *>
skipLine
-- patch info
n <- length <$> (many $ eol *> skipPatchP)
eol
return n
patchInfosAllP :: Parser PatchSeq -------------------------------------------------------------------------------
patchInfosAllP = PatchSeq -- Latest inventory
-------------------------------------------------------------------------------
latestInventoryPristineP :: Parser PristineHash
latestInventoryPristineP = pristineP
latestInventorySizeP :: Parser Int
latestInventorySizeP =
-- pristine hash
skipLine *>
-- previous inventory
optional
( eol *> string "Starting" *> skipRestOfLine *>
eol *> skipLine
) *>
-- patch info
(length <$> many (eol *> skipPatchP)) <*
eol
latestInventoryPrevSizeP :: Parser (Maybe InventoryHash, Int)
latestInventoryPrevSizeP =
liftA2 (,)
( -- pristine hash
skipLine *>
-- previous inventory
optional (eol *> prevInvP)
)
( -- patch info
(length <$> many (eol *> skipPatchP)) <*
eol
)
latestInventoryPageP
:: Int -> Int -> Parser (Maybe InventoryHash, [(PatchInfo, PatchHash)])
latestInventoryPageP off lim =
let f mPrevTag pis =
case mPrevTag of
Nothing -> (Nothing, pis)
Just (ih, pi) -> (Just ih, pi : pis)
in liftA2 f
-- pristine
( skipLine *>
-- previous inventory and clean tag
optional (liftA2 (,) (eol *> prevInvP) (eol *> patchInfoP)) <*
-- skip offset
replicateM_ off (eol *> skipPatchP)
)
-- take limit
(atMost lim $ eol *> patchInfoP)
latestInventoryAllP :: Parser LatestInventory
latestInventoryAllP = LatestInventory
<$> pristineP <$> pristineP
<*> optional (eol *> prevInvP) <*> optional (liftA2 (,) (eol *> prevInvP) (eol *> tagInfoP))
<*> many (eol *> patchInfoP) <*> many (eol *> patchInfoP)
<* eol <* eol
-------------------------------------------------------------------------------
-- Early inventory
-------------------------------------------------------------------------------
earlyInventorySizeP :: Parser Int
earlyInventorySizeP =
-- previous inventory
optional
( string "Starting" *> skipRestOfLine *>
eol *> skipLine
) *>
-- patch info
(length <$> many (eol *> skipPatchP)) <*
eol
earlyInventoryPrevSizeP :: Parser (Maybe InventoryHash, Int)
earlyInventoryPrevSizeP =
liftA2 (,)
-- previous inventory
(optional $ prevInvP <* eol)
-- patch info
(length <$> many (skipPatchP *> eol))
earlyInventoryPageP
:: Int -> Int -> Parser (Maybe InventoryHash, [(PatchInfo, PatchHash)])
earlyInventoryPageP off lim =
let f mPrevTag pis =
case mPrevTag of
Nothing -> (Nothing, pis)
Just (ih, pi) -> (Just ih, pi : pis)
in liftA2 f
-- previous inventory and clean tag
( optional (liftA2 (,) (prevInvP <* eol) (patchInfoP <* eol)) <*
-- skip offset
replicateM_ off (skipPatchP *> eol)
)
-- take limit
(atMost lim $ patchInfoP <* eol)
earlyInventoryAllP :: Parser (Either EarliestInventory MiddleInventory)
earlyInventoryAllP =
let f Nothing pis = Left $ EarliestInventory pis
f (Just (prev, ti)) pis = Right $ MiddleInventory prev ti pis
in liftA2 f
(optional $ liftA2 (,) (prevInvP <* eol) (tagInfoP <* eol))
(many (patchInfoP <* eol))
{-
patchInfosOffsetP :: Int -> Parser PatchSeq patchInfosOffsetP :: Int -> Parser PatchSeq
patchInfosOffsetP off = PatchSeq patchInfosOffsetP off = PatchSeq
<$> pristineP <$> pristineP
@ -261,38 +337,9 @@ patchInfosOffsetP off = PatchSeq
) )
<* eol <* eol
patchInfosLimitP :: Int -> Parser PatchSeq patchInfosLimitP :: Int -> Parser PatchSeq
patchInfosLimitP lim = PatchSeq patchInfosLimitP lim = PatchSeq
<$> pristineP <$> pristineP
<*> optional (eol *> prevInvP) <*> optional (eol *> prevInvP)
<*> atMost lim (eol *> patchInfoP) <*> atMost lim (eol *> patchInfoP)
-}
patchInfosOffsetLimitP :: Int -> Int -> Parser PatchSeq
patchInfosOffsetLimitP off lim = PatchSeq
<$> pristineP
<*> optional (eol *> prevInvP)
<*> ( replicateM_ off (eol *> skipPatchP) *>
atMost lim (eol *> patchInfoP)
)
darcsDir :: FilePath
darcsDir = "_darcs"
inventoryFile :: FilePath
inventoryFile = "hashed_inventory"
readPatchInfoCount :: FilePath -> IO (Either String Int)
readPatchInfoCount repoPath = do
let invPath = repoPath </> darcsDir </> inventoryFile
parseFileIncremental invPath $ patchInfosCountP <* endOfInput
readPatchInfoAll :: FilePath -> IO (Either String PatchSeq)
readPatchInfoAll repoPath = do
let invPath = repoPath </> darcsDir </> inventoryFile
parseFileIncremental invPath $ patchInfosAllP <* endOfInput
readPatchInfoPage :: Int -> Int -> FilePath -> IO (Either String PatchSeq)
readPatchInfoPage off lim repoPath = do
let invPath = repoPath </> darcsDir </> inventoryFile
parseFileIncremental invPath $ patchInfosOffsetLimitP off lim

View file

@ -0,0 +1,96 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Darcs.Local.Inventory.Read
( readLatestInventory
, readCompressedInventory
)
where
import Prelude
import Codec.Compression.Zlib.Internal
import Control.Applicative (many, optional, liftA2)
import Control.Arrow (second)
import Control.Monad (replicateM_)
import Crypto.Hash
import Data.Attoparsec.ByteString
import Data.ByteArray (convert)
import Data.ByteString (ByteString)
import Data.Time.Calendar (fromGregorianValid)
import Data.Time.Clock (UTCTime (..), secondsToDiffTime)
import Data.Word (Word8)
import System.FilePath ((</>))
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Lex.Integral as BX
import Control.Applicative.Local
import Darcs.Local.Hash.Codec
import Darcs.Local.Hash.Types
import Darcs.Local.Inventory.Parser
import Darcs.Local.Inventory.Types
import Data.Attoparsec.ByteString.Local
import Data.ByteString.Local (stripPrefix)
import Data.Text.UTF8.Local (decodeStrict)
darcsDir :: FilePath
darcsDir = "_darcs"
inventoryDir :: FilePath
inventoryDir = "inventories"
inventoryFile :: FilePath
inventoryFile = "hashed_inventory"
readLatestInventory :: FilePath -> Parser a -> IO (Either String a)
readLatestInventory repo =
parseFileIncremental $ repo </> darcsDir </> inventoryFile
readCompressedInventory
:: FilePath -> InventoryHash -> Parser a -> IO (Either String a)
readCompressedInventory repo ih =
let invFile = BC.unpack $ encodeInventoryHash ih
invPath = repo </> darcsDir </> inventoryDir </> invFile
defParams = defaultDecompressParams
bufSize = min (decompressBufferSize defParams) (ihSize ih)
params = defParams { decompressBufferSize = bufSize }
in parseCompressedFileIncremental gzipFormat params invPath
{-
readLatestInventorySize :: FilePath -> IO (Either String Int)
readLatestInventoryAll :: FilePath -> IO (Either String LatestInventory)
readLatestInventoryPage
:: Int -> Int -> FilePath -> IO (Either String LatestInventory)
readInventorySize :: FilePath -> IO (Either String Int)
readInventorySize repoPath = do
let invPath = repoPath </> darcsDir </> inventoryFile
parseFileIncremental invPath $ patchInfosCountP <* endOfInput
readPatchInfoAll :: FilePath -> IO (Either String PatchSeq)
readPatchInfoAll repoPath = do
let invPath = repoPath </> darcsDir </> inventoryFile
parseFileIncremental invPath $ patchInfosAllP <* endOfInput
readPatchInfoPage :: Int -> Int -> FilePath -> IO (Either String PatchSeq)
readPatchInfoPage off lim repoPath = do
let invPath = repoPath </> darcsDir </> inventoryFile
parseFileIncremental invPath $ patchInfosOffsetLimitP off lim
-}

View file

@ -0,0 +1,74 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Darcs.Local.Inventory.Types
( LatestInventory (..)
, MiddleInventory (..)
, EarliestInventory (..)
)
where
-- TODO
--
-- Apparently, after a while, some of the patches are moved from
-- hashed_inventory into the inventories/ dir. So the patch set contains more
-- than one group. This means I need to extend my parser to cover this case.
-- Sources for info about this thing:
--
-- * Darcs source code
-- * Darcs wiki
-- * Local Darcs repos I have
--
-- From Darcs source code:
--
-- > The patches in a repository are stored in chunks broken up at \"clean\"
-- > tags. A tag is clean if the only patches before it in the current
-- > repository ordering are ones that the tag depends on (either directly
-- > or indirectly). Each chunk is stored in a separate inventory file on disk.
-- >
-- > A 'PatchSet' represents a repo's history as the list of patches since the
-- > last clean tag, and then a list of patch lists each delimited by clean tags.
-- >
-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo'
-- > representing a clean tag, the hash of the previous inventory (if it exists),
-- > and the list of patches since that previous inventory.
--
-- Let's start with finding out the format of the binary inventories and
-- parsing them.
import Prelude
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Time.Clock (UTCTime)
import Darcs.Local.Hash.Types
import Darcs.Local.Patch.Types
data LatestInventory = LatestInventory
{ liPristineHash :: PristineHash
, liPrevTag :: Maybe (InventoryHash, (TagInfo, PatchHash))
, liPatches :: [(PatchInfo, PatchHash)]
}
data MiddleInventory = MiddleInventory
{ miPrevious :: InventoryHash
, miTag :: (TagInfo, PatchHash)
, miPatches :: [(PatchInfo, PatchHash)]
}
newtype EarliestInventory = EarliestInventory
{ eiPatches :: [(PatchInfo, PatchHash)]
}

107
src/Darcs/Local/Patch.hs Normal file
View file

@ -0,0 +1,107 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Darcs.Local.Patch
( hashPatchInfo
, refinePatchInfo
, tagToPatch
, patchToTag
, patchToTag_
)
where
import Prelude hiding (take, takeWhile)
import Control.Applicative (many, optional, liftA2)
import Control.Arrow (second)
import Control.Monad (replicateM_)
import Crypto.Hash
import Data.Attoparsec.ByteString
import Data.ByteArray (convert)
import Data.ByteString (ByteString)
import Data.Time.Calendar (fromGregorianValid)
import Data.Time.Clock (UTCTime (..), secondsToDiffTime)
import Data.Word (Word8)
import System.FilePath ((</>))
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Lex.Integral as BX
import Control.Applicative.Local
import Darcs.Local.Hash.Types
import Darcs.Local.Inventory.Types
import Darcs.Local.Patch.Types
import Data.Attoparsec.ByteString.Local
import Data.ByteString.Local (stripPrefix)
import Data.Text.UTF8.Local (decodeStrict)
hashPatchInfo :: HashAlgorithm a => a -> PatchInfoRaw -> Digest a
hashPatchInfo _algo pir =
let add = flip hashUpdate
adds = flip hashUpdates
in hashFinalize $
add (if pirInverted pir then "t" else "f" :: ByteString) $
adds (pirDescription pir) $
add (pirJunkContent pir) $
add (pirJunkPrefix pir) $
add (fst $ pirTime pir) $
add (pirAuthor pir) $
add (pirTitle pir)
hashInit
refinePatchInfo :: PatchInfoRaw -> PatchInfo
refinePatchInfo pir =
let rtitle = pirTitle pir
(title, tag) = case stripPrefix "TAG " rtitle of
Nothing -> (rtitle, False)
Just rest -> (rest, True)
description = case pirDescription pir of
[] -> Nothing
l -> Just $ BC.unlines l
in PatchInfo
{ piAuthor = decodeStrict $ pirAuthor pir
, piHash = uncurry ContentHash $ pirHash pir
, piTitle = decodeStrict title
, piDescription = decodeStrict <$> description
, piTag = tag
, piTime = snd $ pirTime pir
}
tagToPatch :: TagInfo -> PatchInfo
tagToPatch tag = PatchInfo
{ piAuthor = tiAuthor tag
, piHash = tiHash tag
, piTitle = tiTitle tag
, piDescription = tiDescription tag
, piTag = True
, piTime = tiTime tag
}
patchToTag :: PatchInfo -> Maybe TagInfo
patchToTag pi =
if piTag pi
then Just $ patchToTag_ pi
else Nothing
patchToTag_ :: PatchInfo -> TagInfo
patchToTag_ patch = TagInfo
{ tiAuthor = piAuthor patch
, tiHash = piHash patch
, tiTitle = piTitle patch
, tiDescription = piDescription patch
, tiTime = piTime patch
}

View file

@ -0,0 +1,72 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Darcs.Local.Patch.Types
( PatchInfoRaw (..)
, PatchInfo (..)
, TagInfo (..)
)
where
import Prelude
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Time.Clock (UTCTime)
import Darcs.Local.Hash.Types (ContentHash)
-- | Patch metadata in raw form. This is intended for accurate hashing of the
-- patch info.
data PatchInfoRaw = PatchInfoRaw
{ pirAuthor :: ByteString
, pirHash :: (Int, ByteString)
, pirTitle :: ByteString
, pirDescription :: [ByteString]
, pirJunkPrefix :: ByteString
, pirJunkContent :: ByteString
, pirTime :: (ByteString, UTCTime)
, pirInverted :: Bool
}
-- | Patch metadata read from the inventory file.
data PatchInfo = PatchInfo
{ -- | Author name and email
piAuthor :: Text
-- | Patch content hash
, piHash :: ContentHash
-- | Single message line
, piTitle :: Text
-- | Optional description, may contain several lines
, piDescription :: Maybe Text
-- | Whether this is a tag
, piTag :: Bool
-- | When the patch was recorded
, piTime :: UTCTime
}
-- | Tag metadata read from the inventory file.
data TagInfo = TagInfo
{ -- | Author name and email
tiAuthor :: Text
-- | Tag content hash
, tiHash :: ContentHash
-- | Single message line
, tiTitle :: Text
-- | Optional description, may contain several lines
, tiDescription :: Maybe Text
-- | When the tag was recorded
, tiTime :: UTCTime
}

View file

@ -1,119 +0,0 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
module Darcs.Local.PatchInfo.Types
( PatchInfoRaw (..)
, PatchHash (..)
, ContentHash (..)
, InventoryHash (..)
, PristineHash (..)
, PatchInfo (..)
, PatchSeq (..)
)
where
-- TODO
--
-- Apparently, after a while, some of the patches are moved from
-- hashed_inventory into the inventories/ dir. So the patch set contains more
-- than one group. This means I need to extend my parser to cover this case.
-- Sources for info about this thing:
--
-- * Darcs source code
-- * Darcs wiki
-- * Local Darcs repos I have
--
-- From Darcs source code:
--
-- > The patches in a repository are stored in chunks broken up at \"clean\"
-- > tags. A tag is clean if the only patches before it in the current
-- > repository ordering are ones that the tag depends on (either directly
-- > or indirectly). Each chunk is stored in a separate inventory file on disk.
-- >
-- > A 'PatchSet' represents a repo's history as the list of patches since the
-- > last clean tag, and then a list of patch lists each delimited by clean tags.
-- >
-- > A 'Tagged' is a single chunk of a 'PatchSet'. It has a 'PatchInfo'
-- > representing a clean tag, the hash of the previous inventory (if it exists),
-- > and the list of patches since that previous inventory.
--
-- Let's start with finding out the format of the binary inventories and
-- parsing them.
import Prelude
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Time.Clock (UTCTime)
-- | Patch metadata in raw form. This is intended for accurate hashing of the
-- patch info.
data PatchInfoRaw = PatchInfoRaw
{ pirAuthor :: ByteString
, pirHash :: (Int, ByteString)
, pirTitle :: ByteString
, pirDescription :: [ByteString]
, pirJunkPrefix :: ByteString
, pirJunkContent :: ByteString
, pirTime :: (ByteString, UTCTime)
, pirInverted :: Bool
}
-- | A SHA1 hash of the patch info (author, title, description including junk,
-- timestamp). The hash is in binary form, not hex, i.e. its size is always 20
-- bytes.
newtype PatchHash = PatchHash { unPatchHash :: ByteString }
-- | Content size and SHA256 hash of a patch's info and content. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
data ContentHash = ContentHash
{ chSize :: Int
, chHash :: ByteString
}
-- | Content size and SHA256 hash of an inventory (a patch set in a single
-- invetory file). The hash is in binary form, not hex, i.e. its size is always
-- 32 bytes.
data InventoryHash = InventoryHash
{ ihSize :: Int
, ihHash :: ByteString
}
-- | A SHA256 hash of the entire recorded state of the repo. The hash is in
-- binary form, not hex, i.e. its size is always 32 bytes.
newtype PristineHash = PristineHash { unPristineHash :: ByteString }
-- | Patch metadata read from the inventory file.
data PatchInfo = PatchInfo
{ -- | Author name and email
piAuthor :: Text
-- | Patch content hash
, piHash :: ContentHash
-- | Single message line
, piTitle :: Text
-- | Optional description, may contain several lines
, piDescription :: Maybe Text
-- | Whether this is a tag
, piTag :: Bool
-- | When the patch was recorded
, piTime :: UTCTime
}
-- | The information from the hashed inventory file.
data PatchSeq = PatchSeq
{ psPristineHash :: PristineHash
, psPrevious :: Maybe InventoryHash
, psPatches :: [(PatchInfo, PatchHash)]
}

View file

@ -15,15 +15,17 @@
module Data.Attoparsec.ByteString.Local module Data.Attoparsec.ByteString.Local
( parseFileIncremental ( parseFileIncremental
, parseCompressedFileIncremental
) )
where where
import Prelude import Prelude
import Codec.Compression.Zlib.Internal
import Data.Attoparsec.ByteString import Data.Attoparsec.ByteString
import System.IO import System.IO
import qualified Data.ByteString as B (hGet) import qualified Data.ByteString as B (null, hGet)
import qualified Data.ByteString.Lazy.Internal as BLI (defaultChunkSize) import qualified Data.ByteString.Lazy.Internal as BLI (defaultChunkSize)
parseFileIncremental :: FilePath -> Parser a -> IO (Either String a) parseFileIncremental :: FilePath -> Parser a -> IO (Either String a)
@ -36,3 +38,36 @@ parseFileIncremental file parser =
firstChunk <- getChunk firstChunk <- getChunk
let firstResult = parse parser firstChunk let firstResult = parse parser firstChunk
go firstResult go firstResult
parseCompressedFileIncremental
:: Format
-> DecompressParams
-> FilePath
-> Parser a
-> IO (Either String a)
parseCompressedFileIncremental format params file parser =
withBinaryFile file ReadMode $ \ h -> do
let getChunk = B.hGet h BLI.defaultChunkSize
pGo _ (Fail _remainder _contexts msg) = return $ Left msg
pGo f (Partial cont) = f cont
pGo _ (Done _remainder value) = return $ Right value
dGo pCont (DecompressInputRequired dCont) =
getChunk >>= dCont >>= dGo pCont
dGo pCont (DecompressOutputAvailable output next) =
pGo (\ c -> next >>= dGo c) (pCont output)
dGo pCont (DecompressStreamEnd remainder) =
if B.null remainder
then
pGo
( const $
return $
Left "Parser wants input but there's none"
)
(pCont remainder)
else return $ Left "Decompression ended with remainder"
dGo pCont (DecompressStreamError err) =
return $ Left $ show err
dGo (parse parser) (decompressIO format params)

186
src/Development/DarcsRev.hs Normal file
View file

@ -0,0 +1,186 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
{-# LANGUAGE CPP #-}
module Development.DarcsRev
( darcsLastPatchHash
, darcsLastPatchTime
, darcsLastPatchTitle
, darcsLastPatchIsTag
, darcsLastTagHash
, darcsLastTagTime
, darcsLastTagName
, darcsPatchesSinceLastTag
, darcsBranchSharer
, darcsBranchRepo
, darcsTotalPatches
, darcsTreeDirty
)
where
import Prelude
import Data.Fixed
import Data.Foldable (find)
import Data.Time
import Language.Haskell.TH
import qualified Data.ByteString.Char8 as BC (unpack)
import qualified Data.Text as T (unpack)
import Darcs.Local.Hash.Codec
import Darcs.Local.Hash.Types
import Darcs.Local.Patch
import Darcs.Local.Patch.Types
import Darcs.Local.Inventory.Parser
import Darcs.Local.Inventory.Read
import Darcs.Local.Inventory.Types
-- TODO
--
-- * Number of patches since latest tag
-- * Branch name, basically the <user>/<repo> thing. This allows to identify
-- which "darcs branch" is being used
-- * Whether there are unrecorded changes to tracked files
repoPath :: FilePath
repoPath = "."
readLatestInv :: IO [(PatchInfo, PatchHash)]
readLatestInv = do
einv <- readLatestInventory repoPath latestInventoryAllP
case einv of
Left err -> error $ "Failed to parse Darcs inventory file: " ++ err
Right inv -> return $
case snd <$> liPrevTag inv of
Nothing -> liPatches inv
Just (tag, h) -> (tagToPatch tag, h) : liPatches inv
readLastPatch :: IO (PatchInfo, PatchHash)
readLastPatch = do
pis <- readLatestInv
if null pis
then error "No patches found"
else return $ last pis
readLastTag :: IO (PatchInfo, PatchHash)
readLastTag = do
pis <- readLatestInv
if null pis
then error "No patches found"
else case find (piTag . fst) $ reverse pis of
Nothing -> error "No tags found"
Just tag -> return tag
darcsHash :: IO PatchHash -> Q Exp
darcsHash readHash = runIO readHash >>= stringE . BC.unpack . encodePatchHash
darcsTime :: IO PatchInfo -> Q Exp
darcsTime readPI = do
pi <- runIO readPI
let UTCTime (ModifiedJulianDay day) diff' = piTime pi
#if MIN_VERSION_time(1,6,0)
diff = diffTimeToPicoseconds diff'
#else
diff =
let MkFixed pico = realToFrac diff' :: Pico
in pico
#endif
return $ RecConE 'UTCTime
[ ( 'utctDay
, AppE (VarE 'ModifiedJulianDay) (LitE $ IntegerL day)
)
, ( 'utctDayTime
, AppE (VarE 'picosecondsToDiffTime) (LitE $ IntegerL diff)
)
]
darcsTitle :: IO PatchInfo -> Q Exp
darcsTitle readPI = runIO readPI >>= stringE . T.unpack . piTitle
darcsIsTag :: IO PatchInfo -> Q Exp
darcsIsTag readPI = do
pi <- runIO readPI
return $ ConE $ if piTag pi then 'True else 'False
-- | The ASCII lowercase hexadecimal representation of the hash of the last
-- recorded patch, as a string literal. This the SHA256 patch hash, i.e. a hash
-- of the patch info, not content. This is what @darcs log@ displays.
darcsLastPatchHash :: Q Exp
darcsLastPatchHash = darcsHash $ snd <$> readLastPatch
-- | The time of the last recorded patch, as a 'UTCTime' value.
darcsLastPatchTime :: Q Exp
darcsLastPatchTime = darcsTime $ fst <$> readLastPatch
-- | The title of the last recorded patch, as a string literal.
darcsLastPatchTitle :: Q Exp
darcsLastPatchTitle = darcsTitle $ fst <$> readLastPatch
-- | A 'Bool' saying whether the last recorded patch is actually a tag.
darcsLastPatchIsTag :: Q Exp
darcsLastPatchIsTag = darcsIsTag $ fst <$> readLastPatch
-- | The ASCII lowercase hexadecimal representation of the hash of the last
-- recorded tag (i.e. the last patch that is a tag).
darcsLastTagHash :: Q Exp
darcsLastTagHash = darcsHash $ snd <$> readLastTag
-- | The time of the last recorded tag, as a 'UTCTime' value.
darcsLastTagTime :: Q Exp
darcsLastTagTime = darcsTime $ fst <$> readLastTag
-- | The name of the last recorded tag, as a string literal. This is a result
-- of taking the title of the tag and dropping the @"TAG "@ prefix.
darcsLastTagName :: Q Exp
darcsLastTagName = darcsTitle $ fst <$> readLastTag
-- | Number of patches recorded after the last tag, as a number literal.
darcsPatchesSinceLastTag :: Q Exp
darcsPatchesSinceLastTag = do
pisAll <- runIO readLatestInv
case break (piTag . fst) $ reverse pisAll of
(_, []) -> fail "No tag found"
(pisAfter, (_tag : _pisBefore)) ->
litE $ integerL $ toInteger $ length pisAfter
-- | Not implemented yet
darcsBranchSharer :: Q Exp
darcsBranchSharer = undefined
-- | Not implemented yet
darcsBranchRepo :: Q Exp
darcsBranchRepo = undefined
-- | Total number of recorded patches.
darcsTotalPatches :: Q Exp
darcsTotalPatches = do
let go Nothing n = return n
go (Just ih) n = do
einv <- readCompressedInventory repoPath ih earlyInventoryPrevSizeP
case einv of
Left err -> error $ "Failed to parse inventory: " ++ err
Right (mih, m) -> go mih $ n + m
nPatches <- runIO $ do
einv <- readLatestInventory repoPath latestInventoryPrevSizeP
case einv of
Left err -> error $ "Failed to parse latest inventory: " ++ err
Right (mih, n) -> go mih n
litE $ integerL $ toInteger nPatches
-- | Not implemented yet
darcsTreeDirty :: Q Exp
darcsTreeDirty = undefined

View file

@ -38,8 +38,11 @@ import qualified Data.ByteString.Base16 as B16 (encode)
import qualified Data.Foldable as F (find) import qualified Data.Foldable as F (find)
import qualified Data.Text as T (takeWhile, stripEnd) import qualified Data.Text as T (takeWhile, stripEnd)
import Darcs.Local.PatchInfo.Parser import Darcs.Local.Hash.Codec
import Darcs.Local.PatchInfo.Types import Darcs.Local.Inventory.Parser
import Darcs.Local.Inventory.Read
import Darcs.Local.Inventory.Types
import Darcs.Local.Patch.Types
import Darcs.Local.Repository import Darcs.Local.Repository
import Data.Either.Local (maybeRight) import Data.Either.Local (maybeRight)
import Data.EventTime.Local import Data.EventTime.Local
@ -125,18 +128,18 @@ readChangesView
-> IO (Maybe (Int, [LogEntry])) -> IO (Maybe (Int, [LogEntry]))
-- ^ Total number of changes, and view of the chosen subset -- ^ Total number of changes, and view of the chosen subset
readChangesView path off lim = fmap maybeRight $ runExceptT $ do readChangesView path off lim = fmap maybeRight $ runExceptT $ do
total <- ExceptT $ readPatchInfoCount path total <- ExceptT $ readLatestInventory path latestInventorySizeP
let off' = total - off - lim let off' = total - off - lim
ps <- ExceptT $ readPatchInfoPage off' lim path ps <- ExceptT $ readLatestInventory path $ latestInventoryPageP off' lim
now <- lift getCurrentTime now <- lift getCurrentTime
let toLE pi h = LogEntry let toLE pi h = LogEntry
{ leAuthor = { leAuthor =
T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi T.stripEnd $ T.takeWhile (/= '<') $ piAuthor pi
, leHash = decodeStrict $ B16.encode $ unPatchHash h , leHash = decodeStrict $ encodePatchHash h
, leMessage = piTitle pi , leMessage = piTitle pi
, leTime = , leTime =
intervalToEventTime $ intervalToEventTime $
FriendlyConvert $ FriendlyConvert $
now `diffUTCTime` piTime pi now `diffUTCTime` piTime pi
} }
return (total, map (uncurry toLE) $ reverse $ psPatches ps) return (total, map (uncurry toLE) $ reverse $ snd ps)

View file

@ -35,8 +35,13 @@ flag library-only
library library
exposed-modules: Control.Applicative.Local exposed-modules: Control.Applicative.Local
Darcs.Local.PatchInfo.Parser Darcs.Local.Hash.Codec
Darcs.Local.PatchInfo.Types Darcs.Local.Hash.Types
Darcs.Local.Inventory.Parser
Darcs.Local.Inventory.Read
Darcs.Local.Inventory.Types
Darcs.Local.Patch
Darcs.Local.Patch.Types
Darcs.Local.Repository Darcs.Local.Repository
Data.Attoparsec.ByteString.Local Data.Attoparsec.ByteString.Local
Data.Binary.Local Data.Binary.Local
@ -53,6 +58,7 @@ library
Data.Text.UTF8.Local Data.Text.UTF8.Local
Data.Text.Lazy.UTF8.Local Data.Text.Lazy.UTF8.Local
Data.Time.Clock.Local Data.Time.Clock.Local
Development.DarcsRev
Network.SSH.Local Network.SSH.Local
Text.FilePath.Local Text.FilePath.Local
Text.Jasmine.Local Text.Jasmine.Local
@ -205,6 +211,9 @@ library
, yesod-form , yesod-form
, yesod-static , yesod-static
, yesod-persistent , yesod-persistent
-- for reading gzipped darcs inventory via utils in
-- Data.Attoparsec.ByteString.Local
, zlib
hs-source-dirs: src hs-source-dirs: src
default-language: Haskell2010 default-language: Haskell2010