repos / line-indexed-cursor.hs.git


commit
117fc9d
parent
117fc9d
author
Evgenii Akentev
date
2023-07-09 18:10:38 +0400 +04
Create list based version.
7 files changed,  +274, -0
A .gitignore
+27, -0
 1@@ -0,0 +1,27 @@
 2+dist
 3+dist-*
 4+cabal-dev
 5+*.o
 6+*.hi
 7+*.hie
 8+*.chi
 9+*.chs.h
10+*.dyn_o
11+*.dyn_hi
12+.hpc
13+.hsenv
14+.cabal-sandbox/
15+cabal.sandbox.config
16+*.prof
17+*.aux
18+*.hp
19+*.eventlog
20+.stack-work/
21+cabal.project.local
22+cabal.project.local~
23+.HTF/
24+.ghc.environment.*
25+*.output
26+packagedb
27+cache
28+build
A CHANGELOG.md
+5, -0
1@@ -0,0 +1,5 @@
2+# Revision history for line-indexed-file-cursor
3+
4+## 0.1.0.0 -- YYYY-mm-dd
5+
6+* First version. Released on an unsuspecting world.
A LICENSE
+20, -0
 1@@ -0,0 +1,20 @@
 2+Copyright (c) 2023 Evgenii Akentev
 3+
 4+Permission is hereby granted, free of charge, to any person obtaining
 5+a copy of this software and associated documentation files (the
 6+"Software"), to deal in the Software without restriction, including
 7+without limitation the rights to use, copy, modify, merge, publish,
 8+distribute, sublicense, and/or sell copies of the Software, and to
 9+permit persons to whom the Software is furnished to do so, subject to
10+the following conditions:
11+
12+The above copyright notice and this permission notice shall be included
13+in all copies or substantial portions of the Software.
14+
15+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
A line-indexed-cursor.cabal
+35, -0
 1@@ -0,0 +1,35 @@
 2+cabal-version:      3.0
 3+name:               line-indexed-cursor
 4+version:            0.1.0.0
 5+license:            MIT
 6+license-file:       LICENSE
 7+author:             Evgenii Akentev
 8+maintainer:         i@ak3n.com
 9+category:           Development
10+build-type:         Simple
11+extra-doc-files:    CHANGELOG.md
12+
13+common warnings
14+    ghc-options:
15+      -Wall -Wnoncanonical-monad-instances -Wincomplete-uni-patterns
16+      -Wincomplete-record-updates -Wredundant-constraints -Widentities
17+      -Wunused-packages -Wmissing-deriving-strategies
18+
19+
20+library
21+    import:           warnings
22+    exposed-modules:  System.IO.LineIndexedCursor
23+    build-depends:    base ^>=4.18.0.0, bytestring ^>= 0.11
24+    hs-source-dirs:   src
25+    default-language: Haskell2010
26+
27+test-suite line-indexed-cursor-test
28+    import:           warnings
29+    default-language: Haskell2010
30+    type:             exitcode-stdio-1.0
31+    hs-source-dirs:   test
32+    main-is:          Main.hs
33+    build-depends:
34+        base >= 4.7 && < 5,
35+        hspec >= 2.10 && < 3,
36+        line-indexed-cursor
A src/System/IO/LineIndexedCursor.hs
+96, -0
 1@@ -0,0 +1,96 @@
 2+{-# LANGUAGE DuplicateRecordFields #-}
 3+{-# LANGUAGE RecordWildCards #-}
 4+
 5+-----------------------------------------------------------------------------
 6+-- |
 7+-- Module      :  System.IO.LineIndexedCursor
 8+-- Maintainer  :  i@ak3n.com
 9+--
10+-- Line-indexed file reader.
11+--
12+-- Lazily builds the index of line numbers while reading the file
13+-- making it possible to rewind to them quickly later.
14+-----------------------------------------------------------------------------
15+
16+module System.IO.LineIndexedCursor (
17+  LineIndexedCursor(..), mkLineIndexedCursor
18+  ) where
19+
20+import Data.ByteString (ByteString, hGetLine)
21+import Control.Concurrent.MVar
22+import System.IO (Handle, hTell, hSeek, SeekMode(..), hIsEOF)
23+
24+-- | ADT with methods, hiding the internal state.
25+data LineIndexedCursor = LineIndexedCursor
26+  {
27+  -- | Same as 'hGetLine' but safe.
28+  getCurrentLine :: IO (Maybe ByteString)
29+
30+
31+  -- | Rewinds to the requsted line number. Stops at EOF if it's too big.
32+  -- Returns the reached line number.
33+  , goToLine :: Integer -> IO Integer
34+
35+  -- | Returns the file 'Handle'.
36+  , getHandle :: Handle
37+  }
38+
39+data CursorHandle = CursorHandle
40+  { fileHandle :: Handle
41+  , linesIdx :: MVar ([Integer], Integer)
42+  }
43+
44+{- |
45+
46+Builds 'LineIndexedCursor'.
47+
48+Resets the file handle's ofsset to the beginning.
49+
50+Use 'System.IO.hSetNewlineMode' if you want to configure 'System.IO.NewlineMode'.
51+
52+-}
53+mkLineIndexedCursor :: Handle -> IO LineIndexedCursor
54+mkLineIndexedCursor fileHandle = do
55+  -- reset the handle's offset to the beginning
56+  hSeek fileHandle AbsoluteSeek 0
57+
58+  linesIdx <- newMVar ([0], 0)
59+
60+  let cursorHandle = CursorHandle fileHandle linesIdx
61+  pure $ LineIndexedCursor
62+    { getCurrentLine = getCurrentLine' cursorHandle
63+    , goToLine = goToLine' cursorHandle
64+    , getHandle = fileHandle
65+    }
66+
67+getCurrentLine' :: CursorHandle -> IO (Maybe ByteString)
68+getCurrentLine' CursorHandle{..} =
69+  hIsEOF fileHandle >>= \isEOF -> if isEOF then pure Nothing else do
70+    line <- hGetLine fileHandle
71+    offset <- hTell fileHandle
72+    modifyMVar_ linesIdx $ \(idx, size) -> pure $
73+      if (not $ offset `elem` idx)
74+      then (offset : idx, size + 1)
75+      else (idx, size)
76+    pure $ Just line
77+
78+goToLine' :: CursorHandle -> Integer -> IO Integer
79+goToLine' CursorHandle{..} ln = do
80+  modifyMVar linesIdx $ \(idx, size) -> do
81+    if ln > size then do
82+      hSeek fileHandle AbsoluteSeek (idx !! 0)
83+      -- try to read until the requested line number
84+      idxTail <- readUntil (ln - size) []
85+      let newSize = size + (fromIntegral $ length idxTail)
86+      pure ((idxTail ++ idx, newSize), newSize)
87+    else do
88+      let nextSeekIndex = fromIntegral $ size - ln
89+      hSeek fileHandle AbsoluteSeek (idx !! nextSeekIndex)
90+      pure ((idx, size), ln)
91+  where
92+    readUntil 0 idx = pure idx
93+    readUntil counter idx =
94+      hIsEOF fileHandle >>= \isEOF -> if isEOF then pure idx else do
95+        _ <- hGetLine fileHandle
96+        offset <- hTell fileHandle
97+        readUntil (counter - 1) (fromInteger offset : idx)
A test/Main.hs
+71, -0
 1@@ -0,0 +1,71 @@
 2+{-# LANGUAGE OverloadedStrings #-}
 3+
 4+module Main (main) where
 5+
 6+import System.IO
 7+import System.IO.LineIndexedCursor
 8+
 9+import Test.Hspec
10+
11+main :: IO ()
12+main = hspec $ do
13+  let
14+    mkCursor = do
15+      h <- openFile "test/testdata" ReadMode
16+      c <- mkLineIndexedCursor h
17+      pure (h, c)
18+
19+  before mkCursor . after (\(h, _) -> hClose h)
20+  $ describe "System.IO.LineIndexedCursor" $ do
21+
22+      it "getCurrentLine works" $ \(_, c) -> do
23+        l <- getCurrentLine c
24+        l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
25+
26+      it "goToLine works" $ \(_, c) -> do
27+        ln <- goToLine c 10
28+        ln `shouldBe` 10
29+
30+        l <- getCurrentLine c
31+        l `shouldBe` Just "Sed elementum velit sit amet orci mollis tincidunt."
32+
33+      it "goToLine is too big" $ \(_, c) -> do
34+        l <- getCurrentLine c
35+        l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
36+
37+        ln <- goToLine c 30
38+        ln `shouldBe` 20
39+
40+        l' <- getCurrentLine c
41+        l' `shouldBe` Nothing
42+
43+      it "read line, then go to beginning and forth" $ \(_, c) -> do
44+        l <- getCurrentLine c
45+        l `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
46+
47+        _ <- getCurrentLine c
48+        _ <- getCurrentLine c
49+        _ <- getCurrentLine c
50+        _ <- getCurrentLine c
51+        _ <- getCurrentLine c
52+
53+        ln <- goToLine c 0
54+        ln `shouldBe` 0
55+
56+        l' <- getCurrentLine c
57+        l' `shouldBe` Just "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
58+
59+        ln' <- goToLine c 5
60+        ln' `shouldBe` 5
61+
62+        l'' <- getCurrentLine c
63+        l'' `shouldBe` Just "Curabitur nec dui posuere, tincidunt turpis vitae, tincidunt magna."
64+
65+        ln'' <- goToLine c 6
66+        ln'' `shouldBe` 6
67+
68+        ln''' <- goToLine c 7
69+        ln''' `shouldBe` 7
70+
71+        ln'''' <- goToLine c 10
72+        ln'''' `shouldBe` 10
A test/testdata
+20, -0
 1@@ -0,0 +1,20 @@
 2+Lorem ipsum dolor sit amet, consectetur adipiscing elit.
 3+Sed eget lacus id nulla sagittis interdum sit amet ac quam.
 4+Vestibulum vehicula felis ac massa tincidunt, a elementum elit malesuada.
 5+Curabitur nec mi sit amet justo condimentum gravida.
 6+Pellentesque accumsan dolor at nisl pulvinar, ut bibendum diam egestas.
 7+Curabitur nec dui posuere, tincidunt turpis vitae, tincidunt magna.
 8+Duis fringilla orci vitae finibus fermentum.
 9+Pellentesque facilisis nisi sit amet urna elementum, nec blandit neque tincidunt.
10+Duis efficitur odio non ipsum consequat lobortis.
11+Curabitur faucibus tortor quis leo ultricies volutpat.
12+Sed elementum velit sit amet orci mollis tincidunt.
13+Aliquam vitae est vel odio pharetra fermentum.
14+Morbi facilisis sem id scelerisque fermentum.
15+Sed placerat lorem at commodo ornare.
16+Suspendisse bibendum ex non eros bibendum molestie.
17+Nullam sed elit quis arcu dapibus aliquam nec sed nunc.
18+Sed sodales ex a dapibus lacinia.
19+Duis at nunc et est maximus vestibulum.
20+Fusce ut justo id ante vehicula dignissim.
21+Praesent tincidunt eros vel viverra posuere.