You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
117 lines
3.4 KiB
Haskell
117 lines
3.4 KiB
Haskell
module Main where
|
|
import System.Environment (getArgs)
|
|
import qualified Data.ByteString.Char8 as B
|
|
import Data.Set (Set, delete, toList, fromList)
|
|
import qualified Data.Set as Set
|
|
import Data.Maybe (catMaybes)
|
|
|
|
{-|
|
|
Taille minimum pour le recouvrement des `Reads`
|
|
-}
|
|
minbase :: Int
|
|
minbase = 3
|
|
|
|
{-|
|
|
A `Reads` is a bit of a sequence
|
|
-}
|
|
type Reads = B.ByteString
|
|
|
|
{-|
|
|
A `Sequence` contains a `List` of `Reads` and a `Set` of available `Reads` that
|
|
have not yet been included in the sequence. Optimization: inits is a reverse
|
|
`List`, thus adding an element or getting the last element is O(1).
|
|
-}
|
|
data Sequence = Sequence { inits :: [Reads], remains :: Set Reads }
|
|
deriving (Eq, Show)
|
|
|
|
{-|
|
|
Assemble a `List` of `Reads` into a `Text`.
|
|
|
|
assemble' ["abcd", "cdefg", "fghi"] = "abcdefghi"
|
|
-}
|
|
assemble' :: [Reads] -> B.ByteString
|
|
assemble' [] = B.empty
|
|
assemble' [b] = b
|
|
assemble' (a:b:rs) = B.append (B.take (B.length a - coincide a b) a)
|
|
(assemble' (b:rs))
|
|
|
|
{-|
|
|
Assemble the `List` of `Reads` of a `Sequence` into a `Text`.
|
|
-}
|
|
assemble :: Sequence -> B.ByteString
|
|
assemble = assemble' . reverse . inits
|
|
|
|
{-|
|
|
Search for the longest suffix of a first `Reads` which is also the prefix of a
|
|
second `Reads` and returns its length.
|
|
|
|
coincide "abcde" "cdefgh" = 3
|
|
-}
|
|
coincide :: Reads -> Reads -> Int
|
|
coincide a b | a `B.isPrefixOf` b = B.length a
|
|
| B.null a = 0
|
|
| otherwise = coincide (B.tail a) b
|
|
|
|
{-|
|
|
Returns `True` if the `Sequence` is complete. A `Sequence` is complete if there
|
|
is no more `Reads` remaining.
|
|
-}
|
|
complete :: Sequence -> Bool
|
|
complete = Set.null . remains
|
|
|
|
{-|
|
|
Returns `True` if the `Sequence` is valid. A `Sequence` is valid if it is
|
|
complete or if there is `Reads` from the remaining ones that can be added.
|
|
-}
|
|
valid :: Sequence -> Bool
|
|
valid s = complete s || next s /= []
|
|
|
|
{-|
|
|
Initializes a `List` of `Sequence` with each `Reads` from a `Set`.
|
|
-}
|
|
initSequences :: Set Reads -> [Sequence]
|
|
initSequences sr = [ Sequence [r] (delete r sr) | r <- toList sr ]
|
|
|
|
{-|
|
|
Try to add a `Reads` to a `Sequence`. The `Reads` must be a member of remaining
|
|
`Reads`. If the `Reads` does not coincide with the last `Reads` of the
|
|
`Sequence`, it returns `Nothing`. The `Reads` must with at least 3 characters.
|
|
-}
|
|
(-+-) :: Sequence -> Reads -> Maybe Sequence
|
|
(-+-) (Sequence is sr) r
|
|
| coincide (head is) r >= minbase = Just (Sequence (r:is) (delete r sr))
|
|
| otherwise = Nothing
|
|
|
|
{-|
|
|
Given a `Sequence`, returns a `List` of `Sequence` which can be constructed
|
|
from each remaining `Reads`.
|
|
-}
|
|
next :: Sequence -> [Sequence]
|
|
next s@(Sequence _ sr) = catMaybes [ s -+- r | r <- toList sr ]
|
|
|
|
{-|
|
|
Given a `List` of `Sequence`, returns a `List` of `Sequence` with one more
|
|
`Reads` added from the remaining `Reads`.
|
|
-}
|
|
nexts :: [Sequence] -> [Sequence]
|
|
nexts = concatMap next
|
|
|
|
{-|
|
|
Repeat the `nexts` action until there is a complete `Sequence`.
|
|
-}
|
|
loopUntilComplete :: ([Sequence] -> [Sequence]) -> [Sequence] -> [Sequence]
|
|
loopUntilComplete _ [] = []
|
|
loopUntilComplete f ss | any complete ss' = ss'
|
|
| otherwise = loopUntilComplete f ss'
|
|
where ss' = f ss
|
|
|
|
main :: IO ()
|
|
main = do
|
|
(seqFile:_) <- getArgs
|
|
content <- B.readFile seqFile
|
|
let rs = B.lines content
|
|
starts = initSequences (fromList rs)
|
|
solutions = loopUntilComplete nexts starts
|
|
|
|
putStrLn . B.unpack . assemble . head $ solutions
|