-
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Setup.hs
76 lines (68 loc) · 2.56 KB
/
Setup.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{-# LANGUAGE OverloadedStrings #-}
import Control.Monad
import Prelude hiding (concat)
import System.IO (Handle, IOMode (..), hClose, hSetEncoding, utf8, withFile)
import Codec.Archive.Zip
import Data.ByteString.Lazy (ByteString, hPut)
import Data.Text
import Data.Text.IO (hGetLine, hPutStrLn)
import Distribution.Simple
import Network.HTTP.Client
import System.Directory
import System.FilePath
import System.IO.Temp
unihanUrl :: String
unihanUrl = "http://ftp.unicode.org/Public/11.0.0/ucd/Unihan.zip"
kHangulPath :: FilePath
kHangulPath = "src" </> "Text" </> "Seonbi" </> "kHangul.txt"
main :: IO ()
main = do
exist <- doesFileExist kHangulPath
unless exist $ do
data' <- downloadUnihan
extractUnihanReadings data' $ \ txtPath -> do
values <- withFile txtPath ReadMode (extractProp "kHangul")
withFile kHangulPath WriteMode $ \ handle -> do
hSetEncoding handle utf8
forM_ values $ \ (char, value) ->
hPutStrLn handle $ concat [char, "\t", value]
defaultMain
extractProp :: Text -> Handle -> IO [(Text, Text)]
extractProp property handle = do
hSetEncoding handle utf8
line <- hGetLine handle
case line of
"" ->
return []
line' ->
case breakOn "\t" line' of
(_, "") ->
extractProp property handle
(char, rest)
| "U+" `isPrefixOf` char && "\t" `isPrefixOf` rest ->
case breakOn "\t" $ Data.Text.tail rest of
(_, "") ->
extractProp property handle
(prop, value) | prop == property ->
((char, value) :) <$> extractProp property handle
_ ->
extractProp property handle
_ ->
extractProp property handle
extractUnihanReadings :: ByteString -> (FilePath -> IO a) -> IO a
extractUnihanReadings data' callback =
withSystemTempFile "Unihan.zip" $ \ zipPath handle -> do
hPut handle data'
hClose handle
let entryName = "Unihan_Readings.txt"
withSystemTempFile entryName $ \ txtPath handle' -> do
hClose handle'
sel <- mkEntrySelector entryName
withArchive zipPath (saveEntry sel txtPath)
callback txtPath
downloadUnihan :: IO ByteString
downloadUnihan = do
mgr <- newManager defaultManagerSettings
req <- parseRequest unihanUrl
res <- httpLbs req mgr
return $ responseBody res