vlambda博客
学习文章列表

MySQL导出CSV文件(Haskell实现)

为什么要写这个工具


2015年3月份写的一个Haskell的程序,实现和mysql命令select into outfile差不多的功能。mysql的命令只能把文件导出到本机,另一个工具mydumper也能实现差不多的功能。


因为我需要简单地远程使用,又要比Navicat等界面工具使用更灵活。效率比mysql命令差一些,毕竟mysql的命令是C语言实现的,比Navicat好在可以自定义读取的字段和范围限制。


最后吐槽一下Haskell,这个文件编译后居然达到18M多。跟py2exe打包的Python程序一样大了,那个程序用到了PySide,打包了Python、Qt、libsvn的运行库。


代码 hsdump.hs

{-# LANGUAGE OverloadedStrings #-}-- cabal update && cabal install cabal-install-- cabal install cassava && cabal install yaml-config && cabal install hdbc-mysql && cabal install missingH-- ghc -threaded -O2 -optc-O3 -funfolding-use-threshold=16 -fforce-recomp --make hsdump.hs
module Main whereimport Control.Monadimport Data.Char (ord)import Data.Csvimport Data.Convertibleimport Data.Maybe (fromJust)import qualified Data.ByteString.Lazy as Bytes (appendFile)import qualified Data.List.Utils as Utils (join)import qualified Data.Yaml.Config as Yamlimport Database.HDBCimport Database.HDBC.MySQLimport System.Directory (createDirectoryIfMissing)import System.Environment (getArgs)

data Task = Task { sql :: String, params :: [SqlValue], outname :: String }
{- 拼接SQL语句,创建任务 -}createTask :: Yaml.Config -> String -> String -> TaskcreateTask sqlconf start_day stop_day = Task { sql = "SELECT " ++ (Utils.join "," fields) ++ " FROM " ++ table ++ " WHERE " ++ condition ++ " ORDER BY " ++ order, params = params, outname = dayname ++ ".txt" } where (table:_) = Yaml.lookup "table" sqlconf :: [String] (index:_) = Yaml.lookup "index" sqlconf :: [String] (order:_) = Yaml.lookup "order" sqlconf :: [String] fields = Yaml.lookupDefault "fields" ["*"] sqlconf :: [String] dayname = case start_day of "" -> "20000000" otherwise -> filter (/='-') start_day -- 去掉日期中间的横杠 condition = case (start_day, stop_day) of ("", _) -> index ++ "<? OR " ++ index ++ " IS NULL" (_, "") -> index ++ ">=?" otherwise -> index ++ ">=? AND " ++ index ++ "<?" params = map toSql $ filter (/="") [start_day, stop_day]

{- 连接MySQL数据库 -}connectDB :: Yaml.Config -> IO ConnectionconnectDB dbconf = do conn <- connectMySQL defaultMySQLConnectInfo { mysqlHost = host, mysqlPort = port, mysqlUser = user, mysqlPassword = password, mysqlDatabase = database, mysqlUnixSocket = socket } runRaw conn "SET NAMES 'utf8'" -- 设置字符集 return conn where host = Yaml.lookupDefault "host" "localhost" dbconf :: String port = Yaml.lookupDefault "port" 3306 dbconf :: Int user = Yaml.lookupDefault "user" "root" dbconf :: String password = Yaml.lookupDefault "password" "" dbconf :: String database = Yaml.lookupDefault "database" "test" dbconf :: String socket = Yaml.lookupDefault "socket" "/var/lib/mysql.sock" dbconf :: String

{- 连接MS SQL SERVER数据库 -}{-import Database.HDBC.ODBCconnectDB :: Yaml.Config -> IO ConnectionconnectDB dbconf = do connectODBC conn_string where dsn = Yaml.lookupDefault "dsn" "" dbconf :: String host = Yaml.lookupDefault "host" "localhost" dbconf :: String port = Yaml.lookupDefault "port" 1433 dbconf :: Int user = Yaml.lookupDefault "user" "root" dbconf :: String password = Yaml.lookupDefault "password" "" dbconf :: String database = Yaml.lookupDefault "database" "test" dbconf :: String servername = case dsn of "" -> "Server=" ++ host ++ ";Port=" ++ (show port) otherwise -> "DSN=" ++ dsn conn_string = "Driver=FreeTDS;TDS_Version=8.0;" ++ servername ++ ";UID=" ++ user ++ ";PWD=" ++ password ++ ";Database=" ++ database ++ ";Options=262144"-}

{- 将转义字符再转义,即将\n变成\\n -}escape :: String -> Stringescape [] = []escape (x:xs) | x `elem` ['\\', '\b', '\t', '\n', '\r'] = '\\' : x : escape xs | otherwise = x : escape xs

{- 将字段值转为字符串,其中NULL转为\N并转义 -}fromMysql :: SqlValue -> StringfromMysql SqlNull = "\\N"fromMysql val = fromJust $ fromSql val :: String

{- 将字段值转为字符串,同时将原本为字符串类型的值中的转义字符再转义 -}escapeFromMysql :: SqlValue -> StringescapeFromMysql val@(SqlString _) = escape $ fromMysql valescapeFromMysql val@(SqlByteString _) = escape $ fromMysql valescapeFromMysql val@(SqlWord32 _) = escape $ fromMysql valescapeFromMysql val@(SqlWord64 _) = escape $ fromMysql valescapeFromMysql val = fromMysql val

dumpRecord :: [SqlValue] -> [String]dumpRecord row = map escapeFromMysql row

{- 输出结果到Tab分隔的CSV文件 -}outputRecords :: String -> Connection -> Task -> IO ()outputRecords outpath conn task = do stmt <- prepare conn (sql task) _ <- execute stmt (params task) rows <- fetchAllRows stmt Bytes.appendFile outfile $ encodeWith encOpts $ map dumpRecord rows where encOpts = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t'), encQuoting = QuoteNone } outfile = outpath ++ (outname task)

main :: IO ()main = do args <- getArgs -- 读命令行参数,1个参数:yaml文件名 conf <- Yaml.load (head args) -- 加载yaml中的配置 dbconf <- Yaml.subconfig "db" conf sqlconf <- Yaml.subconfig "sql" conf outconf <- Yaml.subconfig "out" conf let path = Yaml.lookupDefault "path" "./data" outconf :: String pre = Yaml.lookupDefault "pre" "records-" outconf :: String outpath = path ++ "/" ++ pre (days:_) = Yaml.lookup "days" conf :: [[String]] tasks = zipWith (createTask sqlconf) days (tail days) -- 以相邻两个日期作为最后两个参数 createDirectoryIfMissing True path -- 如果目录不存在,就创建 conn <- connectDB dbconf mapM_ (outputRecords outpath conn) tasks -- 调用查询输出结果命令 disconnect conn


配置文件

db: host: localhost port: 3306 user: dba password: password database: test socket: /var/lib/mysql/mysql.sock
sql: table: users index: created_at order: id fields: - id - username - password - created_at - modified_at - is_active
days: - "" - "2015-01-01" - "2015-02-01" - "2015-03-01" - "2015-04-01" - "2015-05-01"
out: path: ./data pre: users-