Collapse SQL Whitespace (Part 2)
In the Collapse
SQL Whitespace blog entry, I presented my implementation of
sqlCollapseWhitespace
, a function that collapses whitespace
and removes comments from PostgreSQL queries. My motivation
is to take advantage of Linus’s law and
get feedback. I generally try to avoid parsing SQL because there are so
many edge cases, as well as functionality that I am (blissfully?)
unaware of. This blog entry presents an updated version of the function
that supports formatting of statements that include semicolons.
I learned that some PostgreSQL statements make use of the semicolon,
so a semicolon does not necessarily mark the end of a
statement. For example, a CREATE RULE
statement may be defined with multiple commands that are separated by
semicolons. It is interesting that dollar-quoted strings are not used
for this.
CREATE [ OR REPLACE ] RULE name AS ON event
TO table_name [ WHERE condition ]
DO [ ALSO | INSTEAD ] { NOTHING | command | ( command ; command ... ) }
where event can be one of:
SELECT | INSERT | UPDATE | DELETE
My previous implementation collapsed whitespace after such a semicolon delimiter to a newline. The result is still valid, but it does not meet my goal of only using newlines in between statements.
Thanks go to Marcelo Zabani, the author of Codd.
This issue is easily resolved by parsing parenthesis and keeping track of the number of open parenthesis. A semicolon ends a statement only when there are no open parenthesis. The updated function is included below, and the full source is available on GitHub.
Code
sqlCollapseWhitespace :: String -> String
= goSpace 0 Nothing
sqlCollapseWhitespace where
dropStdComment :: String -> String
= dropWhile (/= '\n')
dropStdComment
dropCStyleComment :: Int -> String -> String
!count = \case
dropCStyleComment '*':'/':xs)
(| count < 1 -> xs
| otherwise -> dropCStyleComment (count - 1) xs
'/':'*':xs) -> dropCStyleComment (count + 1) xs
(:xs) -> dropCStyleComment count xs
(_x-> []
[]
goSpace :: Int -> Maybe Char -> String -> String
!numParens mC xs = case dropWhile isSpace xs of
goSpace '-':'-':ys) -> goSpace numParens mC $ dropStdComment ys
('/':'*':ys) -> goSpace numParens mC $ dropCStyleComment 0 ys
(-> []
[] -> case mC of
ys Just c -> c : goSql numParens ys
Nothing -> goSql numParens ys
goSql :: Int -> String -> String
!numParens = \case
goSql '"':xs) -> '"' : goIdentDQ numParens xs
('\'':xs) -> '\'' : goStringSQ numParens xs
('$':xs) -> '$' : goStringDSBegin numParens "" xs
('(':xs) -> '(' : goSql (numParens + 1) xs
(')':xs) -> ')' : goSql (max 0 $ numParens - 1) xs
(';':xs)
(| numParens > 0 -> ';' : goSql numParens xs
| otherwise -> ';' : goSpace numParens (Just '\n') xs
'-':'-':xs) -> goSpace numParens (Just ' ') $ dropStdComment xs
('/':'*':xs) -> goSpace numParens (Just ' ') $ dropCStyleComment 0 xs
('E':'\'':xs) -> 'E' : '\'' : goStringE numParens xs
(:xs)
(x| isSpace x -> goSpace numParens (Just ' ') xs
| otherwise -> x : goSql numParens xs
-> []
[]
goIdentDQ :: Int -> String -> String
!numParens = \case
goIdentDQ '"':'"':xs) -> '"' : '"' : goIdentDQ numParens xs
('"':xs) -> '"' : goSql numParens xs
(:xs) -> x : goIdentDQ numParens xs
(x-> []
[]
goStringSQ :: Int -> String -> String
!numParens = \case
goStringSQ '\'':'\'':xs) -> '\'' : '\'' : goStringSQ numParens xs
('\'':xs) -> goStringSQEnd numParens False xs
(:xs) -> x : goStringSQ numParens xs
(x-> []
[]
goStringSQEnd :: Int -> Bool -> String -> String
!numParens hasSpace = \case
goStringSQEnd '\n':xs) -> goStringSQEndNewline numParens xs
('-':'-':xs) ->
(-- avoid unused @hasSpace@
$ dropStdComment xs
goStringSQEndNewline numParens '/':'*':xs) -> goStringSQEnd numParens True $ dropCStyleComment 0 xs
(@(y:ys)
xs| isSpace y -> goStringSQEnd numParens True ys
| hasSpace -> '\'' : ' ' : goSql numParens xs
| otherwise -> '\'' : goSql numParens xs
-> "'"
[]
goStringSQEndNewline :: Int -> String -> String
!numParens xs = case dropWhile isSpace xs of
goStringSQEndNewline '\'':ys) -> goStringSQ numParens ys
('-':'-':ys) -> goStringSQEndNewline numParens $ dropStdComment ys
('/':'*':ys) -> goStringSQEndNewline numParens $ dropCStyleComment 0 ys
(-> "'"
[] -> '\'' : ' ' : goSql numParens ys
ys
goStringE :: Int -> String -> String
!numParens = \case
goStringE '\\':x:xs) -> '\\' : x : goStringE numParens xs
('\'':'\'':xs) -> '\'' : '\'' : goStringE numParens xs
('\'':xs) -> '\'' : goSql numParens xs
(:xs) -> x : goStringE numParens xs
(x-> []
[]
goStringDSBegin :: Int -> String -> String -> String
!numParens acc = \case
goStringDSBegin '$':xs) -> '$' : goStringDS numParens (reverse acc) xs
(:xs) -> x : goStringDSBegin numParens (x : acc) xs
(x-> []
[]
goStringDS :: Int -> String -> String -> String
!numParens tag = \case
goStringDS '$':xs) -> '$' : goStringDSEnd numParens tag (tag, xs)
(:xs) -> x : goStringDS numParens tag xs
(x-> []
[]
goStringDSEnd :: Int -> String -> (String, String) -> String
!numParens tag = \case
goStringDSEnd :ts, x:xs)
(t| x == t -> x : goStringDSEnd numParens tag (ts, xs)
| x == '$' -> x : goStringDSEnd numParens tag (tag, xs)
| otherwise -> x : goStringDS numParens tag xs
'$':xs) -> '$' : goSql numParens xs
([], -> goStringDS numParens tag xs (_ts, xs)