Skip to content

Commit 45478c2

Browse files
committed
Tidying up: represent PureScript strings as sequence of Word16
Changes: * Expand tests for string edge-cases * Remove use of `show` while printing errors * Remove `codePoints` from the export list of Language.PureScript.PSString * Fix an issue with derived Generic instances where colliding Idents were being generated * Change CoreFn/ToJSON so that invalid JSON strings (i.e. invalid UTF-16) will not be generated, since relatively few JSON parsers can cope with it (e.g. aeson) * Various function renaming and rearranging to better match existing conventions inside the compiler. Unfortunately we are forced to break the CoreFn JSON format with this change, as there is no way of generating strings that reliably parse to the value we want if strings are allowed to include invalid UTF-16. The CoreFn JSON changes in the following ways: * String literals are now generated as arrays of integers, where each integer is between 0 and 0xFFFF and represents one UTF-16 code unit (were previously generated as JSON strings). * Record literals are now generated as an array of pairs (two-element arrays), where the first element is the key, generated as an array of code units just like string literals, and the second element is the value.
1 parent 773d778 commit 45478c2

File tree

22 files changed

+271
-132
lines changed

22 files changed

+271
-132
lines changed

examples/passing/RecordLabels.purs

Lines changed: 0 additions & 5 deletions
This file was deleted.

examples/passing/RecordLabels/RecordLabels.purs

Lines changed: 0 additions & 25 deletions
This file was deleted.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module Main where
2+
3+
import Prelude
4+
import Records as Records
5+
import Symbols as Symbols
6+
7+
main = do
8+
Records.main
9+
Symbols.main
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
module Records where
2+
3+
import Prelude
4+
import Data.Generic (class Generic, toSpine, GenericSpine(..))
5+
import Control.Monad.Eff.Console (log)
6+
import Test.Assert (assert')
7+
8+
newtype AstralKeys = AstralKeys { "💡" :: Int, "💢" :: Int }
9+
newtype LoneSurrogateKeys = LoneSurrogateKeys { "\xdf06" :: Int, "\xd834" :: Int }
10+
11+
derive instance genericAstralKeys :: Generic AstralKeys
12+
derive instance genericLoneSurrogateKeys :: Generic LoneSurrogateKeys
13+
14+
spineOf :: forall a. Generic a => a -> Unit -> GenericSpine
15+
spineOf x _ = toSpine x
16+
17+
testLoneSurrogateKeys =
18+
let
19+
expected = 5
20+
actual = (_."\xd801" <<< helper) { "\xd800": 5 }
21+
in
22+
assert' ("lone surrogate keys: " <> show actual) (expected == actual)
23+
24+
where
25+
helper :: { "\xd800" :: Int } -> { "\xd801" :: Int }
26+
helper o =
27+
case o."\xd800" of
28+
x -> { "\xd801": x }
29+
30+
testAstralKeys =
31+
let
32+
expected = 5
33+
actual = (_."💢" <<< helper) { "💡": 5 }
34+
in
35+
assert' ("astral keys: " <> show actual) (expected == actual)
36+
37+
where
38+
helper :: { "💡" :: Int } -> { "💢" :: Int }
39+
helper o =
40+
case o."💡" of
41+
x -> { "💢": x }
42+
43+
testGenericLoneSurrogateKeys = do
44+
let expected = SProd "Records.LoneSurrogateKeys"
45+
[ \_ -> SRecord [ {recLabel: "\xd834", recValue: spineOf 1}
46+
, {recLabel: "\xdf06", recValue: spineOf 0}
47+
]
48+
]
49+
actual = toSpine (LoneSurrogateKeys { "\xdf06": 0, "\xd834": 1 })
50+
assert' ("generic lone surrogate keys: " <> show actual) (expected == actual)
51+
52+
testGenericAstralKeys = do
53+
let expected = SProd "Records.AstralKeys"
54+
[ \_ -> SRecord [ {recLabel: "💡", recValue: spineOf 0}
55+
, {recLabel: "💢", recValue: spineOf 1}
56+
]
57+
]
58+
actual = toSpine (AstralKeys { "💡": 0, "💢": 1 })
59+
assert' ("generic astral keys: " <> show actual) (expected == actual)
60+
61+
main = do
62+
testLoneSurrogateKeys
63+
testAstralKeys
64+
testGenericLoneSurrogateKeys
65+
testGenericAstralKeys
66+
log "Done"
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
-- This is similar to StringEscapes except we are performing the same tests
2+
-- with Symbols (at the type level).
3+
4+
module Symbols where
5+
6+
import Prelude
7+
import Control.Monad.Eff.Console (log)
8+
import Type.Data.Symbol (SProxy(..), class AppendSymbol, appendSymbol, reflectSymbol)
9+
import Test.Assert (assert')
10+
11+
highS :: SProxy "\xd834"
12+
highS = SProxy
13+
14+
lowS :: SProxy "\xdf06"
15+
lowS = SProxy
16+
17+
loneSurrogates :: Boolean
18+
loneSurrogates = reflectSymbol (appendSymbol highS lowS) == "\x1d306"
19+
20+
outOfOrderSurrogates :: Boolean
21+
outOfOrderSurrogates = reflectSymbol (appendSymbol lowS highS) == "\xdf06\xd834"
22+
23+
notReplacing :: Boolean
24+
notReplacing = reflectSymbol lowS /= "\xfffd"
25+
26+
main = do
27+
assert' "lone surrogates may be combined into a surrogate pair" loneSurrogates
28+
assert' "lone surrogates may be combined out of order to remain lone surrogates" outOfOrderSurrogates
29+
assert' "lone surrogates are not replaced with the Unicode replacement character U+FFFD" notReplacing
30+
log "Done"

src/Language/PureScript/CodeGen/JS.hs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import Language.PureScript.Errors (ErrorMessageHint(..), SimpleErrorMessage(..),
3535
errorMessage, rethrowWithPosition, addHint)
3636
import Language.PureScript.Names
3737
import Language.PureScript.Options
38-
import Language.PureScript.PSString (PSString, mkString, codePoints)
38+
import Language.PureScript.PSString (PSString, mkString, decodeString)
3939
import Language.PureScript.Traversals (sndM)
4040
import qualified Language.PureScript.Constants as C
4141

@@ -183,8 +183,11 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
183183

184184
accessorString :: PSString -> JS -> JS
185185
accessorString prop =
186-
let quoted = JSIndexer Nothing (JSStringLiteral Nothing prop) in
187-
either (const quoted) (\t -> if identNeedsEscaping t then quoted else JSAccessor Nothing prop) $ codePoints prop
186+
case decodeString prop of
187+
Just s | not (identNeedsEscaping s) ->
188+
JSAccessor Nothing s
189+
_ ->
190+
JSIndexer Nothing (JSStringLiteral Nothing prop)
188191

189192
-- |
190193
-- Generate code in the simplified Javascript intermediate representation for a value or expression.
@@ -259,7 +262,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
259262
(JSUnary Nothing JSNew $ JSApp Nothing (JSVar Nothing (properToJs ctor)) []) ]
260263
valueToJs' (Constructor _ _ (ProperName ctor) fields) =
261264
let constructor =
262-
let body = [ JSAssignment Nothing (JSAccessor Nothing (mkString $ identToJs f) (JSVar Nothing "this")) (var f) | f <- fields ]
265+
let body = [ JSAssignment Nothing (JSAccessor Nothing (identToJs f) (JSVar Nothing "this")) (var f) | f <- fields ]
263266
in JSFunction Nothing (Just (properToJs ctor)) (identToJs `map` fields) (JSBlock Nothing body)
264267
createFn =
265268
let body = JSUnary Nothing JSNew $ JSApp Nothing (JSVar Nothing (properToJs ctor)) (var `map` fields)
@@ -394,7 +397,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
394397
argVar <- freshName
395398
done'' <- go remain done'
396399
js <- binderToJs argVar done'' binder
397-
return (JSVariableIntroduction Nothing argVar (Just (JSAccessor Nothing (mkString $ identToJs field) (JSVar Nothing varName))) : js)
400+
return (JSVariableIntroduction Nothing argVar (Just (JSAccessor Nothing (identToJs field) (JSVar Nothing varName))) : js)
398401
binderToJs' _ _ ConstructorBinder{} =
399402
internalError "binderToJs: Invalid ConstructorBinder in binderToJs"
400403
binderToJs' varName done (NamedBinder _ ident binder) = do

src/Language/PureScript/CodeGen/JS/AST.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ data JS
161161
-- |
162162
-- An object property accessor expression
163163
--
164-
| JSAccessor (Maybe SourceSpan) PSString JS
164+
| JSAccessor (Maybe SourceSpan) Text JS
165165
-- |
166166
-- A function introduction (optional name, arguments, body)
167167
--

src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,13 @@ removeFromBlock _ js = js
7272

7373
isFn :: (Text, Text) -> JS -> Bool
7474
isFn (moduleName, fnName) (JSAccessor _ x (JSVar _ y)) =
75-
x == mkString fnName && y == moduleName
75+
x == fnName && y == moduleName
7676
isFn (moduleName, fnName) (JSIndexer _ (JSStringLiteral _ x) (JSVar _ y)) =
7777
x == mkString fnName && y == moduleName
7878
isFn _ _ = False
7979

8080
isDict :: (Text, Text) -> JS -> Bool
81-
isDict (moduleName, dictName) (JSAccessor _ x (JSVar _ y)) = x == mkString dictName && y == moduleName
81+
isDict (moduleName, dictName) (JSAccessor _ x (JSVar _ y)) = x == dictName && y == moduleName
8282
isDict _ _ = False
8383

8484
isDict' :: [(Text, Text)] -> JS -> Bool

src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import qualified Data.Text as T
2323
import Language.PureScript.CodeGen.JS.AST
2424
import Language.PureScript.CodeGen.JS.Optimizer.Common
2525
import qualified Language.PureScript.Constants as C
26-
import Language.PureScript.PSString (mkString)
2726

2827
-- TODO: Potential bug:
2928
-- Shouldn't just inline this case: { var x = 0; x.toFixed(10); }
@@ -214,7 +213,7 @@ inlineCommonOperators = applyAll $
214213

215214
isNFn :: Text -> Int -> JS -> Bool
216215
isNFn prefix n (JSVar _ name) = name == (prefix <> T.pack (show n))
217-
isNFn prefix n (JSAccessor _ name (JSVar _ dataFunctionUncurried)) | dataFunctionUncurried == C.dataFunctionUncurried = name == mkString (prefix <> T.pack (show n))
216+
isNFn prefix n (JSAccessor _ name (JSVar _ dataFunctionUncurried)) | dataFunctionUncurried == C.dataFunctionUncurried = name == (prefix <> T.pack (show n))
218217
isNFn _ _ _ = False
219218

220219
runFn :: Int -> JS -> JS
@@ -236,11 +235,11 @@ inlineCommonOperators = applyAll $
236235
convert other = other
237236

238237
isModFn :: (Text, Text) -> JS -> Bool
239-
isModFn (m, op) (JSAccessor _ op' (JSVar _ m')) = m == m' && mkString op == op'
238+
isModFn (m, op) (JSAccessor _ op' (JSVar _ m')) = m == m' && op == op'
240239
isModFn _ _ = False
241240

242241
isModFnWithDict :: (Text, Text) -> JS -> Bool
243-
isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [JSVar _ _]) = m == m' && mkString op == op'
242+
isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [JSVar _ _]) = m == m' && op == op'
244243
isModFnWithDict _ _ = False
245244

246245
-- (f <<< g $ x) = f (g x)

src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ magicDo' = everywhereOnJS undo . everywhereOnJSTopDown convert
6868
-- Check if an expression represents the polymorphic pure or return function
6969
isPurePoly = isFn (C.controlApplicative, C.pure')
7070
-- Check if an expression represents a function in the Eff module
71-
isEffFunc name (JSAccessor _ name' (JSVar _ eff)) = eff == C.eff && mkString name == name'
71+
isEffFunc name (JSAccessor _ name' (JSVar _ eff)) = eff == C.eff && name == name'
7272
isEffFunc _ _ = False
7373

7474
-- Remove __do function applications which remain after desugaring
@@ -107,14 +107,14 @@ inlineST = everywhereOnJS convertBlock
107107
convert agg (JSApp s1 f [arg]) | isSTFunc C.newSTRef f =
108108
JSFunction s1 Nothing [] (JSBlock s1 [JSReturn s1 $ if agg then arg else JSObjectLiteral s1 [(mkString C.stRefValue, arg)]])
109109
convert agg (JSApp _ (JSApp s1 f [ref]) []) | isSTFunc C.readSTRef f =
110-
if agg then ref else JSAccessor s1 (mkString C.stRefValue) ref
110+
if agg then ref else JSAccessor s1 C.stRefValue ref
111111
convert agg (JSApp _ (JSApp _ (JSApp s1 f [ref]) [arg]) []) | isSTFunc C.writeSTRef f =
112-
if agg then JSAssignment s1 ref arg else JSAssignment s1 (JSAccessor s1 (mkString C.stRefValue) ref) arg
112+
if agg then JSAssignment s1 ref arg else JSAssignment s1 (JSAccessor s1 C.stRefValue ref) arg
113113
convert agg (JSApp _ (JSApp _ (JSApp s1 f [ref]) [func]) []) | isSTFunc C.modifySTRef f =
114-
if agg then JSAssignment s1 ref (JSApp s1 func [ref]) else JSAssignment s1 (JSAccessor s1 (mkString C.stRefValue) ref) (JSApp s1 func [JSAccessor s1 (mkString C.stRefValue) ref])
114+
if agg then JSAssignment s1 ref (JSApp s1 func [ref]) else JSAssignment s1 (JSAccessor s1 C.stRefValue ref) (JSApp s1 func [JSAccessor s1 C.stRefValue ref])
115115
convert _ other = other
116116
-- Check if an expression represents a function in the ST module
117-
isSTFunc name (JSAccessor _ name' (JSVar _ st)) = st == C.st && mkString name == name'
117+
isSTFunc name (JSAccessor _ name' (JSVar _ st)) = st == C.st && name == name'
118118
isSTFunc _ _ = False
119119
-- Find all ST Refs initialized in this block
120120
findSTRefsIn = everythingOnJS (++) isSTRef

0 commit comments

Comments
 (0)