From 30e3ffad758c2bb41b5e5fbfbf7027ef3535e34d Mon Sep 17 00:00:00 2001 From: Kirill Trofimov Date: Fri, 30 Sep 2011 16:10:19 +0400 Subject: [PATCH] Fix for issues #56, #64 (Add proper unicode support to (de)serialization) --- src/socketio_data.erl | 20 ++++++++++++++------ test/prop_transport.erl | 12 ++++++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/socketio_data.erl b/src/socketio_data.erl index e125586..084667a 100644 --- a/src/socketio_data.erl +++ b/src/socketio_data.erl @@ -20,19 +20,25 @@ encode(#msg{ content = Content, json = false }) when is_list(Content) -> Length = integer_to_list(length(Content)), - ?FRAME ++ Length ++ ?FRAME ++ Content; + convert_to_latin1_list(?FRAME ++ Length ++ ?FRAME ++ Content); encode(#msg{ content = Content, json = true }) -> - JSON = binary_to_list(jsx:term_to_json(Content)), + JSON = unicode:characters_to_list(jsx:term_to_json(Content)), Length = integer_to_list(length(JSON) + ?JSON_FRAME_LENGTH), - ?FRAME ++ Length ++ ?FRAME ++ ?JSON_FRAME ++ JSON; + convert_to_latin1_list(?FRAME ++ Length ++ ?FRAME ++ ?JSON_FRAME ++ JSON); encode(#heartbeat{ index = Index }) -> String = integer_to_list(Index), Length = integer_to_list(length(String) + ?HEARTBEAT_FRAME_LENGTH), - ?FRAME ++ Length ++ ?FRAME ++ ?HEARTBEAT_FRAME ++ String. + convert_to_latin1_list(?FRAME ++ Length ++ ?FRAME ++ ?HEARTBEAT_FRAME ++ String). + +convert_to_latin1_list(Message) -> + binary_to_list(unicode:characters_to_binary(Message)). decode(#msg{content=Str}) when is_list(Str) -> + header(unicode:characters_to_list(list_to_binary(Str))). + +plain_decode(#msg{content=Str}) when is_list(Str) -> header(Str). header(?FRAME ++ Rest) -> @@ -54,7 +60,7 @@ body(Length, Body) -> json(Length, Body) -> {Object, Rest} = lists:split(Length, Body), - [#msg{content=jsx:json_to_term(list_to_binary(Object), [{strict,false}]), json=true} | + [#msg{content=jsx:json_to_term(unicode:characters_to_binary(Object), [{strict,false}]), json=true} | handle_rest(Rest)]. heartbeat(Length, Body) -> @@ -62,7 +68,7 @@ heartbeat(Length, Body) -> [#heartbeat{index=list_to_integer(Heart)} | handle_rest(Rest)]. handle_rest([]) -> []; -handle_rest(X) -> decode(#msg{content=X}). +handle_rest(X) -> plain_decode(#msg{content=X}). %% TESTS -include_lib("eunit/include/eunit.hrl"). @@ -92,4 +98,6 @@ json_encoding_test() -> [X] = decode(#msg{content=Data}), ?assertMatch(#msg{content=JSON, json=true}, X). + + -endif. diff --git a/test/prop_transport.erl b/test/prop_transport.erl index 85fd1b3..f35fe17 100644 --- a/test/prop_transport.erl +++ b/test/prop_transport.erl @@ -82,10 +82,18 @@ gen_encoded({N, Encoded}) -> gen_string() -> ?LAZY(weighted_union([ {1, []}, - {1, [$~|string()]}, - {10, [char()|gen_string()]} + {1, [$~|utf_string()]}, + {10, [utf_char()|gen_string()]} ])). +%% UTF8 includes integers from 0x00000000 to 0x001FFFFF +%% But there is many exceptions higher #7FF (skip other values) +utf_char() -> + integer(0, 16#7FF). + +utf_string() -> + list(utf_char()). + heartbeat() -> ?LET(N, int(), abs(N)). json() ->