Skip to content

Commit

Permalink
Merge pull request #64 from smoogipoo/fix-newline-escape
Browse files Browse the repository at this point in the history
Implement PHP string escaping semantics
  • Loading branch information
peppy authored Aug 1, 2024
2 parents 07256c6 + eb1cd1f commit d4c40d2
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,69 @@ public void TestEmptyString(string input)
[InlineData("😊😊", "\"😊😊\"")]
[InlineData("hello\nworld", "'hello\nworld'")]
[InlineData("hello\nworld", "\"hello\nworld\"")]
[InlineData("hello\\nworld", "'hello\\nworld'")]
[InlineData("hello\nworld", "\"hello\\nworld\"")]
public void TestBasicString(string expected, string input)
{
Assert.Equal(expected, parse(input));
}

[Theory]
// \'
[InlineData("\\'", "\"\\'\"")]
[InlineData("'", "'\\''")]
// \\
[InlineData("\\", "\"\\\\\"")]
[InlineData("\\", "'\\\\'")]
// \n
[InlineData("\n", "\"\\n\"")]
[InlineData("\\n", "'\\n'")]
// \r
[InlineData("\r", "\"\\r\"")]
[InlineData("\\r", "'\\r'")]
// \t
[InlineData("\t", "\"\\t\"")]
[InlineData("\\t", "'\\t'")]
// \v
[InlineData("\v", "\"\\v\"")]
[InlineData("\\v", "'\\v'")]
// \e
[InlineData("\x1B", "\"\\e\"")]
[InlineData("\\e", "'\\e'")]
// \f
[InlineData("\f", "\"\\f\"")]
[InlineData("\\f", "'\\f'")]
// \$
[InlineData("$", "\"\\$\"")]
[InlineData("\\$", "'\\$'")]
// \"
[InlineData("\"", "\"\\\"\"")]
[InlineData("\\\"", "'\\\"'")]
// \[0-7]{1,3}
[InlineData("A", "\"\\101\"")]
[InlineData("\\101", "'\\101'")]
[InlineData("AB", "\"\\101\\102\"")]
[InlineData("\\101\\102", "'\\101\\102'")]
[InlineData("\0", "\"\\400\"")]
[InlineData("\\400", "'\\400'")]
[InlineData("\\800", "\"\\800\"")]
[InlineData("\\800", "'\\800'")]
// \x[0-9A-Fa-f]{1,2}
[InlineData("A", "\"\\x41\"")]
[InlineData("\\x41", "'\\x41'")]
[InlineData("AB", "\"\\x41\\x42\"")]
[InlineData("\\x41\\x42", "'\\x41\\x42'")]
// \u{[0-9A-Fa-f]+}
[InlineData("A", "\"\\u{41}\"")]
[InlineData("\\u{41}", "'\\u{41}'")]
[InlineData("AB", "\"\\u{41}\\u{42}\"")]
[InlineData("\\u{41}\\u{42}", "'\\u{41}\\u{42}'")]
// Invalid escape sequence
[InlineData("\\g", "\"\\g\"")]
[InlineData("\\g", "'\\g'")]
// Other escaped strings
[InlineData(":username's data", "':username\\'s data'")]
[InlineData(":username's data", "\":username\\'s data\"")]
[InlineData(":username\\'s data", "\":username\\'s data\"")]
[InlineData("\"escaped\" quotes", "\"\\\"escaped\\\" quotes\"")]
public void TestEscapedString(string expected, string input)
{
Expand Down
115 changes: 106 additions & 9 deletions LocalisationAnalyser.Tools/Php/PhpStringLiteralSyntaxNode.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
// Copyright (c) ppy Pty Ltd <[email protected]>. Licensed under the MIT Licence.
// See the LICENCE file in the repository root for full licence text.

using System;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;

namespace LocalisationAnalyser.Tools.Php
{
Expand All @@ -10,6 +13,10 @@ namespace LocalisationAnalyser.Tools.Php
/// </summary>
public class PhpStringLiteralSyntaxNode : PhpLiteralSyntaxNode
{
private static readonly Regex oct_pattern = new Regex("^([0-7]{1,3})", RegexOptions.Compiled);
private static readonly Regex hex_pattern = new Regex("^(x[0-9A-Fa-f]{1,2})", RegexOptions.Compiled);
private static readonly Regex uni_pattern = new Regex("^(u{[0-9A-Fa-f]+})", RegexOptions.Compiled);

public readonly string Text;

public PhpStringLiteralSyntaxNode(string text)
Expand All @@ -21,27 +28,23 @@ public static PhpStringLiteralSyntaxNode Parse(PhpTokeniser tokeniser)
{
tokeniser.SkipWhitespace();

char trivia = tokeniser.GetTrivia();

// Skip leading trivia.
char leader = tokeniser.GetTrivia();
tokeniser.Advance();

var stringBuilder = new StringBuilder();
bool isEscaping = false;

while (isEscaping || tokeniser.GetTrivia() != trivia)
while (tokeniser.GetTrivia() != leader)
{
var token = tokeniser.GetTrivia();
char token = tokeniser.GetTrivia();
tokeniser.Advance();

if (token == '\\' && !isEscaping)
if (token == '\\')
{
isEscaping = true;
stringBuilder.Append(processEscapeSequence(leader, tokeniser));
continue;
}

stringBuilder.Append(token);
isEscaping = false;
}

// Skip trailing trivia.
Expand All @@ -50,5 +53,99 @@ public static PhpStringLiteralSyntaxNode Parse(PhpTokeniser tokeniser)

return new PhpStringLiteralSyntaxNode(stringBuilder.ToString());
}

private static string processEscapeSequence(char leader, PhpTokeniser tokeniser)
{
char trivia = tokeniser.GetTrivia();

// Base cases for \{leader} and \\, supported by both single- and double-quoted strings.
if (trivia == leader || trivia == '\\')
{
tokeniser.Advance();
return trivia.ToString();
}

// No other escape sequences are supported for single-quoted strings.
if (leader == '\'')
return @"\";

// Double-quoted strings have a few more cases...
switch (trivia)
{
case 'n':
tokeniser.Advance();
return "\n";

case 'r':
tokeniser.Advance();
return "\r";

case 't':
tokeniser.Advance();
return "\t";

case 'v':
tokeniser.Advance();
return "\v";

case 'e':
tokeniser.Advance();
return "\x1B";

case 'f':
tokeniser.Advance();
return "\f";

case '$':
tokeniser.Advance();
return "$";

case >= '0' and <= '7':
{
Match match = oct_pattern.Match($"{trivia}{tokeniser.PeekNext(2)}");

if (match.Success)
{
tokeniser.Advance(match.Length);

unchecked
{
byte octValue = (byte)Convert.ToInt32(match.Value, 8);
return ((char)octValue).ToString();
}
}

break;
}

case 'x':
{
Match match = hex_pattern.Match($"{trivia}{tokeniser.PeekNext(2)}");

if (match.Success)
{
tokeniser.Advance(match.Length);
return ((char)byte.Parse(match.Value[1..], NumberStyles.HexNumber, CultureInfo.InvariantCulture)).ToString();
}

break;
}

case 'u':
{
Match match = uni_pattern.Match($"{trivia}{tokeniser.PeekNext(16)}");

if (match.Success)
{
tokeniser.Advance(match.Length);
return char.ConvertFromUtf32(Convert.ToInt32(match.Value[2..^1], 16));
}

break;
}
}

return @"\";
}
}
}
23 changes: 23 additions & 0 deletions LocalisationAnalyser.Tools/Php/PhpTokeniser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ public void TryAdvance()
Advance();
}

/// <summary>
/// Advances by a number of trivia.
/// </summary>
/// <param name="length">The number of trivia to advance by.</param>
public void Advance(int length)
{
for (int i = 0; i < length; i++)
Advance();
}

/// <summary>
/// Advances to the next trivia.
/// </summary>
Expand Down Expand Up @@ -104,6 +114,19 @@ public bool TryPeekNext(out char trivia)
return true;
}

/// <summary>
/// Peeks a number of future trivia.
/// </summary>
/// <param name="length">The length of trivia to peek.</param>
/// <returns>The trivia.</returns>
public string PeekNext(int length)
{
int startIndex = Math.Min(content.Length, currentIndex + 1);
int endIndex = Math.Min(content.Length, startIndex + length);

return content.AsSpan()[startIndex..endIndex].ToString();
}

/// <summary>
/// Skips all current whitespace and comments.
/// </summary>
Expand Down

0 comments on commit d4c40d2

Please sign in to comment.