Skip to content

Commit

Permalink
Implement PHP string escaping semantics
Browse files Browse the repository at this point in the history
  • Loading branch information
smoogipoo committed Aug 1, 2024
1 parent b3f05be commit d3a00b7
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 9 deletions.
115 changes: 106 additions & 9 deletions LocalisationAnalyser.Tools/Php/PhpStringLiteralSyntaxNode.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
// Copyright (c) ppy Pty Ltd <[email protected]>. Licensed under the MIT Licence.
// See the LICENCE file in the repository root for full licence text.

using System;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;

namespace LocalisationAnalyser.Tools.Php
{
Expand All @@ -10,6 +13,10 @@ namespace LocalisationAnalyser.Tools.Php
/// </summary>
public class PhpStringLiteralSyntaxNode : PhpLiteralSyntaxNode
{
private static readonly Regex oct_pattern = new Regex("^([0-7]{1,3})", RegexOptions.Compiled);
private static readonly Regex hex_pattern = new Regex("^(x[0-9A-Fa-f]{1,2})", RegexOptions.Compiled);
private static readonly Regex uni_pattern = new Regex("^(u{[0-9A-Fa-f]+})", RegexOptions.Compiled);

public readonly string Text;

public PhpStringLiteralSyntaxNode(string text)
Expand All @@ -21,27 +28,23 @@ public static PhpStringLiteralSyntaxNode Parse(PhpTokeniser tokeniser)
{
tokeniser.SkipWhitespace();

char trivia = tokeniser.GetTrivia();

// Skip leading trivia.
char leader = tokeniser.GetTrivia();
tokeniser.Advance();

var stringBuilder = new StringBuilder();
bool isEscaping = false;

while (isEscaping || tokeniser.GetTrivia() != trivia)
while (tokeniser.GetTrivia() != leader)
{
var token = tokeniser.GetTrivia();
char token = tokeniser.GetTrivia();
tokeniser.Advance();

if (token == '\\' && !isEscaping)
if (token == '\\')
{
isEscaping = true;
stringBuilder.Append(processEscapeSequence(leader, tokeniser));
continue;
}

stringBuilder.Append(token);
isEscaping = false;
}

// Skip trailing trivia.
Expand All @@ -50,5 +53,99 @@ public static PhpStringLiteralSyntaxNode Parse(PhpTokeniser tokeniser)

return new PhpStringLiteralSyntaxNode(stringBuilder.ToString());
}

private static string processEscapeSequence(char leader, PhpTokeniser tokeniser)
{
char trivia = tokeniser.GetTrivia();

// Base cases for \{leader} and \\, supported by both single- and double-quoted strings.
if (trivia == leader || trivia == '\\')
{
tokeniser.Advance();
return trivia.ToString();
}

// No other escape sequences are supported for single-quoted strings.
if (leader == '\'')
return @"\";

// Double-quoted strings have a few more cases...
switch (trivia)
{
case 'n':
tokeniser.Advance();
return "\n";

case 'r':
tokeniser.Advance();
return "\r";

case 't':
tokeniser.Advance();
return "\t";

case 'v':
tokeniser.Advance();
return "\v";

case 'e':
tokeniser.Advance();
return "\x1B";

case 'f':
tokeniser.Advance();
return "\f";

case '$':
tokeniser.Advance();
return "$";

case >= '0' and <= '7':
{
Match match = oct_pattern.Match($"{trivia}{tokeniser.PeekNext(2)}");

if (match.Success)
{
tokeniser.Advance(match.Length);

unchecked
{
byte octValue = (byte)Convert.ToInt32(match.Value, 8);
return ((char)octValue).ToString();
}
}

break;
}

case 'x':
{
Match match = hex_pattern.Match($"{trivia}{tokeniser.PeekNext(2)}");

if (match.Success)
{
tokeniser.Advance(match.Length);
return ((char)byte.Parse(match.Value[1..], NumberStyles.HexNumber, CultureInfo.InvariantCulture)).ToString();
}

break;
}

case 'u':
{
Match match = uni_pattern.Match($"{trivia}{tokeniser.PeekNext(16)}");

if (match.Success)
{
tokeniser.Advance(match.Length);
return char.ConvertFromUtf32(Convert.ToInt32(match.Value[2..^1], 16));
}

break;
}
}

return @"\";
}
}
}
23 changes: 23 additions & 0 deletions LocalisationAnalyser.Tools/Php/PhpTokeniser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ public void TryAdvance()
Advance();
}

/// <summary>
/// Advances by a number of trivia.
/// </summary>
/// <param name="length">The number of trivia to advance by.</param>
public void Advance(int length)
{
for (int i = 0; i < length; i++)
Advance();
}

/// <summary>
/// Advances to the next trivia.
/// </summary>
Expand Down Expand Up @@ -104,6 +114,19 @@ public bool TryPeekNext(out char trivia)
return true;
}

/// <summary>
/// Peeks a number of future trivia.
/// </summary>
/// <param name="length">The length of trivia to peek.</param>
/// <returns>The trivia.</returns>
public string PeekNext(int length)
{
int startIndex = Math.Min(content.Length, currentIndex + 1);
int endIndex = Math.Min(content.Length, startIndex + length);

return content.AsSpan()[startIndex..endIndex].ToString();
}

/// <summary>
/// Skips all current whitespace and comments.
/// </summary>
Expand Down

0 comments on commit d3a00b7

Please sign in to comment.