Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions src/System.Management.Automation/engine/parser/tokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1661,6 +1661,9 @@ private void ScanToEndOfCommentLine(out bool sawBeginSig, out bool matchedRequir
case '\n':
UngetChar();

// Detect a line comment that disguises itself to look like the beginning of a signature block.
// This could be used to hide code at the bottom of a script, since people might assume there is nothing else after the signature.
//
// The token similarity threshold was chosen by instrumenting the tokenizer and
// analyzing every comment from PoshCode, Technet Script Center, and Windows.
//
Expand All @@ -1677,15 +1680,16 @@ private void ScanToEndOfCommentLine(out bool sawBeginSig, out bool matchedRequir
//
// There were only 279 (out of 269,387) comments with a similarity of 11,12,13,14, or 15.
// At a similarity of 16-77, there were thousands of comments per similarity bucket.
//
// System.IO.File.AppendAllText(@"c:\temp\signature_similarity.txt", "" + sawBeginTokenSimilarity + ":" + commentLineComparison);

const string beginSignatureTextNoSpace = "sig#beginsignatureblock\n";
const int beginTokenSimilarityThreshold = 10;

// Quick exit - the comment line is more than 'threshold' longer. Therefore,
const int beginTokenSimilarityUpperBound = 34; // beginSignatureTextNoSpace.Length + beginTokenSimilarityThreshold
const int beginTokenSimilarityLowerBound = 14; // beginSignatureTextNoSpace.Length - beginTokenSimilarityThreshold

// Quick exit - the comment line is more than 'threshold' longer, or is less than 'threshold' shorter. Therefore,
// its similarity will be over the threshold.
if (commentLine.Length > (beginSignatureTextNoSpace.Length + beginTokenSimilarityThreshold))
if (commentLine.Length > beginTokenSimilarityUpperBound || commentLine.Length < beginTokenSimilarityLowerBound)
{
sawBeginSig = false;
}
Expand All @@ -1697,10 +1701,20 @@ private void ScanToEndOfCommentLine(out bool sawBeginSig, out bool matchedRequir
//
// The average script is 14% comments and parses in about 5.05 ms with this algorithm,
// about 4.45 ms with the more simplistic algorithm.
//

string commentLineComparison = commentLine.ToString().ToLowerInvariant();
if (_beginTokenSimilarity2dArray == null)
{
// Create the 2 dimensional array for edit distance calculation if it hasn't been created yet.
_beginTokenSimilarity2dArray = new int[beginTokenSimilarityUpperBound + 1, beginSignatureTextNoSpace.Length + 1];
}
else
{
// Zero out the 2 dimensional array before using it.
Array.Clear(_beginTokenSimilarity2dArray, 0, _beginTokenSimilarity2dArray.Length);
}

int sawBeginTokenSimilarity = GetStringSimilarity(commentLineComparison, beginSignatureTextNoSpace);
int sawBeginTokenSimilarity = GetStringSimilarity(commentLineComparison, beginSignatureTextNoSpace, _beginTokenSimilarity2dArray);
sawBeginSig = sawBeginTokenSimilarity < beginTokenSimilarityThreshold;
}

Expand All @@ -1717,6 +1731,9 @@ private void ScanToEndOfCommentLine(out bool sawBeginSig, out bool matchedRequir

#region Object reuse

// A two-dimensional integer array reused for calculating string similarity.
private int[,] _beginTokenSimilarity2dArray;

private readonly Queue<StringBuilder> _stringBuilders = new Queue<StringBuilder>();

private StringBuilder GetStringBuilder()
Expand Down Expand Up @@ -1796,14 +1813,14 @@ private void ScanBlockComment()

// Implementation of the Levenshtein Distance algorithm
// https://en.wikipedia.org/wiki/Levenshtein_distance
private static int GetStringSimilarity(string first, string second)
private static int GetStringSimilarity(string first, string second, int[,] distanceMap = null)
{
Diagnostics.Assert(!string.IsNullOrEmpty(first) && !string.IsNullOrEmpty(second), "Caller never calls us with empty strings");

// Store a distance map to store the number of edits required to
// convert the first <row> letters of First to the first <column>
// letters of Second.
int[,] distanceMap = new int[first.Length + 1, second.Length + 1];
distanceMap ??= new int[first.Length + 1, second.Length + 1];

// Initialize the first row and column of the matrix - the number
// of edits required when one of the strings is empty is just
Expand Down
77 changes: 46 additions & 31 deletions src/System.Management.Automation/engine/regex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#pragma warning disable 1634, 1691

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Globalization;
Expand Down Expand Up @@ -914,43 +915,51 @@ internal bool IsMatch(string str)
var patternPositionsForNextStringPosition =
new PatternPositionsVisitor(_patternElements.Length);

for (int currentStringPosition = 0;
currentStringPosition < str.Length;
currentStringPosition++)
try
{
char currentStringCharacter = _characterNormalizer.Normalize(str[currentStringPosition]);
patternPositionsForCurrentStringPosition.StringPosition = currentStringPosition;
patternPositionsForNextStringPosition.StringPosition = currentStringPosition + 1;
for (int currentStringPosition = 0;
currentStringPosition < str.Length;
currentStringPosition++)
{
char currentStringCharacter = _characterNormalizer.Normalize(str[currentStringPosition]);
patternPositionsForCurrentStringPosition.StringPosition = currentStringPosition;
patternPositionsForNextStringPosition.StringPosition = currentStringPosition + 1;

int patternPosition;
while (patternPositionsForCurrentStringPosition.MoveNext(out patternPosition))
{
_patternElements[patternPosition].ProcessStringCharacter(
currentStringCharacter,
patternPosition,
patternPositionsForCurrentStringPosition,
patternPositionsForNextStringPosition);
}

int patternPosition;
while (patternPositionsForCurrentStringPosition.MoveNext(out patternPosition))
// swap patternPositionsForCurrentStringPosition
// with patternPositionsForNextStringPosition
var tmp = patternPositionsForCurrentStringPosition;
patternPositionsForCurrentStringPosition = patternPositionsForNextStringPosition;
patternPositionsForNextStringPosition = tmp;
}

int patternPosition2;
while (patternPositionsForCurrentStringPosition.MoveNext(out patternPosition2))
{
_patternElements[patternPosition].ProcessStringCharacter(
currentStringCharacter,
patternPosition,
patternPositionsForCurrentStringPosition,
patternPositionsForNextStringPosition);
_patternElements[patternPosition2].ProcessEndOfString(
patternPosition2,
patternPositionsForCurrentStringPosition);
}

// swap patternPositionsForCurrentStringPosition
// with patternPositionsForNextStringPosition
var tmp = patternPositionsForCurrentStringPosition;
patternPositionsForCurrentStringPosition = patternPositionsForNextStringPosition;
patternPositionsForNextStringPosition = tmp;
return patternPositionsForCurrentStringPosition.ReachedEndOfPattern;
}

int patternPosition2;
while (patternPositionsForCurrentStringPosition.MoveNext(out patternPosition2))
finally
{
_patternElements[patternPosition2].ProcessEndOfString(
patternPosition2,
patternPositionsForCurrentStringPosition);
patternPositionsForCurrentStringPosition.Dispose();
patternPositionsForNextStringPosition.Dispose();
}

return patternPositionsForCurrentStringPosition.ReachedEndOfPattern;
}

private class PatternPositionsVisitor
private class PatternPositionsVisitor : IDisposable
{
private readonly int _lengthOfPattern;

Expand All @@ -965,16 +974,22 @@ public PatternPositionsVisitor(int lengthOfPattern)

_lengthOfPattern = lengthOfPattern;

_isPatternPositionVisitedMarker = new int[lengthOfPattern + 1];
for (int i = 0; i < _isPatternPositionVisitedMarker.Length; i++)
_isPatternPositionVisitedMarker = ArrayPool<int>.Shared.Rent(_lengthOfPattern + 1);
for (int i = 0; i <= _lengthOfPattern; i++)
{
_isPatternPositionVisitedMarker[i] = -1;
}

_patternPositionsForFurtherProcessing = new int[lengthOfPattern];
_patternPositionsForFurtherProcessing = ArrayPool<int>.Shared.Rent(_lengthOfPattern);
_patternPositionsForFurtherProcessingCount = 0;
}

public void Dispose()
{
ArrayPool<int>.Shared.Return(_isPatternPositionVisitedMarker, clearArray: true);
ArrayPool<int>.Shared.Return(_patternPositionsForFurtherProcessing, clearArray: true);
}

public int StringPosition { private get; set; }

public void Add(int patternPosition)
Expand All @@ -984,7 +999,7 @@ public void Add(int patternPosition)
patternPosition <= _lengthOfPattern,
"Caller should verify patternPosition <= this._lengthOfPattern");

// is patternPosition already visited?);
// is patternPosition already visited?
if (_isPatternPositionVisitedMarker[patternPosition] == this.StringPosition)
{
return;
Expand Down