Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 144 additions & 58 deletions src/System.Management.Automation/engine/lang/parserutils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ internal static object SplitOperator(ExecutionContext context, IScriptExtent err
return SplitOperatorImpl(context, errorPosition, lval, rval, SplitImplOptions.None, ignoreCase);
}

private static object SplitOperatorImpl(ExecutionContext context, IScriptExtent errorPosition, object lval, object rval, SplitImplOptions implOptions, bool ignoreCase)
private static IReadOnlyList<string> SplitOperatorImpl(ExecutionContext context, IScriptExtent errorPosition, object lval, object rval, SplitImplOptions implOptions, bool ignoreCase)
{
IEnumerable<string> content = enumerateContent(context, errorPosition, implOptions, lval);

Expand Down Expand Up @@ -634,84 +634,166 @@ private static object SplitOperatorImpl(ExecutionContext context, IScriptExtent
options |= SplitOptions.IgnoreCase;
}

if (predicate != null)
if (predicate == null)
{
return SplitWithPattern(context, errorPosition, content, separatorPattern, limit, options);
}
else if (limit >= 0)
{
return SplitWithPredicate(context, errorPosition, content, predicate, limit);
}
else
{
return SplitWithPattern(context, errorPosition, content, separatorPattern, limit, options);
return NegativeSplitWithPredicate(context, errorPosition, content, predicate, limit);
}
}

private static object SplitWithPredicate(ExecutionContext context, IScriptExtent errorPosition, IEnumerable<string> content, ScriptBlock predicate, int limit)
private static IReadOnlyList<string> NegativeSplitWithPredicate(ExecutionContext context, IScriptExtent errorPosition, IEnumerable<string> content, ScriptBlock predicate, int limit)
{
List<string> results = new List<string>();
var results = new List<string>();

if (limit == -1)
{
// If the user just wants 1 string
// then just return the content
return new List<string>(content);
}

foreach (string item in content)
{
List<string> split = new List<string>();
var split = new List<string>();

// Used to traverse through the item
int cursor = item.Length - 1;

int subStringLength = 0;

for (int charCount = 0; charCount < item.Length; charCount++) {
// Evaluate the predicate using the character at cursor.
object predicateResult = predicate.DoInvokeReturnAsIs(
useLocalScope: true,
errorHandlingBehavior: ScriptBlock.ErrorHandlingBehavior.WriteToExternalErrorPipe,
dollarUnder: CharToString(item[cursor]),
input: AutomationNull.Value,
scriptThis: AutomationNull.Value,
args: new object[] { item, cursor });

if (!LanguagePrimitives.IsTrue(predicateResult))
{
subStringLength++;
cursor -= 1;
continue;
}

split.Add(item.Substring(cursor + 1, subStringLength));

subStringLength = 0;

if (limit == 1)
cursor -= 1;

if (System.Math.Abs(limit) == (split.Count + 1))
{
break;
}
}

if (cursor == -1)
{
// Don't bother with looking for any delimiters,
// just return the original string.
results.Add(item);
continue;
// Used when the limit is negative
// and the cursor was allowed to go
// all the way to the start of the
// string.
split.Add(item.Substring(0, subStringLength));
}
else
{
// Used to get the rest of the string
// when using a negative limit and
// the cursor doesn't reach the end
// of the string.
split.Add(item.Substring(0, cursor + 1));
}

split.Reverse();

results.AddRange(split);
}

return results.ToArray();
}

private static IReadOnlyList<string> SplitWithPredicate(ExecutionContext context, IScriptExtent errorPosition, IEnumerable<string> content, ScriptBlock predicate, int limit)
{
var results = new List<string>();

if (limit == 1)
{
// If the user just wants 1 string
// then just return the content
return new List<string>(content);
}

foreach (string item in content)
{
var split = new List<string>();

StringBuilder buf = new StringBuilder();
for (int strIndex = 0; strIndex < item.Length; strIndex++)
// Used to traverse through the item
int cursor = 0;

// This is used to calculate how much to split from item.
int subStringLength = 0;

for (int charCount = 0; charCount < item.Length; charCount++)
{
object isDelimChar = predicate.DoInvokeReturnAsIs(
// Evaluate the predicate using the character at cursor.
object predicateResult = predicate.DoInvokeReturnAsIs(
useLocalScope: true,
errorHandlingBehavior: ScriptBlock.ErrorHandlingBehavior.WriteToExternalErrorPipe,
dollarUnder: CharToString(item[strIndex]),
dollarUnder: CharToString(item[cursor]),
input: AutomationNull.Value,
scriptThis: AutomationNull.Value,
args: new object[] { item, strIndex });
if (LanguagePrimitives.IsTrue(isDelimChar))
{
split.Add(buf.ToString());
buf = new StringBuilder();
args: new object[] { item, cursor });

if (limit > 0 && split.Count >= (limit - 1))
{
// We're one item below the limit. If
// we have any string left, go ahead
// and add it as the last item, otherwise
// add an empty string if there was
// a delimiter at the end.
if ((strIndex + 1) < item.Length)
{
split.Add(item.Substring(strIndex + 1));
}
else
{
split.Add(string.Empty);
}
// If the current character is not a delimiter
// then it must be included into a substring.
if (!LanguagePrimitives.IsTrue(predicateResult))
{
subStringLength++;

break;
}
cursor += 1;

// If this delimiter is at the end of the string,
// add an empty string to denote the item "after"
// it.
if (strIndex == (item.Length - 1))
{
split.Add(string.Empty);
}
continue;
}
else

// Else, if the character is a delimiter
// then add a substring to the split list.
split.Add(item.Substring(cursor - subStringLength, subStringLength));

subStringLength = 0;

cursor += 1;

if (limit == (split.Count + 1))
{
buf.Append(item[strIndex]);
break;
}
}

// Add any remainder, if we're under the limit.
if (buf.Length > 0 &&
(limit <= 0 || split.Count < limit))
if (cursor == item.Length)
{
split.Add(buf.ToString());
// Used to get the rest of the string
// when the limit is not negative and
// the cursor is allowed to make it to
// the end of the string.
split.Add(item.Substring(cursor - subStringLength, subStringLength));
}
else
{
// Used to get the rest of the string
// when the limit is not negative and
// the cursor is not at the end of the
// string.
split.Add(item.Substring(cursor, item.Length - cursor));
}

results.AddRange(split);
Expand All @@ -720,7 +802,7 @@ private static object SplitWithPredicate(ExecutionContext context, IScriptExtent
return results.ToArray();
}

private static object SplitWithPattern(ExecutionContext context, IScriptExtent errorPosition, IEnumerable<string> content, string separatorPattern, int limit, SplitOptions options)
private static IReadOnlyList<string> SplitWithPattern(ExecutionContext context, IScriptExtent errorPosition, IEnumerable<string> content, string separatorPattern, int limit, SplitOptions options)
{
// Default to Regex matching if no match specified.
if ((options & SplitOptions.SimpleMatch) == 0 &&
Expand All @@ -743,20 +825,24 @@ private static object SplitWithPattern(ExecutionContext context, IScriptExtent e
separatorPattern = Regex.Escape(separatorPattern);
}

if (limit < 0)
RegexOptions regexOptions = parseRegexOptions(options);

int calculatedLimit = limit;

// If the limit is negative then set Regex to read from right to left
if (calculatedLimit < 0)
{
// Regex only allows 0 to signify "no limit", whereas
// we allow any integer <= 0.
limit = 0;
regexOptions |= RegexOptions.RightToLeft;
calculatedLimit *= -1;
}

RegexOptions regexOptions = parseRegexOptions(options);
Regex regex = NewRegex(separatorPattern, regexOptions);

List<string> results = new List<string>();
var results = new List<string>();

foreach (string item in content)
{
string[] split = regex.Split(item, limit, 0);
string[] split = regex.Split(item, calculatedLimit);
results.AddRange(split);
}

Expand Down
66 changes: 66 additions & 0 deletions test/powershell/Language/Operators/SplitOperator.Tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,78 @@ Describe "Split Operator" -Tags CI {
$res[2] | Should -Be "c"
$res[3] | Should -Be "d"

$res = "a b c d" -split " ", -2
$res.count | Should -Be 2
$res[0] | Should -Be "a b c"
$res[1] | Should -Be "d"

$res = "a b c d" -split " ", -1
$res.count | Should -Be 1
$res[0] | Should -Be "a b c d"
}

It "Binary split operator can work with different delimeter than split string" {
$res = "a b c d" -split " ",8
$res.count | Should -Be 4
$res[0] | Should -Be "a"
$res[1] | Should -Be "b"
$res[2] | Should -Be "c"
$res[3] | Should -Be "d"

$res = "a b c d" -split " ",-8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not reverse order like test in line 80?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed this is something that I noticed today while showing off my changes to the judges. I will probably come back and add another commit fixing this functionality.

Which way do you think is better? Personally I think that the first item should be at array index 0.
ie

> $r = 'a b c d' -split ' ',-2
> $r[0]
d

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any formatting or implementation details would be greatly appreciated. Thank you! :)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that splitting from right to left might be considered orthogonal to the iteration from left to right.

@mklement0's original example in #4765 is:

PS> 'a b c d' -split ' ', -2  # split into (at most) 2 strings from the end
a b c  # prefix
d        # requested token

PS> 'a b c d' -split ' ', -3  # split into (at most) 3 strings from the end
a b   # prefix
c       
d 

PS> 'a b' -split ' ', -2  # 2 resulting strings - complete split; same as 0 in this case
a
b

This implementation currently accords with that.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also worth noting that suggestion was approved by the @PowerShell/powershell-committee

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test in line 80 is wrong, yes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@iSazonov Yes you were correct it is fixed in the most recent commits.
@rjmholt Good point, I don't have a good response for that haha.

$res.count | Should -Be 4
$res[0] | Should -Be "a"
$res[1] | Should -Be "b"
$res[2] | Should -Be "c"
$res[3] | Should -Be "d"

$res = " " -split " ",-2
$res.count | Should -Be 2
$res[0] | Should -Be ""
$res[1] | Should -Be ""
}

It "Binary split operator with predicate can work with negative numbers" {
$res = "a b c d" -split {$_ -like ' '},-2
$res.count | Should -Be 2
$res[0] | Should -Be "a b c"
$res[1] | Should -Be "d"

$res = "a b c d" -split {$_ -like ' '},-4
$res.count | Should -Be 4
$res[0] | Should -Be "a"
$res[1] | Should -Be "b"
$res[2] | Should -Be "c"
$res[3] | Should -Be "d"

$res = "a b c d" -split {$_ -like ' '},-8
$res.count | Should -Be 4
$res[0] | Should -Be "a"
$res[1] | Should -Be "b"
$res[2] | Should -Be "c"
$res[3] | Should -Be "d"

$res = " " -split {$_ -like ' '},-4
$res.count | Should -Be 2
$res[0] | Should -Be ""
$res[1] | Should -Be ""

$res = "folder/path/to/file" -split {$_ -like '/'}, -2
$res.count | Should -Be 2
$res[0] | Should -Be "folder/path/to"
$res[1] | Should -Be "file"
}

It "Binary split operator can work with regex expression" {
$res = "a2b3c4d" -split '\d+',2
$res.count | Should -Be 2
$res[0] | Should -Be "a"
$res[1] | Should -Be "b3c4d"

$res = "a2b3c4d" -split '\d+',-2
$res.count | Should -Be 2
$res[0] | Should -Be "a2b3c"
$res[1] | Should -Be "d"
}

It "Binary split operator can works with freeform delimiter" {
Expand Down