Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
<PackageReference Include="System.DirectoryServices" Version="4.5.0" />
<PackageReference Include="System.IO.FileSystem.AccessControl" Version="4.5.0" />
<PackageReference Include="System.Management" Version="4.5.0" />
<PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.5.2" />
<PackageReference Include="System.Security.AccessControl" Version="4.5.0" />
<PackageReference Include="System.Security.Cryptography.Pkcs" Version="4.5.1" />
<PackageReference Include="System.Security.Permissions" Version="4.5.0" />
Expand Down
271 changes: 150 additions & 121 deletions src/System.Management.Automation/namespaces/FileSystemContentStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Management.Automation;
using System.Management.Automation.Internal;
using System.Management.Automation.Provider;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using Dbg = System.Management.Automation;

Expand Down Expand Up @@ -54,8 +55,11 @@ internal class FileSystemContentReaderWriter : IContentReader, IContentWriter
private StreamReader _reader;
private StreamWriter _writer;
private bool _usingByteEncoding;
private string _delimiter = "\n";
private const char DefaultDelimiter = '\n';
private string _delimiter = $"{DefaultDelimiter}";
private int[] _offsetDictionary;
private bool _usingDelimiter;
private StringBuilder _currentLineContent;
private bool _waitForChanges;
private bool _isRawStream;
private long _fileOffset;
Expand Down Expand Up @@ -222,50 +226,6 @@ public FileSystemContentReaderWriter(
_suppressNewline = suppressNewline;
}

/// <summary>
/// Constructor for the content stream
/// </summary>
/// <param name="path">
/// The path to the file to get the content from.
/// </param>
/// <param name="mode">
/// The file mode to open the file with.
/// </param>
/// <param name="access">
/// The file access requested in the file.
/// </param>
/// <param name="share">
/// The file share to open the file with
/// </param>
/// <param name="delimiter">
/// The delimiter to use when reading strings. Each time read is called, all contents up to an including
/// the delimiter is read.
/// </param>
/// <param name="encoding">
/// The encoding of the file to be read or written.
/// </param>
/// <param name="waitForChanges">
/// If true, we will perform blocking reads on the file, waiting for new content to be appended
/// </param>
/// <param name="provider">
/// The CmdletProvider invoking this stream
/// </param>
/// <param name="isRawStream">
/// Indicates raw stream.
/// </param>
public FileSystemContentReaderWriter(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this removed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is unneeded and used internal constructor and we can safely remove it. (Otherwise we have to fix it to follow our changes in another constructor with delimiter parameter.)

string path,
FileMode mode,
FileAccess access,
FileShare share,
string delimiter,
Encoding encoding,
bool waitForChanges,
CmdletProvider provider,
bool isRawStream) : this(path, null, mode, access, share, encoding, false, waitForChanges, provider, isRawStream)
{
}

/// <summary>
/// Constructor for the content stream
/// </summary>
Expand Down Expand Up @@ -314,8 +274,50 @@ public FileSystemContentReaderWriter(
bool isRawStream)
: this(path, streamName, mode, access, share, encoding, false, waitForChanges, provider, isRawStream)
{
_delimiter = delimiter;
_usingDelimiter = true;
// If the delimiter is default ('\n') we'll use ReadLine() method.
// Otherwise allocate temporary structures for ReadDelimited() method.
if (!(delimiter.Length == 1 && delimiter[0] == DefaultDelimiter))
{
_delimiter = delimiter;
_usingDelimiter = true;

// We expect that we are parsing files where line lengths can be relatively long.
const int DefaultLineLength = 256;
_currentLineContent = new StringBuilder(DefaultLineLength);

// For Boyer-Moore string search algorithm.
// Populate the offset lookups.
// These will tell us the maximum number of characters
// we can read to generate another possible match (safe shift).
// If we read more characters than this, we risk consuming
// more of the stream than we need.
//
// Because an unicode character size is 2 byte we would to have use
// very large array with 65535 size to keep this safe offsets.
// One solution is to pack unicode character to byte.
// The workaround is to use low byte from unicode character.
// This allow us to use small array with size 256.
// This workaround is the fastest and provides excellent results
// in regular search scenarios when the file contains
// mostly characters from the same alphabet.
_offsetDictionary = new int[256];

// If next char from file is not in search pattern safe shift is the search pattern length.
for (var n = 0; n < _offsetDictionary.Length; n++)
{
_offsetDictionary[n] = _delimiter.Length;
}

// If next char from file is in search pattern we should calculate a safe shift.
char currentChar;
byte lowByte;
for (var i = 0; i < _delimiter.Length; i++)
{
currentChar = _delimiter[i];
lowByte = Unsafe.As<char, byte>(ref currentChar);
_offsetDictionary[lowByte] = _delimiter.Length - i - 1;
}
}
}

/// <summary>
Expand Down Expand Up @@ -578,103 +580,111 @@ private bool ReadByLine(bool waitChanges, ArrayList blocks, bool readBackward)

private bool ReadDelimited(bool waitChanges, ArrayList blocks, bool readBackward, string actualDelimiter)
{
if (_isRawStream)
{
// when -Raw is used we want to anyway read the whole thing
// so avoiding the while loop by reading the entire content.
string contentRead = _reader.ReadToEnd();
if (contentRead.Length > 0)
{
blocks.Add(contentRead);
}

// We already read whole file so return EOF.
return false;
}

// Since the delimiter is a string, we're essentially
// dealing with a "find the substring" algorithm, but with
// the additional restriction that we cannot read past the
// end of the delimiter. If we read past the end of the delimiter,
// end of the delimiter. If we read past the end of the delimiter,
// then we'll eat up bytes that we need from the filestream.
// The solution is a modified Boyer-Moore string search algorithm.
// This version retains the sub-linear search performance (via the
// lookup tables,) but offloads much of the dirty work to the
// very efficient BCL String.IndexOf(, StringComparison.Ordinal) method.
// lookup tables).
int numRead = 0;
int currentOffset = actualDelimiter.Length;
StringBuilder content = new StringBuilder();

// Populate the offset lookups
// These will tell us the maximum number of characters
// we can read to generate another possible match.
// If we read more characters than this, we risk consuming
// more of the stream than we need.
Dictionary<char, int> offsetDictionary = new Dictionary<char, int>();
foreach (char currentChar in actualDelimiter)
offsetDictionary[currentChar] = actualDelimiter.Length - actualDelimiter.LastIndexOf(currentChar) - 1;
Span<char> readBuffer = stackalloc char[currentOffset];
bool delimiterNotFound = true;
_currentLineContent.Clear();

do
{
if (_isRawStream)
// Read in the required batch of characters
numRead = readBackward
? _backReader.Read(readBuffer.Slice(0, currentOffset))
: _reader.Read(readBuffer.Slice(0, currentOffset));

// If we want to wait for changes, then we'll keep on attempting to read
// until we fill the buffer.
if (numRead == 0)
{
// when -Raw is used we want to anyway read the whole thing
// so avoiding the while loop by reading the entire content.
string contentRead = _reader.ReadToEnd();
numRead = contentRead.Length;
content.Append(contentRead);
if (waitChanges)
{
// But stop reading if the provider is stopping
while ((numRead < currentOffset) && (!_provider.Stopping))
{
// Get the change, and try to read more characters
// We only wait for changes when read forwards, so here we don't need to check if 'readBackward' is
// true or false, we only use 'reader'. The member 'reader' will be updated by WaitForChanges.
WaitForChanges(_path, _mode, _access, _share, _reader.CurrentEncoding);
numRead += _reader.Read(readBuffer.Slice(0, (currentOffset - numRead)));
}
}
}
else

if (numRead > 0)
{
// Read in the required batch of characters
var readBuffer = new char[currentOffset];
numRead = readBackward
? _backReader.Read(readBuffer, 0, currentOffset)
: _reader.Read(readBuffer, 0, currentOffset);

// If we want to wait for changes, then we'll keep on attempting to read
// until we fill the buffer.
if (numRead == 0)
_currentLineContent.Append(readBuffer.Slice(0, numRead));

// Look up the final character in our offset table.
// If the character doesn't exist in the lookup table, then it's not in
// our search key. That means the match must happen strictly /after/ the
// current position. Because of that, we can feel confident reading in the
// number of characters in the search key, without the risk of reading too many.
var currentChar = _currentLineContent[_currentLineContent.Length - 1];
currentOffset = _offsetDictionary[Unsafe.As<char, byte>(ref currentChar)];

// We want to keep reading if delimiter not found and we haven't hit the end of file
delimiterNotFound = true;

// If the final letters matched, then we will get an offset of "0".
// In that case, we'll either have a match (and break from the while loop,)
// or we need to move the scan forward one position.
if (currentOffset == 0)
{
if (waitChanges)
currentOffset = 1;

if (actualDelimiter.Length <= _currentLineContent.Length)
{
// But stop reading if the provider is stopping
while ((numRead < currentOffset) && (!_provider.Stopping))
delimiterNotFound = false;
int i = 0;
int j = _currentLineContent.Length - actualDelimiter.Length;
for (; i < actualDelimiter.Length; i++, j++)
{
// Get the change, and try to read more characters
// We only wait for changes when read forwards, so here we don't need to check if 'readBackward' is
// true or false, we only use 'reader'. The member 'reader' will be updated by WaitForChanges.
WaitForChanges(_path, _mode, _access, _share, _reader.CurrentEncoding);
numRead += _reader.Read(readBuffer, 0, (currentOffset - numRead));
if (actualDelimiter[i] != _currentLineContent[j])
{
delimiterNotFound = true;
break;
}
}
}
}

if (numRead > 0)
{
content.Append(readBuffer, 0, numRead);

// Look up the final character in our offset table.
// If the character doesn't exist in the lookup table, then it's not in
// our search key. That means the match must happen strictly /after/ the
// current position. Because of that, we can feel confident reading in the
// number of characters in the search key, without the risk of reading too many.
if (!offsetDictionary.TryGetValue(content[content.Length - 1], out currentOffset))
currentOffset = actualDelimiter.Length;

// If the final letters matched, then we will get an offset of "0".
// In that case, we'll either have a match (and break from the while loop,)
// or we need to move the scan forward one position.
if (currentOffset == 0)
currentOffset = 1;
}
}

// Two cases where we want to keep reading:
// 1. Raw stream and we haven't hit the end of file
// 2. Delimiter not found and we haven't hit the end of file
} while ((_isRawStream && (numRead != 0)) ||
((content.ToString().IndexOf(actualDelimiter, StringComparison.Ordinal) < 0) && (numRead != 0)));
} while (delimiterNotFound && (numRead != 0));

// We've reached the end of file or end of line.
if (content.Length > 0)
if (_currentLineContent.Length > 0)
{
// Add the block read to the ouptut array list, trimming a trailing delimiter, if present.
// Note: If -Tail was specified, we get here in the course of 2 distinct passes:
// - Once while reading backward simply to determine the appropriate *start position* for later forward reading, ignoring the content of the blocks read (in reverse).
// - Then again during forward reading, for regular output processing; it is only then that trimming the delimiter is necessary.
// (Trimming it during backward reading would not only be unnecessary, but could interfere with determining the correct start position.)
string contentString = content.ToString();
blocks.Add(
!readBackward && contentString.EndsWith(actualDelimiter, StringComparison.Ordinal) && !_isRawStream
? contentString.Substring(0, content.Length - actualDelimiter.Length)
: contentString
!readBackward && !delimiterNotFound
? _currentLineContent.ToString(0, _currentLineContent.Length - actualDelimiter.Length)
: _currentLineContent.ToString()
);
}

Expand All @@ -683,7 +693,7 @@ private bool ReadDelimited(bool waitChanges, ArrayList blocks, bool readBackward
return true;
else
{
if (readBackward && content.Length > 0)
if (readBackward && _currentLineContent.Length > 0)
{
return true;
}
Expand Down Expand Up @@ -1259,14 +1269,32 @@ public override int Read()
/// <summary>
/// Read a specific maximum of characters from the current stream into a buffer
/// </summary>
/// <param name="buffer"></param>
/// <param name="index"></param>
/// <param name="count"></param>
/// <param name="buffer">Output buffer.</param>
/// <param name="index">Start position to write with.</param>
/// <param name="count">Number of bytes to read.</param>
/// <returns>Return the number of characters read, or -1 if we reach the head of the file</returns>
/// <returns>Return the number of characters read, or -1 if we reach the head of the file</returns>
public override int Read(char[] buffer, int index, int count)
{
return ReadSpan(new Span<char>(buffer, index, count));
}

/// <summary>
/// Read characters from the current stream into a Span buffer.
/// </summary>
/// <param name="buffer">Output buffer.</param>
/// <returns>Return the number of characters read, or -1 if we reach the head of the file.</returns>
public override int Read(Span<char> buffer)
{
return ReadSpan(buffer);
}

private int ReadSpan(Span<char> buffer)
{
// deal with the argument validation
int charRead = 0;
int index = 0;
int count = buffer.Length;

do
{
Expand All @@ -1284,7 +1312,8 @@ public override int Read(char[] buffer, int index, int count)
{
buffer[index++] = _charBuff[--_charCount];
}
} while (count > 0);
}
while (count > 0);

return charRead;
}
Expand Down