-
Notifications
You must be signed in to change notification settings - Fork 8.1k
Use HTML meta charset attribute value, if present, when the Context-Type header does not specify it. #4338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use HTML meta charset attribute value, if present, when the Context-Type header does not specify it. #4338
Changes from all commits
2fa0f51
74d816c
e5cfbb1
d1a5b2b
4f4066b
09e7a46
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
|
|
||
| using System; | ||
| using System.Text; | ||
| using System.Text.RegularExpressions; | ||
| using System.IO; | ||
| using System.IO.Compression; | ||
| using System.Management.Automation; | ||
|
|
@@ -391,20 +392,8 @@ internal static void SaveStreamToFile(Stream stream, string filePath, PSCmdlet c | |
| } | ||
| } | ||
|
|
||
| internal static string DecodeStream(Stream stream, string characterSet) | ||
| private static string StreamToString(Stream stream, Encoding encoding) | ||
| { | ||
| Encoding encoding = ContentHelper.GetEncodingOrDefault(characterSet); | ||
| return DecodeStream(stream, encoding); | ||
| } | ||
|
|
||
| internal static string DecodeStream(Stream stream, Encoding encoding) | ||
| { | ||
| if (null == encoding) | ||
| { | ||
| // just use the default encoding if one wasn't provided | ||
| encoding = ContentHelper.GetDefaultEncoding(); | ||
| } | ||
|
|
||
| StringBuilder result = new StringBuilder(capacity: ChunkSize); | ||
| Decoder decoder = encoding.GetDecoder(); | ||
|
|
||
|
|
@@ -413,9 +402,8 @@ internal static string DecodeStream(Stream stream, Encoding encoding) | |
| { | ||
| useBufferSize = encoding.GetMaxCharCount(10); | ||
| } | ||
| char[] chars = new char[useBufferSize]; | ||
|
|
||
|
|
||
| char[] chars = new char[useBufferSize]; | ||
| byte[] bytes = new byte[useBufferSize * 4]; | ||
| int bytesRead = 0; | ||
| do | ||
|
|
@@ -444,12 +432,74 @@ internal static string DecodeStream(Stream stream, Encoding encoding) | |
| // Increment byteIndex to the next block of bytes in the input buffer, if any, to convert. | ||
| byteIndex += bytesUsed; | ||
| } | ||
| } | ||
| while (bytesRead != 0); | ||
| } while (bytesRead != 0); | ||
|
|
||
| return result.ToString(); | ||
| } | ||
|
|
||
| internal static string DecodeStream(Stream stream, string characterSet, out Encoding encoding) | ||
| { | ||
| try | ||
| { | ||
| encoding = Encoding.GetEncoding(characterSet); | ||
| } | ||
| catch (ArgumentException) | ||
| { | ||
| encoding = null; | ||
| } | ||
| return DecodeStream(stream, ref encoding); | ||
| } | ||
|
|
||
| static bool TryGetEncoding(string characterSet, out Encoding encoding) | ||
| { | ||
| bool result = false; | ||
| try | ||
| { | ||
| encoding = Encoding.GetEncoding(characterSet); | ||
| result = true; | ||
| } | ||
| catch (ArgumentException) | ||
| { | ||
| encoding = null; | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| static readonly Regex s_metaexp = new Regex(@"<meta\s[.\n]*[^><]*charset\s*=\s*[""'\n]?(?<charset>[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]"); | ||
|
|
||
| internal static string DecodeStream(Stream stream, ref Encoding encoding) | ||
| { | ||
| bool isDefaultEncoding = false; | ||
| if (null == encoding) | ||
| { | ||
| // Use the default encoding if one wasn't provided | ||
| encoding = ContentHelper.GetDefaultEncoding(); | ||
| isDefaultEncoding = true; | ||
| } | ||
|
|
||
| string content = StreamToString (stream, encoding); | ||
| if (isDefaultEncoding) do | ||
| { | ||
| // check for a charset attribute on the meta element to override the default. | ||
| Match match = s_metaexp.Match(content); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In order to cache the regex, you need to use the static regex methods. See: https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex(v=vs.110).aspx#static_vs_instance
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TravisEz13 - According to my reading of the page, I'm doing exactly what it says... "To prevent recompilation, you should instantiate a single Regex object that is accessible to all code that requires it, as shown in the following rewritten example." Since s_metaexp is static/readonly, there is only one compilation. That appears to be the point of the of the caching; avoiding recompilation. Am I missing something?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Under the
Basically, they changed the behavior and only added a not.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My review of the corefx shows that constructing and reusing an instance (regex.Match) is equivalent to calling the static Match method. The constructor manages cache lookups/updates. The static Match method constructs a new instance and calls through the instance Match method.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sound good, thanks for looking into it. |
||
| if (match.Success) | ||
| { | ||
| Encoding localEncoding = null; | ||
| string characterSet = match.Groups["charset"].Value; | ||
|
|
||
| if (TryGetEncoding(characterSet, out localEncoding)) | ||
| { | ||
| stream.Seek(0, SeekOrigin.Begin); | ||
| content = StreamToString(stream, localEncoding); | ||
| // report the encoding used. | ||
| encoding = localEncoding; | ||
| } | ||
| } | ||
| } while (false); | ||
|
|
||
| return content; | ||
| } | ||
|
|
||
| internal static Byte[] EncodeToBytes(String str, Encoding encoding) | ||
| { | ||
| if (null == encoding) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need doc change as you're adding to public api
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done