Skip to content

Commit 0f8a687

Browse files
committed
Add StringSHA1
1 parent ef75c4b commit 0f8a687

3 files changed

Lines changed: 274 additions & 18 deletions

File tree

src/vs/base/common/hash.ts

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Licensed under the MIT License. See License.txt in the project root for license information.
44
*--------------------------------------------------------------------------------------------*/
55

6+
import * as strings from 'vs/base/common/strings';
7+
68
/**
79
* Return a hash value for an object.
810
*/
@@ -70,3 +72,235 @@ export class Hasher {
7072
return this._value;
7173
}
7274
}
75+
76+
const enum SHA1Constant {
77+
BLOCK_SIZE = 64, // 512 / 8
78+
UNICODE_REPLACEMENT = 0xFFFD,
79+
}
80+
81+
function leftRotate(value: number, bits: number, totalBits: number = 32): number {
82+
// delta + bits = totalBits
83+
const delta = totalBits - bits;
84+
85+
// All ones, expect `delta` zeros aligned to the right
86+
const mask = ~((1 << delta) - 1);
87+
88+
// Join (value left-shifted `bits` bits) with (masked value right-shifted `delta` bits)
89+
return ((value << bits) | ((mask & value) >>> delta)) >>> 0;
90+
}
91+
92+
function fill(dest: Uint8Array, index: number = 0, count: number = dest.byteLength, value: number = 0): void {
93+
for (let i = 0; i < count; i++) {
94+
dest[index + i] = value;
95+
}
96+
}
97+
98+
function leftPad(value: string, length: number, char: string = '0'): string {
99+
while (value.length < length) {
100+
value = char + value;
101+
}
102+
return value;
103+
}
104+
105+
function toHexString(value: number, bitsize: number = 32): string {
106+
return leftPad((value >>> 0).toString(16), bitsize / 4);
107+
}
108+
109+
/**
110+
* A SHA1 implementation that works with strings and does not allocate.
111+
*/
112+
export class StringSHA1 {
113+
private static _bigBlock32 = new DataView(new ArrayBuffer(320)); // 80 * 4 = 320
114+
115+
private _h0 = 0x67452301;
116+
private _h1 = 0xEFCDAB89;
117+
private _h2 = 0x98BADCFE;
118+
private _h3 = 0x10325476;
119+
private _h4 = 0xC3D2E1F0;
120+
121+
private readonly _buff: Uint8Array;
122+
private readonly _buffDV: DataView;
123+
private _buffLen: number;
124+
private _totalLen: number;
125+
private _leftoverHighSurrogate: number;
126+
private _finished: boolean;
127+
128+
constructor() {
129+
this._buff = new Uint8Array(SHA1Constant.BLOCK_SIZE + 3 /* to fit any utf-8 */);
130+
this._buffDV = new DataView(this._buff.buffer);
131+
this._buffLen = 0;
132+
this._totalLen = 0;
133+
this._leftoverHighSurrogate = 0;
134+
this._finished = false;
135+
}
136+
137+
public update(str: string): void {
138+
const strLen = str.length;
139+
if (strLen === 0) {
140+
return;
141+
}
142+
143+
const buff = this._buff;
144+
let buffLen = this._buffLen;
145+
let leftoverHighSurrogate = this._leftoverHighSurrogate;
146+
let charCode: number;
147+
let offset: number;
148+
149+
if (leftoverHighSurrogate !== 0) {
150+
charCode = leftoverHighSurrogate;
151+
offset = -1;
152+
leftoverHighSurrogate = 0;
153+
} else {
154+
charCode = str.charCodeAt(0);
155+
offset = 0;
156+
}
157+
158+
while (true) {
159+
let codePoint = charCode;
160+
if (strings.isHighSurrogate(charCode)) {
161+
if (offset + 1 < strLen) {
162+
const nextCharCode = str.charCodeAt(offset + 1);
163+
if (strings.isLowSurrogate(nextCharCode)) {
164+
offset++;
165+
codePoint = strings.computeCodePoint(charCode, nextCharCode);
166+
} else {
167+
// illegal => unicode replacement character
168+
codePoint = SHA1Constant.UNICODE_REPLACEMENT;
169+
}
170+
} else {
171+
// last character is a surrogate pair
172+
leftoverHighSurrogate = charCode;
173+
break;
174+
}
175+
} else if (strings.isLowSurrogate(charCode)) {
176+
// illegal => unicode replacement character
177+
codePoint = SHA1Constant.UNICODE_REPLACEMENT;
178+
}
179+
180+
buffLen = this._push(buff, buffLen, codePoint);
181+
offset++;
182+
if (offset < strLen) {
183+
charCode = str.charCodeAt(offset);
184+
} else {
185+
break;
186+
}
187+
}
188+
189+
this._buffLen = buffLen;
190+
this._leftoverHighSurrogate = leftoverHighSurrogate;
191+
}
192+
193+
private _push(buff: Uint8Array, buffLen: number, codePoint: number): number {
194+
if (codePoint < 0x0080) {
195+
buff[buffLen++] = codePoint;
196+
} else if (codePoint < 0x0800) {
197+
buff[buffLen++] = 0b11000000 | ((codePoint & 0b00000000000000000000011111000000) >>> 6);
198+
buff[buffLen++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
199+
} else if (codePoint < 0x10000) {
200+
buff[buffLen++] = 0b11100000 | ((codePoint & 0b00000000000000001111000000000000) >>> 12);
201+
buff[buffLen++] = 0b10000000 | ((codePoint & 0b00000000000000000000111111000000) >>> 6);
202+
buff[buffLen++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
203+
} else {
204+
buff[buffLen++] = 0b11110000 | ((codePoint & 0b00000000000111000000000000000000) >>> 18);
205+
buff[buffLen++] = 0b10000000 | ((codePoint & 0b00000000000000111111000000000000) >>> 12);
206+
buff[buffLen++] = 0b10000000 | ((codePoint & 0b00000000000000000000111111000000) >>> 6);
207+
buff[buffLen++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
208+
}
209+
210+
if (buffLen >= SHA1Constant.BLOCK_SIZE) {
211+
this._step();
212+
buffLen -= SHA1Constant.BLOCK_SIZE;
213+
this._totalLen += SHA1Constant.BLOCK_SIZE;
214+
// take last 3 in case of UTF8 overflow
215+
buff[0] = buff[SHA1Constant.BLOCK_SIZE + 0];
216+
buff[1] = buff[SHA1Constant.BLOCK_SIZE + 1];
217+
buff[2] = buff[SHA1Constant.BLOCK_SIZE + 2];
218+
}
219+
220+
return buffLen;
221+
}
222+
223+
public digest(): string {
224+
if (!this._finished) {
225+
this._finished = true;
226+
if (this._leftoverHighSurrogate) {
227+
// illegal => unicode replacement character
228+
this._leftoverHighSurrogate = 0;
229+
this._buffLen = this._push(this._buff, this._buffLen, SHA1Constant.UNICODE_REPLACEMENT);
230+
}
231+
this._totalLen += this._buffLen;
232+
this._wrapUp();
233+
}
234+
235+
return toHexString(this._h0) + toHexString(this._h1) + toHexString(this._h2) + toHexString(this._h3) + toHexString(this._h4);
236+
}
237+
238+
private _wrapUp(): void {
239+
this._buff[this._buffLen++] = 0x80;
240+
fill(this._buff, this._buffLen);
241+
242+
if (this._buffLen > 56) {
243+
this._step();
244+
fill(this._buff);
245+
}
246+
247+
// this will fit because the mantissa can cover up to 52 bits
248+
const ml = 8 * this._totalLen;
249+
250+
this._buffDV.setUint32(56, Math.floor(ml / 4294967296), false);
251+
this._buffDV.setUint32(60, ml % 4294967296, false);
252+
253+
this._step();
254+
}
255+
256+
private _step(): void {
257+
const bigBlock32 = StringSHA1._bigBlock32;
258+
const data = this._buffDV;
259+
260+
for (let j = 0; j < 64 /* 16*4 */; j += 4) {
261+
bigBlock32.setUint32(j, data.getUint32(j, false), false);
262+
}
263+
264+
for (let j = 64; j < 320 /* 80*4 */; j += 4) {
265+
bigBlock32.setUint32(j, leftRotate((bigBlock32.getUint32(j - 12, false) ^ bigBlock32.getUint32(j - 32, false) ^ bigBlock32.getUint32(j - 56, false) ^ bigBlock32.getUint32(j - 64, false)), 1), false);
266+
}
267+
268+
let a = this._h0;
269+
let b = this._h1;
270+
let c = this._h2;
271+
let d = this._h3;
272+
let e = this._h4;
273+
274+
let f: number, k: number;
275+
let temp: number;
276+
277+
for (let j = 0; j < 80; j++) {
278+
if (j < 20) {
279+
f = (b & c) | ((~b) & d);
280+
k = 0x5A827999;
281+
} else if (j < 40) {
282+
f = b ^ c ^ d;
283+
k = 0x6ED9EBA1;
284+
} else if (j < 60) {
285+
f = (b & c) | (b & d) | (c & d);
286+
k = 0x8F1BBCDC;
287+
} else {
288+
f = b ^ c ^ d;
289+
k = 0xCA62C1D6;
290+
}
291+
292+
temp = (leftRotate(a, 5) + f + e + k + bigBlock32.getUint32(j * 4, false)) & 0xffffffff;
293+
e = d;
294+
d = c;
295+
c = leftRotate(b, 30);
296+
b = a;
297+
a = temp;
298+
}
299+
300+
this._h0 = (this._h0 + a) & 0xffffffff;
301+
this._h1 = (this._h1 + b) & 0xffffffff;
302+
this._h2 = (this._h2 + c) & 0xffffffff;
303+
this._h3 = (this._h3 + d) & 0xffffffff;
304+
this._h4 = (this._h4 + e) & 0xffffffff;
305+
}
306+
}

src/vs/base/common/strings.ts

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -428,29 +428,27 @@ export function commonSuffixLength(a: string, b: string): number {
428428
return len;
429429
}
430430

431-
// --- unicode
432-
// http://en.wikipedia.org/wiki/Surrogate_pair
433-
// Returns the code point starting at a specified index in a string
434-
// Code points U+0000 to U+D7FF and U+E000 to U+FFFF are represented on a single character
435-
// Code points U+10000 to U+10FFFF are represented on two consecutive characters
436-
//export function getUnicodePoint(str:string, index:number, len:number):number {
437-
// const chrCode = str.charCodeAt(index);
438-
// if (0xD800 <= chrCode && chrCode <= 0xDBFF && index + 1 < len) {
439-
// const nextChrCode = str.charCodeAt(index + 1);
440-
// if (0xDC00 <= nextChrCode && nextChrCode <= 0xDFFF) {
441-
// return (chrCode - 0xD800) << 10 + (nextChrCode - 0xDC00) + 0x10000;
442-
// }
443-
// }
444-
// return chrCode;
445-
//}
431+
/**
432+
* See http://en.wikipedia.org/wiki/Surrogate_pair
433+
*/
446434
export function isHighSurrogate(charCode: number): boolean {
447435
return (0xD800 <= charCode && charCode <= 0xDBFF);
448436
}
449437

438+
/**
439+
* See http://en.wikipedia.org/wiki/Surrogate_pair
440+
*/
450441
export function isLowSurrogate(charCode: number): boolean {
451442
return (0xDC00 <= charCode && charCode <= 0xDFFF);
452443
}
453444

445+
/**
446+
* See http://en.wikipedia.org/wiki/Surrogate_pair
447+
*/
448+
export function computeCodePoint(highSurrogate: number, lowSurrogate: number): number {
449+
return ((highSurrogate - 0xD800) << 10) + (lowSurrogate - 0xDC00) + 0x10000;
450+
}
451+
454452
/**
455453
* get the code point that begins at offset `offset`
456454
*/
@@ -459,7 +457,7 @@ export function getNextCodePoint(str: string, len: number, offset: number): numb
459457
if (isHighSurrogate(charCode) && offset + 1 < len) {
460458
const nextCharCode = str.charCodeAt(offset + 1);
461459
if (isLowSurrogate(nextCharCode)) {
462-
return ((charCode - 0xD800) << 10) + (nextCharCode - 0xDC00) + 0x10000;
460+
return computeCodePoint(charCode, nextCharCode);
463461
}
464462
}
465463
return charCode;
@@ -473,7 +471,7 @@ function getPrevCodePoint(str: string, offset: number): number {
473471
if (isLowSurrogate(charCode) && offset > 1) {
474472
const prevCharCode = str.charCodeAt(offset - 2);
475473
if (isHighSurrogate(prevCharCode)) {
476-
return ((prevCharCode - 0xD800) << 10) + (charCode - 0xDC00) + 0x10000;
474+
return computeCodePoint(prevCharCode, charCode);
477475
}
478476
}
479477
return charCode;

src/vs/base/test/common/hash.test.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Licensed under the MIT License. See License.txt in the project root for license information.
44
*--------------------------------------------------------------------------------------------*/
55
import * as assert from 'assert';
6-
import { hash } from 'vs/base/common/hash';
6+
import { hash, StringSHA1 } from 'vs/base/common/hash';
77

88
suite('Hash', () => {
99
test('string', () => {
@@ -53,4 +53,28 @@ suite('Hash', () => {
5353
assert.notEqual(a, b);
5454
});
5555

56+
function checkSHA1(strings: string[], expected: string) {
57+
const hash = new StringSHA1();
58+
for (const str of strings) {
59+
hash.update(str);
60+
}
61+
const actual = hash.digest();
62+
assert.equal(actual, expected);
63+
}
64+
65+
test('sha1-1', () => {
66+
checkSHA1(['\udd56'], '9bdb77276c1852e1fb067820472812fcf6084024');
67+
});
68+
69+
test('sha1-2', () => {
70+
checkSHA1(['\udb52'], '9bdb77276c1852e1fb067820472812fcf6084024');
71+
});
72+
73+
test('sha1-3', () => {
74+
checkSHA1(['\uda02ꑍ'], '9b483a471f22fe7e09d83f221871a987244bbd3f');
75+
});
76+
77+
test('sha1-4', () => {
78+
checkSHA1(['hello'], 'aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d');
79+
});
5680
});

0 commit comments

Comments
 (0)