Skip to content

Commit 0533a4c

Browse files
backesCommit Bot
authored andcommitted
[utils] Improve {CopyChars} speed for Atom CPUs
Local testing shows that this switch for small counts (up to size 16) is significantly faster than the default {std::copy_n} (by up to 20%, e.g. for the "join-int" js perf test). It's also faster than just a loop covering all sizes up to 16. R=leszeks@chromium.org CC=jkummerow@chromium.org Bug: chromium:1006157 Change-Id: I4d179f064704261fa18f453c23c04ee0b351e942 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1864831 Reviewed-by: Leszek Swirski <leszeks@chromium.org> Commit-Queue: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/master@{#64397}
1 parent 7594feb commit 0533a4c

1 file changed

Lines changed: 34 additions & 4 deletions

File tree

src/utils/memcopy.h

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ template <typename SrcType, typename DstType>
208208
void CopyChars(DstType* dst, const SrcType* src, size_t count) {
209209
STATIC_ASSERT(std::is_integral<SrcType>::value);
210210
STATIC_ASSERT(std::is_integral<DstType>::value);
211+
using SrcTypeUnsigned = typename std::make_unsigned<SrcType>::type;
212+
using DstTypeUnsigned = typename std::make_unsigned<DstType>::type;
211213

212214
#ifdef DEBUG
213215
// Check for no overlap, otherwise {std::copy_n} cannot be used.
@@ -218,10 +220,38 @@ void CopyChars(DstType* dst, const SrcType* src, size_t count) {
218220
DCHECK(src_end <= dst_start || dst_end <= src_start);
219221
#endif
220222

221-
using SrcTypeUnsigned = typename std::make_unsigned<SrcType>::type;
222-
using DstTypeUnsigned = typename std::make_unsigned<DstType>::type;
223-
std::copy_n(reinterpret_cast<const SrcTypeUnsigned*>(src), count,
224-
reinterpret_cast<DstTypeUnsigned*>(dst));
223+
auto* dst_u = reinterpret_cast<DstTypeUnsigned*>(dst);
224+
auto* src_u = reinterpret_cast<const SrcTypeUnsigned*>(src);
225+
226+
// Especially Atom CPUs profit from this explicit instantiation for small
227+
// counts. This gives up to 20 percent improvement for microbenchmarks such as
228+
// joining an array of small integers (2019-10-16).
229+
switch (count) {
230+
#define CASE(N) \
231+
case N: \
232+
std::copy_n(src_u, N, dst_u); \
233+
return;
234+
CASE(1)
235+
CASE(2)
236+
CASE(3)
237+
CASE(4)
238+
CASE(5)
239+
CASE(6)
240+
CASE(7)
241+
CASE(8)
242+
CASE(9)
243+
CASE(10)
244+
CASE(11)
245+
CASE(12)
246+
CASE(13)
247+
CASE(14)
248+
CASE(15)
249+
CASE(16)
250+
#undef CASE
251+
default:
252+
std::copy_n(src_u, count, dst_u);
253+
return;
254+
}
225255
}
226256

227257
} // namespace internal

0 commit comments

Comments
 (0)