Skip to content

Commit d2dbf1e

Browse files
eerhardtwesm
authored andcommitted
ARROW-4502: [C#] Add support for zero-copy reads
- Update to the latest Google FlatBuffers code to support Spans/Memory. - Add a constructor for ArrowStreamReader that takes a ReadOnlyMemory<byte>. - Add a synchronous ReadNextRecordBatch() method. - Since we are now enabling Spans with FlatBuffers, we need to change the way we write to streams in the ArrowStreamWriter to use Memory<byte> instead of byte[]. This API is in netcoreapp2.1, but not in netstandard, so cross compile for netcoreapp2.1 and add a shim for netstandard. ~Unit tests are coming. I currently haven't found a great way to "read" arrow streams out of thin air. My initial thought is to use the writer to write some made up data, and then read it in using the reader and ensure the values coming back are the same. @wesm - does that sound like a good approach? I was using a binary file (that was written by PyArrow) locally to test this out.~ ~I also plan on adding some benchmark tests to compare between the Stream and the ReadOnlyMemory implementations, but again am having trouble with "how to get the stream to read?".~ @stephentoub @pgovind @chutchinson Author: Eric Erhardt <eric.erhardt@microsoft.com> Closes apache#3736 from eerhardt/ZeroCopyReads and squashes the following commits: 21f41ba <Eric Erhardt> Add RAT exclude for csharp benchmark tests csproj 558ec56 <Eric Erhardt> Address PR feedback. 6ebc80e <Eric Erhardt> Add perf benchmarks for the ArrowStreamReader. 18db336 <Eric Erhardt> Respond to PR feedback. 98e1b11 <Eric Erhardt> Add more types to tests. f6942cf <Eric Erhardt> Add initial unit tests for ArrowStreamReader. f33e294 <Eric Erhardt> ARROW-4502: Add support for zero-copy reads
1 parent 09466ce commit d2dbf1e

22 files changed

Lines changed: 1479 additions & 598 deletions

csharp/Apache.Arrow.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow", "src\Apache.
77
EndProject
88
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Tests", "test\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj", "{9CCEC01B-E67A-4726-BE72-7B514F76163F}"
99
EndProject
10+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Apache.Arrow.Benchmarks", "test\Apache.Arrow.Benchmarks\Apache.Arrow.Benchmarks.csproj", "{742DF47D-77C5-4B84-9E0C-69645F1161EA}"
11+
EndProject
1012
Global
1113
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1214
Debug|Any CPU = Debug|Any CPU
@@ -21,6 +23,10 @@ Global
2123
{9CCEC01B-E67A-4726-BE72-7B514F76163F}.Debug|Any CPU.Build.0 = Debug|Any CPU
2224
{9CCEC01B-E67A-4726-BE72-7B514F76163F}.Release|Any CPU.ActiveCfg = Release|Any CPU
2325
{9CCEC01B-E67A-4726-BE72-7B514F76163F}.Release|Any CPU.Build.0 = Release|Any CPU
26+
{742DF47D-77C5-4B84-9E0C-69645F1161EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
27+
{742DF47D-77C5-4B84-9E0C-69645F1161EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
28+
{742DF47D-77C5-4B84-9E0C-69645F1161EA}.Release|Any CPU.ActiveCfg = Release|Any CPU
29+
{742DF47D-77C5-4B84-9E0C-69645F1161EA}.Release|Any CPU.Build.0 = Release|Any CPU
2430
EndGlobalSection
2531
GlobalSection(SolutionProperties) = preSolution
2632
HideSolutionNode = FALSE

csharp/src/Apache.Arrow/Apache.Arrow.csproj

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<Import Project="../../build/Common.props" />
44

55
<PropertyGroup>
6-
<TargetFramework>netstandard1.3</TargetFramework>
6+
<TargetFrameworks>netstandard1.3;netcoreapp2.1</TargetFrameworks>
77
<LangVersion>7.2</LangVersion>
88
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
99
<Authors>Apache</Authors>
@@ -15,6 +15,7 @@
1515
<PackageTags>apache arrow</PackageTags>
1616
<Company>Apache</Company>
1717
<Version>0.0.1</Version>
18+
<DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;BYTEBUFFER_NO_BOUNDS_CHECK;ENABLE_SPAN_T</DefineConstants>
1819
</PropertyGroup>
1920

2021
<ItemGroup>
@@ -39,4 +40,7 @@
3940
</EmbeddedResource>
4041
</ItemGroup>
4142

43+
<ItemGroup Condition="'$(TargetFramework)' == 'netcoreapp2.1'">
44+
<Compile Remove="Extensions\StreamExtensions.netstandard.cs" />
45+
</ItemGroup>
4246
</Project>

csharp/src/Apache.Arrow/ArrowBuffer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace Apache.Arrow
2323
{
2424
public static ArrowBuffer Empty => new ArrowBuffer(Memory<byte>.Empty);
2525

26-
private ArrowBuffer(Memory<byte> data)
26+
internal ArrowBuffer(ReadOnlyMemory<byte> data)
2727
{
2828
Memory = data;
2929
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one or more
2+
// contributor license agreements. See the NOTICE file distributed with
3+
// this work for additional information regarding copyright ownership.
4+
// The ASF licenses this file to You under the Apache License, Version 2.0
5+
// (the "License"); you may not use this file except in compliance with
6+
// the License. You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
using System;
17+
using System.Buffers;
18+
using System.IO;
19+
using System.Runtime.InteropServices;
20+
using System.Threading;
21+
using System.Threading.Tasks;
22+
23+
namespace Apache.Arrow
24+
{
25+
// Helpers to write Memory<byte> to Stream on netstandard
26+
internal static class StreamExtensions
27+
{
28+
public static Task WriteAsync(this Stream stream, ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default)
29+
{
30+
if (MemoryMarshal.TryGetArray(buffer, out ArraySegment<byte> array))
31+
{
32+
return stream.WriteAsync(array.Array, array.Offset, array.Count, cancellationToken);
33+
}
34+
else
35+
{
36+
byte[] sharedBuffer = ArrayPool<byte>.Shared.Rent(buffer.Length);
37+
buffer.Span.CopyTo(sharedBuffer);
38+
return FinishWriteAsync(stream.WriteAsync(sharedBuffer, 0, buffer.Length, cancellationToken), sharedBuffer);
39+
}
40+
}
41+
42+
private static async Task FinishWriteAsync(Task writeTask, byte[] localBuffer)
43+
{
44+
try
45+
{
46+
await writeTask.ConfigureAwait(false);
47+
}
48+
finally
49+
{
50+
ArrayPool<byte>.Shared.Return(localBuffer);
51+
}
52+
}
53+
}
54+
}

0 commit comments

Comments
 (0)