1.1 环境准备
安装cmake等工具
sudo apt-get install build-essential cmake
1.2 下载源码
git clone https://github.com/apache/arrow.git
切换release分支
cd arrow
git tag
...
git checkout -b 1.0.0 apache-arrow-1.0.0
或直接下载
wget https://github.com/apache/arrow/archive/apache-arrow-1.0.0.tar.gz
1.3 编译
cd cpp
mkdir release
cd release
cmake ..
make
其他编译可选项,请参考链接。
查看编译结果
Scanning dependencies of target arrow_shared
[100%] Linking CXX shared library ../../release/libarrow.so
[100%] Linking CXX static library ../../release/libarrow.a
[100%] Built target arrow_static
[100%] Built target arrow_shared
lynn@lynn-ubuntu:/codes/database/arrow/cpp/release$ ll release/
total 53022
drwxrwxrwx 1 root root 4096 9月 18 14:20 ./
drwxrwxrwx 1 root root 4096 9月 18 14:20 ../
-rwxrwxrwx 1 root root 12468430 9月 18 14:20 libarrow.a*
-rwxrwxrwx 1 root root 20997166 9月 18 14:08 libarrow_bundled_dependencies.a*
lrwxrwxrwx 1 root root 15 9月 18 14:20 libarrow.so -> libarrow.so.100*
lrwxrwxrwx 1 root root 19 9月 18 14:20 libarrow.so.100 -> libarrow.so.100.0.0*
-rwxrwxrwx 1 root root 10259368 9月 18 14:20 libarrow.so.100.0.0*
可以看到libarrow.so.100动态链接库和 libarrow.a&libarrow_bundled_dependencies.a静态链接库。
1.4 安装
lynn@lynn-ubuntu:/codes/database/arrow/cpp/release$ sudo make install
[sudo] password for lynn:
[ 7%] Built target jemalloc_ep
[ 7%] Built target toolchain
[ 7%] Built target arrow_dependencies
[ 99%] Built target arrow_objlib
[ 99%] Built target arrow_shared
[100%] Built target arrow_static
[100%] Built target arrow_bundled_dependencies
Install the project...
-- Install configuration: "RELEASE"
-- Installing: /usr/local/include/arrow/util/config.h
-- Installing: /usr/local/share/doc/arrow/LICENSE.txt
-- Installing: /usr/local/share/doc/arrow/NOTICE.txt
-- Installing: /usr/local/share/doc/arrow/README.md
-- Installing: /usr/local/lib/libarrow.so.100.0.0
-- Installing: /usr/local/lib/libarrow.so.100
-- Installing: /usr/local/lib/libarrow.so
-- Installing: /usr/local/lib/libarrow.a
-- Installing: /usr/local/lib/cmake/arrow/FindArrow.cmake
-- Installing: /usr/local/lib/cmake/arrow/ArrowTargets.cmake
-- Installing: /usr/local/lib/cmake/arrow/ArrowTargets-release.cmake
-- Installing: /usr/local/lib/cmake/arrow/ArrowConfig.cmake
-- Installing: /usr/local/lib/cmake/arrow/ArrowConfigVersion.cmake
-- Installing: /usr/local/lib/pkgconfig/arrow.pc
-- Installing: /usr/local/lib/libarrow_bundled_dependencies.a
-- Installing: /usr/local/include/arrow/api.h
-- Installing: /usr/local/include/arrow/array.h
-- Installing: /usr/local/include/arrow/buffer.h
-- Installing: /usr/local/include/arrow/buffer_builder.h
-- Installing: /usr/local/include/arrow/builder.h
-- Installing: /usr/local/include/arrow/chunked_array.h
-- Installing: /usr/local/include/arrow/compare.h
-- Installing: /usr/local/include/arrow/config.h
-- Installing: /usr/local/include/arrow/datum.h
-- Installing: /usr/local/include/arrow/device.h
-- Installing: /usr/local/include/arrow/extension_type.h
-- Installing: /usr/local/include/arrow/memory_pool.h
-- Installing: /usr/local/include/arrow/memory_pool_test.h
-- Installing: /usr/local/include/arrow/pch.h
-- Installing: /usr/local/include/arrow/pretty_print.h
-- Installing: /usr/local/include/arrow/record_batch.h
-- Installing: /usr/local/include/arrow/result.h
-- Installing: /usr/local/include/arrow/scalar.h
-- Installing: /usr/local/include/arrow/sparse_tensor.h
-- Installing: /usr/local/include/arrow/status.h
-- Installing: /usr/local/include/arrow/stl.h
-- Installing: /usr/local/include/arrow/stl_allocator.h
-- Installing: /usr/local/include/arrow/table.h
-- Installing: /usr/local/include/arrow/table_builder.h
-- Installing: /usr/local/include/arrow/tensor.h
-- Installing: /usr/local/include/arrow/type.h
-- Installing: /usr/local/include/arrow/type_fwd.h
-- Installing: /usr/local/include/arrow/type_traits.h
-- Installing: /usr/local/include/arrow/visitor.h
-- Installing: /usr/local/include/arrow/visitor_inline.h
-- Installing: /usr/local/lib/cmake/arrow/ArrowOptions.cmake
-- Installing: /usr/local/lib/cmake/arrow/arrow-config.cmake
-- Installing: /usr/local/include/arrow/testing/extension_type.h
-- Installing: /usr/local/include/arrow/testing/generator.h
-- Installing: /usr/local/include/arrow/testing/gtest_common.h
-- Installing: /usr/local/include/arrow/testing/gtest_compat.h
-- Installing: /usr/local/include/arrow/testing/gtest_util.h
-- Installing: /usr/local/include/arrow/testing/json_integration.h
-- Installing: /usr/local/include/arrow/testing/pch.h
-- Installing: /usr/local/include/arrow/testing/random.h
-- Installing: /usr/local/include/arrow/testing/util.h
-- Installing: /usr/local/include/arrow/testing/visibility.h
-- Installing: /usr/local/include/arrow/array/array_base.h
-- Installing: /usr/local/include/arrow/array/array_binary.h
-- Installing: /usr/local/include/arrow/array/array_decimal.h
-- Installing: /usr/local/include/arrow/array/array_dict.h
-- Installing: /usr/local/include/arrow/array/array_nested.h
-- Installing: /usr/local/include/arrow/array/array_primitive.h
-- Installing: /usr/local/include/arrow/array/builder_adaptive.h
-- Installing: /usr/local/include/arrow/array/builder_base.h
-- Installing: /usr/local/include/arrow/array/builder_binary.h
-- Installing: /usr/local/include/arrow/array/builder_decimal.h
-- Installing: /usr/local/include/arrow/array/builder_dict.h
-- Installing: /usr/local/include/arrow/array/builder_nested.h
-- Installing: /usr/local/include/arrow/array/builder_primitive.h
-- Installing: /usr/local/include/arrow/array/builder_time.h
-- Installing: /usr/local/include/arrow/array/builder_union.h
-- Installing: /usr/local/include/arrow/array/concatenate.h
-- Installing: /usr/local/include/arrow/array/data.h
-- Installing: /usr/local/include/arrow/array/diff.h
-- Installing: /usr/local/include/arrow/array/util.h
-- Installing: /usr/local/include/arrow/array/validate.h
-- Installing: /usr/local/include/arrow/c/abi.h
-- Installing: /usr/local/include/arrow/c/bridge.h
-- Installing: /usr/local/include/arrow/c/helpers.h
-- Installing: /usr/local/include/arrow/io/api.h
-- Installing: /usr/local/include/arrow/io/buffered.h
-- Installing: /usr/local/include/arrow/io/caching.h
-- Installing: /usr/local/include/arrow/io/compressed.h
-- Installing: /usr/local/include/arrow/io/concurrency.h
-- Installing: /usr/local/include/arrow/io/file.h
-- Installing: /usr/local/include/arrow/io/hdfs.h
-- Installing: /usr/local/include/arrow/io/interfaces.h
-- Installing: /usr/local/include/arrow/io/memory.h
-- Installing: /usr/local/include/arrow/io/mman.h
-- Installing: /usr/local/include/arrow/io/slow.h
-- Installing: /usr/local/include/arrow/io/test_common.h
-- Installing: /usr/local/include/arrow/io/transform.h
-- Installing: /usr/local/include/arrow/io/type_fwd.h
-- Installing: /usr/local/include/arrow/tensor/converter.h
-- Installing: /usr/local/include/arrow/util/align_util.h
-- Installing: /usr/local/include/arrow/util/atomic_shared_ptr.h
-- Installing: /usr/local/include/arrow/util/base64.h
-- Installing: /usr/local/include/arrow/util/basic_decimal.h
-- Installing: /usr/local/include/arrow/util/benchmark_util.h
-- Installing: /usr/local/include/arrow/util/bit_block_counter.h
-- Installing: /usr/local/include/arrow/util/bit_run_reader.h
-- Installing: /usr/local/include/arrow/util/bit_stream_utils.h
-- Installing: /usr/local/include/arrow/util/bit_util.h
-- Installing: /usr/local/include/arrow/util/bitmap.h
-- Installing: /usr/local/include/arrow/util/bitmap_builders.h
-- Installing: /usr/local/include/arrow/util/bitmap_generate.h
-- Installing: /usr/local/include/arrow/util/bitmap_ops.h
-- Installing: /usr/local/include/arrow/util/bitmap_reader.h
-- Installing: /usr/local/include/arrow/util/bitmap_visit.h
-- Installing: /usr/local/include/arrow/util/bitmap_writer.h
-- Installing: /usr/local/include/arrow/util/bitset_stack.h
-- Installing: /usr/local/include/arrow/util/bpacking.h
-- Installing: /usr/local/include/arrow/util/bpacking_avx512_generated.h
-- Installing: /usr/local/include/arrow/util/bpacking_default.h
-- Installing: /usr/local/include/arrow/util/byte_stream_split.h
-- Installing: /usr/local/include/arrow/util/checked_cast.h
-- Installing: /usr/local/include/arrow/util/compare.h
-- Installing: /usr/local/include/arrow/util/compiler_util.h
-- Installing: /usr/local/include/arrow/util/compression.h
-- Installing: /usr/local/include/arrow/util/cpu_info.h
-- Installing: /usr/local/include/arrow/util/decimal.h
-- Installing: /usr/local/include/arrow/util/delimiting.h
-- Installing: /usr/local/include/arrow/util/double_conversion.h
-- Installing: /usr/local/include/arrow/util/formatting.h
-- Installing: /usr/local/include/arrow/util/functional.h
-- Installing: /usr/local/include/arrow/util/future.h
-- Installing: /usr/local/include/arrow/util/future_iterator.h
-- Installing: /usr/local/include/arrow/util/hash_util.h
-- Installing: /usr/local/include/arrow/util/hashing.h
-- Installing: /usr/local/include/arrow/util/int_util.h
-- Installing: /usr/local/include/arrow/util/io_util.h
-- Installing: /usr/local/include/arrow/util/iterator.h
-- Installing: /usr/local/include/arrow/util/key_value_metadata.h
-- Installing: /usr/local/include/arrow/util/logging.h
-- Installing: /usr/local/include/arrow/util/macros.h
-- Installing: /usr/local/include/arrow/util/make_unique.h
-- Installing: /usr/local/include/arrow/util/memory.h
-- Installing: /usr/local/include/arrow/util/mutex.h
-- Installing: /usr/local/include/arrow/util/optional.h
-- Installing: /usr/local/include/arrow/util/parallel.h
-- Installing: /usr/local/include/arrow/util/print.h
-- Installing: /usr/local/include/arrow/util/range.h
-- Installing: /usr/local/include/arrow/util/rle_encoding.h
-- Installing: /usr/local/include/arrow/util/simd.h
-- Installing: /usr/local/include/arrow/util/sort.h
-- Installing: /usr/local/include/arrow/util/spaced.h
-- Installing: /usr/local/include/arrow/util/stopwatch.h
-- Installing: /usr/local/include/arrow/util/string.h
-- Installing: /usr/local/include/arrow/util/string_builder.h
-- Installing: /usr/local/include/arrow/util/string_view.h
-- Installing: /usr/local/include/arrow/util/task_group.h
-- Installing: /usr/local/include/arrow/util/thread_pool.h
-- Installing: /usr/local/include/arrow/util/time.h
-- Installing: /usr/local/include/arrow/util/trie.h
-- Installing: /usr/local/include/arrow/util/type_fwd.h
-- Installing: /usr/local/include/arrow/util/type_traits.h
-- Installing: /usr/local/include/arrow/util/ubsan.h
-- Installing: /usr/local/include/arrow/util/uri.h
-- Installing: /usr/local/include/arrow/util/utf8.h
-- Installing: /usr/local/include/arrow/util/value_parsing.h
-- Installing: /usr/local/include/arrow/util/variant.h
-- Installing: /usr/local/include/arrow/util/vector.h
-- Installing: /usr/local/include/arrow/util/visibility.h
-- Installing: /usr/local/include/arrow/util/windows_compatibility.h
-- Installing: /usr/local/include/arrow/util/windows_fixup.h
-- Installing: /usr/local/include/arrow/vendored/datetime.h
-- Installing: /usr/local/include/arrow/vendored/strptime.h
-- Installing: /usr/local/include/arrow/vendored/xxhash.h
-- Installing: /usr/local/include/arrow/vendored/optional.hpp
-- Installing: /usr/local/include/arrow/vendored/string_view.hpp
-- Installing: /usr/local/include/arrow/vendored/variant.hpp
-- Installing: /usr/local/include/arrow/vendored/datetime/date.h
-- Installing: /usr/local/include/arrow/vendored/datetime/ios.h
-- Installing: /usr/local/include/arrow/vendored/datetime/tz.h
-- Installing: /usr/local/include/arrow/vendored/datetime/tz_private.h
-- Installing: /usr/local/include/arrow/vendored/datetime/visibility.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/bignum-dtoa.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/bignum.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/cached-powers.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/diy-fp.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/double-conversion.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/fast-dtoa.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/fixed-dtoa.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/ieee.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/strtod.h
-- Installing: /usr/local/include/arrow/vendored/double-conversion/utils.h
-- Installing: /usr/local/include/arrow/ipc/api.h
-- Installing: /usr/local/include/arrow/ipc/dictionary.h
-- Installing: /usr/local/include/arrow/ipc/feather.h
-- Installing: /usr/local/include/arrow/ipc/json_simple.h
-- Installing: /usr/local/include/arrow/ipc/message.h
-- Installing: /usr/local/include/arrow/ipc/options.h
-- Installing: /usr/local/include/arrow/ipc/reader.h
-- Installing: /usr/local/include/arrow/ipc/test_common.h
-- Installing: /usr/local/include/arrow/ipc/type_fwd.h
-- Installing: /usr/local/include/arrow/ipc/util.h
-- Installing: /usr/local/include/arrow/ipc/writer.h
2. project示例
CMakeLists.txt
cmake_minimum_required(VERSION 3.15)
project(arrow_cpp)
set(CMAKE_CXX_STANDARD 14)
find_package(Arrow REQUIRED)
add_executable(arrow_cpp row-wise-conversion-example.cc)
target_link_libraries(arrow_cpp PRIVATE arrow_shared)
row-wise-conversion-example.cc
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cstdint>
#include <iostream>
#include <vector>
#include <arrow/api.h>
using arrow::DoubleBuilder;
using arrow::Int64Builder;
using arrow::ListBuilder;
// 当我们希望使用列式数据结构来构建高效的操作,我们通常以行方式从其他系统接收数据。
// 在下面,我们希望通过演示如何将行数据转换为列式表来简要介绍Apache Arrow提供的类。
// 本例中的数据存储在以下结构中:
// While we want to use columnar data structures to build efficient operations, we
// often receive data in a row-wise fashion from other systems. In the following,
// we want give a brief introduction into the classes provided by Apache Arrow by
// showing how to transform row-wise data into a columnar table.
//
// The data in this example is stored in the following struct:
struct data_row {
int64_t id;
double cost;
std::vector<double> cost_components;
};
// 将结构体向量(集合)转换为列式表。
//
// 最终表示是由一个`arrow::Schema`和`arrow::ChunkedArray`实例列表组成的一个`arrow::Table`。
// 第一步,我们将在数据上迭代并以增量方式构建数组。
// 对于这个任务,我们提供了帮助构造最终“arrow::Array”实例的“arrow::ArrayBuilder”类。
//
// 对于每种类型,Arrow都有一个特殊类型的构建器类。
// 对于原始标量值“id”和“cost”,我们可以分别使用“arrow::Int64Builder”和`arrow::DoubleBuilder`。
// 对于“cost_components”向量,我们需要有两个构建器,一个是构建偏移量数组的顶层“arrow::ListBuilder”,
// 另一个是嵌套的“arrow::DoubleBuilder”,用于构造由前一个数组中的偏移量引用的基础值数组。
// Transforming a vector of structs into a columnar Table.
//
// The final representation should be an `arrow::Table` which in turn
// is made up of an `arrow::Schema` and a list of
// `arrow::ChunkedArray` instances. As the first step, we will iterate
// over the data and build up the arrays incrementally. For this
// task, we provide `arrow::ArrayBuilder` classes that help in the
// construction of the final `arrow::Array` instances.
//
// For each type, Arrow has a specially typed builder class. For the primitive
// values `id` and `cost` we can use the respective `arrow::Int64Builder` and
// `arrow::DoubleBuilder`. For the `cost_components` vector, we need to have two
// builders, a top-level `arrow::ListBuilder` that builds the array of offsets and
// a nested `arrow::DoubleBuilder` that constructs the underlying values array that
// is referenced by the offsets in the former array.
arrow::Status VectorToColumnarTable(const std::vector<struct data_row>& rows,
std::shared_ptr<arrow::Table>* table) {
// 使用arrow::jemalloc::MemoryPool::default_pool()可以提高底层内存区域的大小,因此更构建器高效。
// 目前,arrow::jemalloc只支持Unix系统,而不支持Windows。
// The builders are more efficient using
// arrow::jemalloc::MemoryPool::default_pool() as this can increase the size of
// the underlying memory regions in-place. At the moment, arrow::jemalloc is only
// supported on Unix systems, not Windows.
arrow::MemoryPool* pool = arrow::default_memory_pool();
Int64Builder id_builder(pool);
DoubleBuilder cost_builder(pool);
ListBuilder components_builder(pool, std::make_shared<DoubleBuilder>(pool));
// The following builder is owned by components_builder.
DoubleBuilder& cost_components_builder =
*(static_cast<DoubleBuilder*>(components_builder.value_builder()));
// 现在我们可以循环现有数据并将其插入构建器中。
// `Append`调用在这里可能会是失败(例如,我们无法分配足够的额外内存)。因此我们需要检查它们的返回值。
// Now we can loop over our existing data and insert it into the builders. The
// `Append` calls here may fail (e.g. we cannot allocate enough additional memory).
// Thus we need to check their return values. For more information on these values,
// check the documentation about `arrow::Status`.
for (const data_row& row : rows) {
ARROW_RETURN_NOT_OK(id_builder.Append(row.id));
ARROW_RETURN_NOT_OK(cost_builder.Append(row.cost));
// 指示新列表行的开始。这将在值构建器中存储当前偏移量。
// Indicate the start of a new list row. This will memorise the current
// offset in the values builder.
ARROW_RETURN_NOT_OK(components_builder.Append());
// 存储实际值。最终的nullptr参数告诉底层构建器所有添加的值都是有效的,如: non-null
// Store the actual values. The final nullptr argument tells the underyling
// builder that all added values are valid, i.e. non-null.
ARROW_RETURN_NOT_OK(cost_components_builder.AppendValues(row.cost_components.data(),
row.cost_components.size()));
}
// 最后,我们敲定数组,声明(type)模式并将它们组合成一个`arrow::Table`:
// At the end, we finalise the arrays, declare the (type) schema and combine them
// into a single `arrow::Table`:
std::shared_ptr<arrow::Array> id_array;
ARROW_RETURN_NOT_OK(id_builder.Finish(&id_array));
std::shared_ptr<arrow::Array> cost_array;
ARROW_RETURN_NOT_OK(cost_builder.Finish(&cost_array));
// 无需调用cost_components_builder.Finish函数因为它是由父生成器的Finish调用暗示的。
// No need to invoke cost_components_builder.Finish because it is implied by
// the parent builder‘s Finish invocation.
std::shared_ptr<arrow::Array> cost_components_array;
ARROW_RETURN_NOT_OK(components_builder.Finish(&cost_components_array));
std::vector<std::shared_ptr<arrow::Field>> schema_vector = {
arrow::field("id", arrow::int64()), arrow::field("cost", arrow::float64()),
arrow::field("cost_components", arrow::list(arrow::float64()))};
auto schema = std::make_shared<arrow::Schema>(schema_vector);
// 最后一个 `table`变量是我们可以传递给其他函数的变量,这些函数可以使用apache arrow的内存结构。
// 此对象拥有所有引用数据的所有权,因此我们不必关心未定义的引用,只要我们离开构建表及其底层数组的函数的作用域。
// The final `table` variable is the one we then can pass on to other functions
// that can consume Apache Arrow memory structures. This object has ownership of
// all referenced data, thus we don‘t have to care about undefined references once
// we leave the scope of the function building the table and its underlying arrays.
*table = arrow::Table::Make(schema, {id_array, cost_array, cost_components_array});
return arrow::Status::OK();
}
arrow::Status ColumnarTableToVector(const std::shared_ptr<arrow::Table>& table,
std::vector<struct data_row>* rows) {
// 要将Arrow表转换回与上一节中相同的行表示形式,我们首先检查表是否符合预期的schema,然后以增量方式构建行向量。
//
// 为了检查表是否如预期的那样,我们可以单独使用它的schema。
// To convert an Arrow table back into the same row-wise representation as in the
// above section, we first will check that the table conforms to our expected
// schema and then will build up the vector of rows incrementally.
//
// For the check if the table is as expected, we can utilise solely its schema.
std::vector<std::shared_ptr<arrow::Field>> schema_vector = {
arrow::field("id", arrow::int64()), arrow::field("cost", arrow::float64()),
arrow::field("cost_components", arrow::list(arrow::float64()))};
auto expected_schema = std::make_shared<arrow::Schema>(schema_vector);
if (!expected_schema->Equals(*table->schema())) {
// The table doesn‘t have the expected schema thus we cannot directly
// convert it to our target representation.
return arrow::Status::Invalid("Schemas are not matching!");
}
// 由于我们已经确定了该表有所期望的结构,我们可以解码底层数组。
// 对于原始标量列`id`和`cost`,我们可以使用高级函数来获取值,
// 而对于嵌套列`cost_components`我们需要访问指向数据的C指针,以将其内容复制到结果`std::vector<double>`。
// 这里我们需要注意的是还要向指针添加偏移量.启用零拷贝切片操作需要该偏移量。
// 虽然可以为两个数组自动调整该值,但不能对附带的位图执行此操作,因为切片的边界通常位于一个字节内。
// As we have ensured that the table has the expected structure, we can unpack the
// underlying arrays. For the primitive columns `id` and `cost` we can use the high
// level functions to get the values whereas for the nested column
// `cost_components` we need to access the C-pointer to the data to copy its
// contents into the resulting `std::vector<double>`. Here we need to be care to
// also add the offset to the pointer. This offset is needed to enable zero-copy
// slicing operations. While this could be adjusted automatically for double
// arrays, this cannot be done for the accompanying bitmap as often the slicing
// border would be inside a byte.
auto ids =
std::static_pointer_cast<arrow::Int64Array>(table->column(0)->chunk(0));
auto costs =
std::static_pointer_cast<arrow::DoubleArray>(table->column(1)->chunk(0));
auto cost_components =
std::static_pointer_cast<arrow::ListArray>(table->column(2)->chunk(0));
auto cost_components_values =
std::static_pointer_cast<arrow::DoubleArray>(cost_components->values());
// 要启用零拷贝切片,原始值指针可能需要考虑此切片偏移量。
// 对于已经在内部说明此偏移量的像Value(…)的高级函数不需要这样做。
// To enable zero-copy slices, the native values pointer might need to account
// for this slicing offset. This is not needed for the higher level functions
// like Value(…) that already account for this offset internally.
const double* ccv_ptr = cost_components_values->data()->GetValues<double>(1);
for (int64_t i = 0; i < table->num_rows(); i++) {
// 这个例子中的另一个简化是,我们假设不存在null条目,即,每一行都用有效值填充。
// Another simplification in this example is that we assume that there are
// no null entries, e.g. each row is fill with valid values.
int64_t id = ids->Value(i);
double cost = costs->Value(i);
const double* first = ccv_ptr + cost_components->value_offset(i);
const double* last = ccv_ptr + cost_components->value_offset(i + 1);
std::vector<double> components_vec(first, last); //直接写入指针,零拷贝
rows->push_back({id, cost, components_vec});
}
return arrow::Status::OK();
}
#define EXIT_ON_FAILURE(expr) do { arrow::Status status_ = (expr); if (!status_.ok()) { std::cerr << status_.message() << std::endl; return EXIT_FAILURE; } } while (0);
int main(int argc, char** argv) {
std::vector<data_row> rows = {
{1, 1.0, {1.0}}, {2, 2.0, {1.0, 2.0}}, {3, 3.0, {1.0, 2.0, 3.0}}};
std::shared_ptr<arrow::Table> table;
EXIT_ON_FAILURE(VectorToColumnarTable(rows, &table));
std::vector<data_row> expected_rows;
EXIT_ON_FAILURE(ColumnarTableToVector(table, &expected_rows));
assert(rows.size() == expected_rows.size());
return EXIT_SUCCESS;
}
参考资料
http://arrow.apache.org/docs/developers/cpp/building.html#building-arrow-cpp
http://arrow.apache.org/docs/cpp/cmake.html#using-arrow-c-in-your-own-project
http://arrow.apache.org/docs/cpp/examples/row_columnar_conversion.html