# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # cython: profile=False # distutils: language = c++ # cython: embedsignature = True from cython.operator cimport dereference as deref from libcpp.vector cimport vector as std_vector from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport * from pyarrow.lib cimport (check_status, _Weakrefable, MemoryPool, maybe_unbox_memory_pool, Schema, pyarrow_wrap_schema, pyarrow_wrap_batch, RecordBatch, pyarrow_wrap_table, get_reader) cdef class ORCReader(_Weakrefable): cdef: object source CMemoryPool* allocator unique_ptr[ORCFileReader] reader def __cinit__(self, MemoryPool memory_pool=None): self.allocator = maybe_unbox_memory_pool(memory_pool) def open(self, object source, c_bool use_memory_map=True): cdef: shared_ptr[CRandomAccessFile] rd_handle self.source = source get_reader(source, use_memory_map, &rd_handle) with nogil: check_status(ORCFileReader.Open(rd_handle, self.allocator, &self.reader)) def schema(self): """ The arrow schema for this file. Returns ------- schema : pyarrow.Schema """ cdef: shared_ptr[CSchema] sp_arrow_schema with nogil: check_status(deref(self.reader).ReadSchema(&sp_arrow_schema)) return pyarrow_wrap_schema(sp_arrow_schema) def nrows(self): return deref(self.reader).NumberOfRows() def nstripes(self): return deref(self.reader).NumberOfStripes() def read_stripe(self, n, include_indices=None): cdef: shared_ptr[CRecordBatch] sp_record_batch RecordBatch batch int64_t stripe std_vector[int] indices stripe = n if include_indices is None: with nogil: (check_status(deref(self.reader) .ReadStripe(stripe, &sp_record_batch))) else: indices = include_indices with nogil: (check_status(deref(self.reader) .ReadStripe(stripe, indices, &sp_record_batch))) return pyarrow_wrap_batch(sp_record_batch) def read(self, include_indices=None): cdef: shared_ptr[CTable] sp_table std_vector[int] indices if include_indices is None: with nogil: check_status(deref(self.reader).Read(&sp_table)) else: indices = include_indices with nogil: check_status(deref(self.reader).Read(indices, &sp_table)) return pyarrow_wrap_table(sp_table)