快捷搜索:  汽车  科技

impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId

impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId此处从context中取得TuniqueId对象之后,将其转化成16进制。从下方的Impala的JAVA源码中可以看到,SessionId存放在TUniqueId对象的两个字段中,取出时需将其转换成16进制字符串:cp -r toolchain/gflags-2.2.0-p1/include/gflags/ /root/impala-udf/include cp -r toolchain/glog-0.3.4-p3/include/glog/ /root/impala-udf/include cp -r toolchain/rapidjson-0.11/include/rapidjson/ /root/impala-udf/include cp -r toolchain/thrift-0.9.0-p11/include/thrift/ /root/impala-udf/include cp

toolchain下是否已下载thrift glog gflags rapidjson依赖

impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId(1)

7. 创建UDF文件

创建文件夹:

mkdir -p /root/impala-udf mkdir -p /root/impala-udf/include

将源码包内相关包复制到include下

cp -r toolchain/gflags-2.2.0-p1/include/gflags/ /root/impala-udf/include cp -r toolchain/glog-0.3.4-p3/include/glog/ /root/impala-udf/include cp -r toolchain/rapidjson-0.11/include/rapidjson/ /root/impala-udf/include cp -r toolchain/thrift-0.9.0-p11/include/thrift/ /root/impala-udf/include cp -r be/generated-sources/gen-cpp /root/impala-udf/include cp -r be/src/* /root/impala-udf/include

编写获取SessionId的C 代码:

编写getSessionId.h

#ifndef SAMPLES_UDF_H #define SAMPLES_UDF_H #include <udf/udf.h> #include <udf/udf-internal.h> using namespace impala_udf; StringVal GetSessionId(FunctionContext* context); #endif

编写getSessionId.cc

从下方的Impala的JAVA源码中可以看到,SessionId存放在TUniqueId对象的两个字段中,取出时需将其转换成16进制字符串:

impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId(2)

此处从context中取得TuniqueId对象之后,将其转化成16进制。

#define __STDC_FORMAT_MACROS #include <stdlib.h> #include "getSessionId.h" #include <udf/udf.h> #include <udf/udf-internal.h> #include <runtime/runtime-state.h> #include <string.h> #include <vector> #include <stdio.h> #include <inttypes.h> #include<iostream> using namespace std; namespace impala{ class TUniqueId; } using namespace impala_udf; string DecIntToHexStr(unsigned long long num) { string str; long long Temp = num / 16; int left = num % 16; if (Temp > 0) str = DecIntToHexStr(Temp); if (left < 10) str = (left '0'); else str = ('A' left - 10); return str; } StringVal getSessionId(FunctionContext* context) { impala::TUniqueId id = context->impl()->state()->session_id(); string idhi = DecIntToHexStr(id.hi); string idlo = DecIntToHexStr(id.lo); string sessionid = idhi ":" idlo; return *(new StringVal(sessionid.data())); }

编写CMakeList.txt文件:

cmake_minimum_required(VERSION 3.2.3) # where to put generated libraries set(LIBRARY_OUTPUT_PATH "build") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "build") find_program(CLANG_EXECUTABLE clang ) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb") include_directories("include") # Function to generate rule to cross compile a source file to an IR module. # This should be called with the .cc src file and it will generate a # src-file-ir target that can be built. # e.g. COMPILE_TO_IR(test.cc) generates the "test-ir" make target. set(IR_COMPILE_FLAGS "-emit-llvm" "-O3" "-c" "-Iinclude") function(COMPILE_TO_IR SRC_FILE) get_filename_component(BASE_NAME ${SRC_FILE} NAME_WE) set(OUTPUT_FILE "build/${BASE_NAME}.ll") add_custom_command( OUTPUT ${OUTPUT_FILE} COMMAND ${CLANG_EXECUTABLE} ${IR_COMPILE_FLAGS} ${SRC_FILE} -o ${OUTPUT_FILE} DEPENDS ${SRC_FILE}) add_custom_target(${BASE_NAME}-ir ALL DEPENDS ${OUTPUT_FILE}) endfunction(COMPILE_TO_IR) # Build the UDA/UDFs into a shared library. add_library(getSessionId SHARED getSessionId.cc) # Custom targest to cross compile UDA/UDF to ir if (CLANG_EXECUTABLE) COMPILE_TO_IR(getSessionId.cc ) endif(CLANG_EXECUTABLE) # This is an example of how to use the test harness to help develop UDF and UDAs. target_link_libraries(getSessionId ImpalaUdf)

8. 编译UDF

cmake .

impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId(3)

make

impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId(4)

在该目录的build下可看见编译好的文件

impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId(5)

猜您喜欢: