impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId
impala分批读取数据:0518-如何在Impala中使用UDF获取SessionId此处从context中取得TuniqueId对象之后,将其转化成16进制。从下方的Impala的JAVA源码中可以看到,SessionId存放在TUniqueId对象的两个字段中,取出时需将其转换成16进制字符串:cp -r toolchain/gflags-2.2.0-p1/include/gflags/ /root/impala-udf/include cp -r toolchain/glog-0.3.4-p3/include/glog/ /root/impala-udf/include cp -r toolchain/rapidjson-0.11/include/rapidjson/ /root/impala-udf/include cp -r toolchain/thrift-0.9.0-p11/include/thrift/ /root/impala-udf/include cp
toolchain下是否已下载thrift glog gflags rapidjson依赖
7. 创建UDF文件
创建文件夹:
mkdir -p /root/impala-udf mkdir -p /root/impala-udf/include
将源码包内相关包复制到include下
cp -r toolchain/gflags-2.2.0-p1/include/gflags/ /root/impala-udf/include cp -r toolchain/glog-0.3.4-p3/include/glog/ /root/impala-udf/include cp -r toolchain/rapidjson-0.11/include/rapidjson/ /root/impala-udf/include cp -r toolchain/thrift-0.9.0-p11/include/thrift/ /root/impala-udf/include cp -r be/generated-sources/gen-cpp /root/impala-udf/include cp -r be/src/* /root/impala-udf/include
编写获取SessionId的C 代码:
编写getSessionId.h
#ifndef SAMPLES_UDF_H #define SAMPLES_UDF_H #include <udf/udf.h> #include <udf/udf-internal.h> using namespace impala_udf; StringVal GetSessionId(FunctionContext* context); #endif
编写getSessionId.cc
从下方的Impala的JAVA源码中可以看到,SessionId存放在TUniqueId对象的两个字段中,取出时需将其转换成16进制字符串:
此处从context中取得TuniqueId对象之后,将其转化成16进制。
#define __STDC_FORMAT_MACROS #include <stdlib.h> #include "getSessionId.h" #include <udf/udf.h> #include <udf/udf-internal.h> #include <runtime/runtime-state.h> #include <string.h> #include <vector> #include <stdio.h> #include <inttypes.h> #include<iostream> using namespace std; namespace impala{ class TUniqueId; } using namespace impala_udf; string DecIntToHexStr(unsigned long long num) { string str; long long Temp = num / 16; int left = num % 16; if (Temp > 0) str = DecIntToHexStr(Temp); if (left < 10) str = (left '0'); else str = ('A' left - 10); return str; } StringVal getSessionId(FunctionContext* context) { impala::TUniqueId id = context->impl()->state()->session_id(); string idhi = DecIntToHexStr(id.hi); string idlo = DecIntToHexStr(id.lo); string sessionid = idhi ":" idlo; return *(new StringVal(sessionid.data())); }
编写CMakeList.txt文件:
cmake_minimum_required(VERSION 3.2.3) # where to put generated libraries set(LIBRARY_OUTPUT_PATH "build") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "build") find_program(CLANG_EXECUTABLE clang ) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb") include_directories("include") # Function to generate rule to cross compile a source file to an IR module. # This should be called with the .cc src file and it will generate a # src-file-ir target that can be built. # e.g. COMPILE_TO_IR(test.cc) generates the "test-ir" make target. set(IR_COMPILE_FLAGS "-emit-llvm" "-O3" "-c" "-Iinclude") function(COMPILE_TO_IR SRC_FILE) get_filename_component(BASE_NAME ${SRC_FILE} NAME_WE) set(OUTPUT_FILE "build/${BASE_NAME}.ll") add_custom_command( OUTPUT ${OUTPUT_FILE} COMMAND ${CLANG_EXECUTABLE} ${IR_COMPILE_FLAGS} ${SRC_FILE} -o ${OUTPUT_FILE} DEPENDS ${SRC_FILE}) add_custom_target(${BASE_NAME}-ir ALL DEPENDS ${OUTPUT_FILE}) endfunction(COMPILE_TO_IR) # Build the UDA/UDFs into a shared library. add_library(getSessionId SHARED getSessionId.cc) # Custom targest to cross compile UDA/UDF to ir if (CLANG_EXECUTABLE) COMPILE_TO_IR(getSessionId.cc ) endif(CLANG_EXECUTABLE) # This is an example of how to use the test harness to help develop UDF and UDAs. target_link_libraries(getSessionId ImpalaUdf)
8. 编译UDF
cmake .
make
在该目录的build下可看见编译好的文件