# logstash 采集 oracle 表数据

# 背景

因为项目数据量比较大，而且需要作一些 metrics 给大家看，所以准备先将数据存储到 ES，然后通过 Grafana 作图。

为什么不从 grafana 直连 oracle 呢？因为 Grafana 连接 oracle 需要安装插件，并且这个插件需要付费使用参考。所以为了节约不必要的花费，我们将数据先转存到 ES（正好我们之前有搭建过 es 集群），然后从 grafana 配置 es 作为数据源。

现配置 logstash 采集 oracle 数据。

# 安装 logstash

可以通过压缩包（tar.gz）或者 RPM 包等形式安装，推荐 RPM 包形式安装，压缩包的形式安装可以玩一玩，但是由于无法使用 systemctl 等一些功能，后续配置稍微比较麻烦。

# 前期准备

	[root@chumingcheng logstash]# wget https://artifacts.elastic.co/downloads/logstash/logstash-7.6.1.rpm
	[root@chumingcheng logstash]# rpm -ivh logstash-7.6.1.rpm
	[root@chumingcheng logstash]# cd /var/log/logstash/ # logstash 日志 log 目录
	[root@chumingcheng logstash]# cd /etc/logstash/conf.d/ # logstash 配置文件目录
	[root@chumingcheng conf.d]# mkdir table_user.sql # 创建采集数据的 sql
	[root@chumingcheng conf.d]# mkdir table_user.conf # logstash 采集配置
	[root@chumingcheng conf.d]# vi table_user.sql # 编辑 sql

# 编辑 sql 脚本

select * from table_user where lastmodifiedtime > cast(:sql_last_value as date)

# 编辑 logstash 配置

[root@chumingcheng conf.d]# vi table_user.conf   # 编辑采集配置

先给一个简单的配置用来测试，很简单，接收控制台输入，然后将数据输出到 es，并且输出到控制台，所以这里有两个输出

input { stdin { } }
output {
  elasticsearch {
    index => "myindextest"
    hosts => ["localhost:9200"]
    user => "elastic"
    password => "********"
  }
  stdout { codec => rubydebug }
}

下面是一个正式的从 oracle 采集并输出到 es 的配置

input{
    stdin{
    }
    jdbc{
        jdbc_connection_string => "jdbc:oracle:thin:@xx.xx.xxx.xx:1701/xxxservicename"
        jdbc_user => "xxx"
        jdbc_password => "********"
        jdbc_driver_library => "/opt/jdbc/ojdbc8-full/ojdbc8.jar"
        jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
        jdbc_paging_enabled => "true"
        jdbc_page_size => "100000"
        jdbc_fetch_size => "100000"
        statement_filepath => "/etc/logstash/conf.d/table_user.sql"
        record_last_run => "true"
        use_column_value => "true"
        tracking_column => "lastmodifiedtime"
        tracking_column_type => "timestamp"
        last_run_metadata_path => "/var/log/logstash/last_run_metadata_for_table_user"
        clean_run => "false"
        schedule => "* * * * *"
        type => "table_user"
    }
}


output{
    if [type] == "table_user" {
        elasticsearch{
            hosts => ["xxx.xxx.xxx.xxx:9200"]
            user => "elastic"
            password => "**********"
            index => "table_user"
            document_id => "%{column1}_%{column2}_%{column3}"
        }
    }
}

测试配置并启动

[root@chumingcheng conf.d]# /usr/share/logstash/bin/logstash -f table_user.conf -t   # 测试配置

[root@chumingcheng conf.d]# /usr/share/logstash/bin/logstash -f table_user.conf   # 启动测试，看 log 调试

[root@chumingcheng conf.d]# systemctl start logstash   # 上一步没问题的话，执行这个命令后台启动

# 问题

启动 logstash 时可能遇到 /tmp 路径访问的问题，执行下 mount -o remount,exec /tmp 试试
也可能遇到 ojdbc driver 的问题，可以尝试更换下版本，比如从 ojdbc10 降低到 ojdbc8
必要时使用 chmod -777 ojdbc10 给个 777 权限试试

# 注意

因为 sql 是以 lastmodifiedtime 为准的，所以对于老的数据没有办法删除，可以尝试通过 api 的方式删除 es 里不想要的数据
第一次启动 logstash 后，logstash 会自动去配置的表里拿所有数据，将这些数据同步到 es 里你新建的 index 下面
注意给你的 index 创建一个 index pattern，这样你就能从 kibana 里通过 Lucene 或者 KQL 查询你的数据了
注意 logstash 的 output 里有设置一个 documnet_id, 这样后续如果 logstash 再次抓到了一条相同 document_id 的数据时，就会覆盖前面一条老的数据，所以尽量用 oracle 表主键或聚合主键作为 document_id
注意用 systemctl 启动前进行配置文件测试和启动测试，这样方便看 log 找问题
注意 table_user 在 input 和 output 是要关联起来的
重要：理论上我应该使用更高版本的 logstash 去规避 log4j 漏洞，但是配合相应的 es 和 kibana 都要做 upgrade，所以没有去做
我的 es 和 kibana 都是普通用户启动的，因为有用户级别的警告，不过 logstash 我直接用 root 用户启动了，暂时没发现什么问题
lastmodifiedtime 必须是 timestamp 类型或者 int 类型，timestamp 对应 oracle 里 Date 类型，int 可以对应自增主键
sql_last_value 是固定的命名，不要试图改变它
我这里是一个 conf 配置文件采集一个 oracle 表，从上面一个 conf 对应一个 sql 文件就可以看出，
实际中会有很多 logstash 的 conf 配置，直接放在 conf.d 目录下就行，通过 systemctl 启动会自动加载 conf.d 目录下所有 conf 文件，不用一一指定配置文件启动。

logstash