1.下载

download-seatunnel | Apache SeaTunnel

需要配置JAVA_HOME,jdk8+ 配置JDK环境

[root@bigdata01 apk]# cd /opt/apk
[root@bigdata01 apk]# export version="2.3.8"
[root@bigdata01 apk]# wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz"
--2024-10-24 09:46:53--  https://archive.apache.org/dist/seatunnel/2.3.8/apache-seatunnel-2.3.8-bin.tar.gz
Resolving archive.apache.org (archive.apache.org)... 65.108.204.189, 2a01:4f9:1a:a084::2
Connecting to archive.apache.org (archive.apache.org)|65.108.204.189|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 161240478 (154M) [application/x-gzip]
Saving to: ‘apache-seatunnel-2.3.8-bin.tar.gz’

100%[==========================================================================================================>] 161,240,478 13.8MB/s   in 12s    

2024-10-24 09:47:06 (12.5 MB/s) - ‘apache-seatunnel-2.3.8-bin.tar.gz’ saved [161240478/161240478]
[root@bigdata01 apk]# tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" -C /opt/

 

2.安装插件

从2.2.0-beta版本开始,二进制包不再默认提供连接器依赖,因此在第一次使用它时,您需要执行以下命令来安装连接器:(当然,您也可以从 Apache Maven Repository 手动下载连接器,然后将其移动至connectors/seatunnel目录下)。

2.1自动下载插件

[root@bigdata01 seatunnel]# cd /opt/apache-seatunnel-2.3.8/
# 通过配置config/plugin_config来指定所需要的插件,默认全部下载
[root@bigdata01 apache-seatunnel-2.3.8]# mv ./config/plugin_config ./config/plugin_config_bak
[root@bigdata01 apache-seatunnel-2.3.8]# vi ./config/plugin_config
--seatunnel-connectors--
connector-jdbc
connector-console
--end--
[root@bigdata01 apache-seatunnel-2.3.8]# echo ${version}
2.3.8
[root@bigdata01 apache-seatunnel-2.3.8]# sh bin/install-plugin.sh ${version}

2.2手动下载插件

 

6b3c181c4b2e44b81b43eea95170affe.png

 

73e9dd99be2af32137b1d7a9c25d8c26.png

 

3.添加数据库驱动

[root@bigdata01 apache-seatunnel-2.3.8]# mkdir -p plugins/jdbc/lib/
[root@bigdata01 apache-seatunnel-2.3.8]# ll plugins/jdbc/lib/
total 3276
-rw-r--r-- 1 root root  873219 Mar 28  2024 DmJdbcDriver18.jar                                                                                      
-rw-r--r-- 1 root root 2476480 Jun 18 15:24 mysql-connector-java-8.0.28.jar                                                                         

 

4.配置成员节点

[root@bigdata01 apache-seatunnel-2.3.8]# vi config/hazelcast.yaml
###如果是单机部署,则不需要改动########

 

7edd2cd998d4ec872ea07d4238ce25e2.png

5.集群配置优化

##方式一,启动 SeaTunnel 引擎时添加 JVM 选项
[root@bigdata01 apache-seatunnel-2.3.8]# ./bin/seatunnel-cluster.sh -DJvmOption="-Xms2G -Xmx2G"

##方式二(推荐)
[root@bigdata01 apache-seatunnel-2.3.8]# vi bin/seatunnel-cluster.sh

 

1085f2097379f22dc73b6d3f1cb4a86b.png

 

6.配置seatunnel.yaml

[root@bigdata01 apache-seatunnel-2.3.8]# vi config/seatunnel.yaml
seatunnel:
  engine:
    history-job-expire-minutes: 240    # 报错历史作业的时间,过大造成内存溢出
    backup-count: 1
    classloader-cache-mode: true
    queue-type: blockingqueue
    print-execution-info-interval: 60
    print-job-metrics-info-interval: 60
    slot-service:
      dynamic-slot: true
    checkpoint:
      interval: 10000
      timeout: 60000
      storage:
        type: hdfs
        max-retained: 3
        plugin-config:
          namespace: /tmp/seatunnel/checkpoint_snapshot
          storage.type: hdfs
          fs.defaultFS: file:///tmp/ # Ensure that the directory has written permission

backup-count: 值为min(1, max(5, N/2))。N是集群节点的编号

SeaTunnel Engine 基于Hazelcast IMDG实现集群管理。集群的状态数据(作业运行状态,资源状态)存储在Hazelcast IMap中。Hazelcast IMap 中保存的数据将分布存储在集群的所有节点中。Hazelcast 会对存储在 Imap 中的数据进行分区。每个分区可以指定备份的数量。因此,SeaTunnel Engine可以在不使用其他服务(例如zookeeper)的情况下实现集群HA。用于backup count定义同步备份的数量。例如,如果设置为 1,分区的备份将放在另外一个成员上。如果是2,就会放在另外两个成员身上。

 

7.服务启动

# 集群启动命令
[root@bigdata01 apache-seatunnel-2.3.8]# nohup ./bin/seatunnel-cluster.sh > seatunnel_running.log 2>&1 &

# 任务启动目录
[root@bigdata01 apache-seatunnel-2.3.8]# mkdir -p config/tasks/
[root@bigdata01 apache-seatunnel-2.3.8]# vi config/tasks/mysql2mysql_test.conf
[root@bigdata01 apache-seatunnel-2.3.8]# ./bin/seatunnel.sh --config  ./config/tasks/mysql2mysql_test.conf
env {
  job.mode = "BATCH"
}
 
source {
    jdbc {
        url = "jdbc:mysql://127.0.0.1:3306/test?allowMultiQueries=true&characterEncoding=utf-8&useSSL=false"
        driver = "com.mysql.cj.jdbc.Driver"
        user = "user"
        password = "password"
        query = "select * from test"
    }
}
 
transform {
}
 
sink {
    jdbc {
        url = "jdbc:mysql://127.0.0.1:3306/dm?allowMultiQueries=true&characterEncoding=utf-8&useSSL=false"
        driver = "com.mysql.cj.jdbc.Driver"
        user = "user"
        password = "password"
        query = "insert into dm_test (id) values(?)"
    }
 
}

 

Logo

DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。

更多推荐