testElasticSearchRouting.ktr 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <transformation>
  3. <info>
  4. <name>testElasticSearchRouting</name>
  5. <description/>
  6. <extended_description/>
  7. <trans_version/>
  8. <trans_type>Normal</trans_type>
  9. <directory>/</directory>
  10. <parameters>
  11. </parameters>
  12. <log>
  13. <trans-log-table>
  14. <connection/>
  15. <schema/>
  16. <table/>
  17. <size_limit_lines/>
  18. <interval/>
  19. <timeout_days/>
  20. <field>
  21. <id>ID_BATCH</id>
  22. <enabled>Y</enabled>
  23. <name>ID_BATCH</name>
  24. </field>
  25. <field>
  26. <id>CHANNEL_ID</id>
  27. <enabled>Y</enabled>
  28. <name>CHANNEL_ID</name>
  29. </field>
  30. <field>
  31. <id>TRANSNAME</id>
  32. <enabled>Y</enabled>
  33. <name>TRANSNAME</name>
  34. </field>
  35. <field>
  36. <id>STATUS</id>
  37. <enabled>Y</enabled>
  38. <name>STATUS</name>
  39. </field>
  40. <field>
  41. <id>LINES_READ</id>
  42. <enabled>Y</enabled>
  43. <name>LINES_READ</name>
  44. <subject/>
  45. </field>
  46. <field>
  47. <id>LINES_WRITTEN</id>
  48. <enabled>Y</enabled>
  49. <name>LINES_WRITTEN</name>
  50. <subject/>
  51. </field>
  52. <field>
  53. <id>LINES_UPDATED</id>
  54. <enabled>Y</enabled>
  55. <name>LINES_UPDATED</name>
  56. <subject/>
  57. </field>
  58. <field>
  59. <id>LINES_INPUT</id>
  60. <enabled>Y</enabled>
  61. <name>LINES_INPUT</name>
  62. <subject/>
  63. </field>
  64. <field>
  65. <id>LINES_OUTPUT</id>
  66. <enabled>Y</enabled>
  67. <name>LINES_OUTPUT</name>
  68. <subject/>
  69. </field>
  70. <field>
  71. <id>LINES_REJECTED</id>
  72. <enabled>Y</enabled>
  73. <name>LINES_REJECTED</name>
  74. <subject/>
  75. </field>
  76. <field>
  77. <id>ERRORS</id>
  78. <enabled>Y</enabled>
  79. <name>ERRORS</name>
  80. </field>
  81. <field>
  82. <id>STARTDATE</id>
  83. <enabled>Y</enabled>
  84. <name>STARTDATE</name>
  85. </field>
  86. <field>
  87. <id>ENDDATE</id>
  88. <enabled>Y</enabled>
  89. <name>ENDDATE</name>
  90. </field>
  91. <field>
  92. <id>LOGDATE</id>
  93. <enabled>Y</enabled>
  94. <name>LOGDATE</name>
  95. </field>
  96. <field>
  97. <id>DEPDATE</id>
  98. <enabled>Y</enabled>
  99. <name>DEPDATE</name>
  100. </field>
  101. <field>
  102. <id>REPLAYDATE</id>
  103. <enabled>Y</enabled>
  104. <name>REPLAYDATE</name>
  105. </field>
  106. <field>
  107. <id>LOG_FIELD</id>
  108. <enabled>Y</enabled>
  109. <name>LOG_FIELD</name>
  110. </field>
  111. <field>
  112. <id>EXECUTING_SERVER</id>
  113. <enabled>N</enabled>
  114. <name>EXECUTING_SERVER</name>
  115. </field>
  116. <field>
  117. <id>EXECUTING_USER</id>
  118. <enabled>N</enabled>
  119. <name>EXECUTING_USER</name>
  120. </field>
  121. <field>
  122. <id>CLIENT</id>
  123. <enabled>N</enabled>
  124. <name>CLIENT</name>
  125. </field>
  126. </trans-log-table>
  127. <perf-log-table>
  128. <connection/>
  129. <schema/>
  130. <table/>
  131. <interval/>
  132. <timeout_days/>
  133. <field>
  134. <id>ID_BATCH</id>
  135. <enabled>Y</enabled>
  136. <name>ID_BATCH</name>
  137. </field>
  138. <field>
  139. <id>SEQ_NR</id>
  140. <enabled>Y</enabled>
  141. <name>SEQ_NR</name>
  142. </field>
  143. <field>
  144. <id>LOGDATE</id>
  145. <enabled>Y</enabled>
  146. <name>LOGDATE</name>
  147. </field>
  148. <field>
  149. <id>TRANSNAME</id>
  150. <enabled>Y</enabled>
  151. <name>TRANSNAME</name>
  152. </field>
  153. <field>
  154. <id>STEPNAME</id>
  155. <enabled>Y</enabled>
  156. <name>STEPNAME</name>
  157. </field>
  158. <field>
  159. <id>STEP_COPY</id>
  160. <enabled>Y</enabled>
  161. <name>STEP_COPY</name>
  162. </field>
  163. <field>
  164. <id>LINES_READ</id>
  165. <enabled>Y</enabled>
  166. <name>LINES_READ</name>
  167. </field>
  168. <field>
  169. <id>LINES_WRITTEN</id>
  170. <enabled>Y</enabled>
  171. <name>LINES_WRITTEN</name>
  172. </field>
  173. <field>
  174. <id>LINES_UPDATED</id>
  175. <enabled>Y</enabled>
  176. <name>LINES_UPDATED</name>
  177. </field>
  178. <field>
  179. <id>LINES_INPUT</id>
  180. <enabled>Y</enabled>
  181. <name>LINES_INPUT</name>
  182. </field>
  183. <field>
  184. <id>LINES_OUTPUT</id>
  185. <enabled>Y</enabled>
  186. <name>LINES_OUTPUT</name>
  187. </field>
  188. <field>
  189. <id>LINES_REJECTED</id>
  190. <enabled>Y</enabled>
  191. <name>LINES_REJECTED</name>
  192. </field>
  193. <field>
  194. <id>ERRORS</id>
  195. <enabled>Y</enabled>
  196. <name>ERRORS</name>
  197. </field>
  198. <field>
  199. <id>INPUT_BUFFER_ROWS</id>
  200. <enabled>Y</enabled>
  201. <name>INPUT_BUFFER_ROWS</name>
  202. </field>
  203. <field>
  204. <id>OUTPUT_BUFFER_ROWS</id>
  205. <enabled>Y</enabled>
  206. <name>OUTPUT_BUFFER_ROWS</name>
  207. </field>
  208. </perf-log-table>
  209. <channel-log-table>
  210. <connection/>
  211. <schema/>
  212. <table/>
  213. <timeout_days/>
  214. <field>
  215. <id>ID_BATCH</id>
  216. <enabled>Y</enabled>
  217. <name>ID_BATCH</name>
  218. </field>
  219. <field>
  220. <id>CHANNEL_ID</id>
  221. <enabled>Y</enabled>
  222. <name>CHANNEL_ID</name>
  223. </field>
  224. <field>
  225. <id>LOG_DATE</id>
  226. <enabled>Y</enabled>
  227. <name>LOG_DATE</name>
  228. </field>
  229. <field>
  230. <id>LOGGING_OBJECT_TYPE</id>
  231. <enabled>Y</enabled>
  232. <name>LOGGING_OBJECT_TYPE</name>
  233. </field>
  234. <field>
  235. <id>OBJECT_NAME</id>
  236. <enabled>Y</enabled>
  237. <name>OBJECT_NAME</name>
  238. </field>
  239. <field>
  240. <id>OBJECT_COPY</id>
  241. <enabled>Y</enabled>
  242. <name>OBJECT_COPY</name>
  243. </field>
  244. <field>
  245. <id>REPOSITORY_DIRECTORY</id>
  246. <enabled>Y</enabled>
  247. <name>REPOSITORY_DIRECTORY</name>
  248. </field>
  249. <field>
  250. <id>FILENAME</id>
  251. <enabled>Y</enabled>
  252. <name>FILENAME</name>
  253. </field>
  254. <field>
  255. <id>OBJECT_ID</id>
  256. <enabled>Y</enabled>
  257. <name>OBJECT_ID</name>
  258. </field>
  259. <field>
  260. <id>OBJECT_REVISION</id>
  261. <enabled>Y</enabled>
  262. <name>OBJECT_REVISION</name>
  263. </field>
  264. <field>
  265. <id>PARENT_CHANNEL_ID</id>
  266. <enabled>Y</enabled>
  267. <name>PARENT_CHANNEL_ID</name>
  268. </field>
  269. <field>
  270. <id>ROOT_CHANNEL_ID</id>
  271. <enabled>Y</enabled>
  272. <name>ROOT_CHANNEL_ID</name>
  273. </field>
  274. </channel-log-table>
  275. <step-log-table>
  276. <connection/>
  277. <schema/>
  278. <table/>
  279. <timeout_days/>
  280. <field>
  281. <id>ID_BATCH</id>
  282. <enabled>Y</enabled>
  283. <name>ID_BATCH</name>
  284. </field>
  285. <field>
  286. <id>CHANNEL_ID</id>
  287. <enabled>Y</enabled>
  288. <name>CHANNEL_ID</name>
  289. </field>
  290. <field>
  291. <id>LOG_DATE</id>
  292. <enabled>Y</enabled>
  293. <name>LOG_DATE</name>
  294. </field>
  295. <field>
  296. <id>TRANSNAME</id>
  297. <enabled>Y</enabled>
  298. <name>TRANSNAME</name>
  299. </field>
  300. <field>
  301. <id>STEPNAME</id>
  302. <enabled>Y</enabled>
  303. <name>STEPNAME</name>
  304. </field>
  305. <field>
  306. <id>STEP_COPY</id>
  307. <enabled>Y</enabled>
  308. <name>STEP_COPY</name>
  309. </field>
  310. <field>
  311. <id>LINES_READ</id>
  312. <enabled>Y</enabled>
  313. <name>LINES_READ</name>
  314. </field>
  315. <field>
  316. <id>LINES_WRITTEN</id>
  317. <enabled>Y</enabled>
  318. <name>LINES_WRITTEN</name>
  319. </field>
  320. <field>
  321. <id>LINES_UPDATED</id>
  322. <enabled>Y</enabled>
  323. <name>LINES_UPDATED</name>
  324. </field>
  325. <field>
  326. <id>LINES_INPUT</id>
  327. <enabled>Y</enabled>
  328. <name>LINES_INPUT</name>
  329. </field>
  330. <field>
  331. <id>LINES_OUTPUT</id>
  332. <enabled>Y</enabled>
  333. <name>LINES_OUTPUT</name>
  334. </field>
  335. <field>
  336. <id>LINES_REJECTED</id>
  337. <enabled>Y</enabled>
  338. <name>LINES_REJECTED</name>
  339. </field>
  340. <field>
  341. <id>ERRORS</id>
  342. <enabled>Y</enabled>
  343. <name>ERRORS</name>
  344. </field>
  345. <field>
  346. <id>LOG_FIELD</id>
  347. <enabled>N</enabled>
  348. <name>LOG_FIELD</name>
  349. </field>
  350. </step-log-table>
  351. <metrics-log-table>
  352. <connection/>
  353. <schema/>
  354. <table/>
  355. <timeout_days/>
  356. <field>
  357. <id>ID_BATCH</id>
  358. <enabled>Y</enabled>
  359. <name>ID_BATCH</name>
  360. </field>
  361. <field>
  362. <id>CHANNEL_ID</id>
  363. <enabled>Y</enabled>
  364. <name>CHANNEL_ID</name>
  365. </field>
  366. <field>
  367. <id>LOG_DATE</id>
  368. <enabled>Y</enabled>
  369. <name>LOG_DATE</name>
  370. </field>
  371. <field>
  372. <id>METRICS_DATE</id>
  373. <enabled>Y</enabled>
  374. <name>METRICS_DATE</name>
  375. </field>
  376. <field>
  377. <id>METRICS_CODE</id>
  378. <enabled>Y</enabled>
  379. <name>METRICS_CODE</name>
  380. </field>
  381. <field>
  382. <id>METRICS_DESCRIPTION</id>
  383. <enabled>Y</enabled>
  384. <name>METRICS_DESCRIPTION</name>
  385. </field>
  386. <field>
  387. <id>METRICS_SUBJECT</id>
  388. <enabled>Y</enabled>
  389. <name>METRICS_SUBJECT</name>
  390. </field>
  391. <field>
  392. <id>METRICS_TYPE</id>
  393. <enabled>Y</enabled>
  394. <name>METRICS_TYPE</name>
  395. </field>
  396. <field>
  397. <id>METRICS_VALUE</id>
  398. <enabled>Y</enabled>
  399. <name>METRICS_VALUE</name>
  400. </field>
  401. </metrics-log-table>
  402. </log>
  403. <maxdate>
  404. <connection/>
  405. <table/>
  406. <field/>
  407. <offset>0.0</offset>
  408. <maxdiff>0.0</maxdiff>
  409. </maxdate>
  410. <size_rowset>10000</size_rowset>
  411. <sleep_time_empty>50</sleep_time_empty>
  412. <sleep_time_full>50</sleep_time_full>
  413. <unique_connections>N</unique_connections>
  414. <feedback_shown>Y</feedback_shown>
  415. <feedback_size>50000</feedback_size>
  416. <using_thread_priorities>Y</using_thread_priorities>
  417. <shared_objects_file/>
  418. <capture_step_performance>N</capture_step_performance>
  419. <step_performance_capturing_delay>1000</step_performance_capturing_delay>
  420. <step_performance_capturing_size_limit>100</step_performance_capturing_size_limit>
  421. <dependencies>
  422. </dependencies>
  423. <partitionschemas>
  424. </partitionschemas>
  425. <slaveservers>
  426. </slaveservers>
  427. <clusterschemas>
  428. </clusterschemas>
  429. <created_user>-</created_user>
  430. <created_date>2019/05/17 17:06:44.305</created_date>
  431. <modified_user>-</modified_user>
  432. <modified_date>2019/05/17 17:06:44.305</modified_date>
  433. <key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA=</key_for_session_key>
  434. <is_key_private>N</is_key_private>
  435. </info>
  436. <notepads>
  437. </notepads>
  438. <connection>
  439. <name>testSqlserver</name>
  440. <server>192.168.50.32</server>
  441. <type>MSSQLNATIVE</type>
  442. <access>Native</access>
  443. <database>MemberData</database>
  444. <port>1433</port>
  445. <username>ljhyPortal2016</username>
  446. <password>Encrypted 746573746ed7f9c1af00ce938a8b5aef398cc2fe8c</password>
  447. <servername/>
  448. <data_tablespace/>
  449. <index_tablespace/>
  450. <attributes>
  451. <attribute>
  452. <code>FORCE_IDENTIFIERS_TO_LOWERCASE</code>
  453. <attribute>N</attribute>
  454. </attribute>
  455. <attribute>
  456. <code>FORCE_IDENTIFIERS_TO_UPPERCASE</code>
  457. <attribute>N</attribute>
  458. </attribute>
  459. <attribute>
  460. <code>IS_CLUSTERED</code>
  461. <attribute>N</attribute>
  462. </attribute>
  463. <attribute>
  464. <code>MSSQLUseIntegratedSecurity</code>
  465. <attribute>false</attribute>
  466. </attribute>
  467. <attribute>
  468. <code>MSSQL_DOUBLE_DECIMAL_SEPARATOR</code>
  469. <attribute>N</attribute>
  470. </attribute>
  471. <attribute>
  472. <code>PORT_NUMBER</code>
  473. <attribute>1433</attribute>
  474. </attribute>
  475. <attribute>
  476. <code>PRESERVE_RESERVED_WORD_CASE</code>
  477. <attribute>Y</attribute>
  478. </attribute>
  479. <attribute>
  480. <code>QUOTE_ALL_FIELDS</code>
  481. <attribute>N</attribute>
  482. </attribute>
  483. <attribute>
  484. <code>SUPPORTS_BOOLEAN_DATA_TYPE</code>
  485. <attribute>Y</attribute>
  486. </attribute>
  487. <attribute>
  488. <code>SUPPORTS_TIMESTAMP_DATA_TYPE</code>
  489. <attribute>Y</attribute>
  490. </attribute>
  491. <attribute>
  492. <code>USE_POOLING</code>
  493. <attribute>N</attribute>
  494. </attribute>
  495. </attributes>
  496. </connection>
  497. <order>
  498. <hop>
  499. <from>表输入</from>
  500. <to>Java 代码</to>
  501. <enabled>Y</enabled>
  502. </hop>
  503. <hop>
  504. <from>Java 代码</from>
  505. <to>Elasticsearch bulk insert</to>
  506. <enabled>Y</enabled>
  507. </hop>
  508. </order>
  509. <step>
  510. <name>Elasticsearch bulk insert</name>
  511. <type>ElasticSearchBulk</type>
  512. <description/>
  513. <distribute>Y</distribute>
  514. <custom_distribution/>
  515. <copies>1</copies>
  516. <partitioning>
  517. <method>none</method>
  518. <schema_name/>
  519. </partitioning>
  520. <general>
  521. <index>twitter_%{date}</index>
  522. <type>_doc?routing=%{id}</type>
  523. <batchSize>50000</batchSize>
  524. <timeout>100</timeout>
  525. <timeoutUnit>SECONDS</timeoutUnit>
  526. <isJson>N</isJson>
  527. <idField>id</idField>
  528. <overwriteIfExists>Y</overwriteIfExists>
  529. <useOutput>N</useOutput>
  530. <stopOnError>Y</stopOnError>
  531. </general>
  532. <fields>
  533. <field>
  534. <columnName>date</columnName>
  535. <targetName>date</targetName>
  536. </field>
  537. <field>
  538. <columnName>date22</columnName>
  539. <targetName>date22</targetName>
  540. </field>
  541. <field>
  542. <columnName>id</columnName>
  543. <targetName>id</targetName>
  544. </field>
  545. <field>
  546. <columnName>name</columnName>
  547. <targetName>name</targetName>
  548. </field>
  549. <field>
  550. <columnName>twitter</columnName>
  551. <targetName>twitter</targetName>
  552. </field>
  553. </fields>
  554. <servers>
  555. <server>
  556. <address>192.168.50.32</address>
  557. <port>9300</port>
  558. </server>
  559. </servers>
  560. <settings>
  561. <setting>
  562. <name>cluster.name</name>
  563. <value>es</value>
  564. </setting>
  565. <setting>
  566. <name>custom.aliase.source</name>
  567. <value>crm_twitter</value>
  568. </setting>
  569. <setting>
  570. <name>custom.fields.date22</name>
  571. <value>{"type":"date","format":"yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||strict_date_optional_time||epoch_millis"}</value>
  572. </setting>
  573. <setting>
  574. <name>custom.fields.twitter</name>
  575. <value>{"type":"text","analyzer": "ik_smart",
  576. "search_analyzer":"ik_smart","index":true}</value>
  577. </setting>
  578. <setting>
  579. <name>custom.index.number_of_replicas</name>
  580. <value>1</value>
  581. </setting>
  582. <setting>
  583. <name>custom.index.number_of_shards</name>
  584. <value>4</value>
  585. </setting>
  586. </settings>
  587. <attributes/>
  588. <cluster_schema/>
  589. <remotesteps>
  590. <input>
  591. </input>
  592. <output>
  593. </output>
  594. </remotesteps>
  595. <GUI>
  596. <xloc>416</xloc>
  597. <yloc>208</yloc>
  598. <draw>Y</draw>
  599. </GUI>
  600. </step>
  601. <step>
  602. <name>Java 代码</name>
  603. <type>UserDefinedJavaClass</type>
  604. <description/>
  605. <distribute>Y</distribute>
  606. <custom_distribution/>
  607. <copies>1</copies>
  608. <partitioning>
  609. <method>none</method>
  610. <schema_name/>
  611. </partitioning>
  612. <definitions>
  613. <definition>
  614. <class_type>TRANSFORM_CLASS</class_type>
  615. <class_name>Processor</class_name>
  616. <class_source>//package com.tools.ws;
  617. //import test.TestFun;
  618. public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException
  619. {
  620. Object[] r = getRow();
  621. // If the row object is null, we are done processing.
  622. if (r == null) {
  623. setOutputDone();
  624. return false;
  625. }
  626. //if(first){
  627. // first = false;
  628. //}
  629. r = createOutputRow(r, data.outputRowMeta.size());
  630. String inputstr = get(Fields.In,"date").getString(r);
  631. String tstr = inputstr.replace("-","_");
  632. get(Fields.Out, "date").setValue(r, tstr);
  633. putRow(data.outputRowMeta, r);
  634. return true;
  635. }</class_source>
  636. </definition>
  637. </definitions>
  638. <fields>
  639. </fields>
  640. <clear_result_fields>N</clear_result_fields>
  641. <info_steps/>
  642. <target_steps/>
  643. <usage_parameters/>
  644. <attributes/>
  645. <cluster_schema/>
  646. <remotesteps>
  647. <input>
  648. </input>
  649. <output>
  650. </output>
  651. </remotesteps>
  652. <GUI>
  653. <xloc>432</xloc>
  654. <yloc>80</yloc>
  655. <draw>Y</draw>
  656. </GUI>
  657. </step>
  658. <step>
  659. <name>表输入</name>
  660. <type>TableInput</type>
  661. <description/>
  662. <distribute>Y</distribute>
  663. <custom_distribution/>
  664. <copies>1</copies>
  665. <partitioning>
  666. <method>none</method>
  667. <schema_name/>
  668. </partitioning>
  669. <connection>testSqlserver</connection>
  670. <sql>SELECT
  671. id
  672. , name
  673. , twitter
  674. , "date"
  675. ,DATEDIFF(SECOND,'1970-01-01 00:00:00',convert(varchar(16),[date22],120)) as [date22]
  676. FROM twitter
  677. </sql>
  678. <limit>0</limit>
  679. <lookup/>
  680. <execute_each_row>N</execute_each_row>
  681. <variables_active>N</variables_active>
  682. <lazy_conversion_active>N</lazy_conversion_active>
  683. <attributes/>
  684. <cluster_schema/>
  685. <remotesteps>
  686. <input>
  687. </input>
  688. <output>
  689. </output>
  690. </remotesteps>
  691. <GUI>
  692. <xloc>208</xloc>
  693. <yloc>80</yloc>
  694. <draw>Y</draw>
  695. </GUI>
  696. </step>
  697. <step_error_handling>
  698. </step_error_handling>
  699. <slave-step-copy-partition-distribution>
  700. </slave-step-copy-partition-distribution>
  701. <slave_transformation>N</slave_transformation>
  702. <attributes/>
  703. </transformation>