IoTDB integration for Apache FlinkFlink IoTDB - 图1 (opens new window). This module includes the IoTDB sink that allows a flink job to write events into timeseries, and the IoTDB source allowing reading data from IoTDB.

IoTDBSink

To use the IoTDBSink, you need construct an instance of it by specifying IoTDBSinkOptions and IoTSerializationSchema instances. The IoTDBSink send only one event after another by default, but you can change to batch by invoking withBatchSize(int).

Example

This example shows a case that sends data to a IoTDB server from a Flink job:

  • A simulated Source SensorSource generates data points per 1 second.
  • Flink uses IoTDBSink to consume the generated data points and write the data into IoTDB.

It is noteworthy that to use IoTDBSink, schema auto-creation in IoTDB should be enabled.

  1. import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
  2. import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
  3. import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
  4. import com.google.common.collect.Lists;
  5. import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  6. import org.apache.flink.streaming.api.functions.source.SourceFunction;
  7. import java.security.SecureRandom;
  8. import java.util.HashMap;
  9. import java.util.Map;
  10. import java.util.Random;
  11. public class FlinkIoTDBSink {
  12. public static void main(String[] args) throws Exception {
  13. // run the flink job on local mini cluster
  14. StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  15. IoTDBSinkOptions options = new IoTDBSinkOptions();
  16. options.setHost("127.0.0.1");
  17. options.setPort(6667);
  18. options.setUser("root");
  19. options.setPassword("root");
  20. // If the server enables auto_create_schema, then we do not need to register all timeseries
  21. // here.
  22. options.setTimeseriesOptionList(
  23. Lists.newArrayList(
  24. new IoTDBSinkOptions.TimeseriesOption(
  25. "root.sg.d1.s1", TSDataType.DOUBLE, TSEncoding.GORILLA, CompressionType.SNAPPY)));
  26. IoTSerializationSchema serializationSchema = new DefaultIoTSerializationSchema();
  27. IoTDBSink ioTDBSink =
  28. new IoTDBSink(options, serializationSchema)
  29. // enable batching
  30. .withBatchSize(10)
  31. // how many connectons to the server will be created for each parallelism
  32. .withSessionPoolSize(3);
  33. env.addSource(new SensorSource())
  34. .name("sensor-source")
  35. .setParallelism(1)
  36. .addSink(ioTDBSink)
  37. .name("iotdb-sink");
  38. env.execute("iotdb-flink-example");
  39. }
  40. private static class SensorSource implements SourceFunction<Map<String, String>> {
  41. boolean running = true;
  42. Random random = new SecureRandom();
  43. @Override
  44. public void run(SourceContext context) throws Exception {
  45. while (running) {
  46. Map<String, String> tuple = new HashMap();
  47. tuple.put("device", "root.sg.d1");
  48. tuple.put("timestamp", String.valueOf(System.currentTimeMillis()));
  49. tuple.put("measurements", "s1");
  50. tuple.put("types", "DOUBLE");
  51. tuple.put("values", String.valueOf(random.nextDouble()));
  52. context.collect(tuple);
  53. Thread.sleep(1000);
  54. }
  55. }
  56. @Override
  57. public void cancel() {
  58. running = false;
  59. }
  60. }
  61. }

Usage

  • Launch the IoTDB server.
  • Run org.apache.iotdb.flink.FlinkIoTDBSink.java to run the flink job on local mini cluster.

IoTDBSource

To use the IoTDBSource, you need to construct an instance of IoTDBSource by specifying IoTDBSourceOptions and implementing the abstract method convert() in IoTDBSource. The convert methods defines how you want the row data to be transformed.

Example

This example shows a case where data are read from IoTDB.

  1. import org.apache.iotdb.flink.options.IoTDBSourceOptions;
  2. import org.apache.iotdb.rpc.IoTDBConnectionException;
  3. import org.apache.iotdb.rpc.StatementExecutionException;
  4. import org.apache.iotdb.rpc.TSStatusCode;
  5. import org.apache.iotdb.session.Session;
  6. import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
  7. import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
  8. import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
  9. import org.apache.iotdb.tsfile.read.common.RowRecord;
  10. import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  11. import java.util.ArrayList;
  12. import java.util.List;
  13. public class FlinkIoTDBSource {
  14. static final String LOCAL_HOST = "127.0.0.1";
  15. static final String ROOT_SG1_D1_S1 = "root.sg1.d1.s1";
  16. static final String ROOT_SG1_D1 = "root.sg1.d1";
  17. public static void main(String[] args) throws Exception {
  18. prepareData();
  19. // run the flink job on local mini cluster
  20. StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  21. IoTDBSourceOptions ioTDBSourceOptions =
  22. new IoTDBSourceOptions("127.0.0.1", 6667, "root", "root",
  23. "select s1 from " + ROOT_SG1_D1 + " align by device");
  24. env.addSource(
  25. new IoTDBSource<RowRecord>(ioTDBSourceOptions) {
  26. @Override
  27. public RowRecord convert(RowRecord rowRecord) {
  28. return rowRecord;
  29. }
  30. })
  31. .name("sensor-source")
  32. .print()
  33. .setParallelism(2);
  34. env.execute();
  35. }
  36. /**
  37. * Write some data to IoTDB
  38. */
  39. private static void prepareData() throws IoTDBConnectionException, StatementExecutionException {
  40. Session session = new Session(LOCAL_HOST, 6667, "root", "root");
  41. session.open(false);
  42. try {
  43. session.setStorageGroup("root.sg1");
  44. if (!session.checkTimeseriesExists(ROOT_SG1_D1_S1)) {
  45. session.createTimeseries(
  46. ROOT_SG1_D1_S1, TSDataType.INT64, TSEncoding.RLE, CompressionType.SNAPPY);
  47. List<String> measurements = new ArrayList<>();
  48. List<TSDataType> types = new ArrayList<>();
  49. measurements.add("s1");
  50. measurements.add("s2");
  51. measurements.add("s3");
  52. types.add(TSDataType.INT64);
  53. types.add(TSDataType.INT64);
  54. types.add(TSDataType.INT64);
  55. for (long time = 0; time < 100; time++) {
  56. List<Object> values = new ArrayList<>();
  57. values.add(1L);
  58. values.add(2L);
  59. values.add(3L);
  60. session.insertRecord(ROOT_SG1_D1, time, measurements, types, values);
  61. }
  62. }
  63. } catch (StatementExecutionException e) {
  64. if (e.getStatusCode() != TSStatusCode.PATH_ALREADY_EXIST_ERROR.getStatusCode()) {
  65. throw e;
  66. }
  67. }
  68. }
  69. }

Usage

Launch the IoTDB server. Run org.apache.iotdb.flink.FlinkIoTDBSource.java to run the flink job on local mini cluster.