[GitHub] [camel] jonchase opened a new pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

classic Classic list List threaded Threaded
19 messages Options
Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase opened a new pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox

jonchase opened a new pull request #3891:
URL: https://github.com/apache/camel/pull/3891


   Addresses https://issues.apache.org/jira/browse/CAMEL-14992


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox

jonchase commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r435863948



##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2QueryHelper.java
##########
@@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.Exchange;
+import org.apache.camel.util.ObjectHelper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Clock;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import software.amazon.awssdk.services.athena.model.GetQueryExecutionResponse;
+import software.amazon.awssdk.services.athena.model.QueryExecutionState;
+
+
+/**
+ * Package-private class to encapsulate the logic of running queries, waiting for completion states, retrying, etc.
+ */
+class Athena2QueryHelper {
+  private static final Logger LOG = LoggerFactory.getLogger(Athena2QueryHelper.class);
+
+  // configuration ======================
+  private final Clock clock = Clock.systemUTC();
+  private final long waitTimeout;
+  private final long delay;
+  private final Set<String> retry;
+  private final int maxAttempts;
+  private final boolean resetWaitTimeoutOnAttempt;
+  private final long absoluteStartMs;
+
+  // state ==============================
+  private long currentDelay;
+  private int attempts;
+  private boolean isFailure;
+  private boolean isSuccess;
+  private boolean isRetry;
+  private long startMs;
+  private boolean interrupted;
+
+  Athena2QueryHelper(Exchange exchange, Athena2Configuration configuration) {
+    this.waitTimeout = determineWaitTimeout(exchange, configuration);
+    this.delay = determineDelay(exchange, configuration);
+    this.maxAttempts = determineMaxAttempts(exchange, configuration);
+    this.retry = determineRetry(exchange, configuration);
+    this.resetWaitTimeoutOnAttempt = determineResetWaitTimeoutOnRetry(exchange, configuration);
+    this.absoluteStartMs = now();
+
+    this.currentDelay = determineInitialDelay(exchange, configuration);
+  }
+
+  private long now() {
+    return clock.millis();
+  }
+
+  long getElapsedMillis() {
+    return now() - this.absoluteStartMs;
+  }
+
+  /**
+   * Record that a query attempt was made.  This is relevant b/c only so many attempts are permitted.
+   */
+  void markAttempt() {
+    if (attempts == 0) {
+      this.startMs = now();
+    } else {
+      if (resetWaitTimeoutOnAttempt) {
+        this.startMs = now();
+      }
+    }
+    ++attempts;
+
+    this.isFailure = false;
+    this.isSuccess = false;
+    this.isRetry = false;
+  }
+
+  int getAttempts() {
+    return this.attempts;
+  }
+
+  /**
+   * Should another query attempt be made?
+   */
+  boolean shouldAttempt() {
+    if (this.attempts >= this.maxAttempts) {
+      LOG.trace("AWS Athena start query execution used all {} attempts", this.maxAttempts);
+      return false;
+    }
+
+    if (this.interrupted) {
+      LOG.trace("AWS Athena start query execution thread was interrupted, will try no more");
+      return false;
+    }
+
+    if (this.isFailure) {
+      LOG.trace("AWS Athena start query execution detected permanent failure");
+      return false;
+    }
+
+    if (this.isSuccess) {
+      LOG.trace("AWS Athena start query execution detected success, will try no more");
+      return false;
+    }
+
+    // if this.isRetry, return true
+
+    return true;
+  }
+
+  /**
+   * Should there be a wait for the query to complete?
+   */
+  boolean shouldWait() {
+    long now = now();
+    long millisWaited = now - this.startMs;
+    if (millisWaited >= this.waitTimeout) {
+      LOG
+          .trace("AWS Athena start query execution waited for {}, which exceeded wait timeout of {}", millisWaited,
+              this.waitTimeout);
+      return false;
+    }
+
+    if (this.interrupted) {
+      LOG.trace("AWS Athena start query execution thread was interrupted, will wait no longer");
+      return false;
+    }
+
+    if (this.isFailure) {
+      LOG.trace("AWS Athena start query execution detected failure, will wait no longer");
+      return false;
+    }
+
+    if (this.isSuccess) {
+      LOG.trace("AWS Athena start query execution detected success, will wait no longer");
+      return false;
+    }
+
+    if (this.isRetry) {
+      LOG.trace("AWS Athena start query execution detected retry, will immediately attempt retry");
+      return false;
+    }
+
+    return true;
+  }
+
+  void doWait() {
+    try {
+      Thread.sleep(this.currentDelay);

Review comment:
       Is this the 'right' way of doing this in a Camel component?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r435864229



##########
File path: components/camel-aws2-athena/src/main/docs/aws2-athena-component.adoc
##########
@@ -0,0 +1,537 @@
+[[aws2-athena-component]]
+= AWS 2 Athena Component
+:docTitle: AWS 2 Athena
+:artifactId: camel-aws2-athena
+:description: Access AWS Athena service using AWS SDK version 2.x.
+:since: 3.4
+:supportLevel: Preview
+:component-header: Only producer is supported
+
+*Since Camel {since}*
+
+*{component-header}*
+
+The Athena component supports running queries with https://aws.amazon.com/athena/[AWS Athena] and working with results.
+
+Prerequisites
+
+You must have a valid Amazon Web Services developer account, and be
+signed up to use Amazon Athena.  More information is available at
+https://aws.amazon.com/athena/[AWS Athena].
+
+== URI Format
+
+[source,java]
+------------------------------
+aws2-athena://label[?options]
+------------------------------
+
+You can append query options to the URI in the following format,
+?options=value&option2=value&...
+
+For example, to run a simple query, wait up to 60 seconds for completion, and log the results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .split(body()).streaming()
+    .to("log:out")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+Similarly, running the query and returning a path to the results in S3:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=S3Pointer")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+== URI Options
+
+
+// component options: START
+The AWS 2 Athena component supports 29 options, which are listed below.
+
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *accessKey* (producer) | Amazon AWS Access Key. |  | String
+| *amazonAthenaClient* (producer) | The AmazonAthena instance to use as the client. |  | AthenaClient
+| *configuration* (producer) | The component configuration. |  | Athena2Configuration
+| *database* (producer) | The Athena database to use. |  | String
+| *delay* (producer) | Milliseconds before the next poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 2000 | long
+| *encryptionOption* (producer) | The encryption type to use when storing query results in S3. One of SSE_S3, SSE_KMS, or CSE_KMS. The value can be one of: SSE_S3, SSE_KMS, CSE_KMS, null |  | EncryptionOption
+| *includeTrace* (producer) | Include useful trace information at the beginning of queries as an SQL comment (prefixed with --). | false | boolean
+| *initialDelay* (producer) | Milliseconds before the first poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1000 | long
+| *kmsKey* (producer) | For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. |  | String
+| *lazyStartProducer* (producer) | Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail during starting and cause the route to fail being started. By deferring this startup to be lazy then the startup failure can be handled during routing messages via Camel's routing error handlers. Beware that when the first message is processed then creating and starting the producer may take a little time and prolong the total processing time of the processing. | false | boolean
+| *maxAttempts* (producer) | Maximum number of times to attempt a query. Set to 1 to disable retries. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1 | int
+| *maxResults* (producer) | Max number of results to return for the given operation (if supported by the Athena API endpoint). If not set, will use the Athena API default for the given operation. |  | Integer
+| *nextToken* (producer) | Pagination token to use in the case where the response from the previous request was truncated. |  | String
+| *operation* (producer) | The Athena API function to call. The value can be one of: getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution | startQueryExecution | Athena2Operations
+| *outputLocation* (producer) | The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/. Ensure this value ends with a forward slash ('/'). |  | String
+| *outputType* (producer) | How query results should be returned. One of StreamList (default - return a GetQueryResultsIterable that can page through all results), SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results), S3Pointer (return an S3 path pointing to the results). The value can be one of: StreamList, SelectList, S3Pointer | StreamList | Athena2OutputType
+| *proxyHost* (producer) | To define a proxy host when instantiating the Athena client. |  | String
+| *proxyPort* (producer) | To define a proxy port when instantiating the Athena client. |  | Integer
+| *proxyProtocol* (producer) | To define a proxy protocol when instantiating the Athena client. The value can be one of: HTTP, HTTPS | HTTPS | Protocol
+| *queryExecutionId* (producer) | The unique ID identifying the query execution. |  | String
+| *queryString* (producer) | The SQL query to run. Except for simple queries, prefer setting this as the body of the Exchange or as a header using Athena2Constants.QUERY_STRING to avoid having to deal with URL encoding issues. |  | String
+| *region* (producer) | The region in which Athena client needs to work. When using this parameter, the configuration will expect the lowercase name of the region (for example ap-east-1). You'll need to use the name Region.EU_WEST_1.id(). |  | String
+| *resetWaitTimeoutOnRetry* (producer) | Reset the waitTimeout countdown in the event of a query retry. If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | true | boolean
+| *retry* (producer) | Optional comma separated list of error types to retry the query for. Use 'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted), 'generic' to retry 'GENERIC_INTERNAL_ERROR' failures, 'exhausted' to retry queries that have exhausted resource limits, 'always' to always retry regardless of failure condition, or 'never' or null to never retry (default). See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. The value can be one of: never, always, retryable, exhausted, generic | never | String
+| *secretKey* (producer) | Amazon AWS Secret Key. |  | String
+| *waitTimeout* (producer) | Optional max wait time in millis to wait for a successful query completion. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 0 | long
+| *workGroup* (producer) | The workgroup to use for running the query. |  | String
+| *basicPropertyBinding* (advanced) | Whether the component should use basic property binding (Camel 2.x) or the newer property binding with additional capabilities | false | boolean
+| *clientRequestToken* (advanced) | A unique string to ensure issues queries are idempotent. It is unlikely you will need to set this. |  | String
+|===
+// component options: END
+
+
+
+
+
+
+
+
+
+
+
+// endpoint options: START
+The AWS 2 Athena endpoint is configured using URI syntax:
+
+----
+aws2-athena:label
+----
+
+with the following path and query parameters:
+
+=== Path Parameters (1 parameters):
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *label* | *Required* Logical name |  | String
+|===
+
+
+=== Query Parameters (29 parameters):
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *accessKey* (producer) | Amazon AWS Access Key. |  | String
+| *amazonAthenaClient* (producer) | The AmazonAthena instance to use as the client. |  | AthenaClient
+| *database* (producer) | The Athena database to use. |  | String
+| *delay* (producer) | Milliseconds before the next poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 2000 | long
+| *encryptionOption* (producer) | The encryption type to use when storing query results in S3. One of SSE_S3, SSE_KMS, or CSE_KMS. The value can be one of: SSE_S3, SSE_KMS, CSE_KMS, null |  | EncryptionOption
+| *includeTrace* (producer) | Include useful trace information at the beginning of queries as an SQL comment (prefixed with --). | false | boolean
+| *initialDelay* (producer) | Milliseconds before the first poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1000 | long
+| *kmsKey* (producer) | For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. |  | String
+| *lazyStartProducer* (producer) | Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail during starting and cause the route to fail being started. By deferring this startup to be lazy then the startup failure can be handled during routing messages via Camel's routing error handlers. Beware that when the first message is processed then creating and starting the producer may take a little time and prolong the total processing time of the processing. | false | boolean
+| *maxAttempts* (producer) | Maximum number of times to attempt a query. Set to 1 to disable retries. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1 | int
+| *maxResults* (producer) | Max number of results to return for the given operation (if supported by the Athena API endpoint). If not set, will use the Athena API default for the given operation. |  | Integer
+| *nextToken* (producer) | Pagination token to use in the case where the response from the previous request was truncated. |  | String
+| *operation* (producer) | The Athena API function to call. The value can be one of: getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution | startQueryExecution | Athena2Operations
+| *outputLocation* (producer) | The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/. Ensure this value ends with a forward slash ('/'). |  | String
+| *outputType* (producer) | How query results should be returned. One of StreamList (default - return a GetQueryResultsIterable that can page through all results), SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results), S3Pointer (return an S3 path pointing to the results). The value can be one of: StreamList, SelectList, S3Pointer | StreamList | Athena2OutputType
+| *proxyHost* (producer) | To define a proxy host when instantiating the Athena client. |  | String
+| *proxyPort* (producer) | To define a proxy port when instantiating the Athena client. |  | Integer
+| *proxyProtocol* (producer) | To define a proxy protocol when instantiating the Athena client. The value can be one of: HTTP, HTTPS | HTTPS | Protocol
+| *queryExecutionId* (producer) | The unique ID identifying the query execution. |  | String
+| *queryString* (producer) | The SQL query to run. Except for simple queries, prefer setting this as the body of the Exchange or as a header using Athena2Constants.QUERY_STRING to avoid having to deal with URL encoding issues. |  | String
+| *region* (producer) | The region in which Athena client needs to work. When using this parameter, the configuration will expect the lowercase name of the region (for example ap-east-1). You'll need to use the name Region.EU_WEST_1.id(). |  | String
+| *resetWaitTimeoutOnRetry* (producer) | Reset the waitTimeout countdown in the event of a query retry. If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | true | boolean
+| *retry* (producer) | Optional comma separated list of error types to retry the query for. Use 'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted), 'generic' to retry 'GENERIC_INTERNAL_ERROR' failures, 'exhausted' to retry queries that have exhausted resource limits, 'always' to always retry regardless of failure condition, or 'never' or null to never retry (default). See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. The value can be one of: never, always, retryable, exhausted, generic | never | String
+| *secretKey* (producer) | Amazon AWS Secret Key. |  | String
+| *waitTimeout* (producer) | Optional max wait time in millis to wait for a successful query completion. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 0 | long
+| *workGroup* (producer) | The workgroup to use for running the query. |  | String
+| *basicPropertyBinding* (advanced) | Whether the endpoint should use basic property binding (Camel 2.x) or the newer property binding with additional capabilities | false | boolean
+| *clientRequestToken* (advanced) | A unique string to ensure issues queries are idempotent. It is unlikely you will need to set this. |  | String
+| *synchronous* (advanced) | Sets whether synchronous processing should be strictly used, or Camel is allowed to use asynchronous processing (if supported). | false | boolean
+|===
+// endpoint options: END
+
+
+
+
+
+
+
+
+
+
+Required Athena component options
+
+You have to provide the amazonAthenaClient in the
+Registry or your accessKey and secretKey to access
+the https://aws.amazon.com/athena/[AWS Athena] service.
+
+== Usage
+
+=== Message headers evaluated by the Athena producer
+
+Message headers listed here override the corresponding
+query parameters listed in "Query Parameters".
+
+[width="100%",cols="5%,5%,10%,75%,5%",options="header",]
+|=======================================================================
+|Operation |Header |Type |Description |Required
+
+|All |`CamelAwsAthenaOperation` |`String` |The operation to perform. Permitted values are getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution. Default is startQueryExecution. | No
+
+|startQueryExecution |`CamelAwsAthenaDatabase` |`String` |The Athena database to use. | No
+
+|getQueryExecution, getQueryResults |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution. | No
+
+|listQueryExecutions, startQueryExecution |`CamelAwsAthenaWorkGroup` |`String` |The workgroup to use for running the query. | No
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaNextToken` |`String` |Pagination token to use in the case where the response from the previous request was truncated. | No
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaMaxResults` |`Integer` |Max number of results to return for the given operation (if supported by the Athena API endpoint).
+If not set, will use the Athena API default for the given operation. | No
+
+|startQueryExecution |`CamelAwsAthenaIncludeTrace` |`boolean` |Include useful trace information at the beginning of queries as an SQL comment (prefixed with "--"). | No
+
+|startQueryExecution |`CamelAwsAthenaOutputLocation` |`String` |The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/.
+Ensure this value ends with a forward slash ('/'). | No
+
+|getQueryResults |`CamelAwsAthenaOutputType` |`Athena2OutputType` |How query results should be returned.  One of
+StreamList (default - return a GetQueryResultsIterable that can page through all results),
+SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results),
+S3Pointer (return an S3 path pointing to the results). | No
+
+|startQueryExecution |`CamelAwsAthenaClientRequestToken` |`String` |A unique string to ensure issues queries are idempotent.  It is unlikely you will need to set this. | No
+
+|startQueryExecution |`CamelAwsAthenaQueryString` |`String` |The SQL query to run.  Except for simple queries, prefer setting this as the `body` of the
+Exchange or as this header to avoid having to deal with URL encoding issues. | No
+
+|startQueryExecution |`CamelAwsAthenaEncryptionOption` |`String` |The encryption type to use when storing query results in S3.  One of SSE_S3, SSE_KMS, or CSE_KMS. | No
+
+|startQueryExecution |`CamelAwsAthenaKmsKey` |`String` |For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. | No
+
+|startQueryExecution |`CamelAwsAthenaWaitTimeout` |`long` |Optional max wait time in millis to wait for a successful query completion.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaInitialDelay` |`long` |Milliseconds before the first poll for query execution status.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaDelay` |`long` |Milliseconds before the next poll for query execution status.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaMaxAttempts` |`int` |Maximum number of times to attempt a query.  Set to 1 to disable retries.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaRetry` |`String` |Optional comma separated list of error types to retry the query for.  Use
+'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted),
+'generic' to retry 'GENERIC_INTERNAL_ERROR' failures,
+'exhausted' to retry queries that have exhausted resource limits,
+'always' to always retry regardless of failure condition, or
+'never' or null to never retry (default).
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaResetWaitTimeoutOnRetry` |`boolean` |Reset the waitTimeout countdown in the event of a query retry.
+If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|=======================================================================
+
+=== Message headers set by the Athena producer
+
+[width="100%",cols="5%,5%,10%,80%",options="header",]
+|=======================================================================
+|Operation |Header |Type |Description
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionState` |`software.amazon.awssdk.services.athena.model.QueryExecutionState` |The state of the query execution.  One of `QUEUED`, `RUNNING`, `SUCCEEDED`, `FAILED`, `CANCELLED`.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaOutputLocation` |`String` |The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution.
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaNextToken` |`String` |Pagination token to use in the case where the response from the previous request was truncated.
+
+|startQueryExecution |`CamelAwsAthenaStartQueryExecutionAttempts` |`int` |Total number of attempts made to run the query.  Will be greater than 1 if the query is retried.
+
+|startQueryExecution |`CamelAwsAthenaStartQueryExecutionElapsedMillis` |`long` |Total time in millis taken in startQueryExecution (mostly relevant when waiting for query completion within startQueryExecution).
+
+|=======================================================================
+
+=== Athena Producer operations
+
+The Camel-AWS Athena component provides the following operation on the producer side:
+
+ - getQueryExecution
+ - getQueryResults
+ - listQueryExecutions
+ - startQueryExecution
+
+=== Advanced AmazonAthena configuration
+
+If your Camel Application is running behind a firewall or if you need to
+have more control over the `AthenaClient` instance configuration, you can
+create your own instance and refer to it in your Camel aws2-athena component configuration:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("aws2-athena://MyQuery?amazonAthenaClient=#client&...")
+.to("mock:result");
+--------------------------------------------------------------------------------
+
+=== Overriding query parameters with message headers
+
+Message headers listed in "Message headers evaluated by the Athena producer" override the corresponding
+query parameters listed in "Query Parameters".
+
+For example:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+     .setHeader(Athena2Constants.OUTPUT_LOCATION, constant("s3://other/location/"))
+     .to("aws2-athena:label?outputLocation=s3://foo/bar/")
+     .to("mock:result");
+--------------------------------------------------------------------------------
+
+Will cause the output location to be `s3://other/location/`.
+
+=== Athena Producer Operation examples
+
+- getQueryExecution: this operation returns information about a query given its query execution ID
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .to("aws2-athena://label?operation=getQueryExecution&queryExecutionId=11111111-1111-1111-1111-111111111111")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will yield an
+https://docs.aws.amazon.com/athena/latest/APIReference/API_QueryExecution.html[Athena QueryExecution] in the body.
+
+The getQueryExecution operation also supports retreiving the query execution ID from a header
+(`CamelAwsAthenaQueryExecutionId`), and since startQueryExecution sets the same header upon starting a query,
+these operations can be used together:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryExecution")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will yield an Athena QueryExecution in the body for the query that was just started.
+
+- getQueryResults: this operation returns the results of a query that has succeeded.  The results are returned in the
+body in one of three formats.
+
+`StreamList` - the default - returns a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/paginators/GetQueryResultsIterable.html[GetQueryResultsIterable]
+in the body that can page through all results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The output of StreamList can be processed in various ways:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant(
+        "SELECT * FROM ("
+            + "    VALUES"
+            + "        (1, 'a'),"
+            + "        (2, 'b')"
+            + ") AS t (id, name)"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .split(body()).streaming()
+    .process(new Processor() {
+
+      @Override
+      public void process(Exchange exchange) {
+        GetQueryResultsResponse page = exchange
+                                        .getMessage()
+                                        .getBody(GetQueryResultsResponse.class);
+        for (Row row : page.resultSet().rows()) {
+          String line = row.data()
+                          .stream()
+                          .map(Datum::varCharValue)
+                          .collect(Collectors.joining(","));
+          System.out.println(line);
+        }
+      }
+    })
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will print the results of the query as CSV to the console.
+
+`SelectList` - returns a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/model/GetQueryResultsResponse.html[GetQueryResponse]
+in the body containing at most 1,000 rows, plus the NextToken value as a header (`CamelAwsAthenaNextToken`),
+which can be used for manual pagination of results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=SelectList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/model/GetQueryResultsResponse.html[GetQueryResponse]
+in the body plus the NextToken value as a header (`CamelAwsAthenaNextToken`), which can be used to manually page
+through the results 1,000 rows at a time.
+
+`S3Pointer` - return an S3 path (e.g. `s3://bucket/path/`) pointing to the results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=S3Pointer")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return an
+S3 path (e.g. `s3://bucket/path/`) in the body pointing to the results.  The path will also be set in a header
+(`CamelAwsAthenaOutputLocation`).
+
+- listQueryExecutions: this operation returns a list of query execution IDs
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .to("aws2-athena://label?operation=listQueryExecutions")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return a list of query executions in the body, plus the NextToken value as a
+header (`CamelAwsAthenaNextToken`) than can be used for manual pagination of results.
+
+- startQueryExecution: this operation starts the execution of a query.  It supports waiting for the query to
+complete before proceeding, and retrying the query based on a set of configurable failure conditions:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start the query `SELECT 1` and configure the
+results to be saved to `s3://bucket/path/`, but will not wait for the query
+to complete.
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start a query and wait up to 60 seconds for it to
+reach a status that indicates it is complete (one of SUCCEEDED, FAILED, CANCELLED,
+or UNKNOWN_TO_SDK_VERSION).  Upon failure, the query would not be retried.
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&initialDelay=10000&delay=1000&maxAttempts=3&retry=retryable&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start a query and wait up to 60 seconds for it to reach
+a status that indicates it is complete (one of SUCCEEDED, FAILED, CANCELLED,
+or UNKNOWN_TO_SDK_VERSION).  Upon failure, the query would be automatically
+retried up to 2 more times if the failure state indicates the query may
+succeed upon retry (Athena queries that fail with states such as `GENERIC_INTERNAL_ERROR`
+or "resource limit exhaustion" will sometimes succeed if retried).  While waiting
+for the query to complete, the query status would first be checked after an
+initial delay of 10 seconds, and subsequently every 1 second until the query completes.
+
+=== Putting it all together
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&&maxAttempts=3&retry=retryable&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start the query and wait up to 60 seconds for it to
+complete.  Upon completion, getQueryResults put the results of the query into
+the body of the message for further processing.
+
+For the sake of completeness, a similar outcome could be achieved with the following:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .loopDoWhile(simple("${header." + Athena2Constants.QUERY_EXECUTION_STATE + "} != 'SUCCEEDED'"))
+      .delay(1_000)
+      .to("aws2-athena://label?operation=getQueryExecution")
+    .end()
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+Caution: the preceding example would block indefinitely, however, if the query did not complete with a
+status of SUCCEEDED.
+
+== Automatic detection of AmazonAthena client in registry
+
+The component is capable of detecting the presence of an AmazonAthena bean in the registry.
+If it's the only instance of that type it will be used as the client and you won't have to define it as URI parameter, like the example above.
+This may be really useful for smarter configuration of the endpoint.
+
+== Dependencies
+
+Maven users will need to add the following dependency to their pom.xml.
+
+*pom.xml*
+
+[source,xml]
+---------------------------------------
+<dependency>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-aws2-athena</artifactId>
+    <version>${camel-version}</version>
+</dependency>
+---------------------------------------
+
+where `$\{camel-version\}` must be replaced by the actual version of Camel.
+
+// TODO where does this come from???
+include::camel-spring-boot::page$aws2-athena-starter.adoc[]

Review comment:
       I couldn't figure out where this file lives.  The other aws2 components all seem to have one.  Not sure how the magic happens. ;)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] oscerd commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

oscerd commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r435878126



##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2ComponentVerifierExtension.java
##########
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.component.extension.verifier.DefaultComponentVerifierExtension;
+import org.apache.camel.component.extension.verifier.ResultBuilder;
+import org.apache.camel.component.extension.verifier.ResultErrorBuilder;
+import org.apache.camel.component.extension.verifier.ResultErrorHelper;
+
+import java.util.Map;

Review comment:
       Can you please run the build with the sourcecheck profile enabled? It will report some warning at the beginning. Can you please fix them? In this case the import order should be different




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] oscerd commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

oscerd commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r435879007



##########
File path: components/camel-aws2-athena/src/main/docs/aws2-athena-component.adoc
##########
@@ -0,0 +1,537 @@
+[[aws2-athena-component]]
+= AWS 2 Athena Component
+:docTitle: AWS 2 Athena
+:artifactId: camel-aws2-athena
+:description: Access AWS Athena service using AWS SDK version 2.x.
+:since: 3.4
+:supportLevel: Preview
+:component-header: Only producer is supported
+
+*Since Camel {since}*
+
+*{component-header}*
+
+The Athena component supports running queries with https://aws.amazon.com/athena/[AWS Athena] and working with results.
+
+Prerequisites
+
+You must have a valid Amazon Web Services developer account, and be
+signed up to use Amazon Athena.  More information is available at
+https://aws.amazon.com/athena/[AWS Athena].
+
+== URI Format
+
+[source,java]
+------------------------------
+aws2-athena://label[?options]
+------------------------------
+
+You can append query options to the URI in the following format,
+?options=value&option2=value&...
+
+For example, to run a simple query, wait up to 60 seconds for completion, and log the results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .split(body()).streaming()
+    .to("log:out")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+Similarly, running the query and returning a path to the results in S3:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=S3Pointer")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+== URI Options
+
+
+// component options: START
+The AWS 2 Athena component supports 29 options, which are listed below.
+
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *accessKey* (producer) | Amazon AWS Access Key. |  | String
+| *amazonAthenaClient* (producer) | The AmazonAthena instance to use as the client. |  | AthenaClient
+| *configuration* (producer) | The component configuration. |  | Athena2Configuration
+| *database* (producer) | The Athena database to use. |  | String
+| *delay* (producer) | Milliseconds before the next poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 2000 | long
+| *encryptionOption* (producer) | The encryption type to use when storing query results in S3. One of SSE_S3, SSE_KMS, or CSE_KMS. The value can be one of: SSE_S3, SSE_KMS, CSE_KMS, null |  | EncryptionOption
+| *includeTrace* (producer) | Include useful trace information at the beginning of queries as an SQL comment (prefixed with --). | false | boolean
+| *initialDelay* (producer) | Milliseconds before the first poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1000 | long
+| *kmsKey* (producer) | For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. |  | String
+| *lazyStartProducer* (producer) | Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail during starting and cause the route to fail being started. By deferring this startup to be lazy then the startup failure can be handled during routing messages via Camel's routing error handlers. Beware that when the first message is processed then creating and starting the producer may take a little time and prolong the total processing time of the processing. | false | boolean
+| *maxAttempts* (producer) | Maximum number of times to attempt a query. Set to 1 to disable retries. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1 | int
+| *maxResults* (producer) | Max number of results to return for the given operation (if supported by the Athena API endpoint). If not set, will use the Athena API default for the given operation. |  | Integer
+| *nextToken* (producer) | Pagination token to use in the case where the response from the previous request was truncated. |  | String
+| *operation* (producer) | The Athena API function to call. The value can be one of: getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution | startQueryExecution | Athena2Operations
+| *outputLocation* (producer) | The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/. Ensure this value ends with a forward slash ('/'). |  | String
+| *outputType* (producer) | How query results should be returned. One of StreamList (default - return a GetQueryResultsIterable that can page through all results), SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results), S3Pointer (return an S3 path pointing to the results). The value can be one of: StreamList, SelectList, S3Pointer | StreamList | Athena2OutputType
+| *proxyHost* (producer) | To define a proxy host when instantiating the Athena client. |  | String
+| *proxyPort* (producer) | To define a proxy port when instantiating the Athena client. |  | Integer
+| *proxyProtocol* (producer) | To define a proxy protocol when instantiating the Athena client. The value can be one of: HTTP, HTTPS | HTTPS | Protocol
+| *queryExecutionId* (producer) | The unique ID identifying the query execution. |  | String
+| *queryString* (producer) | The SQL query to run. Except for simple queries, prefer setting this as the body of the Exchange or as a header using Athena2Constants.QUERY_STRING to avoid having to deal with URL encoding issues. |  | String
+| *region* (producer) | The region in which Athena client needs to work. When using this parameter, the configuration will expect the lowercase name of the region (for example ap-east-1). You'll need to use the name Region.EU_WEST_1.id(). |  | String
+| *resetWaitTimeoutOnRetry* (producer) | Reset the waitTimeout countdown in the event of a query retry. If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | true | boolean
+| *retry* (producer) | Optional comma separated list of error types to retry the query for. Use 'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted), 'generic' to retry 'GENERIC_INTERNAL_ERROR' failures, 'exhausted' to retry queries that have exhausted resource limits, 'always' to always retry regardless of failure condition, or 'never' or null to never retry (default). See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. The value can be one of: never, always, retryable, exhausted, generic | never | String
+| *secretKey* (producer) | Amazon AWS Secret Key. |  | String
+| *waitTimeout* (producer) | Optional max wait time in millis to wait for a successful query completion. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 0 | long
+| *workGroup* (producer) | The workgroup to use for running the query. |  | String
+| *basicPropertyBinding* (advanced) | Whether the component should use basic property binding (Camel 2.x) or the newer property binding with additional capabilities | false | boolean
+| *clientRequestToken* (advanced) | A unique string to ensure issues queries are idempotent. It is unlikely you will need to set this. |  | String
+|===
+// component options: END
+
+
+
+
+
+
+
+
+
+
+
+// endpoint options: START
+The AWS 2 Athena endpoint is configured using URI syntax:
+
+----
+aws2-athena:label
+----
+
+with the following path and query parameters:
+
+=== Path Parameters (1 parameters):
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *label* | *Required* Logical name |  | String
+|===
+
+
+=== Query Parameters (29 parameters):
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *accessKey* (producer) | Amazon AWS Access Key. |  | String
+| *amazonAthenaClient* (producer) | The AmazonAthena instance to use as the client. |  | AthenaClient
+| *database* (producer) | The Athena database to use. |  | String
+| *delay* (producer) | Milliseconds before the next poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 2000 | long
+| *encryptionOption* (producer) | The encryption type to use when storing query results in S3. One of SSE_S3, SSE_KMS, or CSE_KMS. The value can be one of: SSE_S3, SSE_KMS, CSE_KMS, null |  | EncryptionOption
+| *includeTrace* (producer) | Include useful trace information at the beginning of queries as an SQL comment (prefixed with --). | false | boolean
+| *initialDelay* (producer) | Milliseconds before the first poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1000 | long
+| *kmsKey* (producer) | For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. |  | String
+| *lazyStartProducer* (producer) | Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail during starting and cause the route to fail being started. By deferring this startup to be lazy then the startup failure can be handled during routing messages via Camel's routing error handlers. Beware that when the first message is processed then creating and starting the producer may take a little time and prolong the total processing time of the processing. | false | boolean
+| *maxAttempts* (producer) | Maximum number of times to attempt a query. Set to 1 to disable retries. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1 | int
+| *maxResults* (producer) | Max number of results to return for the given operation (if supported by the Athena API endpoint). If not set, will use the Athena API default for the given operation. |  | Integer
+| *nextToken* (producer) | Pagination token to use in the case where the response from the previous request was truncated. |  | String
+| *operation* (producer) | The Athena API function to call. The value can be one of: getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution | startQueryExecution | Athena2Operations
+| *outputLocation* (producer) | The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/. Ensure this value ends with a forward slash ('/'). |  | String
+| *outputType* (producer) | How query results should be returned. One of StreamList (default - return a GetQueryResultsIterable that can page through all results), SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results), S3Pointer (return an S3 path pointing to the results). The value can be one of: StreamList, SelectList, S3Pointer | StreamList | Athena2OutputType
+| *proxyHost* (producer) | To define a proxy host when instantiating the Athena client. |  | String
+| *proxyPort* (producer) | To define a proxy port when instantiating the Athena client. |  | Integer
+| *proxyProtocol* (producer) | To define a proxy protocol when instantiating the Athena client. The value can be one of: HTTP, HTTPS | HTTPS | Protocol
+| *queryExecutionId* (producer) | The unique ID identifying the query execution. |  | String
+| *queryString* (producer) | The SQL query to run. Except for simple queries, prefer setting this as the body of the Exchange or as a header using Athena2Constants.QUERY_STRING to avoid having to deal with URL encoding issues. |  | String
+| *region* (producer) | The region in which Athena client needs to work. When using this parameter, the configuration will expect the lowercase name of the region (for example ap-east-1). You'll need to use the name Region.EU_WEST_1.id(). |  | String
+| *resetWaitTimeoutOnRetry* (producer) | Reset the waitTimeout countdown in the event of a query retry. If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | true | boolean
+| *retry* (producer) | Optional comma separated list of error types to retry the query for. Use 'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted), 'generic' to retry 'GENERIC_INTERNAL_ERROR' failures, 'exhausted' to retry queries that have exhausted resource limits, 'always' to always retry regardless of failure condition, or 'never' or null to never retry (default). See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. The value can be one of: never, always, retryable, exhausted, generic | never | String
+| *secretKey* (producer) | Amazon AWS Secret Key. |  | String
+| *waitTimeout* (producer) | Optional max wait time in millis to wait for a successful query completion. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 0 | long
+| *workGroup* (producer) | The workgroup to use for running the query. |  | String
+| *basicPropertyBinding* (advanced) | Whether the endpoint should use basic property binding (Camel 2.x) or the newer property binding with additional capabilities | false | boolean
+| *clientRequestToken* (advanced) | A unique string to ensure issues queries are idempotent. It is unlikely you will need to set this. |  | String
+| *synchronous* (advanced) | Sets whether synchronous processing should be strictly used, or Camel is allowed to use asynchronous processing (if supported). | false | boolean
+|===
+// endpoint options: END
+
+
+
+
+
+
+
+
+
+
+Required Athena component options
+
+You have to provide the amazonAthenaClient in the
+Registry or your accessKey and secretKey to access
+the https://aws.amazon.com/athena/[AWS Athena] service.
+
+== Usage
+
+=== Message headers evaluated by the Athena producer
+
+Message headers listed here override the corresponding
+query parameters listed in "Query Parameters".
+
+[width="100%",cols="5%,5%,10%,75%,5%",options="header",]
+|=======================================================================
+|Operation |Header |Type |Description |Required
+
+|All |`CamelAwsAthenaOperation` |`String` |The operation to perform. Permitted values are getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution. Default is startQueryExecution. | No
+
+|startQueryExecution |`CamelAwsAthenaDatabase` |`String` |The Athena database to use. | No
+
+|getQueryExecution, getQueryResults |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution. | No
+
+|listQueryExecutions, startQueryExecution |`CamelAwsAthenaWorkGroup` |`String` |The workgroup to use for running the query. | No
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaNextToken` |`String` |Pagination token to use in the case where the response from the previous request was truncated. | No
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaMaxResults` |`Integer` |Max number of results to return for the given operation (if supported by the Athena API endpoint).
+If not set, will use the Athena API default for the given operation. | No
+
+|startQueryExecution |`CamelAwsAthenaIncludeTrace` |`boolean` |Include useful trace information at the beginning of queries as an SQL comment (prefixed with "--"). | No
+
+|startQueryExecution |`CamelAwsAthenaOutputLocation` |`String` |The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/.
+Ensure this value ends with a forward slash ('/'). | No
+
+|getQueryResults |`CamelAwsAthenaOutputType` |`Athena2OutputType` |How query results should be returned.  One of
+StreamList (default - return a GetQueryResultsIterable that can page through all results),
+SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results),
+S3Pointer (return an S3 path pointing to the results). | No
+
+|startQueryExecution |`CamelAwsAthenaClientRequestToken` |`String` |A unique string to ensure issues queries are idempotent.  It is unlikely you will need to set this. | No
+
+|startQueryExecution |`CamelAwsAthenaQueryString` |`String` |The SQL query to run.  Except for simple queries, prefer setting this as the `body` of the
+Exchange or as this header to avoid having to deal with URL encoding issues. | No
+
+|startQueryExecution |`CamelAwsAthenaEncryptionOption` |`String` |The encryption type to use when storing query results in S3.  One of SSE_S3, SSE_KMS, or CSE_KMS. | No
+
+|startQueryExecution |`CamelAwsAthenaKmsKey` |`String` |For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. | No
+
+|startQueryExecution |`CamelAwsAthenaWaitTimeout` |`long` |Optional max wait time in millis to wait for a successful query completion.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaInitialDelay` |`long` |Milliseconds before the first poll for query execution status.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaDelay` |`long` |Milliseconds before the next poll for query execution status.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaMaxAttempts` |`int` |Maximum number of times to attempt a query.  Set to 1 to disable retries.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaRetry` |`String` |Optional comma separated list of error types to retry the query for.  Use
+'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted),
+'generic' to retry 'GENERIC_INTERNAL_ERROR' failures,
+'exhausted' to retry queries that have exhausted resource limits,
+'always' to always retry regardless of failure condition, or
+'never' or null to never retry (default).
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaResetWaitTimeoutOnRetry` |`boolean` |Reset the waitTimeout countdown in the event of a query retry.
+If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|=======================================================================
+
+=== Message headers set by the Athena producer
+
+[width="100%",cols="5%,5%,10%,80%",options="header",]
+|=======================================================================
+|Operation |Header |Type |Description
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionState` |`software.amazon.awssdk.services.athena.model.QueryExecutionState` |The state of the query execution.  One of `QUEUED`, `RUNNING`, `SUCCEEDED`, `FAILED`, `CANCELLED`.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaOutputLocation` |`String` |The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution.
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaNextToken` |`String` |Pagination token to use in the case where the response from the previous request was truncated.
+
+|startQueryExecution |`CamelAwsAthenaStartQueryExecutionAttempts` |`int` |Total number of attempts made to run the query.  Will be greater than 1 if the query is retried.
+
+|startQueryExecution |`CamelAwsAthenaStartQueryExecutionElapsedMillis` |`long` |Total time in millis taken in startQueryExecution (mostly relevant when waiting for query completion within startQueryExecution).
+
+|=======================================================================
+
+=== Athena Producer operations
+
+The Camel-AWS Athena component provides the following operation on the producer side:
+
+ - getQueryExecution
+ - getQueryResults
+ - listQueryExecutions
+ - startQueryExecution
+
+=== Advanced AmazonAthena configuration
+
+If your Camel Application is running behind a firewall or if you need to
+have more control over the `AthenaClient` instance configuration, you can
+create your own instance and refer to it in your Camel aws2-athena component configuration:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("aws2-athena://MyQuery?amazonAthenaClient=#client&...")
+.to("mock:result");
+--------------------------------------------------------------------------------
+
+=== Overriding query parameters with message headers
+
+Message headers listed in "Message headers evaluated by the Athena producer" override the corresponding
+query parameters listed in "Query Parameters".
+
+For example:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+     .setHeader(Athena2Constants.OUTPUT_LOCATION, constant("s3://other/location/"))
+     .to("aws2-athena:label?outputLocation=s3://foo/bar/")
+     .to("mock:result");
+--------------------------------------------------------------------------------
+
+Will cause the output location to be `s3://other/location/`.
+
+=== Athena Producer Operation examples
+
+- getQueryExecution: this operation returns information about a query given its query execution ID
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .to("aws2-athena://label?operation=getQueryExecution&queryExecutionId=11111111-1111-1111-1111-111111111111")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will yield an
+https://docs.aws.amazon.com/athena/latest/APIReference/API_QueryExecution.html[Athena QueryExecution] in the body.
+
+The getQueryExecution operation also supports retreiving the query execution ID from a header
+(`CamelAwsAthenaQueryExecutionId`), and since startQueryExecution sets the same header upon starting a query,
+these operations can be used together:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryExecution")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will yield an Athena QueryExecution in the body for the query that was just started.
+
+- getQueryResults: this operation returns the results of a query that has succeeded.  The results are returned in the
+body in one of three formats.
+
+`StreamList` - the default - returns a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/paginators/GetQueryResultsIterable.html[GetQueryResultsIterable]
+in the body that can page through all results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The output of StreamList can be processed in various ways:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant(
+        "SELECT * FROM ("
+            + "    VALUES"
+            + "        (1, 'a'),"
+            + "        (2, 'b')"
+            + ") AS t (id, name)"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .split(body()).streaming()
+    .process(new Processor() {
+
+      @Override
+      public void process(Exchange exchange) {
+        GetQueryResultsResponse page = exchange
+                                        .getMessage()
+                                        .getBody(GetQueryResultsResponse.class);
+        for (Row row : page.resultSet().rows()) {
+          String line = row.data()
+                          .stream()
+                          .map(Datum::varCharValue)
+                          .collect(Collectors.joining(","));
+          System.out.println(line);
+        }
+      }
+    })
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will print the results of the query as CSV to the console.
+
+`SelectList` - returns a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/model/GetQueryResultsResponse.html[GetQueryResponse]
+in the body containing at most 1,000 rows, plus the NextToken value as a header (`CamelAwsAthenaNextToken`),
+which can be used for manual pagination of results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=SelectList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/model/GetQueryResultsResponse.html[GetQueryResponse]
+in the body plus the NextToken value as a header (`CamelAwsAthenaNextToken`), which can be used to manually page
+through the results 1,000 rows at a time.
+
+`S3Pointer` - return an S3 path (e.g. `s3://bucket/path/`) pointing to the results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=S3Pointer")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return an
+S3 path (e.g. `s3://bucket/path/`) in the body pointing to the results.  The path will also be set in a header
+(`CamelAwsAthenaOutputLocation`).
+
+- listQueryExecutions: this operation returns a list of query execution IDs
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .to("aws2-athena://label?operation=listQueryExecutions")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return a list of query executions in the body, plus the NextToken value as a
+header (`CamelAwsAthenaNextToken`) than can be used for manual pagination of results.
+
+- startQueryExecution: this operation starts the execution of a query.  It supports waiting for the query to
+complete before proceeding, and retrying the query based on a set of configurable failure conditions:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start the query `SELECT 1` and configure the
+results to be saved to `s3://bucket/path/`, but will not wait for the query
+to complete.
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start a query and wait up to 60 seconds for it to
+reach a status that indicates it is complete (one of SUCCEEDED, FAILED, CANCELLED,
+or UNKNOWN_TO_SDK_VERSION).  Upon failure, the query would not be retried.
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&initialDelay=10000&delay=1000&maxAttempts=3&retry=retryable&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start a query and wait up to 60 seconds for it to reach
+a status that indicates it is complete (one of SUCCEEDED, FAILED, CANCELLED,
+or UNKNOWN_TO_SDK_VERSION).  Upon failure, the query would be automatically
+retried up to 2 more times if the failure state indicates the query may
+succeed upon retry (Athena queries that fail with states such as `GENERIC_INTERNAL_ERROR`
+or "resource limit exhaustion" will sometimes succeed if retried).  While waiting
+for the query to complete, the query status would first be checked after an
+initial delay of 10 seconds, and subsequently every 1 second until the query completes.
+
+=== Putting it all together
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&&maxAttempts=3&retry=retryable&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start the query and wait up to 60 seconds for it to
+complete.  Upon completion, getQueryResults put the results of the query into
+the body of the message for further processing.
+
+For the sake of completeness, a similar outcome could be achieved with the following:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .loopDoWhile(simple("${header." + Athena2Constants.QUERY_EXECUTION_STATE + "} != 'SUCCEEDED'"))
+      .delay(1_000)
+      .to("aws2-athena://label?operation=getQueryExecution")
+    .end()
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+Caution: the preceding example would block indefinitely, however, if the query did not complete with a
+status of SUCCEEDED.
+
+== Automatic detection of AmazonAthena client in registry
+
+The component is capable of detecting the presence of an AmazonAthena bean in the registry.
+If it's the only instance of that type it will be used as the client and you won't have to define it as URI parameter, like the example above.
+This may be really useful for smarter configuration of the endpoint.
+
+== Dependencies
+
+Maven users will need to add the following dependency to their pom.xml.
+
+*pom.xml*
+
+[source,xml]
+---------------------------------------
+<dependency>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-aws2-athena</artifactId>
+    <version>${camel-version}</version>
+</dependency>
+---------------------------------------
+
+where `$\{camel-version\}` must be replaced by the actual version of Camel.
+
+// TODO where does this come from???
+include::camel-spring-boot::page$aws2-athena-starter.adoc[]

Review comment:
       Ah yeah :-) This comes from https://github.com/apache/camel-spring-boot, but it could be something coming in a subsequent PR.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] oscerd commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

oscerd commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r435880745



##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2QueryHelper.java
##########
@@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.Exchange;
+import org.apache.camel.util.ObjectHelper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Clock;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import software.amazon.awssdk.services.athena.model.GetQueryExecutionResponse;
+import software.amazon.awssdk.services.athena.model.QueryExecutionState;
+
+
+/**
+ * Package-private class to encapsulate the logic of running queries, waiting for completion states, retrying, etc.
+ */
+class Athena2QueryHelper {
+  private static final Logger LOG = LoggerFactory.getLogger(Athena2QueryHelper.class);
+
+  // configuration ======================
+  private final Clock clock = Clock.systemUTC();
+  private final long waitTimeout;
+  private final long delay;
+  private final Set<String> retry;
+  private final int maxAttempts;
+  private final boolean resetWaitTimeoutOnAttempt;
+  private final long absoluteStartMs;
+
+  // state ==============================
+  private long currentDelay;
+  private int attempts;
+  private boolean isFailure;
+  private boolean isSuccess;
+  private boolean isRetry;
+  private long startMs;
+  private boolean interrupted;
+
+  Athena2QueryHelper(Exchange exchange, Athena2Configuration configuration) {
+    this.waitTimeout = determineWaitTimeout(exchange, configuration);
+    this.delay = determineDelay(exchange, configuration);
+    this.maxAttempts = determineMaxAttempts(exchange, configuration);
+    this.retry = determineRetry(exchange, configuration);
+    this.resetWaitTimeoutOnAttempt = determineResetWaitTimeoutOnRetry(exchange, configuration);
+    this.absoluteStartMs = now();
+
+    this.currentDelay = determineInitialDelay(exchange, configuration);
+  }
+
+  private long now() {
+    return clock.millis();
+  }
+
+  long getElapsedMillis() {
+    return now() - this.absoluteStartMs;
+  }
+
+  /**
+   * Record that a query attempt was made.  This is relevant b/c only so many attempts are permitted.
+   */
+  void markAttempt() {
+    if (attempts == 0) {
+      this.startMs = now();
+    } else {
+      if (resetWaitTimeoutOnAttempt) {
+        this.startMs = now();
+      }
+    }
+    ++attempts;
+
+    this.isFailure = false;
+    this.isSuccess = false;
+    this.isRetry = false;
+  }
+
+  int getAttempts() {
+    return this.attempts;
+  }
+
+  /**
+   * Should another query attempt be made?
+   */
+  boolean shouldAttempt() {
+    if (this.attempts >= this.maxAttempts) {
+      LOG.trace("AWS Athena start query execution used all {} attempts", this.maxAttempts);
+      return false;
+    }
+
+    if (this.interrupted) {
+      LOG.trace("AWS Athena start query execution thread was interrupted, will try no more");
+      return false;
+    }
+
+    if (this.isFailure) {
+      LOG.trace("AWS Athena start query execution detected permanent failure");
+      return false;
+    }
+
+    if (this.isSuccess) {
+      LOG.trace("AWS Athena start query execution detected success, will try no more");
+      return false;
+    }
+
+    // if this.isRetry, return true
+
+    return true;
+  }
+
+  /**
+   * Should there be a wait for the query to complete?
+   */
+  boolean shouldWait() {
+    long now = now();
+    long millisWaited = now - this.startMs;
+    if (millisWaited >= this.waitTimeout) {
+      LOG
+          .trace("AWS Athena start query execution waited for {}, which exceeded wait timeout of {}", millisWaited,
+              this.waitTimeout);
+      return false;
+    }
+
+    if (this.interrupted) {
+      LOG.trace("AWS Athena start query execution thread was interrupted, will wait no longer");
+      return false;
+    }
+
+    if (this.isFailure) {
+      LOG.trace("AWS Athena start query execution detected failure, will wait no longer");
+      return false;
+    }
+
+    if (this.isSuccess) {
+      LOG.trace("AWS Athena start query execution detected success, will wait no longer");
+      return false;
+    }
+
+    if (this.isRetry) {
+      LOG.trace("AWS Athena start query execution detected retry, will immediately attempt retry");
+      return false;
+    }
+
+    return true;
+  }
+
+  void doWait() {
+    try {
+      Thread.sleep(this.currentDelay);

Review comment:
       Personally I think it's ok, because with Athena we need to wait for results, but lets wait for more feedback @davsclaus @gnodet




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] omarsmak commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

omarsmak commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r435936429



##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2Configuration.java
##########
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.RuntimeCamelException;
+import org.apache.camel.spi.Metadata;
+import org.apache.camel.spi.UriParam;
+import org.apache.camel.spi.UriParams;
+import org.apache.camel.spi.UriPath;
+
+import software.amazon.awssdk.core.Protocol;
+import software.amazon.awssdk.services.athena.AthenaClient;
+import software.amazon.awssdk.services.athena.model.EncryptionOption;
+import software.amazon.awssdk.services.athena.paginators.GetQueryResultsIterable;
+
+@UriParams
+public class Athena2Configuration implements Cloneable {
+
+  @UriPath(description = "Logical name")
+  @Metadata(required = true)
+  private String label;

Review comment:
       This options does not have getters/setters, would be good to add

##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2Configuration.java
##########
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.RuntimeCamelException;
+import org.apache.camel.spi.Metadata;
+import org.apache.camel.spi.UriParam;
+import org.apache.camel.spi.UriParams;
+import org.apache.camel.spi.UriPath;
+
+import software.amazon.awssdk.core.Protocol;
+import software.amazon.awssdk.services.athena.AthenaClient;
+import software.amazon.awssdk.services.athena.model.EncryptionOption;
+import software.amazon.awssdk.services.athena.paginators.GetQueryResultsIterable;
+
+@UriParams
+public class Athena2Configuration implements Cloneable {
+
+  @UriPath(description = "Logical name")
+  @Metadata(required = true)
+  private String label;
+
+  // common
+  @UriParam(defaultValue = "startQueryExecution",

Review comment:
       for these options, would be good to add the `label` to the annotation in order to have nice generated docs for these options




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] oscerd commented on pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

oscerd commented on pull request #3891:
URL: https://github.com/apache/camel/pull/3891#issuecomment-639537126


   For the label we could also add them with subsequent PRs, I guess it would be better to have the component merged and @jonchase could work on new PRs about this. What do you all think?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] omarsmak commented on pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

omarsmak commented on pull request #3891:
URL: https://github.com/apache/camel/pull/3891#issuecomment-639540242


   Fine with me @oscerd


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r436255573



##########
File path: components/camel-aws2-athena/src/main/docs/aws2-athena-component.adoc
##########
@@ -0,0 +1,537 @@
+[[aws2-athena-component]]
+= AWS 2 Athena Component
+:docTitle: AWS 2 Athena
+:artifactId: camel-aws2-athena
+:description: Access AWS Athena service using AWS SDK version 2.x.
+:since: 3.4
+:supportLevel: Preview
+:component-header: Only producer is supported
+
+*Since Camel {since}*
+
+*{component-header}*
+
+The Athena component supports running queries with https://aws.amazon.com/athena/[AWS Athena] and working with results.
+
+Prerequisites
+
+You must have a valid Amazon Web Services developer account, and be
+signed up to use Amazon Athena.  More information is available at
+https://aws.amazon.com/athena/[AWS Athena].
+
+== URI Format
+
+[source,java]
+------------------------------
+aws2-athena://label[?options]
+------------------------------
+
+You can append query options to the URI in the following format,
+?options=value&option2=value&...
+
+For example, to run a simple query, wait up to 60 seconds for completion, and log the results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .split(body()).streaming()
+    .to("log:out")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+Similarly, running the query and returning a path to the results in S3:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=S3Pointer")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+== URI Options
+
+
+// component options: START
+The AWS 2 Athena component supports 29 options, which are listed below.
+
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *accessKey* (producer) | Amazon AWS Access Key. |  | String
+| *amazonAthenaClient* (producer) | The AmazonAthena instance to use as the client. |  | AthenaClient
+| *configuration* (producer) | The component configuration. |  | Athena2Configuration
+| *database* (producer) | The Athena database to use. |  | String
+| *delay* (producer) | Milliseconds before the next poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 2000 | long
+| *encryptionOption* (producer) | The encryption type to use when storing query results in S3. One of SSE_S3, SSE_KMS, or CSE_KMS. The value can be one of: SSE_S3, SSE_KMS, CSE_KMS, null |  | EncryptionOption
+| *includeTrace* (producer) | Include useful trace information at the beginning of queries as an SQL comment (prefixed with --). | false | boolean
+| *initialDelay* (producer) | Milliseconds before the first poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1000 | long
+| *kmsKey* (producer) | For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. |  | String
+| *lazyStartProducer* (producer) | Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail during starting and cause the route to fail being started. By deferring this startup to be lazy then the startup failure can be handled during routing messages via Camel's routing error handlers. Beware that when the first message is processed then creating and starting the producer may take a little time and prolong the total processing time of the processing. | false | boolean
+| *maxAttempts* (producer) | Maximum number of times to attempt a query. Set to 1 to disable retries. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1 | int
+| *maxResults* (producer) | Max number of results to return for the given operation (if supported by the Athena API endpoint). If not set, will use the Athena API default for the given operation. |  | Integer
+| *nextToken* (producer) | Pagination token to use in the case where the response from the previous request was truncated. |  | String
+| *operation* (producer) | The Athena API function to call. The value can be one of: getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution | startQueryExecution | Athena2Operations
+| *outputLocation* (producer) | The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/. Ensure this value ends with a forward slash ('/'). |  | String
+| *outputType* (producer) | How query results should be returned. One of StreamList (default - return a GetQueryResultsIterable that can page through all results), SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results), S3Pointer (return an S3 path pointing to the results). The value can be one of: StreamList, SelectList, S3Pointer | StreamList | Athena2OutputType
+| *proxyHost* (producer) | To define a proxy host when instantiating the Athena client. |  | String
+| *proxyPort* (producer) | To define a proxy port when instantiating the Athena client. |  | Integer
+| *proxyProtocol* (producer) | To define a proxy protocol when instantiating the Athena client. The value can be one of: HTTP, HTTPS | HTTPS | Protocol
+| *queryExecutionId* (producer) | The unique ID identifying the query execution. |  | String
+| *queryString* (producer) | The SQL query to run. Except for simple queries, prefer setting this as the body of the Exchange or as a header using Athena2Constants.QUERY_STRING to avoid having to deal with URL encoding issues. |  | String
+| *region* (producer) | The region in which Athena client needs to work. When using this parameter, the configuration will expect the lowercase name of the region (for example ap-east-1). You'll need to use the name Region.EU_WEST_1.id(). |  | String
+| *resetWaitTimeoutOnRetry* (producer) | Reset the waitTimeout countdown in the event of a query retry. If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | true | boolean
+| *retry* (producer) | Optional comma separated list of error types to retry the query for. Use 'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted), 'generic' to retry 'GENERIC_INTERNAL_ERROR' failures, 'exhausted' to retry queries that have exhausted resource limits, 'always' to always retry regardless of failure condition, or 'never' or null to never retry (default). See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. The value can be one of: never, always, retryable, exhausted, generic | never | String
+| *secretKey* (producer) | Amazon AWS Secret Key. |  | String
+| *waitTimeout* (producer) | Optional max wait time in millis to wait for a successful query completion. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 0 | long
+| *workGroup* (producer) | The workgroup to use for running the query. |  | String
+| *basicPropertyBinding* (advanced) | Whether the component should use basic property binding (Camel 2.x) or the newer property binding with additional capabilities | false | boolean
+| *clientRequestToken* (advanced) | A unique string to ensure issues queries are idempotent. It is unlikely you will need to set this. |  | String
+|===
+// component options: END
+
+
+
+
+
+
+
+
+
+
+
+// endpoint options: START
+The AWS 2 Athena endpoint is configured using URI syntax:
+
+----
+aws2-athena:label
+----
+
+with the following path and query parameters:
+
+=== Path Parameters (1 parameters):
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *label* | *Required* Logical name |  | String
+|===
+
+
+=== Query Parameters (29 parameters):
+
+
+[width="100%",cols="2,5,^1,2",options="header"]
+|===
+| Name | Description | Default | Type
+| *accessKey* (producer) | Amazon AWS Access Key. |  | String
+| *amazonAthenaClient* (producer) | The AmazonAthena instance to use as the client. |  | AthenaClient
+| *database* (producer) | The Athena database to use. |  | String
+| *delay* (producer) | Milliseconds before the next poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 2000 | long
+| *encryptionOption* (producer) | The encryption type to use when storing query results in S3. One of SSE_S3, SSE_KMS, or CSE_KMS. The value can be one of: SSE_S3, SSE_KMS, CSE_KMS, null |  | EncryptionOption
+| *includeTrace* (producer) | Include useful trace information at the beginning of queries as an SQL comment (prefixed with --). | false | boolean
+| *initialDelay* (producer) | Milliseconds before the first poll for query execution status. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1000 | long
+| *kmsKey* (producer) | For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. |  | String
+| *lazyStartProducer* (producer) | Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail during starting and cause the route to fail being started. By deferring this startup to be lazy then the startup failure can be handled during routing messages via Camel's routing error handlers. Beware that when the first message is processed then creating and starting the producer may take a little time and prolong the total processing time of the processing. | false | boolean
+| *maxAttempts* (producer) | Maximum number of times to attempt a query. Set to 1 to disable retries. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 1 | int
+| *maxResults* (producer) | Max number of results to return for the given operation (if supported by the Athena API endpoint). If not set, will use the Athena API default for the given operation. |  | Integer
+| *nextToken* (producer) | Pagination token to use in the case where the response from the previous request was truncated. |  | String
+| *operation* (producer) | The Athena API function to call. The value can be one of: getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution | startQueryExecution | Athena2Operations
+| *outputLocation* (producer) | The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/. Ensure this value ends with a forward slash ('/'). |  | String
+| *outputType* (producer) | How query results should be returned. One of StreamList (default - return a GetQueryResultsIterable that can page through all results), SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results), S3Pointer (return an S3 path pointing to the results). The value can be one of: StreamList, SelectList, S3Pointer | StreamList | Athena2OutputType
+| *proxyHost* (producer) | To define a proxy host when instantiating the Athena client. |  | String
+| *proxyPort* (producer) | To define a proxy port when instantiating the Athena client. |  | Integer
+| *proxyProtocol* (producer) | To define a proxy protocol when instantiating the Athena client. The value can be one of: HTTP, HTTPS | HTTPS | Protocol
+| *queryExecutionId* (producer) | The unique ID identifying the query execution. |  | String
+| *queryString* (producer) | The SQL query to run. Except for simple queries, prefer setting this as the body of the Exchange or as a header using Athena2Constants.QUERY_STRING to avoid having to deal with URL encoding issues. |  | String
+| *region* (producer) | The region in which Athena client needs to work. When using this parameter, the configuration will expect the lowercase name of the region (for example ap-east-1). You'll need to use the name Region.EU_WEST_1.id(). |  | String
+| *resetWaitTimeoutOnRetry* (producer) | Reset the waitTimeout countdown in the event of a query retry. If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | true | boolean
+| *retry* (producer) | Optional comma separated list of error types to retry the query for. Use 'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted), 'generic' to retry 'GENERIC_INTERNAL_ERROR' failures, 'exhausted' to retry queries that have exhausted resource limits, 'always' to always retry regardless of failure condition, or 'never' or null to never retry (default). See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. The value can be one of: never, always, retryable, exhausted, generic | never | String
+| *secretKey* (producer) | Amazon AWS Secret Key. |  | String
+| *waitTimeout* (producer) | Optional max wait time in millis to wait for a successful query completion. See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | 0 | long
+| *workGroup* (producer) | The workgroup to use for running the query. |  | String
+| *basicPropertyBinding* (advanced) | Whether the endpoint should use basic property binding (Camel 2.x) or the newer property binding with additional capabilities | false | boolean
+| *clientRequestToken* (advanced) | A unique string to ensure issues queries are idempotent. It is unlikely you will need to set this. |  | String
+| *synchronous* (advanced) | Sets whether synchronous processing should be strictly used, or Camel is allowed to use asynchronous processing (if supported). | false | boolean
+|===
+// endpoint options: END
+
+
+
+
+
+
+
+
+
+
+Required Athena component options
+
+You have to provide the amazonAthenaClient in the
+Registry or your accessKey and secretKey to access
+the https://aws.amazon.com/athena/[AWS Athena] service.
+
+== Usage
+
+=== Message headers evaluated by the Athena producer
+
+Message headers listed here override the corresponding
+query parameters listed in "Query Parameters".
+
+[width="100%",cols="5%,5%,10%,75%,5%",options="header",]
+|=======================================================================
+|Operation |Header |Type |Description |Required
+
+|All |`CamelAwsAthenaOperation` |`String` |The operation to perform. Permitted values are getQueryExecution, getQueryResults, listQueryExecutions, startQueryExecution. Default is startQueryExecution. | No
+
+|startQueryExecution |`CamelAwsAthenaDatabase` |`String` |The Athena database to use. | No
+
+|getQueryExecution, getQueryResults |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution. | No
+
+|listQueryExecutions, startQueryExecution |`CamelAwsAthenaWorkGroup` |`String` |The workgroup to use for running the query. | No
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaNextToken` |`String` |Pagination token to use in the case where the response from the previous request was truncated. | No
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaMaxResults` |`Integer` |Max number of results to return for the given operation (if supported by the Athena API endpoint).
+If not set, will use the Athena API default for the given operation. | No
+
+|startQueryExecution |`CamelAwsAthenaIncludeTrace` |`boolean` |Include useful trace information at the beginning of queries as an SQL comment (prefixed with "--"). | No
+
+|startQueryExecution |`CamelAwsAthenaOutputLocation` |`String` |The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/.
+Ensure this value ends with a forward slash ('/'). | No
+
+|getQueryResults |`CamelAwsAthenaOutputType` |`Athena2OutputType` |How query results should be returned.  One of
+StreamList (default - return a GetQueryResultsIterable that can page through all results),
+SelectList (returns at most 1,000 rows at a time, plus a NextToken value as a header than can be used for manual pagination of results),
+S3Pointer (return an S3 path pointing to the results). | No
+
+|startQueryExecution |`CamelAwsAthenaClientRequestToken` |`String` |A unique string to ensure issues queries are idempotent.  It is unlikely you will need to set this. | No
+
+|startQueryExecution |`CamelAwsAthenaQueryString` |`String` |The SQL query to run.  Except for simple queries, prefer setting this as the `body` of the
+Exchange or as this header to avoid having to deal with URL encoding issues. | No
+
+|startQueryExecution |`CamelAwsAthenaEncryptionOption` |`String` |The encryption type to use when storing query results in S3.  One of SSE_S3, SSE_KMS, or CSE_KMS. | No
+
+|startQueryExecution |`CamelAwsAthenaKmsKey` |`String` |For SSE-KMS and CSE-KMS, this is the KMS key ARN or ID. | No
+
+|startQueryExecution |`CamelAwsAthenaWaitTimeout` |`long` |Optional max wait time in millis to wait for a successful query completion.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaInitialDelay` |`long` |Milliseconds before the first poll for query execution status.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaDelay` |`long` |Milliseconds before the next poll for query execution status.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaMaxAttempts` |`int` |Maximum number of times to attempt a query.  Set to 1 to disable retries.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaRetry` |`String` |Optional comma separated list of error types to retry the query for.  Use
+'retryable' to retry all retryable failure conditions (e.g. generic errors and resources exhausted),
+'generic' to retry 'GENERIC_INTERNAL_ERROR' failures,
+'exhausted' to retry queries that have exhausted resource limits,
+'always' to always retry regardless of failure condition, or
+'never' or null to never retry (default).
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|startQueryExecution |`CamelAwsAthenaResetWaitTimeoutOnRetry` |`boolean` |Reset the waitTimeout countdown in the event of a query retry.
+If set to true, potential max time spent waiting for queries is equal to waitTimeout x maxAttempts.
+See the section 'Waiting for Query Completion and Retrying Failed Queries' to learn more. | No
+
+|=======================================================================
+
+=== Message headers set by the Athena producer
+
+[width="100%",cols="5%,5%,10%,80%",options="header",]
+|=======================================================================
+|Operation |Header |Type |Description
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionState` |`software.amazon.awssdk.services.athena.model.QueryExecutionState` |The state of the query execution.  One of `QUEUED`, `RUNNING`, `SUCCEEDED`, `FAILED`, `CANCELLED`.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaOutputLocation` |`String` |The location in Amazon S3 where query results are stored, such as s3://path/to/query/bucket/.
+
+|getQueryExecution, getQueryResults, startQueryExecution |`CamelAwsAthenaQueryExecutionId` |`String` |The unique ID identifying the query execution.
+
+|getQueryResults, listQueryExecutions |`CamelAwsAthenaNextToken` |`String` |Pagination token to use in the case where the response from the previous request was truncated.
+
+|startQueryExecution |`CamelAwsAthenaStartQueryExecutionAttempts` |`int` |Total number of attempts made to run the query.  Will be greater than 1 if the query is retried.
+
+|startQueryExecution |`CamelAwsAthenaStartQueryExecutionElapsedMillis` |`long` |Total time in millis taken in startQueryExecution (mostly relevant when waiting for query completion within startQueryExecution).
+
+|=======================================================================
+
+=== Athena Producer operations
+
+The Camel-AWS Athena component provides the following operation on the producer side:
+
+ - getQueryExecution
+ - getQueryResults
+ - listQueryExecutions
+ - startQueryExecution
+
+=== Advanced AmazonAthena configuration
+
+If your Camel Application is running behind a firewall or if you need to
+have more control over the `AthenaClient` instance configuration, you can
+create your own instance and refer to it in your Camel aws2-athena component configuration:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("aws2-athena://MyQuery?amazonAthenaClient=#client&...")
+.to("mock:result");
+--------------------------------------------------------------------------------
+
+=== Overriding query parameters with message headers
+
+Message headers listed in "Message headers evaluated by the Athena producer" override the corresponding
+query parameters listed in "Query Parameters".
+
+For example:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+     .setHeader(Athena2Constants.OUTPUT_LOCATION, constant("s3://other/location/"))
+     .to("aws2-athena:label?outputLocation=s3://foo/bar/")
+     .to("mock:result");
+--------------------------------------------------------------------------------
+
+Will cause the output location to be `s3://other/location/`.
+
+=== Athena Producer Operation examples
+
+- getQueryExecution: this operation returns information about a query given its query execution ID
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .to("aws2-athena://label?operation=getQueryExecution&queryExecutionId=11111111-1111-1111-1111-111111111111")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will yield an
+https://docs.aws.amazon.com/athena/latest/APIReference/API_QueryExecution.html[Athena QueryExecution] in the body.
+
+The getQueryExecution operation also supports retreiving the query execution ID from a header
+(`CamelAwsAthenaQueryExecutionId`), and since startQueryExecution sets the same header upon starting a query,
+these operations can be used together:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryExecution")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will yield an Athena QueryExecution in the body for the query that was just started.
+
+- getQueryResults: this operation returns the results of a query that has succeeded.  The results are returned in the
+body in one of three formats.
+
+`StreamList` - the default - returns a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/paginators/GetQueryResultsIterable.html[GetQueryResultsIterable]
+in the body that can page through all results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The output of StreamList can be processed in various ways:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant(
+        "SELECT * FROM ("
+            + "    VALUES"
+            + "        (1, 'a'),"
+            + "        (2, 'b')"
+            + ") AS t (id, name)"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .split(body()).streaming()
+    .process(new Processor() {
+
+      @Override
+      public void process(Exchange exchange) {
+        GetQueryResultsResponse page = exchange
+                                        .getMessage()
+                                        .getBody(GetQueryResultsResponse.class);
+        for (Row row : page.resultSet().rows()) {
+          String line = row.data()
+                          .stream()
+                          .map(Datum::varCharValue)
+                          .collect(Collectors.joining(","));
+          System.out.println(line);
+        }
+      }
+    })
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will print the results of the query as CSV to the console.
+
+`SelectList` - returns a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/model/GetQueryResultsResponse.html[GetQueryResponse]
+in the body containing at most 1,000 rows, plus the NextToken value as a header (`CamelAwsAthenaNextToken`),
+which can be used for manual pagination of results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=SelectList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return a
+https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/athena/model/GetQueryResultsResponse.html[GetQueryResponse]
+in the body plus the NextToken value as a header (`CamelAwsAthenaNextToken`), which can be used to manually page
+through the results 1,000 rows at a time.
+
+`S3Pointer` - return an S3 path (e.g. `s3://bucket/path/`) pointing to the results:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=S3Pointer")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return an
+S3 path (e.g. `s3://bucket/path/`) in the body pointing to the results.  The path will also be set in a header
+(`CamelAwsAthenaOutputLocation`).
+
+- listQueryExecutions: this operation returns a list of query execution IDs
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .to("aws2-athena://label?operation=listQueryExecutions")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will return a list of query executions in the body, plus the NextToken value as a
+header (`CamelAwsAthenaNextToken`) than can be used for manual pagination of results.
+
+- startQueryExecution: this operation starts the execution of a query.  It supports waiting for the query to
+complete before proceeding, and retrying the query based on a set of configurable failure conditions:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start the query `SELECT 1` and configure the
+results to be saved to `s3://bucket/path/`, but will not wait for the query
+to complete.
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start a query and wait up to 60 seconds for it to
+reach a status that indicates it is complete (one of SUCCEEDED, FAILED, CANCELLED,
+or UNKNOWN_TO_SDK_VERSION).  Upon failure, the query would not be retried.
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&waitTimeout=60000&initialDelay=10000&delay=1000&maxAttempts=3&retry=retryable&outputLocation=s3://bucket/path/")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start a query and wait up to 60 seconds for it to reach
+a status that indicates it is complete (one of SUCCEEDED, FAILED, CANCELLED,
+or UNKNOWN_TO_SDK_VERSION).  Upon failure, the query would be automatically
+retried up to 2 more times if the failure state indicates the query may
+succeed upon retry (Athena queries that fail with states such as `GENERIC_INTERNAL_ERROR`
+or "resource limit exhaustion" will sometimes succeed if retried).  While waiting
+for the query to complete, the query status would first be checked after an
+initial delay of 10 seconds, and subsequently every 1 second until the query completes.
+
+=== Putting it all together
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?waitTimeout=60000&&maxAttempts=3&retry=retryable&outputLocation=s3://bucket/path/")
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+The preceding example will start the query and wait up to 60 seconds for it to
+complete.  Upon completion, getQueryResults put the results of the query into
+the body of the message for further processing.
+
+For the sake of completeness, a similar outcome could be achieved with the following:
+
+[source,java]
+--------------------------------------------------------------------------------
+from("direct:start")
+    .setBody(constant("SELECT 1"))
+    .to("aws2-athena://label?operation=startQueryExecution&outputLocation=s3://bucket/path/")
+    .loopDoWhile(simple("${header." + Athena2Constants.QUERY_EXECUTION_STATE + "} != 'SUCCEEDED'"))
+      .delay(1_000)
+      .to("aws2-athena://label?operation=getQueryExecution")
+    .end()
+    .to("aws2-athena://label?operation=getQueryResults&outputType=StreamList")
+    .to("mock:result");
+--------------------------------------------------------------------------------
+
+Caution: the preceding example would block indefinitely, however, if the query did not complete with a
+status of SUCCEEDED.
+
+== Automatic detection of AmazonAthena client in registry
+
+The component is capable of detecting the presence of an AmazonAthena bean in the registry.
+If it's the only instance of that type it will be used as the client and you won't have to define it as URI parameter, like the example above.
+This may be really useful for smarter configuration of the endpoint.
+
+== Dependencies
+
+Maven users will need to add the following dependency to their pom.xml.
+
+*pom.xml*
+
+[source,xml]
+---------------------------------------
+<dependency>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-aws2-athena</artifactId>
+    <version>${camel-version}</version>
+</dependency>
+---------------------------------------
+
+where `$\{camel-version\}` must be replaced by the actual version of Camel.
+
+// TODO where does this come from???
+include::camel-spring-boot::page$aws2-athena-starter.adoc[]

Review comment:
       I created a ticket to track this and can finish it off soon.
   
   https://issues.apache.org/jira/browse/CAMEL-15152
   
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r436255765



##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2ComponentVerifierExtension.java
##########
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.component.extension.verifier.DefaultComponentVerifierExtension;
+import org.apache.camel.component.extension.verifier.ResultBuilder;
+import org.apache.camel.component.extension.verifier.ResultErrorBuilder;
+import org.apache.camel.component.extension.verifier.ResultErrorHelper;
+
+import java.util.Map;

Review comment:
       Done.  




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r436255966



##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2Configuration.java
##########
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.RuntimeCamelException;
+import org.apache.camel.spi.Metadata;
+import org.apache.camel.spi.UriParam;
+import org.apache.camel.spi.UriParams;
+import org.apache.camel.spi.UriPath;
+
+import software.amazon.awssdk.core.Protocol;
+import software.amazon.awssdk.services.athena.AthenaClient;
+import software.amazon.awssdk.services.athena.model.EncryptionOption;
+import software.amazon.awssdk.services.athena.paginators.GetQueryResultsIterable;
+
+@UriParams
+public class Athena2Configuration implements Cloneable {
+
+  @UriPath(description = "Logical name")
+  @Metadata(required = true)
+  private String label;

Review comment:
       Happy to add the getter/setter here if you feel it belongs.  For context, I copy/pasted the code from the aws2-ec2 component, which uses a similar pattern, and it did not have the getters/setters.  If I should add them here, should I open a small PR to add them to the EC2 component as well?  
   
   For reference, the URI format is `aws2-athena://label[?options]`.  Would it be a normal/desirable thing for the label part of the URI to get changed?  




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on a change in pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on a change in pull request #3891:
URL: https://github.com/apache/camel/pull/3891#discussion_r436256730



##########
File path: components/camel-aws2-athena/src/main/java/org/apache/camel/component/aws2/athena/Athena2Configuration.java
##########
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.aws2.athena;
+
+import org.apache.camel.RuntimeCamelException;
+import org.apache.camel.spi.Metadata;
+import org.apache.camel.spi.UriParam;
+import org.apache.camel.spi.UriParams;
+import org.apache.camel.spi.UriPath;
+
+import software.amazon.awssdk.core.Protocol;
+import software.amazon.awssdk.services.athena.AthenaClient;
+import software.amazon.awssdk.services.athena.model.EncryptionOption;
+import software.amazon.awssdk.services.athena.paginators.GetQueryResultsIterable;
+
+@UriParams
+public class Athena2Configuration implements Cloneable {
+
+  @UriPath(description = "Logical name")
+  @Metadata(required = true)
+  private String label;
+
+  // common
+  @UriParam(defaultValue = "startQueryExecution",

Review comment:
       I've added the "producer" label to them.  I tried adding more than one label as comma separated values, which the docs for `UriParam#label` state can handle more than one label, but they were not showing up in the generated docs.  Let me know if this looks ok now.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on pull request #3891:
URL: https://github.com/apache/camel/pull/3891#issuecomment-640033317


   Just pushed my last change set for some formatting clean up.  Let me know if there is anything else needed before merge. 👍


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on pull request #3891:
URL: https://github.com/apache/camel/pull/3891#issuecomment-640203521


   @oscerd I included the link to the Spring Boot starter docs
   
   related PR for that - https://github.com/apache/camel-spring-boot/pull/84


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] oscerd commented on pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

oscerd commented on pull request #3891:
URL: https://github.com/apache/camel/pull/3891#issuecomment-640390220


   I'm merging the PR.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] oscerd commented on pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

oscerd commented on pull request #3891:
URL: https://github.com/apache/camel/pull/3891#issuecomment-640399754


   Merged, thanks a lot!


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] oscerd closed pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

oscerd closed pull request #3891:
URL: https://github.com/apache/camel/pull/3891


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]


Reply | Threaded
Open this post in threaded view
|

[GitHub] [camel] jonchase commented on pull request #3891: CAMEL-14992: initial support for AWS2 Athena component

GitBox
In reply to this post by GitBox

jonchase commented on pull request #3891:
URL: https://github.com/apache/camel/pull/3891#issuecomment-640484031


   👍


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[hidden email]