Использование AWS проекта DataLake Formation для импорта базы данных MySQL в S3. Использовал план акций для импорта данных. Но задание не удалось с ошибкой ниже и трассировки стека.
Expression #4 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'DBName.TTT.Num' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by
Я знаю, что могу обойти эту проблему, отключив «полную группу по» в моей MySQL БД. Но разве парни AWS не должны знать, что не следует включать в предложение select столбцы, не входящие в группу, или добавлять все столбцы в предложении select для группировки?
Кроме того, почему это происходит только с несколько таблиц, но не все?
Есть ли AWS внутренние разработчики DataLake поблизости?
Traceback (most recent call last):
File "script_2020-01-13-17-14-42.py", line 320, in <module>
main()
File "script_2020-01-13-17-14-42.py", line 316, in main
driver.run_transform()
File "script_2020-01-13-17-14-42.py", line 297, in run_transform
transform.transform()
File "script_2020-01-13-17-14-42.py", line 90, in transform
self._snapshot_transform()
File "script_2020-01-13-17-14-42.py", line 78, in _snapshot_transform
table_name=self.source.table_name)
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/PyGlue.zip/awsglue/dynamicframe.py", line 611, in from_catalog
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/PyGlue.zip/awsglue/context.py", line 142, in create_dynamic_frame_from_catalog
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/PyGlue.zip/awsglue/data_source.py", line 36, in getFrame
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
print("[INFO] GRANTed Select permission to
{}
on table
{}
::
{}
".format(creator_arn, db_name, table_name))
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o1287.getDynamicFrame.
: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: Expression #4 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'DBName.TTT.Num' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:377)
at com.mysql.jdbc.Util.getInstance(Util.java:360)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:978)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3887)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3823)
at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2435)
at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2582)
at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2530)
at com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:1907)
at com.mysql.jdbc.PreparedStatement.executeQuery(PreparedStatement.java:2030)
at org.apache.spark.sql.jdbc.glue.GlueJDBCSource$.resolveTable(GlueJDBCSource.scala:47)
at org.apache.spark.sql.jdbc.glue.GlueJDBCSource$.createRelation(GlueJDBCSource.scala:30)
at com.amazonaws.services.glue.util.JDBCWrapper.tableDF(JDBCUtils.scala:805)
at com.amazonaws.services.glue.util.NoCondition$.tableDF(JDBCUtils.scala:84)
at com.amazonaws.services.glue.util.NoJDBCPartitioner$.tableDF(JDBCUtils.scala:123)
at com.amazonaws.services.glue.JDBCDataSource.getDynamicFrame(DataSource.scala:745)
at com.amazonaws.services.glue.DataSource$class.getDynamicFrame(DataSource.scala:77)
at com.amazonaws.services.glue.SparkSQLDataSource.getDynamicFrame(DataSource.scala:586)
at sun.reflect.GeneratedMethodAccessor107.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)