<property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://192.168.0.121:3306/hive?autoReconnect=true&useSSL=true&createDatabaseIfNotExist=true&characterEncoding=utf8</value> <description>JDBC connection string used by Hive Metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>JDBC Driver class</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> <description>Metastore database user name</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>Qscf123%^</value> <description>Metastore database password</description> </property> <property> <name>hive.exec.local.scratchdir</name> <value>/tmp/tester</value> <description>Local scratch space for Hive jobs</description> </property> <property> <name>hive.downloaded.resources.dir</name> <value>/tmp/tester/hive_resource</value> <description>Temporary local directory for added resources in the remote file system.</description> </property> <property> <name>hive.compactor.initiator.on</name> <value>true</value> <description> Whether to run the initiator and cleaner threads on this metastore instance or not. Set this to true on one instance of the Thrift metastore service as part of turning on Hive transactions. For a complete list of parameters required for turning on transactions, see hive.txn.manager. </description> </property> <property> <name>hive.compactor.worker.threads</name> <value>2</value> <description> How many compactor worker threads to run on this metastore instance. Set this to a positive number on one or more instances of the Thrift metastore service as part of turning on Hive transactions. For a complete list of parameters required for turning on transactions, see hive.txn.manager. Worker threads spawn MapReduce jobs to do compactions. They do not do the compactions themselves. Increasing the number of worker threads will decrease the time it takes tables or partitions to be compacted once they are determined to need compaction. It will also increase the background load on the Hadoop cluster as more MapReduce jobs will be running in the background. </description> </property> <property> <name>hive.support.concurrency</name> <value>true</value> <description> Whether Hive supports concurrency control or not. A ZooKeeper instance must be up and running when using zookeeper Hive lock manager </description> </property> <property> <name>hive.enforce.bucketing</name> <value>true</value> <description>Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced.</description> </property> <property> <name>hive.exec.dynamic.partition</name> <value>true</value> <description>Whether or not to allow dynamic partitions in DML/DDL.</description> </property> <property> <name>hive.exec.dynamic.partition.mode</name> <value>nonstrict</value> <description> In strict mode, the user must specify at least one static partition in case the user accidentally overwrites all partitions. In nonstrict mode all partitions are allowed to be dynamic. </description> </property> <property> <name>hive.txn.manager</name> <value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value> <description> Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive transactions, which also requires appropriate settings for hive.compactor.initiator.on, hive.compactor.worker.threads, hive.support.concurrency (true), hive.enforce.bucketing (true), and hive.exec.dynamic.partition.mode (nonstrict). The default DummyTxnManager replicates pre-Hive-0.13 behavior and provides no transactions. </description> </property>
-- 創一張表,先放資料 CREATETABLE test_df2 (v1 STRING, v2 STRING, v3 DOUBLE) ROWFORMATDELIMITED FIELDSTERMINATEDBY',' STOREDAS TEXTFILE; -- 讀入資料 LOADDATALOCAL INPATH '/home/tester/test_df2.csv' OVERWRITE INTOTABLE test_df2; -- 創一張transaction表 createtable test_df2_transaction (v1 STRING, v2 STRING, v3 DOUBLE) clustered by (v1) into3 buckets storedas orc TBLPROPERTIES ('transactional'='true'); -- 把資料加入transaction表 insertinto test_df2_transaction select * from test_df2; -- 看資料 select * from test_df2_transaction; /* id8 d -0.73893442 id5 a -0.79378789 id2 b 0.73282957 id6 b -0.36969293 id3 c 0.00552144 id7 c -1.66246829 id4 d 0.83103357 id1 a 0.01991423 */ -- 試試看update update test_df2_transaction set v2='c'where v1='id1'; select * from test_df2_transaction; /* id8 d -0.73893442 id5 a -0.79378789 id2 b 0.73282957 id11 a 2.001 id6 b -0.36969293 id3 c 0.00552144 id9 b 2.001 id7 c -1.66246829 id4 d 0.83103357 id1 c 2.001 */ -- 試試看delete deletefrom test_df2_transaction where v1='id1'; select * from test_df2_transaction; /* id8 d -0.73893442 id5 a -0.79378789 id2 b 0.73282957 id6 b -0.36969293 id3 c 0.00552144 id7 c -1.66246829 id4 d 0.83103357 */ -- 試試看insert insertintotable test_df2_transaction values ('id9', 'b', 2.001),('id1', 'b', 2.001),('id11', 'a', 2.001); select * from test_df2_transaction; /* id8 d -0.73893442 id5 a -0.79378789 id2 b 0.73282957 id11 a 2.001 id6 b -0.36969293 id3 c 0.00552144 id9 b 2.001 id7 c -1.66246829 id4 d 0.83103357 id1 b 2.001 */
val spark = SparkSession.builder().appName("spark on hive"). config("spark.sql.warehouse.dir", "hdfs://hc1/spark"). enableHiveSupport().getOrCreate()
spark.sql("update test_df2_transaction set v2='c' where v1='id1'").show() # org.apache.spark.sql.catalyst.parser.ParseException spark.sql("delete from test_df2_transaction where v1='id1'").show() # org.apache.spark.sql.catalyst.parser.ParseException