我正在尝试将数据从 Hive 导出到 Teradata。下面是我的代码:
/* Code Start */
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class HiveToTd {
private final static String tdUser = "**********";
private final static String tdPass = "**********";
private final static String hiveUser = "**********";
private final static String hivePass = "**********";
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
/**
* @param args
* @throws ClassNotFoundException
* @throws SQLException
*/
public static void main(String[] args) throws ClassNotFoundException, SQLException {
// Get the Teradata connection
Class.forName("com.teradata.jdbc.TeraDriver");
Connection tdcon = DriverManager.getConnection("jdbc:teradata://database.XXXXX.com/TMODE=ANSI,TYPE=FASTLOAD", tdUser, tdPass);
System.out.println("Connected to Teradata.");
// Get our hive connection
Class.forName(driverName);
System.out.println("Connecting to Hive.");
Connection hivecon = DriverManager.getConnection("jdbc:hive2://bigdatabase.xxxxxx.com:10000/default", hiveUser, hivePass);
System.out.println("Connected to Hive.");
// Select our table from Hive
Statement hst = hivecon.createStatement();
System.out.println("Executing Statement");
ResultSet hrs = hst.executeQuery("SELECT COL1, COL2, COL3 FROM db.table limit 100");
System.out.println("Get DATA");
int count= 0;
if(hrs.next())
{
count++;
}
System.out.println(count);
计数返回“1”而不是 100。我已验证我在 Hive 的表中有超过 100 万条记录。我究竟做错了什么?它只是返回标题行,仅此而已。我本以为问题出在连接上,但它给了我正确的标题行。所以它必须是别的东西。
所以看起来代码确实有效。感谢您帮助我解决 Thusitha 问题。
下一段比较麻烦。这是为了快速加载到 TD。
// Empty the staging table
tdcon.createStatement().executeUpdate("delete from dbname.staging_table");
// Create prepared statement for Teradata
System.out.println("Begin load to Teradata");
tdcon.setAutoCommit(false);
PreparedStatement ps = tdcon.prepareStatement("insert into dbname.staging_table values (?,?,?)");
System.out.println("Start Fastload");
int i;
for (i = 1; hrs.next(); i++){
ps.setString(1, hrs.getString(1));
ps.setString(2, hrs.getString(2));
ps.setString(3, hrs.getString(3));
ps.addBatch();
System.out.println(i);
if (i % 10000 == 0){
ps.executeBatch();
}
}
if (i % 10000 != 0){
ps.executeBatch();
}
tdcon.commit();
tdcon.setAutoCommit(true);
ps.close();
hrs.close();
sideLoad("dbname.staging_table", "dbname.final_table", tdcon);
tdcon.close();
hivecon.close();
}
public static int sideLoad(String fromTable, String toTable, Connection conn) throws SQLException{
return (conn.createStatement().executeUpdate("INSERT INTO " + toTable + " SELECT * FROM " + fromTable));
}
}
“启动快速加载”消息后我得到的错误是:
Exception in thread "main" java.sql.SQLException: [Teradata JDBC Driver] [TeraJDBC 15.00.00.20] [Error 1103] [SQLState HY000] Cannot add an empty batch of rows to a database table
at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:94)
at com.teradata.jdbc.jdbc_4.util.ErrorFactory.makeDriverJDBCException(ErrorFactory.java:64)
at com.teradata.jdbc.jdbc.fastload.FastLoadManagerPreparedStatement.executeBatch(FastLoadManagerPreparedStatement.java:2049)
at com.optus.insights.HiveToTd.main(HiveToTd.java:84)
你有一个
if
条件
所以计数它只会转到 ResultSet 的第一行并递增计数 1
如果要遍历所有行,请使用
while loop
而不是 if
更改您的代码如下
while(hrs.next()){
count++;
}