全部产品
Search
文档中心

云原生大数据计算服务 MaxCompute:多线程上传示例

更新时间:Aug 04, 2023

本文通过代码示例向您介绍如何使用TableTunnel接口实现多线程上传。

import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import com.aliyun.odps.Column;
 import com.aliyun.odps.Odps;
 import com.aliyun.odps.PartitionSpec;
 import com.aliyun.odps.TableSchema;
 import com.aliyun.odps.account.Account;
 import com.aliyun.odps.account.AliyunAccount;
 import com.aliyun.odps.data.Record;
 import com.aliyun.odps.data.RecordWriter;
 import com.aliyun.odps.tunnel.TableTunnel;
 import com.aliyun.odps.tunnel.TunnelException;
 import com.aliyun.odps.tunnel.TableTunnel.UploadSession;
 class UploadThread implements Callable<Boolean> {
         private long id;
         private RecordWriter recordWriter;
         private Record record;
         private TableSchema tableSchema;
         public UploadThread(long id, RecordWriter recordWriter, Record record,
                         TableSchema tableSchema) {
                 this.id = id;
                 this.recordWriter = recordWriter;
                 this.record = record;
                 this.tableSchema = tableSchema;
         }
         @Override
         public Boolean call() {
                 for (int i = 0; i < tableSchema.getColumns().size(); i++) {
                         Column column = tableSchema.getColumn(i);
                         switch (column.getType()) {
                         case BIGINT:
                                 record.setBigint(i, 1L);
                                 break;
                         case BOOLEAN:
                                 record.setBoolean(i, true);
                                 break;
                         case DATETIME:
                                 record.setDatetime(i, new Date());
                                 break;
                         case DOUBLE:
                                 record.setDouble(i, 0.0);
                                 break;
                         case STRING:
                                 record.setString(i, "sample");
                                 break;
                         default:
                                 throw new RuntimeException("Unknown column type: "
                                                 + column.getType());
                         }
                 }
                boolean success = true;
                try {
	            for (int i = 0; i < 10; i++) {
                         recordWriter.write(record);
	            }
                } catch (IOException e) {
 	            success = false;
                    e.printStackTrace();
                } finally {
 	            recordWriter.close();
                }
                    return success;
               
 }
 public class UploadThreadSample {
         // 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户
         // 此处以把AccessKey 和 AccessKeySecret 保存在环境变量为例说明。您也可以根据业务需要,保存到配置文件里
         // 强烈建议不要把 AccessKey 和 AccessKeySecret 保存到代码里,会存在密钥泄漏风险
			 	 private static String accessId = System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID");
				 private static String accessKey = System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET");
         private static String odpsUrl = "<http://service.odps.aliyun.com/api>";
         private static String tunnelUrl = "<http://dt.cn-shanghai.maxcompute.aliyun-inc.com>";
         //设置tunnelUrl,若需要使用内网时必须设置,否则默认公网。此处给的是华东2经典网络Tunnel Endpoint,其他Region请参见Endpoint。
         private static String project = "<your project>";
         private static String table = "<your table name>";
         private static String partition = "<your partition spec>";
         private static int threadNum = 10;
         public static void main(String args[]) {
                 Account account = new AliyunAccount(accessId, accessKey);
                 Odps odps = new Odps(account);
                 odps.setEndpoint(odpsUrl);
                 odps.setDefaultProject(project);
                 try {
                         TableTunnel tunnel = new TableTunnel(odps);
                         tunnel.setEndpoint(tunnelUrl);//tunnelUrl设置
                         PartitionSpec partitionSpec = new PartitionSpec(partition);
                         UploadSession uploadSession = tunnel.createUploadSession(project,
                                         table, partitionSpec);
                         System.out.println("Session Status is : "
                                         + uploadSession.getStatus().toString());
                         ExecutorService pool = Executors.newFixedThreadPool(threadNum);
                         ArrayList<Callable<Boolean>> callers = new ArrayList<Callable<Boolean>>();
                         for (int i = 0; i < threadNum; i++) {
                                 RecordWriter recordWriter = uploadSession.openRecordWriter(i);
                                 Record record = uploadSession.newRecord();
                                 callers.add(new UploadThread(i, recordWriter, record,
                                                 uploadSession.getSchema()));
                         }
                         pool.invokeAll(callers);
                         pool.shutdown();
                         Long[] blockList = new Long[threadNum];
                         for (int i = 0; i < threadNum; i++)
                                 blockList[i] = Long.valueOf(i);
                         uploadSession.commit(blockList);
                         System.out.println("upload success!");
                 } catch (TunnelException e) {
                         e.printStackTrace();
                 } catch (IOException e) {
                         e.printStackTrace();
                 } catch (InterruptedException e) {
                         e.printStackTrace();
                 }
         }
 }
说明

对于Tunnel Endpoint,支持指定或者不指定。

  • 如果指定,按照指定的Endpoint下载。

  • 如果不指定,默认为公网Endpoint。

  • 文中使用的是华东2经典网络Tunnel Endpoint,其他region的Tunnel Endpoint设置可以参考文档配置Endpoint