public class DagUtils extends Object
| Modifier and Type | Field and Description |
|---|---|
static String |
TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX |
static String |
TEZ_MERGE_WORK_FILE_PREFIXES |
static String |
TEZ_TMP_DIR_KEY |
| Modifier and Type | Method and Description |
|---|---|
void |
addCredentials(BaseWork work,
org.apache.tez.dag.api.DAG dag)
Set up credentials for the base work on secure clusters
|
org.apache.hadoop.mapred.JobConf |
createConfiguration(HiveConf hiveConf)
Creates and initializes a JobConf object that can be used to execute
the DAG.
|
org.apache.tez.dag.api.Edge |
createEdge(org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex v,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
Given two vertices and the configuration for the source vertex, createEdge
will create an Edge object that connects the two.
|
org.apache.tez.dag.api.GroupInputEdge |
createEdge(org.apache.tez.dag.api.VertexGroup group,
org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
Given a Vertex group and a vertex createEdge will create an
Edge between them.
|
org.apache.tez.dag.api.PreWarmVertex |
createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf,
int numContainers,
Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) |
org.apache.hadoop.fs.Path |
createTezDir(org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.conf.Configuration conf)
createTezDir creates a temporary directory in the scratchDir folder to
be used with Tez.
|
org.apache.tez.dag.api.Vertex |
createVertex(org.apache.hadoop.mapred.JobConf conf,
BaseWork work,
org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.yarn.api.records.LocalResource appJarLr,
List<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr,
org.apache.hadoop.fs.FileSystem fileSystem,
Context ctx,
boolean hasChildren,
TezWork tezWork,
TezWork.VertexType vertexType)
Create a vertex from a given work object.
|
String |
getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr) |
static org.apache.hadoop.yarn.api.records.Resource |
getContainerResource(org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.fs.Path |
getDefaultDestDir(org.apache.hadoop.conf.Configuration conf) |
String |
getExecJarPathLocal() |
org.apache.hadoop.fs.FileStatus |
getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf) |
static DagUtils |
getInstance()
Singleton
|
String |
getResourceBaseName(org.apache.hadoop.fs.Path path) |
org.apache.hadoop.fs.Path |
getTezDir(org.apache.hadoop.fs.Path scratchDir)
Gets the tez dir that belongs to the hive scratch dir
|
org.apache.hadoop.mapred.JobConf |
initializeVertexConf(org.apache.hadoop.mapred.JobConf conf,
Context context,
BaseWork work)
Creates and initializes the JobConf object for a given BaseWork object.
|
org.apache.hadoop.yarn.api.records.LocalResource |
localizeResource(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dest,
org.apache.hadoop.yarn.api.records.LocalResourceType type,
org.apache.hadoop.conf.Configuration conf) |
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFiles(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf,
String[] inputOutputJars)
Localizes files, archives and jars from a provided array of names.
|
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFilesFromConf(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf)
Localizes files, archives and jars the user has instructed us
to provide on the cluster as resources for execution.
|
static org.apache.hadoop.fs.FileStatus |
validateTargetDir(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf) |
public static final String TEZ_TMP_DIR_KEY
public static final String TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX
public static final String TEZ_MERGE_WORK_FILE_PREFIXES
public org.apache.tez.dag.api.GroupInputEdge createEdge(org.apache.tez.dag.api.VertexGroup group,
org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
throws IOException
group - The parent VertexGroupvConf - The job conf of one of the parrent (grouped) verticesw - The child vertexedgeProp - the edge property of connection between the two
endpoints.IOExceptionpublic org.apache.tez.dag.api.Edge createEdge(org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex v,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
throws IOException
vConf - JobConf of the first (source) vertexv - The first vertex (source)w - The second vertex (sink)IOExceptionpublic static org.apache.hadoop.yarn.api.records.Resource getContainerResource(org.apache.hadoop.conf.Configuration conf)
public org.apache.tez.dag.api.PreWarmVertex createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf,
int numContainers,
Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources)
throws IOException,
org.apache.tez.dag.api.TezException
numContainers - number of containers to pre-warmlocalResources - additional resources to pre-warm withIOExceptionorg.apache.tez.dag.api.TezExceptionpublic org.apache.hadoop.fs.Path getDefaultDestDir(org.apache.hadoop.conf.Configuration conf)
throws LoginException,
IOException
conf - LoginException - if we are unable to figure user informationIOException - when any dfs operation fails.public List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf) throws IOException, LoginException
conf - IOException - when hdfs operation failsLoginException - when getDefaultDestDir fails with the same exceptionpublic List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFiles(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf, String[] inputOutputJars) throws IOException, LoginException
hdfsDirPathStr - Destination directory in HDFS.conf - Configuration.inputOutputJars - The file names to localize.IOException - when hdfs operation fails.LoginException - when getDefaultDestDir fails with the same exceptionpublic org.apache.hadoop.fs.FileStatus getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf)
throws IOException,
LoginException
IOExceptionLoginExceptionpublic static org.apache.hadoop.fs.FileStatus validateTargetDir(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf)
throws IOException
IOExceptionpublic String getExecJarPathLocal() throws URISyntaxException
URISyntaxExceptionpublic String getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr)
public String getResourceBaseName(org.apache.hadoop.fs.Path path)
path - - the string from which we try to determine the resource base namepublic org.apache.hadoop.yarn.api.records.LocalResource localizeResource(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dest,
org.apache.hadoop.yarn.api.records.LocalResourceType type,
org.apache.hadoop.conf.Configuration conf)
throws IOException
src - path to the source for the resourcedest - path in hdfs for the resourcetype - local resource type (File/Archive)conf - IOException - when any file system related calls fails.public org.apache.hadoop.mapred.JobConf createConfiguration(HiveConf hiveConf) throws IOException
hiveConf - Current hiveConf for the executionIOExceptionpublic org.apache.hadoop.mapred.JobConf initializeVertexConf(org.apache.hadoop.mapred.JobConf conf,
Context context,
BaseWork work)
conf - Any configurations in conf will be copied to the resulting new JobConf object.work - BaseWork will be used to populate the configuration object.public org.apache.tez.dag.api.Vertex createVertex(org.apache.hadoop.mapred.JobConf conf,
BaseWork work,
org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.yarn.api.records.LocalResource appJarLr,
List<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr,
org.apache.hadoop.fs.FileSystem fileSystem,
Context ctx,
boolean hasChildren,
TezWork tezWork,
TezWork.VertexType vertexType)
throws Exception
conf - JobConf to be used to this execution unitwork - The instance of BaseWork representing the actual work to be performed
by this vertex.scratchDir - HDFS scratch dir for this execution unit.appJarLr - Local resource for hive-exec.additionalLr - fileSystem - FS corresponding to scratchDir and LocalResourcesctx - This query's contextExceptionpublic void addCredentials(BaseWork work, org.apache.tez.dag.api.DAG dag)
public org.apache.hadoop.fs.Path createTezDir(org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.conf.Configuration conf)
throws IOException
IOExceptionpublic org.apache.hadoop.fs.Path getTezDir(org.apache.hadoop.fs.Path scratchDir)
public static DagUtils getInstance()
Copyright © 2017 The Apache Software Foundation. All rights reserved.