public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements Serializable
| Modifier and Type | Class and Description |
|---|---|
static class |
FileSinkOperator.Counter
Counters.
|
class |
FileSinkOperator.FSPaths |
static interface |
FileSinkOperator.RecordWriter
RecordWriter.
|
Operator.OperatorFunc, Operator.State| Modifier and Type | Field and Description |
|---|---|
protected boolean |
autoDelete |
protected boolean |
bDynParts |
protected Map<Integer,Integer> |
bucketMap |
protected String |
childSpecPathDynLinkedPartitions |
protected long |
cntr |
protected List<String> |
dpColNames |
protected DynamicPartitionCtx |
dpCtx |
protected int |
dpStartCol |
protected List<String> |
dpVals |
protected List<Object> |
dpWritables |
protected boolean |
filesCreated |
protected org.apache.hadoop.fs.FileSystem |
fs |
protected FileSinkOperator.FSPaths |
fsp |
protected HiveOutputFormat<?,?> |
hiveOutputFormat |
protected boolean |
isCollectRWStats |
protected boolean |
isCompressed |
protected boolean |
isSkewedStoredAsSubDirectories |
protected boolean |
isTemporary |
protected org.apache.hadoop.mapred.JobConf |
jc |
protected HiveKey |
key |
protected ListBucketingCtx |
lbCtx |
static org.apache.commons.logging.Log |
LOG |
protected long |
logEveryNRows |
protected int |
maxPartitions |
protected boolean |
multiFileSpray |
protected int |
numDynParts |
protected long |
numRows |
protected org.apache.hadoop.fs.Path |
parent |
protected HivePartitioner<HiveKey,Object> |
prtner |
protected org.apache.hadoop.io.Writable |
recordValue |
protected org.apache.hadoop.io.LongWritable |
row_count |
protected FileSinkOperator.RecordWriter[] |
rowOutWriters |
protected Serializer |
serializer |
protected org.apache.hadoop.fs.Path |
specPath |
protected boolean |
statsCollectRawDataSize |
protected boolean[] |
statsFromRecordWriter |
protected int |
totalFiles |
protected HashMap<String,FileSinkOperator.FSPaths> |
valToPaths |
alias, childOperators, childOperatorsArray, childOperatorsTag, colExprMap, conf, CONTEXT_NAME_KEY, done, groupKeyObject, HIVECOUNTERCREATEDFILES, HIVECOUNTERFATAL, id, inputObjInspectors, isLogDebugEnabled, isLogInfoEnabled, isLogTraceEnabled, operatorId, out, outputObjInspector, parentOperators, PLOG, reporter, state, statsMap| Constructor and Description |
|---|
FileSinkOperator() |
| Modifier and Type | Method and Description |
|---|---|
protected boolean |
areAllTrue(boolean[] statsFromRW) |
void |
augmentPlan()
Called during semantic analysis as operators are being added
in order to give them a chance to compute any additional plan information
needed.
|
void |
checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf job) |
void |
closeOp(boolean abort)
Operator specific close routine.
|
protected void |
createBucketFiles(FileSinkOperator.FSPaths fsp) |
protected void |
createBucketForFileIdx(FileSinkOperator.FSPaths fsp,
int filesIdx) |
protected String |
generateListBucketingDirName(Object row)
Generate list bucketing directory name from a row.
|
protected FileSinkOperator.FSPaths |
getDynOutPaths(List<String> row,
String lbDirName) |
String |
getName()
Implements the getName function for the Node Interface.
|
static String |
getOperatorName() |
OperatorType |
getType()
Return the type of the specific operator among the
types in OperatorType.
|
protected Collection<Future<?>> |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization.
|
void |
jobCloseOp(org.apache.hadoop.conf.Configuration hconf,
boolean success) |
protected FileSinkOperator.FSPaths |
lookupListBucketingPaths(String lbDirName)
Lookup list bucketing path.
|
void |
process(Object row,
int tag)
Process the row.
|
protected boolean |
updateProgress()
Report status to JT so that JT won't kill this task if closing takes too long
due to too many files to close and the NN is overloaded.
|
acceptLimitPushdown, allInitializedParentsAreClosed, areAllParentsInitialized, cleanUpInputFileChanged, cleanUpInputFileChangedOp, clone, cloneOp, cloneRecursiveChildren, close, columnNamesRowResolvedCanBeObtained, completeInitializationOp, createDummy, defaultEndGroup, defaultStartGroup, dump, dump, endGroup, flush, forward, getAdditionalCounters, getChildOperators, getChildren, getColumnExprMap, getConf, getConfiguration, getDone, getExecContext, getGroupKeyObject, getIdentifier, getInputObjInspectors, getNextCntr, getNumChild, getNumParent, getOperatorId, getOpTraits, getOutputObjInspector, getParentOperators, getSchema, getStatistics, getStats, initEvaluators, initEvaluators, initEvaluatorsAndReturnStruct, initialize, initialize, initializeChildren, initializeLocalWork, initOperatorId, isUseBucketizedHiveInputFormat, jobClose, logStats, opAllowedAfterMapJoin, opAllowedBeforeMapJoin, opAllowedBeforeSortMergeJoin, opAllowedConvertMapJoin, passExecContext, preorderMap, processGroup, removeChild, removeChildAndAdoptItsChildren, removeChildren, removeParent, replaceChild, replaceParent, reset, resetId, resetStats, setAlias, setChildOperators, setColumnExprMap, setConf, setDone, setExecContext, setGroupKeyObject, setId, setInputContext, setInputObjInspectors, setOperatorId, setOpTraits, setOutputCollector, setParentOperators, setReporter, setSchema, setStatistics, setUseBucketizedHiveInputFormat, startGroup, supportAutomaticSortMergeJoin, supportSkewJoinOptimization, supportUnionRemoveOptimization, toString, toStringpublic static final org.apache.commons.logging.Log LOG
protected transient HashMap<String,FileSinkOperator.FSPaths> valToPaths
protected transient int numDynParts
protected transient DynamicPartitionCtx dpCtx
protected transient boolean isCompressed
protected transient boolean isTemporary
protected transient org.apache.hadoop.fs.Path parent
protected transient HiveOutputFormat<?,?> hiveOutputFormat
protected transient org.apache.hadoop.fs.Path specPath
protected transient String childSpecPathDynLinkedPartitions
protected transient int dpStartCol
protected transient FileSinkOperator.RecordWriter[] rowOutWriters
protected transient int maxPartitions
protected transient ListBucketingCtx lbCtx
protected transient boolean isSkewedStoredAsSubDirectories
protected transient boolean statsCollectRawDataSize
protected transient boolean[] statsFromRecordWriter
protected transient boolean isCollectRWStats
protected transient long numRows
protected transient long cntr
protected transient long logEveryNRows
protected transient org.apache.hadoop.fs.FileSystem fs
protected transient Serializer serializer
protected final transient org.apache.hadoop.io.LongWritable row_count
protected transient int totalFiles
protected transient boolean multiFileSpray
protected transient HivePartitioner<HiveKey,Object> prtner
protected final transient HiveKey key
protected transient FileSinkOperator.FSPaths fsp
protected transient boolean bDynParts
protected transient boolean autoDelete
protected transient org.apache.hadoop.mapred.JobConf jc
protected boolean filesCreated
protected org.apache.hadoop.io.Writable recordValue
protected Collection<Future<?>> initializeOp(org.apache.hadoop.conf.Configuration hconf) throws HiveException
OperatorinitializeOp in class Operator<FileSinkDesc>HiveExceptionprotected void createBucketFiles(FileSinkOperator.FSPaths fsp) throws HiveException
HiveExceptionprotected void createBucketForFileIdx(FileSinkOperator.FSPaths fsp, int filesIdx) throws HiveException
HiveExceptionprotected boolean updateProgress()
public void process(Object row, int tag) throws HiveException
Operatorprocess in class Operator<FileSinkDesc>row - The object representing the row.tag - The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.HiveExceptionprotected boolean areAllTrue(boolean[] statsFromRW)
protected FileSinkOperator.FSPaths lookupListBucketingPaths(String lbDirName) throws HiveException
lbDirName - HiveExceptionprotected String generateListBucketingDirName(Object row)
row - row to process.protected FileSinkOperator.FSPaths getDynOutPaths(List<String> row, String lbDirName) throws HiveException
HiveExceptionpublic void closeOp(boolean abort)
throws HiveException
OperatorcloseOp in class Operator<FileSinkDesc>HiveExceptionpublic String getName()
OperatorgetName in interface NodegetName in class Operator<FileSinkDesc>public static String getOperatorName()
public void jobCloseOp(org.apache.hadoop.conf.Configuration hconf,
boolean success)
throws HiveException
jobCloseOp in class Operator<FileSinkDesc>HiveExceptionpublic OperatorType getType()
OperatorgetType in class Operator<FileSinkDesc>public void augmentPlan()
OperatoraugmentPlan in class Operator<FileSinkDesc>public void checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf job)
throws IOException
IOExceptionCopyright © 2017 The Apache Software Foundation. All rights reserved.