public class TrainValidationSplit extends Estimator<TrainValidationSplitModel> implements HasCollectSubModels, MLWritable, Logging
CrossValidator
, but only splits the set once.Constructor and Description |
---|
TrainValidationSplit() |
TrainValidationSplit(String uid) |
Modifier and Type | Method and Description |
---|---|
static Params |
clear(Param<?> param) |
static BooleanParam |
collectSubModels() |
TrainValidationSplit |
copy(ParamMap extra)
Creates a copy of this instance with the same UID and some extra params.
|
static Param<Estimator<?>> |
estimator() |
Param<Estimator<?>> |
estimator()
param for the estimator to be validated
|
static Param<ParamMap[]> |
estimatorParamMaps() |
Param<ParamMap[]> |
estimatorParamMaps()
param for estimator param maps
|
static Param<Evaluator> |
evaluator() |
Param<Evaluator> |
evaluator()
param for the evaluator used to select hyper-parameters that maximize the validated metric
|
static String |
explainParam(Param<?> param) |
static String |
explainParams() |
static ParamMap |
extractParamMap() |
static ParamMap |
extractParamMap(ParamMap extra) |
TrainValidationSplitModel |
fit(Dataset<?> dataset)
Fits a model to the input data.
|
static <T> scala.Option<T> |
get(Param<T> param) |
static boolean |
getCollectSubModels() |
static <T> scala.Option<T> |
getDefault(Param<T> param) |
static Estimator<?> |
getEstimator() |
Estimator<?> |
getEstimator() |
static ParamMap[] |
getEstimatorParamMaps() |
ParamMap[] |
getEstimatorParamMaps() |
static Evaluator |
getEvaluator() |
Evaluator |
getEvaluator() |
static <T> T |
getOrDefault(Param<T> param) |
static int |
getParallelism() |
static Param<Object> |
getParam(String paramName) |
static long |
getSeed() |
static double |
getTrainRatio() |
double |
getTrainRatio() |
static <T> boolean |
hasDefault(Param<T> param) |
static boolean |
hasParam(String paramName) |
static boolean |
isDefined(Param<?> param) |
static boolean |
isSet(Param<?> param) |
static TrainValidationSplit |
load(String path) |
void |
logTuningParams(org.apache.spark.ml.util.Instrumentation instrumentation)
Instrumentation logging for tuning params including the inner estimator and evaluator info.
|
static IntParam |
parallelism() |
static Param<?>[] |
params() |
static MLReader<TrainValidationSplit> |
read() |
static void |
save(String path) |
static LongParam |
seed() |
static <T> Params |
set(Param<T> param,
T value) |
TrainValidationSplit |
setCollectSubModels(boolean value)
Whether to collect submodels when fitting.
|
TrainValidationSplit |
setEstimator(Estimator<?> value) |
TrainValidationSplit |
setEstimatorParamMaps(ParamMap[] value) |
TrainValidationSplit |
setEvaluator(Evaluator value) |
TrainValidationSplit |
setParallelism(int value)
Set the maximum level of parallelism to evaluate models in parallel.
|
TrainValidationSplit |
setSeed(long value) |
TrainValidationSplit |
setTrainRatio(double value) |
static String |
toString() |
static DoubleParam |
trainRatio() |
DoubleParam |
trainRatio()
Param for ratio between train and validation data.
|
StructType |
transformSchema(StructType schema)
:: DeveloperApi ::
|
StructType |
transformSchemaImpl(StructType schema) |
String |
uid()
An immutable unique ID for the object and its derivatives.
|
MLWriter |
write()
Returns an
MLWriter instance for this ML instance. |
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
clear, copyValues, defaultCopy, defaultParamMap, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, isDefined, isSet, paramMap, params, set, set, set, setDefault, setDefault, shouldOwn
toString
collectSubModels, getCollectSubModels
save
initializeLogging, initializeLogIfNecessary, initializeLogIfNecessary, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning
public TrainValidationSplit(String uid)
public TrainValidationSplit()
public static MLReader<TrainValidationSplit> read()
public static TrainValidationSplit load(String path)
public static String toString()
public static Param<?>[] params()
public static String explainParam(Param<?> param)
public static String explainParams()
public static final boolean isSet(Param<?> param)
public static final boolean isDefined(Param<?> param)
public static boolean hasParam(String paramName)
public static Param<Object> getParam(String paramName)
public static final <T> scala.Option<T> get(Param<T> param)
public static final <T> T getOrDefault(Param<T> param)
public static final <T> scala.Option<T> getDefault(Param<T> param)
public static final <T> boolean hasDefault(Param<T> param)
public static final ParamMap extractParamMap()
public static final LongParam seed()
public static final long getSeed()
public static Estimator<?> getEstimator()
public static ParamMap[] getEstimatorParamMaps()
public static Evaluator getEvaluator()
public static DoubleParam trainRatio()
public static double getTrainRatio()
public static IntParam parallelism()
public static int getParallelism()
public static final BooleanParam collectSubModels()
public static final boolean getCollectSubModels()
public static void save(String path) throws java.io.IOException
java.io.IOException
public String uid()
Identifiable
uid
in interface Identifiable
public TrainValidationSplit setEstimator(Estimator<?> value)
public TrainValidationSplit setEstimatorParamMaps(ParamMap[] value)
public TrainValidationSplit setEvaluator(Evaluator value)
public TrainValidationSplit setTrainRatio(double value)
public TrainValidationSplit setSeed(long value)
public TrainValidationSplit setParallelism(int value)
value
- (undocumented)public TrainValidationSplit setCollectSubModels(boolean value)
Note: If set this param, when you save the returned model, you can set an option
"persistSubModels" to be "true" before saving, in order to save these submodels.
You can check documents of
TrainValidationSplitModel.TrainValidationSplitModelWriter
for more information.
value
- (undocumented)public TrainValidationSplitModel fit(Dataset<?> dataset)
Estimator
fit
in class Estimator<TrainValidationSplitModel>
dataset
- (undocumented)public StructType transformSchema(StructType schema)
PipelineStage
Check transform validity and derive the output schema from the input schema.
We check validity for interactions between parameters during transformSchema
and
raise an exception if any parameter value is invalid. Parameter value checks which
do not depend on other parameters are handled by Param.validate()
.
Typical implementation should first conduct verification on schema change and parameter validity, including complex parameter interaction checks.
transformSchema
in class PipelineStage
schema
- (undocumented)public TrainValidationSplit copy(ParamMap extra)
Params
defaultCopy()
.copy
in interface Params
copy
in class Estimator<TrainValidationSplitModel>
extra
- (undocumented)public MLWriter write()
MLWritable
MLWriter
instance for this ML instance.write
in interface MLWritable
public double getTrainRatio()
public DoubleParam trainRatio()
public Param<Estimator<?>> estimator()
public Estimator<?> getEstimator()
public Param<ParamMap[]> estimatorParamMaps()
public ParamMap[] getEstimatorParamMaps()
public Param<Evaluator> evaluator()
public Evaluator getEvaluator()
public StructType transformSchemaImpl(StructType schema)
public void logTuningParams(org.apache.spark.ml.util.Instrumentation instrumentation)
instrumentation
- (undocumented)