deployModelName
Model Deployment Name
required
|
Name of the model to be used for deployment (must be a valid DNS subdomain with no underscores).
type: string
maxLength: 30
pattern: [a-zA-Z][\-a-zA-Z0-9]*[a-zA-Z0-9]?
|
dimReduction
Perform Dimensionality Reduction
|
Whether to perform dimensionality reduction or not. Truncated SVD is used to reduce dimensionality. Reduces overfitting and training time. Note that sparse vectors will become dense.
type: boolean
default value: 'false '
|
dimReductionSize
Reduced Dimension Size
|
The target dimension size of the features after dimensionality reduction.
type: integer
default value: '256 '
exclusiveMinimum: false
minimum: 1
|
dropout
Dropout
|
Probability for applying dropout regularization.
type: number
default value: '0.2 '
|
embeddingReg
Embedding regularization
|
The scale of how critical the algorithm should be of minimizing the maximum similarity between embeddings of different classes
type: number
default value: '0.8 '
|
embeddingsSize
Embedding size
|
Dimension size of final embedding vectors for text and class.
type: integer
default value: '100 '
exclusiveMinimum: false
minimum: 1
|
featurizerType
Featurizer
|
The type of featurizer to use. TFIDF will compute both term-frequency and inverse document-frequency, whereas Count will use only term-frequency
type: string
default value: 'tfidf '
enum: {
tfidf
count
}
|
id
Job ID
required
|
The ID for this job. Used in the API to reference this job. Allowed characters: a-z, A-Z, dash (-) and underscore (_)
type: string
maxLength: 63
pattern: [a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?
|
l1Ratio
L1 penalty ratio
|
Only used with the `elasticnet` penalty. If its value = 0, l2 penalty will be used. If it's value = 1, l1 penalty will be used. A value in between will use the appropirate ratio of l1 and l2 penalties.
type: number
default value: '0.5 '
exclusiveMaximum: false
maximum: 1.0
|
labelField
Training collection class field
required
|
Solr field name containing the classes/labels for the text
type: string
minLength: 1
|
labelLayersSizes
Hidden sizes before class embedding
|
Sizes of hidden layers before the embedding layer for classes. Specify as a list of numbers for multiple layers or a single number for 1 layer. Leave blank if no hidden layers are required.
type: string
default value: '[] '
pattern: ^(\[(((\d)*,\s*)*(\d+)+)?\])?$
|
lowercaseTexts
Lowercase Text
|
Select if you want the text to be lowercased
type: boolean
default value: 'true '
|
maxBatchSize
Maximum Batch Size
|
The largest batch size to use during training. Batch size will be increased linearly every epoch, upto the maximum batch size specified.
type: integer
default value: '128 '
exclusiveMinimum: false
minimum: 1
|
maxCharLen
Maximum No. of Characters
|
Maximum length, in characters, of the training text. Texts longer than this value will be truncated.
type: integer
default value: '100000 '
exclusiveMinimum: false
minimum: 1
|
maxDf
Max Document Frequency
|
Maximum Df for token to be considered. Provide a float (0,1) if you want to specify as a fraction, otherwise integer >= 1 to specify the exact number of documents in which a token should occur
type: number
default value: '0.8 '
|
maxFeatures
Maximum Vocab Size
|
Maximum number of tokens (including word or character ngrams) to consider for the vocabulary. Less frequent tokens will be omitted.
type: integer
default value: '250000 '
exclusiveMinimum: false
minimum: 1
|
maxIter
Maximum iterations for algorithm
|
Maximum number of iterations taken for the optimization algorithm to converge.
type: integer
default value: '200 '
exclusiveMinimum: false
minimum: 1
|
maxNgram
Max Ngram size
|
Maximum word or character ngram size to be used.
type: integer
exclusiveMinimum: false
minimum: 1
|
minBatchSize
Minimum Batch Size
|
The smallest batch size with which to start training. Batch size will be increased linearly every epoch, upto the maximum batch size specified.
type: integer
default value: '64 '
exclusiveMinimum: false
minimum: 1
|
minCharLen
Minimum No. of Characters
|
Minimum length, in characters, for the text to be included into training.
type: integer
default value: '2 '
exclusiveMinimum: false
minimum: 1
|
minClassSize
Minimum no. of examples per class
|
Minimum number of samples that class should have to be included into training. Otherwise the class and all its samples are dropped.
type: integer
default value: '5 '
exclusiveMinimum: false
minimum: 2
|
minDf
Min Document Frequency
|
Minimum Df for token to be considered. Provide a float (0,1) if you want to specify as a fraction, otherwise integer >= 1 to specify the exact number of documents in which a token should occur.
type: number
default value: '1.0 '
|
minNgram
Min Ngram size
|
Minimum word or character ngram size to be used.
type: integer
exclusiveMinimum: false
minimum: 1
|
modelReplicas
Model replicas
|
How many replicas of the model should be deployed by Seldon Core
type: integer
default value: '1 '
exclusiveMinimum: false
minimum: 1
|
muNeg
Maximum negative class similarity
|
How similar algorithm should try to make embedding vectors for negative classes. The algorithm will try to minimize similarities so that it's lower than the value specified here.
type: number
default value: '-0.4 '
exclusiveMaximum: false
maximum: 1.0
|
muPos
Maximum correct class similarity
|
How similar algorithm should try to make embedding vectors for correct classes. The algorithm will try to maximize similarities so that it's higher than the value specified here.
type: number
default value: '0.8 '
exclusiveMaximum: false
maximum: 1.0
|
multiClass
Loss Method
|
Whether to train a binary classifier for each class or use a multinomial loss. ‘auto’ selects ‘ovr’ if the data is binary, or if algorithm=’liblinear’, and otherwise selects ‘multinomial’.
type: string
default value: 'auto '
enum: {
auto
ovr
multinomial
}
|
norm
Use Norm
|
Select the norm method to use.
type: string
default value: 'None '
enum: {
None
L1
L2
}
|
numEpochs
Number of training epochs
|
Number of epochs for which to train the model.
type: integer
default value: '40 '
exclusiveMinimum: false
minimum: 1
|
numNeg
Number of negative classes for training
|
Number of negative classes to use during training to minimize their similarity to the input text. Should be less than the total number of classes.
type: integer
exclusiveMinimum: false
minimum: 1
|
penalty
Penalty
|
Specify the norm used in the penalization. l2 is supported only by the ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers. ‘elasticnet’ is only supported by the ‘saga’ solver. Select none, if you don't want to regularize (this is not supported by the `liblinear` solver).
type: string
default value: 'l2 '
enum: {
l1
l2
elsaticnet
none
}
|
randomSeed
Random Seed
|
Pseudorandom determinism fixed by keeping this seed constant
type: integer
default value: '12345 '
|
readOptions
Read Options
|
Options used when reading input from Solr or other sources.
type: array of object
object attributes: {
key
(required)
: {
display name: Parameter Name
type: string
}
value
: {
display name: Parameter Value
type: string
}
}
|
reg
Regularization term
|
This is the inverse of regularization strength. Smaller values result in stronger regularization.
type: number
default value: '1.0 '
|
regTerm
Regularization Term
|
Scale of L2 regularization
type: number
default value: '0.002 '
|
scaling
Scale Features
|
Whether to apply Standard Scaling (X - mean(X)) / std(X) for the features. If the feature vector is sparse (no dimensionality reduction is used), then only division on standard deviation will be applied.
type: boolean
default value: 'true '
|
secretName
Cloud storage secret name
|
Name of the secret used to access cloud storage as defined in the K8s namespace
type: string
minLength: 1
|
similarityType
Similarity type
|
Type of similarity to use to compare the embedded vectors.
type: string
default value: 'cosine '
enum: {
cosine
inner
}
|
smoothIdf
Smooth IDF
|
Smooth IDF weights by adding one to document frequencies. Prevents zero divisions.
type: boolean
default value: 'true '
|
solver
Optimization Algorithm
|
The optimization algorithm to use to fit to the data. LBFGS and SAGA are good initial choices.
type: string
default value: 'lbfgs '
enum: {
lbfgs
newton-cg
liblinear
sag
saga
}
|
sparkConfig
Additional parameters
|
Provide additional key/value pairs to be injected into the training JSON map at runtime. Values will be inserted as-is, so use " to surround string values
type: array of object
object attributes: {
key
(required)
: {
display name: Parameter Name
type: string
}
value
: {
display name: Parameter Value
type: string
}
}
|
stopwordsBlobName
Stopwords Blob Store
|
Name of the stopwords blob resource. This is a .txt file with one stopword per line. By default the file is called stopwords/stopwords_en.txt however a custom file can also be used. Check documentation for more details on format and uploading to blob store.
type: string
default value: 'stopwords/stopwords_en.txt '
blobType: file:spark
reference: blob
|
sublinearTf
Sublinear TF
|
Whether to apply sublinear scaling to TF, i.e. replace tf with 1 + log(tf). It usually helps when characters are used.
type: boolean
default value: 'true '
|
textField
Training collection content field
required
|
Solr field name containing the text to be classified
type: string
minLength: 1
|
textLayersSizes
Hidden sizes before text embedding
|
Sizes of hidden layers before the embedding layer for text. Specify as a list of numbers for multiple layers or a single number for 1 layer. Leave blank if no hidden layers are required.
type: string
default value: '[256, 128] '
pattern: ^(\[(((\d)*,\s*)*(\d+)+)?\])?$
|
tokenPattern
Token filtering pattern
|
Regex pattern for filtering tokens.
type: string
default value: '(?u)\b\w\w+\b '
|
tol
Stopping tolerance
|
Tolerance for stopping criteria.
type: number
default value: '0.0001 '
|
topK
Number of Output classes
|
Number of most probable output classes to assign to each sample along with their scores.
type: integer
default value: '1 '
exclusiveMinimum: false
minimum: 1
|
trainingCollection
Training data path
required
|
Solr collection or cloud storage path where training data is present.
type: string
minLength: 1
|
trainingDataFilterQuery
Training Data Filter Query
|
Solr or SQL query to filter training data. Use solr query when solr collection is specified in Training Path. Use SQL query when cloud storage location is specified. The table name for SQL is `spark_input`.
type: string
|
trainingFormat
Training data format
required
|
The format of the training data - solr, parquet etc.
type: string
default value: 'solr '
minLength: 1
|
trainingSampleFraction
Training Data Sampling Fraction
|
Choose a fraction of the data for training.
type: number
default value: '1.0 '
exclusiveMaximum: false
maximum: 1.0
|
type
Spark Job Type
required
|
type: string
default value: 'argo-classification '
enum: {
argo-classification
}
|
unidecodeTexts
Unidecode Text
|
Select if you want the text to be unidecoded
type: boolean
default value: 'true '
|
useCharacters
Use Characters
|
Whether to use the characters or word analyzer. Use words if the text is long. Using characters on long text can significantly increase vectorization time and memory requirements.
type: boolean
default value: 'true '
|
useClassWeights
Use class weights
|
If true, a weight is applied to each class inversely proportional to its frequency.
type: boolean
default value: 'false '
|
useMaxNegSim
Only minimize max. negative similarity
|
If true, only the maximum similarity for negative classes will be minimized. If unchecked, all negative similarities will be used.
type: boolean
default value: 'true '
|
valSize
Validation set size
|
Size of the validation dataset. Provide a float (0, 1) if you want to sample as a fraction, or an integer >= 1 if you want to sample exact number of records.
type: number
default value: '0.1 '
|
workflowType
Method
required
|
Method to be used for classification.
type: string
default value: 'Logistic Regression '
enum: {
Logistic Regression
Starspace
}
|
writeOptions
Write Options
|
Options used when writing output to Solr or other sources
type: array of object
object attributes: {
key
(required)
: {
display name: Parameter Name
type: string
}
value
: {
display name: Parameter Value
type: string
}
}
|