Method | Description | |
---|---|---|
__construct ( Phpml\Tokenization\Tokenizer $tokenizer, |
||
fit ( array $samples ) | ||
getVocabulary ( ) : array | ||
transform ( array &$samples ) |
Method | Description | |
---|---|---|
addTokenToVocabulary ( string $token ) | ||
buildVocabulary ( array &$samples ) | ||
checkDocumentFrequency ( array &$samples ) : array | ||
getBeyondMinimumIndexes ( integer $samplesCount ) : array | ||
getTokenIndex ( string $token ) : integer | boolean | ||
isStopWord ( string $token ) : boolean | ||
resetBeyondMinimum ( array &$sample, array $beyondMinimum ) | ||
transformSample ( string &$sample ) | ||
updateFrequency ( string $token ) |
public __construct ( Phpml\Tokenization\Tokenizer $tokenizer, |
||
$tokenizer | Phpml\Tokenization\Tokenizer | |
$stopWords | ||
$minDF | float |