public class VocabularyHolder extends Object implements Serializable
| Modifier and Type | Class and Description |
|---|---|
static class |
VocabularyHolder.Builder |
| Modifier | Constructor and Description |
|---|---|
protected |
VocabularyHolder()
Default constructor
|
protected |
VocabularyHolder(VocabCache<? extends SequenceElement> cache,
boolean markAsSpecial)
Builds VocabularyHolder from VocabCache.
|
| Modifier and Type | Method and Description |
|---|---|
protected void |
activateScavenger()
This method removes low-frequency words based on their frequency change between activations.
|
void |
addWord(String word)
Adds new word to vocabulary
|
void |
addWord(VocabularyWord word) |
static List<Byte> |
arrayToList(byte[] array,
int codeLen)
This method is used only for VocabCache compatibility purposes
|
static List<Integer> |
arrayToList(int[] array,
int codeLen)
This method is used only for VocabCache compatibility purposes
|
static HuffmanNode |
buildNode(List<Byte> codes,
List<Integer> points,
int codeLen,
int index) |
void |
consumeVocabulary(VocabularyHolder holder) |
boolean |
containsWord(String word)
Checks vocabulary for the word existance
|
Collection<VocabularyWord> |
getVocabulary() |
VocabularyWord |
getVocabularyWordByIdx(Integer id) |
VocabularyWord |
getVocabularyWordByString(String word) |
void |
incrementWordCounter(String word)
Increments by one number of occurencies of the word in corpus
|
int |
indexOf(String word)
This method returns index of word in sorted list.
|
static byte[] |
listToArray(List<Byte> code) |
static int[] |
listToArray(List<Integer> points,
int codeLen) |
int |
numWords() |
void |
resetWordCounters()
This methods reset counters for all words in vocabulary
|
protected void |
setScavengerActivationThreshold(int threshold)
This method is needed ONLY for unit tests and should NOT be available in public scope.
|
long |
totalWordsBeyondLimit() |
void |
transferBackToVocabCache() |
void |
transferBackToVocabCache(VocabCache cache) |
void |
transferBackToVocabCache(VocabCache cache,
boolean emptyHolder)
This method is required for compatibility purposes.
|
void |
truncateVocabulary()
The same as truncateVocabulary(this.minWordFrequency)
|
void |
truncateVocabulary(int threshold)
All words with frequency below threshold wii be removed
|
List<VocabularyWord> |
updateHuffmanCodes()
build binary tree ordered by counter.
|
List<VocabularyWord> |
words()
Returns sorted list of words in vocabulary.
|
protected VocabularyHolder()
protected VocabularyHolder(@NonNull
VocabCache<? extends SequenceElement> cache,
boolean markAsSpecial)
cache - public static HuffmanNode buildNode(List<Byte> codes, List<Integer> points, int codeLen, int index)
public void transferBackToVocabCache()
public void transferBackToVocabCache(VocabCache cache)
public void transferBackToVocabCache(VocabCache cache, boolean emptyHolder)
cache - protected void setScavengerActivationThreshold(int threshold)
threshold - public static List<Byte> arrayToList(byte[] array, int codeLen)
array - codeLen - public static List<Integer> arrayToList(int[] array, int codeLen)
array - codeLen - public Collection<VocabularyWord> getVocabulary()
public VocabularyWord getVocabularyWordByString(String word)
public VocabularyWord getVocabularyWordByIdx(Integer id)
public boolean containsWord(String word)
word - to be looked forpublic void incrementWordCounter(String word)
word - whose counter is to be incrementedpublic void addWord(String word)
word - to be addedpublic void addWord(VocabularyWord word)
public void consumeVocabulary(VocabularyHolder holder)
protected void activateScavenger()
public void resetWordCounters()
public int numWords()
public void truncateVocabulary()
public void truncateVocabulary(int threshold)
threshold - exclusive threshold for removalpublic List<VocabularyWord> updateHuffmanCodes()
public int indexOf(String word)
word - public List<VocabularyWord> words()
public long totalWordsBeyondLimit()
Copyright © 2017. All rights reserved.