public interface VocabCache<T extends SequenceElement> extends Serializable
| Modifier and Type | Method and Description |
|---|---|
void |
addToken(T element)
Adds a token
to the cache
|
void |
addWordToIndex(int index,
long elementId) |
void |
addWordToIndex(int index,
String word) |
boolean |
containsWord(String word)
Returns true if the cache contains the given word
|
int |
docAppearedIn(String word)
Count of documents a word appeared in
|
T |
elementAtIndex(int index)
Returns SequenceElement at the given index or null
|
boolean |
hasToken(String token)
Returns whether the cache
contains this token or not
|
void |
importVocabulary(VocabCache<T> vocabCache)
imports vocabulary
|
void |
incrementDocCount(String word,
long howMuch)
Increment the document count
|
void |
incrementTotalDocCount()
Increment the doc count
|
void |
incrementTotalDocCount(long by)
Increment the doc count
|
void |
incrementWordCount(String word)
Increment the count for the given word
|
void |
incrementWordCount(String word,
int increment)
Increment the count for the given word by
the amount increment
|
int |
indexOf(String word)
Returns the index of a given word
|
void |
loadVocab()
Load vocab
|
int |
numWords()
Returns the number of words in the cache
|
void |
putVocabWord(String word)
Deprecated.
|
void |
removeElement(String label)
Removes element with specified label from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
removeElement(T element)
Removes specified element from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
saveVocab()
Saves the vocab: this allow for reuse of word frequencies
|
void |
setCountForDoc(String word,
long count)
Set the count for the number of documents the word appears in
|
T |
tokenFor(long id) |
T |
tokenFor(String word)
Returns the token (again not necessarily in the vocab)
for this word
|
Collection<T> |
tokens()
All of the tokens in the cache, (not necessarily apart of the vocab)
|
long |
totalNumberOfDocs()
Returns the total of number of documents encountered in the corpus
|
long |
totalWordOccurrences()
The total number of word occurrences
|
void |
updateWordsOccurencies()
Updates counters
|
boolean |
vocabExists()
Vocab exists already
|
Collection<T> |
vocabWords()
Returns all of the vocab word nodes
|
String |
wordAtIndex(int index)
Returns the word contained at the given index or null
|
T |
wordFor(long id) |
T |
wordFor(String word) |
int |
wordFrequency(String word)
Returns the number of times the word has occurred
|
Collection<String> |
words()
Returns all of the words in the vocab
|
void loadVocab()
boolean vocabExists()
void saveVocab()
Collection<String> words()
void incrementWordCount(String word)
word - the word to increment the count forvoid incrementWordCount(String word, int increment)
word - the word to increment the count forincrement - the amount to increment byint wordFrequency(String word)
word - the word to retrieve the occurrence frequency forboolean containsWord(String word)
word - the word to check forString wordAtIndex(int index)
index - the index of the word to getT elementAtIndex(int index)
index - int indexOf(String word)
word - the index of a given wordCollection<T> vocabWords()
long totalWordOccurrences()
T wordFor(long id)
void addWordToIndex(int index,
String word)
index - word - void addWordToIndex(int index,
long elementId)
@Deprecated void putVocabWord(String word)
word - the word to add to the vocabint numWords()
int docAppearedIn(String word)
word - the number of documents the word appeared invoid incrementDocCount(String word, long howMuch)
word - the word to increment byhowMuch - void setCountForDoc(String word, long count)
word - the word to set the count forcount - the count of the wordlong totalNumberOfDocs()
void incrementTotalDocCount()
void incrementTotalDocCount(long by)
by - the number to increment byCollection<T> tokens()
void addToken(T element)
element - the word to addT tokenFor(String word)
word - the word to get the token forT tokenFor(long id)
boolean hasToken(String token)
token - the token to tesvoid importVocabulary(VocabCache<T> vocabCache)
vocabCache - void updateWordsOccurencies()
void removeElement(String label)
label - label of the element to be removedvoid removeElement(T element)
element - SequenceElement to be removedCopyright © 2017. All rights reserved.