edu.northwestern.at.wordhoard.swing.calculator.modelutils
Class WordUtils

java.lang.Object
  extended by edu.northwestern.at.wordhoard.swing.calculator.modelutils.WordUtils

public class WordUtils
extends java.lang.Object

Word occurrence utilities.


Field Summary
protected static java.util.TreeMap punctuationMap
          Punctuation map.
protected static java.util.HashMap wordClassToMajorWordClassMap
          Word class to major word class map.
 
Constructor Summary
protected WordUtils()
          Don't allow instantiation but do allow overrides.
 
Method Summary
static java.lang.String createCompoundWordClassQueryString(java.lang.String compoundWordClass)
          Create query string portion for looking up a compound word class.
protected static void createPunctuationMap()
          Create punctuation map.
static java.lang.String[] extractLemmata(java.lang.String compoundLemma)
          Extract lemmata from a compound lemma string.
static java.lang.String[] extractSpellingAndCompoundWordClass(java.lang.String spellingAndCompoundWordClass)
          Parse spelling and compound word class string.
static java.lang.String[] extractWordClassTags(java.lang.String spellingAndCompoundWordClass)
          Extract word class tags from a spelling/compound word class.
static java.lang.String getCompoundLemma(Word word)
          Gets the compound lemma.
static java.lang.String getCompoundWordClass(Word word)
          Gets the compound word class.
static java.lang.String getDisplayableText(Word[] wordOccurrences, boolean displayLemmaForms, java.lang.String eolChars)
          Get displayable text for an array of adjacent WordOccurrences.
static java.lang.String getDisplayableText(Word[] words, boolean displayLemmaForms, java.lang.String eolChars, java.lang.String[] highlightWords, int[] highlightBracket, boolean highlightsAreLemmata, WordSet wordSet)
          Get displayable html text for an array of adjacent Words.
static java.lang.String getDisplayableText(Word[] wordOccurrences, java.lang.String eolChars)
          Get displayable text for an array of adjacent WordOccurrences.
static java.lang.String getDisplayableWordText(Spelling word, int wordForm)
          Get displayable word text.
static java.lang.String getIsVerse(Word word)
          Gets the "is verse" flag.
static java.lang.String[] getIsVerseValues()
          Get list of distinct isVerse values.
static java.util.List getLeftSpan(Word word, int leftSpan)
          Get span of words to left of a specified word.
static Word[] getLemmaOccurrences(Spelling lemma, WorkPart[] workParts)
          Perform a lemma query.
static java.lang.String[] getLemmaTags(Word word)
          Gets the lemma tags for a word.
static java.lang.String getMajorWordClassForWordClass(java.lang.String wordClass)
          Get major word class for a word class.
static java.lang.String getMetricalShape(Word word)
          Gets the metrical shape.
static java.lang.String[] getMetricalShapeValues()
          Get list of distinct metrical shape values.
static java.lang.String getPrintablePunctuation(java.lang.String punctuation)
          Convert punctuation string into printable string.
static java.util.List getRightSpan(Word word, int rightSpan)
          Get span of words to right of a specified word.
static Word[] getSpan(Word word, int leftSpan, int rightSpan)
          Get surrounding words of a specified word.
static Word[] getSpanFromCache(Word word, int leftSpan, int rightSpan)
          Get surrounding words of a specified word.
static java.lang.String getSpeakerGender(Word word)
          Gets the speaker gender.
static java.lang.String[] getSpeakerGenders()
          Get list of speaker gender values.
static java.lang.String[] getSpeakerMortalities()
          Get list of speaker mortality values.
static java.lang.String getSpeakerMortality(Word word)
          Gets the speaker mortality.
static java.lang.String getSpellingAndCompoundWordClass(Word word)
          Gets the lower case word with the trailing compound word class.
static Spelling getSpellingForString(java.lang.String wordText)
          Get Spelling from string.
static Word[] getSpellingOccurrences(Spelling spelling, WorkPart[] workParts)
          Perform a spelling query.
static Spelling[] getSpellingsByInitialString(java.lang.String initialString)
          Finds spellings by matching an initial string of characters.
static Word[] getWordOccurrences(Corpus corpus, int wordForm, Spelling word)
          Get word occurrences for a word in a specified corpus.
static Word[] getWordOccurrences(WorkPart[] workParts, int wordForm, Spelling word)
          Get word occurrences for a word in specified work parts.
static Word[] getWordOccurrences(WorkPart workPart, int wordForm, Spelling word)
          Get word occurrences for a word in a specified work part.
static Word[] getWordOccurrences(WorkSet workSet, int wordForm, Spelling word)
          Get word occurrences for a word in a specified work set.
static int[] getWordPartCounts(Word[] words)
          Get word part counts for a batch of words.
static WordPartData[] getWordPartData(Word[] words)
          Get word parts for a batch of words.
static Word[] getWordsByTags(java.util.Collection tags)
          Gets word occurrences by tag.
static boolean isVerseExists(java.lang.String isVerseText)
          See if specified "isVerse" value exists.
protected static void makeWordClassToMajorWordClassMap()
          Construct word class to major word class map.
static boolean metricalShapeExists(java.lang.String metricalShape)
          See if specified metrical shape value exists.
static Word[] performWordQuery(java.lang.String queryString, java.lang.String[] paramNames, java.lang.Object[] paramValues)
          Perform word query.
static boolean speakerGenderExists(java.lang.String speakerGenderText)
          See if specified speaker gender exists.
static boolean speakerMortalityExists(java.lang.String speakerMortalityText)
          See if specified speaker mortality exists.
static boolean spellingExists(java.lang.String spellingText)
          See if specified spelling exists.
static java.lang.String stripSpelling(java.lang.String s)
          Removes spelling from a tagged word.
static java.lang.String stripWordClass(java.lang.String s)
          Removes word class tagging from a word or phrase.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

punctuationMap

protected static java.util.TreeMap punctuationMap
Punctuation map.


wordClassToMajorWordClassMap

protected static java.util.HashMap wordClassToMajorWordClassMap
Word class to major word class map.

Constructor Detail

WordUtils

protected WordUtils()
Don't allow instantiation but do allow overrides.

Method Detail

performWordQuery

public static Word[] performWordQuery(java.lang.String queryString,
                                      java.lang.String[] paramNames,
                                      java.lang.Object[] paramValues)
Perform word query.

Parameters:
queryString - The query string.
paramNames - Parameter names used in the query.
paramValues - Parameter values for each paramName.
Returns:
Array of Word entries.

getWordsByTags

public static Word[] getWordsByTags(java.util.Collection tags)
Gets word occurrences by tag.

Parameters:
tags - Collection of word tags.
Returns:
The word occurrences with the specified tags, or null if none found.
Throws:
PersistenceException

getSpan

public static Word[] getSpan(Word word,
                             int leftSpan,
                             int rightSpan)
Get surrounding words of a specified word.

Parameters:
word - Word for which to get span.
leftSpan - # of words to left of specified word to retrieve.
rightSpan - # of words to right of specified word to retrieve.
Returns:
Span of words around specified word.

getSpanFromCache

public static Word[] getSpanFromCache(Word word,
                                      int leftSpan,
                                      int rightSpan)
Get surrounding words of a specified word.

Parameters:
word - Word for which to get span, i.e., the anchor word.
leftSpan - # of words to left of specified word to retrieve.
rightSpan - # of words to right of specified word to retrieve.
Returns:
Span of words around specified word.

This operates like getSpan above, but uses object model traversal instead of a database lookup. The assumption is that the relevant word objects have already been loaded into the cache.


getLeftSpan

public static java.util.List getLeftSpan(Word word,
                                         int leftSpan)
Get span of words to left of a specified word.

Parameters:
word - Word for which to get left span, i.e., the anchor word.
leftSpan - # of words to left of specified word to retrieve.
Returns:
List of words to left of specified word.

getRightSpan

public static java.util.List getRightSpan(Word word,
                                          int rightSpan)
Get span of words to right of a specified word.

Parameters:
word - Word for which to get right span, i.e., the anchor word.
rightSpan - # of words to right of specified word to retrieve.
Returns:
List of words to right of specified word.

extractSpellingAndCompoundWordClass

public static java.lang.String[] extractSpellingAndCompoundWordClass(java.lang.String spellingAndCompoundWordClass)
Parse spelling and compound word class string.

Parameters:
spellingAndCompoundWordClass - The spelling and compound word class to look up. Must be in the form "spelling (wordclass)" .
Returns:
A two element String array. The first element is the spelling and the second is the compound word class string.

createCompoundWordClassQueryString

public static java.lang.String createCompoundWordClassQueryString(java.lang.String compoundWordClass)
Create query string portion for looking up a compound word class.

Parameters:
compoundWordClass - The compound word class.
Returns:
A query string portion for looking up a matching set of word classes.

getSpellingOccurrences

public static Word[] getSpellingOccurrences(Spelling spelling,
                                            WorkPart[] workParts)
Perform a spelling query.

Parameters:
spelling - Spelling to look up.
workParts - The work/work parts to search.

getLemmaOccurrences

public static Word[] getLemmaOccurrences(Spelling lemma,
                                         WorkPart[] workParts)
Perform a lemma query.

Parameters:
lemma - Lemma to look up.
workParts - The work/work parts to search.

getWordOccurrences

public static Word[] getWordOccurrences(WorkPart[] workParts,
                                        int wordForm,
                                        Spelling word)
Get word occurrences for a word in specified work parts.

Parameters:
workParts - The work parts.
wordForm - The word form.
word - The word to look up.
Returns:
Array of Word entries for word in work part.

getWordOccurrences

public static Word[] getWordOccurrences(WorkPart workPart,
                                        int wordForm,
                                        Spelling word)
Get word occurrences for a word in a specified work part.

Parameters:
workPart - The work part.
wordForm - The word form.
word - The word to look up.
Returns:
Array of Word entries for word in work part.

getWordOccurrences

public static Word[] getWordOccurrences(Corpus corpus,
                                        int wordForm,
                                        Spelling word)
Get word occurrences for a word in a specified corpus.

Parameters:
corpus - The corpus.
wordForm - The word form.
word - The word to look up.
Returns:
Array of Word entries for word in corpus.

getWordOccurrences

public static Word[] getWordOccurrences(WorkSet workSet,
                                        int wordForm,
                                        Spelling word)
Get word occurrences for a word in a specified work set.

Parameters:
workSet - The work set.
wordForm - The word form.
word - The word to look up.
Returns:
Array of Word entries for word in work set.

getDisplayableText

public static java.lang.String getDisplayableText(Word[] words,
                                                  boolean displayLemmaForms,
                                                  java.lang.String eolChars,
                                                  java.lang.String[] highlightWords,
                                                  int[] highlightBracket,
                                                  boolean highlightsAreLemmata,
                                                  WordSet wordSet)
Get displayable html text for an array of adjacent Words.

Parameters:
words - Array of Words to display.
displayLemmaForms - Display lemma forms instead of spellings.
eolChars - String for end of line.
highlightWords - Highlight words in this list.
highlightBracket - Interval in which to highlight words.
highlightsAreLemmata - True if highlight words are lemma forms.
wordSet - Word set containing collection of words which are actually part of the context. Context words not in the word set will display with strike-through marking. May be null.
Returns:
Word occurrence text as displayable xhtml.

getDisplayableText

public static java.lang.String getDisplayableText(Word[] wordOccurrences,
                                                  java.lang.String eolChars)
Get displayable text for an array of adjacent WordOccurrences.

Parameters:
wordOccurrences - Array of word occurrences.
eolChars - String for end of line.
Returns:
Word occurrence text as displayable text.

getDisplayableText

public static java.lang.String getDisplayableText(Word[] wordOccurrences,
                                                  boolean displayLemmaForms,
                                                  java.lang.String eolChars)
Get displayable text for an array of adjacent WordOccurrences.

Parameters:
wordOccurrences - Array of word occurrences.
displayLemmaForms - True to display lemma form of text.
eolChars - String for end of line.
Returns:
Word occurrence text as displayable text.

getPrintablePunctuation

public static java.lang.String getPrintablePunctuation(java.lang.String punctuation)
Convert punctuation string into printable string.

Parameters:
punctuation - The original punctuation string.
Returns:
Printable string.

speakerGenderExists

public static boolean speakerGenderExists(java.lang.String speakerGenderText)
See if specified speaker gender exists.

Parameters:
speakerGenderText - The speaker gender text.
Returns:
true if specified speaker gender exists, false otherwise.

speakerMortalityExists

public static boolean speakerMortalityExists(java.lang.String speakerMortalityText)
See if specified speaker mortality exists.

Parameters:
speakerMortalityText - The speaker mortality text.
Returns:
true if specified speaker mortality exists, false otherwise.

getSpeakerGenders

public static java.lang.String[] getSpeakerGenders()
Get list of speaker gender values.

Returns:
String array of speaker gender values.

getSpeakerMortalities

public static java.lang.String[] getSpeakerMortalities()
Get list of speaker mortality values.

Returns:
String array of speaker mortality values.

isVerseExists

public static boolean isVerseExists(java.lang.String isVerseText)
See if specified "isVerse" value exists.

Parameters:
isVerseText - The "isVerse" text.
Returns:
true if specified "isVerse" exists, false otherwise.

getIsVerseValues

public static java.lang.String[] getIsVerseValues()
Get list of distinct isVerse values.

Returns:
String array of distinct "isVerse" values.

metricalShapeExists

public static boolean metricalShapeExists(java.lang.String metricalShape)
See if specified metrical shape value exists.

Parameters:
metricalShape - The metrical shape to check.
Returns:
true if specified metrical shape exists, false otherwise.

getMetricalShapeValues

public static java.lang.String[] getMetricalShapeValues()
Get list of distinct metrical shape values.

Returns:
String array of distinct metrical shape strings.

extractWordClassTags

public static java.lang.String[] extractWordClassTags(java.lang.String spellingAndCompoundWordClass)
Extract word class tags from a spelling/compound word class.

Parameters:
spellingAndCompoundWordClass - The combined spelling and word class(es).
Returns:
String array of word class tags.

extractLemmata

public static java.lang.String[] extractLemmata(java.lang.String compoundLemma)
Extract lemmata from a compound lemma string.

Parameters:
compoundLemma - The compound lemma string.
Returns:
String array of individual lemmata.

spellingExists

public static boolean spellingExists(java.lang.String spellingText)
See if specified spelling exists.

Parameters:
spellingText - The spelling text.
Returns:
true if spelling with spelling text exists, false otherwise.

getSpellingsByInitialString

public static Spelling[] getSpellingsByInitialString(java.lang.String initialString)
Finds spellings by matching an initial string of characters.

Parameters:
initialString - The initial spelling text string.
Returns:
An array of matching spelling objects whose tags begin with the specified text. Null if none.

getSpellingForString

public static Spelling getSpellingForString(java.lang.String wordText)
Get Spelling from string.

Parameters:
wordText - The word text.
Returns:
A Spelling object for the word text.

stripWordClass

public static java.lang.String stripWordClass(java.lang.String s)
Removes word class tagging from a word or phrase.

Parameters:
s - The string from which to remove any word class tags.
Returns:
The string with the word class tags removed.

Example:

String wc = stripWordClass( "think (v)" );

returns think in wc.


stripSpelling

public static java.lang.String stripSpelling(java.lang.String s)
Removes spelling from a tagged word.

Parameters:
s - The string from which to remove the spelling. Form is "spelling (wordclass)".
Returns:
The string with the spelling removed.

Example:

String wc = stripSpelling( "think (v)" );

returns v in wc.


getCompoundWordClass

public static java.lang.String getCompoundWordClass(Word word)
Gets the compound word class.

Parameters:
word - The word for which to return the compound word class.
Returns:
The compound word class. Note: each compound word class tag may have a trailing homonym index.

getSpellingAndCompoundWordClass

public static java.lang.String getSpellingAndCompoundWordClass(Word word)
Gets the lower case word with the trailing compound word class.

Parameters:
word - The word for which to return the spelling and compound word class.
Returns:
The insensitive word spelling with the compound word class in parens appended.

getCompoundLemma

public static java.lang.String getCompoundLemma(Word word)
Gets the compound lemma.

Parameters:
word - The word for which to return the compound lemma.
Returns:
The compound lemma.

getLemmaTags

public static java.lang.String[] getLemmaTags(Word word)
Gets the lemma tags for a word.

Parameters:
word - The word for which to return the lemma tags.
Returns:
String array of lemma tags.

getSpeakerGender

public static java.lang.String getSpeakerGender(Word word)
Gets the speaker gender.

Parameters:
word - The word for which to return the speaker gender.
Returns:
The speaker gender (M=male, F=female, U=mixed/unknown).

If a word has mixed gender speakers, "U" is returned.


getSpeakerMortality

public static java.lang.String getSpeakerMortality(Word word)
Gets the speaker mortality.

Parameters:
word - The word for which to return the speaker mortality.
Returns:
The speaker mortality (M=mortal, I=immortal, U=unknown).

If a word has mixed speaker mortalities, "U" is returned.


getIsVerse

public static java.lang.String getIsVerse(Word word)
Gets the "is verse" flag.

Parameters:
word - The word for which to return the is verse flag.
Returns:
'y' if word is verse, 'n' if prose, ' ' if not specified.

getMetricalShape

public static java.lang.String getMetricalShape(Word word)
Gets the metrical shape.

Parameters:
word - The word for which to return the metrical shape.
Returns:
Metrical shape, or "not specified" if null.

getDisplayableWordText

public static java.lang.String getDisplayableWordText(Spelling word,
                                                      int wordForm)
Get displayable word text.

Parameters:
word - The word text.
wordForm - The word form type.
Returns:
The displayable word text.

makeWordClassToMajorWordClassMap

protected static void makeWordClassToMajorWordClassMap()
Construct word class to major word class map.


getMajorWordClassForWordClass

public static java.lang.String getMajorWordClassForWordClass(java.lang.String wordClass)
Get major word class for a word class.

Parameters:
wordClass - The word class.
Returns:
The major word class for "wordClass", or empty string if none.

getWordPartCounts

public static int[] getWordPartCounts(Word[] words)
Get word part counts for a batch of words.

Parameters:
words - Array of Word.
Returns:
Array of int containing word part counts for each word in "words."

The word part counts are returned in the same order as the entries in the words array.


getWordPartData

public static WordPartData[] getWordPartData(Word[] words)
Get word parts for a batch of words.

Parameters:
words - Array of Word.
Returns:
Array of WordPartData containing word part data for each word in "words."

The word part counts are returned in the same order as the entries in the words array.


createPunctuationMap

protected static void createPunctuationMap()
Create punctuation map.