Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 7 additions & 53 deletions src/org/opensolaris/opengrok/index/IndexDatabase.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.NoLockFactory;
Expand Down Expand Up @@ -82,8 +80,7 @@
public class IndexDatabase {

private Project project;
private FSDirectory indexDirectory;
private FSDirectory spellDirectory;
private FSDirectory indexDirectory;
private IndexWriter writer;
private TermsEnum uidIter;
private IgnoredNames ignoredNames;
Expand Down Expand Up @@ -241,31 +238,22 @@ public void run() {
private void initialize() throws IOException {
synchronized (this) {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
File spellDir = new File(env.getDataRootFile(), "spellIndex");
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
if (project != null) {
indexDir = new File(indexDir, project.getPath());
spellDir = new File(spellDir, project.getPath());
indexDir = new File(indexDir, project.getPath());
}

if (!indexDir.exists() && !indexDir.mkdirs()) {
// to avoid race conditions, just recheck..
if (!indexDir.exists()) {
throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
}
}

if (!spellDir.exists() && !spellDir.mkdirs()) {
if (!spellDir.exists()) {
throw new FileNotFoundException("Failed to create root directory [" + spellDir.getAbsolutePath() + "]");
}
}
}

if (!env.isUsingLuceneLocking()) {
lockfact = NoLockFactory.getNoLockFactory();
}
indexDirectory = FSDirectory.open(indexDir, lockfact);
spellDirectory = FSDirectory.open(spellDir, lockfact);
indexDirectory = FSDirectory.open(indexDir, lockfact);
ignoredNames = env.getIgnoredNames();
includedNames = env.getIncludedNames();
analyzerGuru = new AnalyzerGuru();
Expand Down Expand Up @@ -426,8 +414,7 @@ public void update() throws IOException, HistoryException {
if (!isInterrupted() && isDirty()) {
if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
optimize();
}
createSpellingSuggestions();
}
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File timestamp = new File(env.getDataRootFile(), "timestamp");
if (timestamp.exists()) {
Expand Down Expand Up @@ -518,40 +505,7 @@ public void optimize() {
}
}
}

/**
* Generate a spelling suggestion for the definitions stored in defs
*/
public void createSpellingSuggestions() {
IndexReader indexReader = null;
SpellChecker checker;

try {
log.info("Generating spelling suggestion index ... ");
indexReader = DirectoryReader.open(indexDirectory);
checker = new SpellChecker(spellDirectory);
//TODO below seems only to index "defs" , possible bug ?
Analyzer analyzer = AnalyzerGuru.getAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
checker.indexDictionary(new LuceneDictionary(indexReader, QueryBuilder.DEFS), iwc, false);
log.info("done");
} catch (IOException e) {
log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
log.log(Level.WARNING, "An error occured while closing reader", e);
}
}
if (spellDirectory != null) {
spellDirectory.close();
}
}
}


private boolean isDirty() {
synchronized (lock) {
return dirty;
Expand Down
4 changes: 2 additions & 2 deletions src/org/opensolaris/opengrok/search/Summarizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ private Token[] getTokens(String text) throws IOException {
//FIXME somehow integrate below cycle to getSummary to save the cloning and memory,
//also creating Tokens is suboptimal with 3.0.0 , this whole class could be replaced by highlighter
ArrayList<Token> result = new ArrayList<Token>();
TokenStream ts = analyzer.tokenStream("full", new StringReader(text));
TokenStream ts = analyzer.tokenStream("full", text);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
while(ts.incrementToken()) {
Expand All @@ -297,7 +297,7 @@ private Token[] getTokens(String text) throws IOException {


/**
* Get the terms from a query and adds them to hightlite
* Get the terms from a query and adds them to highlight
* a stream of tokens
*
* @param query
Expand Down
129 changes: 74 additions & 55 deletions src/org/opensolaris/opengrok/web/SearchHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -42,9 +41,12 @@
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.store.FSDirectory;
import org.opensolaris.opengrok.OpenGrokLogger;
import org.opensolaris.opengrok.analysis.CompatibleAnalyser;
Expand All @@ -64,6 +66,10 @@
*/
public class SearchHelper {

/**
* max number of words to suggest for spellcheck
*/
public int SPELLCHECK_SUGGEST_WORD_COUNT=5;
/**
* opengrok's data root: used to find the search index file
*/
Expand Down Expand Up @@ -128,7 +134,7 @@ public class SearchHelper {
* the searcher used to open/search the index. Automatically set via
* {@link #prepareExec(SortedSet)}.
*/
public IndexSearcher searcher;
public IndexSearcher searcher;
/**
* list of docs which result from the executing the query
*/
Expand All @@ -147,6 +153,10 @@ public class SearchHelper {
* {@link #prepareExec(SortedSet)}.
*/
protected Sort sort;
/**
* the spellchecker object
*/
protected DirectSpellChecker checker;
/**
* projects to use to setup indexer searchers. Usually setup via
* {@link #prepareExec(SortedSet)}.
Expand Down Expand Up @@ -211,7 +221,8 @@ public class SearchHelper {
public static Set<Map.Entry<String, String>> getFileTypeDescirptions() {
return fileTypeDescription.entrySet();
}


File indexDir;
/**
* Create the searcher to use wrt. to currently set parameters and the given
* projects. Does not produce any {@link #redirect} link. It also does
Expand All @@ -235,13 +246,13 @@ public SearchHelper prepareExec(SortedSet<String> projects) {
}
// the Query created by the QueryBuilder
try {
indexDir=new File(dataRoot, "index");
query = builder.build();
if (projects == null) {
errorMsg = "No project selected!";
return this;
}
this.projects = projects;
File indexDir = new File(dataRoot, "index");
this.projects = projects;
if (projects.isEmpty()) {
//no project setup
FSDirectory dir = FSDirectory.open(indexDir);
Expand Down Expand Up @@ -285,6 +296,7 @@ public SearchHelper prepareExec(SortedSet<String> projects) {
sort = Sort.RELEVANCE;
break;
}
checker=new DirectSpellChecker();
} catch (ParseException e) {
errorMsg = PARSE_ERROR_MSG + e.getMessage();
} catch (FileNotFoundException e) {
Expand Down Expand Up @@ -357,17 +369,20 @@ public SearchHelper executeQuery() {
}
private static final Pattern TABSPACE = Pattern.compile("[\t ]+");

private static void getSuggestion(String term, SpellChecker checker,
private void getSuggestion(Term term, IndexReader ir,
List<String> result) throws IOException {
if (term == null) {
return;
}
String[] toks = TABSPACE.split(term, 0);
String[] toks = TABSPACE.split(term.text(), 0);
for (int j = 0; j < toks.length; j++) {
if (toks[j].length() <= 3) {
continue;
}
result.addAll(Arrays.asList(checker.suggestSimilar(toks[j].toLowerCase(), 5)));
//TODO below seems to be case insensitive ... for refs/defs this is bad
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw - it worked like this with old code as well, so the problem must be in index somewhere ... (or the spellchecker?)

SuggestWord[] words=checker.suggestSimilar(
new Term(term.field(),toks[j]), SPELLCHECK_SUGGEST_WORD_COUNT, ir,
SuggestMode.SUGGEST_ALWAYS);
for (SuggestWord w: words) {
result.add(w.string);
}
}
}

Expand All @@ -379,74 +394,78 @@ private static void getSuggestion(String term, SpellChecker checker,
* <li>{@link #projects}</li> <li>{@link #dataRoot}</li>
* <li>{@link #builder}</li> </ul>
*
* @return a possible empty list of sugeestions.
* @return a possible empty list of suggestions.
*/
public List<Suggestion> getSuggestions() {
if (projects == null) {
return new ArrayList<Suggestion>(0);
return new ArrayList<>(0);
}
File[] spellIndex = null;
String name[];
if (projects.isEmpty()) {
spellIndex = new File[]{new File(dataRoot, "spellIndex")};
name=new String[]{"/"};
} else if (projects.size() == 1) {
spellIndex = new File[]{
new File(dataRoot, "spellIndex/" + projects.first())
};
name=new String[]{projects.first()};
} else {
spellIndex = new File[projects.size()];
int ii = 0;
File indexDir = new File(dataRoot, "spellIndex");
name = new String[projects.size()];
int ii = 0;
for (String proj : projects) {
spellIndex[ii++] = new File(indexDir, proj);
name[ii++] = proj;
}
}
List<Suggestion> res = new ArrayList<Suggestion>();
List<String> dummy = new ArrayList<String>();
for (int idx = 0; idx < spellIndex.length; idx++) {
if (!spellIndex[idx].exists()) {
continue;
}
FSDirectory spellDirectory = null;
SpellChecker checker = null;
Suggestion s = new Suggestion(spellIndex[idx].getName());
List<Suggestion> res = new ArrayList<>();
List<String> dummy = new ArrayList<>();
FSDirectory dir;
IndexReader ir=null;
Term t;
for (int idx = 0; idx < name.length; idx++) {
Suggestion s = new Suggestion(name[idx]);
try {
spellDirectory = FSDirectory.open(spellIndex[idx]);
checker = new SpellChecker(spellDirectory);
getSuggestion(builder.getFreetext(), checker, dummy);
dir = FSDirectory.open(new File(indexDir, name[idx]));
ir = DirectoryReader.open(dir);
if (builder.getFreetext()!=null &&
!builder.getFreetext().isEmpty()) {
t=new Term(QueryBuilder.FULL,builder.getFreetext());
getSuggestion(t, ir, dummy);
s.freetext = dummy.toArray(new String[dummy.size()]);
dummy.clear();
getSuggestion(builder.getRefs(), checker, dummy);
}
if (builder.getRefs()!=null && !builder.getRefs().isEmpty()) {
t=new Term(QueryBuilder.REFS,builder.getRefs());
getSuggestion(t, ir, dummy);
s.refs = dummy.toArray(new String[dummy.size()]);
dummy.clear();
// TODO it seems the only true spellchecker is for
// below field, see IndexDatabase
// createspellingsuggestions ...
getSuggestion(builder.getDefs(), checker, dummy);
}
if (builder.getDefs()!=null && !builder.getDefs().isEmpty()) {
t=new Term(QueryBuilder.DEFS,builder.getDefs());
getSuggestion(t, ir, dummy);
s.defs = dummy.toArray(new String[dummy.size()]);
dummy.clear();
if (s.freetext.length > 0 || s.defs.length > 0 || s.refs.length > 0) {
}
//TODO suggest also for path and history?
if ((s.freetext!=null && s.freetext.length > 0) ||
(s.defs!=null && s.defs.length > 0) ||
(s.refs!=null && s.refs.length > 0) ) {
res.add(s);
}
} catch (IOException e) {
log.log(Level.WARNING, "Got excption while getting spelling suggestions: ", e);
log.log(Level.WARNING, "Got exception while getting "
+ "spelling suggestions: ", e);
} finally {
if (spellDirectory != null) {
spellDirectory.close();
}
if (checker != null) {
try {
checker.close();
} catch (Exception x) {
log.log(Level.WARNING, "Got excption while closing spelling suggestions: ", x);
}
}
}
}
if (ir != null) {
try {
ir.close();
} catch (IOException ex) {
log.log(Level.WARNING, "Got exception while "
+ "getting spelling suggestions: ", ex);
}
}
}
}
return res;
}

/**
* Prepare the fields to support printing a fullblown summary. Does nothing
* Prepare the fields to support printing a full blown summary. Does nothing
* if {@link #redirect} or {@link #errorMsg} have a none-{@code null} value.
*
* <p> Parameters which should be populated/set at this time: <ul>
Expand Down
18 changes: 12 additions & 6 deletions web/search.jsp
Original file line number Diff line number Diff line change
Expand Up @@ -133,18 +133,24 @@ include file="menu.jspf"
List<Suggestion> hints = searchHelper.getSuggestions();
for (Suggestion hint : hints) {
%><p><font color="#cc0000">Did you mean (for <%= hint.name %>)</font>:<%
for (String word : hint.freetext) {
if (hint.freetext!=null) {
for (String word : hint.freetext) {
%> <a href="search?q=<%= Util.URIEncode(word) %>"><%=
Util.htmlize(word) %></a> &nbsp; <%
}
for (String word : hint.refs) {
}
}
if (hint.refs!=null) {
for (String word : hint.refs) {
%> <a href="search?refs=<%= Util.URIEncode(word) %>"><%=
Util.htmlize(word) %></a> &nbsp; <%
}
for (String word : hint.defs) {
}
}
if (hint.defs!=null) {
for (String word : hint.defs) {
%> <a href="search?defs=<%= Util.URIEncode(word) %>"><%=
Util.htmlize(word) %></a> &nbsp; <%
}
}
%></p><%
}
%>
Expand Down Expand Up @@ -233,4 +239,4 @@ include file="menu.jspf"

include file="foot.jspf"

%>
%>