Tuesday, 13 July 2021

Lucene: Boosting Queries

Sometimes we may want to boost queries on specific fields. For example, I want to rank more to the documents that are written by the author ‘Ram’.

 

Example

Query authorBoostQuery = new BoostQuery(authorQuery, 3f);
Query authorBoostQuery = new BoostQuery(authorQuery, 3f);

 

Now, you can combine multiple queries using BooleanQuery like below.

BooleanQuery boolQuery = BooleanQuery.Builder.class.newInstance()
.add(authorBoostQuery, BooleanClause.Occur.SHOULD).add(titleQuery, BooleanClause.Occur.SHOULD)
.build();

 

Find the below working application.

 

DocumentUtil.java

 

package com.sample.app.util;

import java.util.Arrays;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;

public class DocumentUtil {

	public static Document getDocument(String id, String title, String description, String blog, List<String> authors) {
		Document doc = new Document();
		doc.add(new StringField("id", id, Field.Store.YES));
		doc.add(new TextField("title", title, Field.Store.YES));
		doc.add(new TextField("description", description, Field.Store.NO));
		doc.add(new TextField("blog", blog, Field.Store.YES));

		for (String author : authors) {
			doc.add(new StringField("author", author, Field.Store.YES));
		}

		return doc;

	}

	public static List<Document> getDocuments() {
		Document doc1 = getDocument("1", "Java World",
				"The original independent resource for Java developers, architects, and managers.", " javaworld.com",
				Arrays.asList("Ram"));
		Document doc2 = getDocument("2", "Oracle Blogs | The Java Source",
				" Java powers more than 4.5 billion devices including 800 million computers and 1.5 billion cell phones. If you love Java, this is the blog you must follow.",
				"blogs.oracle.com/java", Arrays.asList("Ram"));
		Document doc3 = getDocument("3", "A Java geek Professionals and experts",
				"Nicolas Fränkel's blog. IT architect focusing on Java, Java EE, and their surrounding ecosystems. He is a trainer, book writer, speaker & blogger.",
				"blog.frankel.ch", Arrays.asList("John"));
		Document doc4 = getDocument("4", "Self Learning for Java Developers By Krishna (Java)", "Learn Java fundamentals and other java libraries",
				"self-learning-java-tutorial.blogspot.com", Arrays.asList("Krishna"));

		return Arrays.asList(doc1, doc2, doc3, doc4);

	}
}

 

App.java

package com.sample.app;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.util.QueryBuilder;

import com.sample.app.util.DocumentUtil;

public class App {

	private static void printQueryResults(Query query, Directory directory) throws IOException {
		try (IndexReader indexReader = DirectoryReader.open(directory)) {
			IndexSearcher indexSearcher = new IndexSearcher(indexReader);
			TopDocs docs = indexSearcher.search(query, 10);
			ScoreDoc[] hits = docs.scoreDocs;
			System.out.print("Total Hits: " + docs.totalHits);
			System.out.print("Results: ");
			for (int i = 0; i < hits.length; i++) {
				Document d = indexSearcher.doc(hits[i].doc);

				System.out.println("\nTitle: " + d.get("title"));
				System.out.println("Score : " + hits[i].score);

				IndexableField[] fields = d.getFields("author");
				for (IndexableField field : fields) {
					System.out.println("Author: " + field.stringValue());
				}

				System.out.println();
			}

			System.out.println("**************************************\n");
		}
	}

	public static void main(String args[]) throws IOException, InstantiationException, IllegalAccessException {

		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig indexWriterConfig1 = new IndexWriterConfig(analyzer);

		Directory directory = new MMapDirectory(new File("/Users/Shared/lucene").toPath(), NoLockFactory.INSTANCE);

		try (IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig1)) {

			List<Document> documents = DocumentUtil.getDocuments();

			indexWriter.addDocuments(documents);
			indexWriter.commit();

			QueryBuilder queryBuilder = new QueryBuilder(analyzer);
			Query authorQuery = new TermQuery(new Term("author", "Ram"));
			Query titleQuery = queryBuilder.createPhraseQuery("title", "Java");

			Query authorBoostQuery = new BoostQuery(authorQuery, 3f);
			Query titleBoostQuery = new BoostQuery(titleQuery, 123.5f);

			System.out.println("\nBoosting based on author");
			BooleanQuery boolQuery = BooleanQuery.Builder.class.newInstance()
					.add(authorBoostQuery, BooleanClause.Occur.SHOULD).add(titleQuery, BooleanClause.Occur.SHOULD)
					.build();

			printQueryResults(boolQuery, directory);

			System.out.println("\nBoosting based on title");
			boolQuery = BooleanQuery.Builder.class.newInstance().add(authorQuery, BooleanClause.Occur.SHOULD)
					.add(titleBoostQuery, BooleanClause.Occur.SHOULD).build();

			printQueryResults(boolQuery, directory);

		}

	}
}

 

Output

Boosting based on author
Total Hits: 4 hitsResults: 
Title: Java World
Score : 1.0093331
Author: Ram


Title: Oracle Blogs | The Java Source
Score : 0.9940433
Author: Ram


Title: Self Learning for Java Developers By Krishna (Java)
Score : 0.057394832
Author: Krishna


Title: A Java geek Professionals and experts
Score : 0.045246847
Author: John

**************************************


Boosting based on title
Total Hits: 4 hitsResults: 
Title: Java World
Score : 8.23543
Author: Ram


Title: Self Learning for Java Developers By Krishna (Java)
Score : 7.088262
Author: Krishna


Title: Oracle Blogs | The Java Source
Score : 6.347131
Author: Ram


Title: A Java geek Professionals and experts
Score : 5.5879855
Author: John

**************************************

 

 

 

  

Previous                                                    Next                                                    Home

No comments:

Post a Comment