Monday 5 July 2021

Lucene: Documents are heterogeneous

Lucene documents do not need to have fixed schema. For example, you can store an employee document with fields id, firstName and lastName. You can even store a book document with fields id, title, price in the same index. Even not all employee documents have same fields. For example, emp1 document can have 3 fields id, firstName and lastName where as emp2 document can have 10 fields.

 

Example

Document bookDoc = new Document();
bookDoc.add(new TextField("id", "1", Field.Store.YES));
bookDoc.add(new TextField("title", "Java for Beginners", Field.Store.YES));
bookDoc.add(new TextField("description", "All the Java specific topics covered here", Field.Store.NO));
bookDoc.add(new DoublePoint("price", 2.145));

Document emp1Doc = new Document();
emp1Doc.add(new TextField("id", "1", Field.Store.YES));
emp1Doc.add(new TextField("firstName", "Gopi", Field.Store.YES));
emp1Doc.add(new TextField("lastName", "Battu", Field.Store.NO));

Document emp2Doc = new Document();
emp2Doc.add(new TextField("id", "2", Field.Store.YES));
emp2Doc.add(new TextField("firstName", "Rama Krishna", Field.Store.YES));
emp2Doc.add(new TextField("mail", "ram@abcdef.com", Field.Store.YES));

 

You can add all the above 3 documents to same index.

 

App.java

package com.sample.app;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.util.QueryBuilder;

public class App {

	public static void main(String args[]) throws IOException {

		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig config = new IndexWriterConfig(analyzer);

		Directory directory = new MMapDirectory(new File("/Users/Shared/lucene").toPath(), NoLockFactory.INSTANCE);

		try (IndexWriter indexWriter = new IndexWriter(directory, config)) {

			Document bookDoc = new Document();
			bookDoc.add(new TextField("id", "1", Field.Store.YES));
			bookDoc.add(new TextField("title", "Java for Beginners", Field.Store.YES));
			bookDoc.add(new TextField("description", "All the Java specific topics covered here", Field.Store.NO));
			bookDoc.add(new DoublePoint("price", 2.145));

			Document emp1Doc = new Document();
			emp1Doc.add(new TextField("id", "1", Field.Store.YES));
			emp1Doc.add(new TextField("firstName", "Gopi", Field.Store.YES));
			emp1Doc.add(new TextField("lastName", "Battu", Field.Store.NO));

			Document emp2Doc = new Document();
			emp2Doc.add(new TextField("id", "2", Field.Store.YES));
			emp2Doc.add(new TextField("firstName", "Rama Krishna", Field.Store.YES));
			emp2Doc.add(new TextField("mail", "ram@abcdef.com", Field.Store.YES));

			indexWriter.addDocument(bookDoc);
			indexWriter.addDocument(emp1Doc);
			indexWriter.addDocument(emp2Doc);

		}

		QueryBuilder queryBuilder = new QueryBuilder(analyzer);
		Query query = queryBuilder.createMinShouldMatchQuery("firstName", "Krishna", 0.2f);
		int maxHitsPerPage = 10;

		try (IndexReader indexReader = DirectoryReader.open(directory)) {
			IndexSearcher indexSearcher = new IndexSearcher(indexReader);

			TopDocs docs = indexSearcher.search(query, maxHitsPerPage);
			ScoreDoc[] hits = docs.scoreDocs;
			System.out.println("Total Hits: " + docs.totalHits);
			System.out.println("Results: ");
			for (int i = 0; i < hits.length; i++) {
				Document document = indexSearcher.doc(hits[i].doc);
				System.out.println("First Name: " + document.get("firstName"));
				System.out.println("Last Name: " + document.get("lastName"));
			}
		}

	}

}

 

Output

Total Hits: 1 hits
Results: 
First Name: Rama Krishna
Last Name: null

 

 

 

 

 

Previous                                                    Next                                                    Home

No comments:

Post a Comment