Monday 12 July 2021

Lucene: Multivalued Fields

There are situations where one field has multiple values associated with it.

 

For example, a book has multiple authors, employee may work in one or more projects etc.,

 

How to add multiple values to a filed?

Just add the values to same field like below.

for (String author : authors) {
	doc.add(new StringField("author", author, Field.Store.YES));
}

How to retrieve all the values associated with Mutivalued field?

Use ‘getFields’ method of Document.

IndexableField[] fields = doc.getFields("author");
for (IndexableField field : fields) {
	System.out.println("\t" + field.stringValue());
}


DocumentUtil.java

package com.sample.app.util;

import java.util.Arrays;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;

public class DocumentUtil {

	public static Document getDocument(String id, String title, String description, String blog, List<String> authors) {
		Document doc = new Document();
		doc.add(new StringField("id", id, Field.Store.YES));
		doc.add(new TextField("title", title, Field.Store.YES));
		doc.add(new TextField("description", description, Field.Store.NO));
		doc.add(new TextField("blog", blog, Field.Store.YES));

		for (String author : authors) {
			doc.add(new StringField("author", author, Field.Store.YES));
		}
		return doc;

	}

	public static List<Document> getDocuments() {
		Document doc1 = getDocument("1", "JavaWorld",
				"The original independent resource for Java developers, architects, and managers.", " javaworld.com",
				Arrays.asList("John", "Robert"));
		Document doc2 = getDocument("2", "Oracle Blogs | The Java Source",
				" Java powers more than 4.5 billion devices including 800 million computers and 1.5 billion cell phones. If you love Java, this is the blog you must follow.",
				"blogs.oracle.com/java", Arrays.asList("James", "Stewart"));
		Document doc3 = getDocument("3", "A Java geek",
				"Nicolas Fränkel's blog. IT architect focusing on Java, Java EE, and their surrounding ecosystems. He is a trainer, book writer, speaker & blogger.",
				"blog.frankel.ch", Arrays.asList("Ram", "Krishna"));
		Document doc4 = getDocument("4", "Self Learning Java", "Learn Java fundamentals and other java libraries",
				"self-learning-java-tutorial.blogspot.com", Arrays.asList("Ram", "Gopi", "Siva"));

		return Arrays.asList(doc1, doc2, doc3, doc4);

	}
}


App.java

package com.sample.app;

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.util.Bits;

import com.sample.app.util.DocumentUtil;

public class App {

	private static void printAllDocuments(Directory directory) throws IOException {

		try (IndexReader indexReader = DirectoryReader.open(directory)) {
			System.out.println("All Documents in Lucene Index");
			Bits liveDocs = MultiBits.getLiveDocs(indexReader);
			for (int i = 0; i < indexReader.maxDoc(); i++) {
				if (liveDocs != null && !liveDocs.get(i))
					continue;

				Document doc = indexReader.document(i);
				System.out.println(doc.get("id") + ", " + doc.get("title"));

				IndexableField[] fields = doc.getFields("author");
				for (IndexableField field : fields) {
					System.out.println("\t" + field.stringValue());
				}

			}

			System.out.println();
		}

	}

	public static void main(String args[]) throws IOException {

		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig indexWriterConfig1 = new IndexWriterConfig(analyzer);

		Directory directory = new MMapDirectory(new File("/Users/Shared/lucene").toPath(), NoLockFactory.INSTANCE);

		try (IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig1)) {

			List<Document> documents = DocumentUtil.getDocuments();

			indexWriter.addDocuments(documents);
			indexWriter.commit();

			printAllDocuments(directory);

		}

	}
}


Output

All Documents in Lucene Index
1, JavaWorld
	John
	Robert
2, Oracle Blogs | The Java Source
	James
	Stewart
3, A Java geek
	Ram
	Krishna
4, Self Learning Java
	Ram
	Gopi
	Siva






 

 

Previous                                                    Next                                                    Home

No comments:

Post a Comment