Saturday, 19 June 2021

Lucene: Print all the documents

Below snippet prints all the live documents from Lucene index.

private static void printAllDocuments(IndexReader indexReader) throws IOException {
	System.out.println("All Documents in Lucene Index");
	Bits liveDocs = MultiBits.getLiveDocs(indexReader);
	for (int i = 0; i < indexReader.maxDoc(); i++) {
		if (liveDocs != null && !liveDocs.get(i))
			continue;

		Document doc = indexReader.document(i);
		System.out.println(doc.get("id") + ", " + doc.get("title"));
	}

	System.out.println();

}

 

'indexReader.maxDoc()' method return one greater than the largest possible document number.

 

'MultiBits.getLiveDocs' method is used to check whether document is live or marked for deletion.

 

Find the below working application.

 

App.java

package com.sample.app;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.util.Bits;

public class App {

	private static void printAllDocuments(IndexReader indexReader) throws IOException {
		System.out.println("All Documents in Lucene Index");
		Bits liveDocs = MultiBits.getLiveDocs(indexReader);
		for (int i = 0; i < indexReader.maxDoc(); i++) {
			if (liveDocs != null && !liveDocs.get(i))
				continue;

			Document doc = indexReader.document(i);
			System.out.println(doc.get("id") + ", " + doc.get("title"));
		}

		System.out.println();

	}

	public static void main(String args[]) throws IOException {

		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig indexWriterConfig1 = new IndexWriterConfig(analyzer);
		IndexWriterConfig indexWriterConfig2 = new IndexWriterConfig(analyzer);

		Directory directory = new MMapDirectory(new File("/Users/Shared/lucene").toPath(), NoLockFactory.INSTANCE);

		Document doc1 = new Document();
		doc1.add(new TextField("id", "1", Field.Store.YES));
		doc1.add(new TextField("title", "Lucene in Action", Field.Store.YES));
		doc1.add(new TextField("description", "Lucene is a platform where we can index our data to make it searchable.",
				Field.Store.YES));

		Document doc2 = new Document();
		doc2.add(new TextField("id", "2", Field.Store.YES));
		doc2.add(new TextField("title", "Java in Action", Field.Store.YES));
		doc2.add(new TextField("description",
				"Java is a platform and programming language to build Enterprise Applications", Field.Store.YES));

		try (IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig1)) {
			indexWriter.addDocument(doc1);
			indexWriter.addDocument(doc2);
		}

		try (IndexReader indexReader = DirectoryReader.open(directory)) {
			printAllDocuments(indexReader);
		}

		System.out.println("\nDelete the document with id 1\n");
		try (IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig2)) {
			indexWriter.deleteDocuments(new Term("id", "1"));
		}

		try (IndexReader indexReader = DirectoryReader.open(directory)) {
			printAllDocuments(indexReader);
		}

	}

}

 

Output

All Documents in Lucene Index
1, Lucene in Action
2, Java in Action


Delete the document with id 1

All Documents in Lucene Index
2, Java in Action

 

 

 

 

 

Previous                                                    Next                                                    Home

No comments:

Post a Comment