Monday, 7 March 2016

PDFBox: validate PDF document

Apache PDFBox provides PreflightParser class, by using this we can validate the PDF Document. PreflightParser compliant with the ISO-19005 specification (aka PDF/A-1).

Following link explains about PDF/A-1 standard.

Following step-by-step procedure shows you how to validate PDF Document.


Step 1: Instantiate the parser with given PDF file.
PreflightParser parser = new PreflightParser(fileName);

Step 2: Call the parse method of the PreflightParser.
parser.parse();


Step 3: Get PreflightDocument and validate.
try (PreflightDocument document = parser.getPreflightDocument()) {
 document.validate();
 ValidationResult result = document.getResult();
 return Optional.of(result);
}


Following snippet is used to validate a PDF document.
public static Optional<ValidationResult> getValidationResult(String fileName) {
 if (Objects.isNull(fileName)) {
  throw new NullPointerException("fileName shouldn't be null");
 }

 try {
  PreflightParser parser = new PreflightParser(fileName);

  parser.parse();

  try (PreflightDocument document = parser.getPreflightDocument()) {
   document.validate();
   ValidationResult result = document.getResult();
   return Optional.of(result);
  }

 } catch (IOException e) {
  return Optional.empty();
 }

}

/**
* Return true if file is a valid PDF/A-1b file
* 
* @param fileName
* @return
*/
public static boolean isValidPDF(String fileName) {
 Optional<ValidationResult> validationResult = getValidationResult(fileName);

 if (!validationResult.isPresent()) {
  return false;
 }

 ValidationResult result = validationResult.get();
 if (result.isValid()) {
  return true;
 }

 return false;
}

import java.io.IOException;
import java.util.Optional;

import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;

public class PDFTextStripperUtilTest {
 public static void main(String args[]) throws IOException {
  String fileName = "/Users/harikrishna_gurram/Downloads/Saurabh.pdf";

  if (PDFTextStripperUtil.isValidPDF(fileName)) {
   System.out.println("The file " + fileName
     + " is a valid PDF/A-1b file");
  } else {
   System.out.println("Not a valid PDF/A-1b file");
   Optional<ValidationResult> validationResult = PDFTextStripperUtil
     .getValidationResult(fileName);
   if (!validationResult.isPresent()) {
    return;
   }
   ValidationResult result = validationResult.get();
   for (ValidationError error : result.getErrorsList()) {
    System.out.println(error.getErrorCode() + " : "
      + error.getDetails());
   }
  }
 }
}


Find complete utility class in following link.

Previous                                                 Next                                                 Home

No comments:

Post a Comment