Monday, 7 March 2016

PDFBox: embed files into PDF document

A PDF can contain references to external files via the file system or a URL to a remote location. It is also possible to embed a binary file into a PDF document.

Apache PDFBox provides following classes to embed documents into a pdf file.

Class
Description
PDSimpleFileSpecification
By using this class we can embed simple string reference to a file(e.g. "./photos/trekking/krishna.jpg")
PDComplexFileSpecification
It is more feature rich and allows for advanced settings on the file reference.

Following step-by-step procedure explains, how to add attachements tp PDF file.


Step 1: Load PDF Document.
PDDocument doc = PDDocument.load(new File(fileName))


Step 2: Instantiate PDDocumentNameDictionary, Attachments are stored as part of the "names" dictionary in the document catalog.
PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());


Step 3: First we need to get all the existed attachments, after that we can add new attachments.
PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles();
Map existedNames = efTree.getNames();


Step 4: Create the file specification, which holds the embedded file.
PDComplexFileSpecification fs = new PDComplexFileSpecification();
fs.setFile(attachement);

for (String attachement : attachements) {
 /* Create the file specification, which holds the embedded file */
 PDComplexFileSpecification fs = new PDComplexFileSpecification();

 fs.setFile(attachement);

 try (InputStream is = new FileInputStream(attachement)) {
  /* This represents an embedded file in a file specification */
  PDEmbeddedFile ef = new PDEmbeddedFile(doc, is);

  /* Set some relevant properties of embedded file */
  ef.setCreationDate(new GregorianCalendar());
  fs.setEmbeddedFile(ef);

  /*
  * now add the entry to the embedded file tree and set in
  * the document.
  */
  efMap.put(attachement, fs);
 }
}


Following is the complete code to attach a file.
public static boolean addAtachement(final String fileName,
   final String... attachements) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  if (Objects.isNull(attachements)) {
   throw new NullPointerException("attachements shouldn't be null");
  }

  Map<String, PDComplexFileSpecification> efMap = new HashMap<>();

  try (PDDocument doc = PDDocument.load(new File(fileName))) {

   /*
    * Attachments are stored as part of the "names" dictionary in the
    * document catalog
    */
   PDDocumentNameDictionary names = new PDDocumentNameDictionary(
     doc.getDocumentCatalog());

   PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles();
   if (Objects.isNull(efTree)) {
    efTree = new PDEmbeddedFilesNameTreeNode();
   }
   Map<String, PDComplexFileSpecification> existedNames = efTree.getNames();
   efMap.putAll(existedNames);

   for (String attachement : attachements) {
    /* Create the file specification, which holds the embedded file */
    PDComplexFileSpecification fs = new PDComplexFileSpecification();

    fs.setFile(attachement);

    try (InputStream is = new FileInputStream(attachement)) {
     /* This represents an embedded file in a file specification */
     PDEmbeddedFile ef = new PDEmbeddedFile(doc, is);

     /* Set some relevant properties of embedded file */
     ef.setCreationDate(new GregorianCalendar());
     fs.setEmbeddedFile(ef);

     /*
      * now add the entry to the embedded file tree and set in
      * the document.
      */
     efMap.put(attachement, fs);
    }
   }

   efTree.setNames(efMap);
   names.setEmbeddedFiles(efTree);
   doc.getDocumentCatalog().setNames(names);
   doc.save(fileName);
   return true;
  } catch (IOException e) {
   System.out.println(e.getMessage());
   return false;
  }

 }


Following is the updated utility class.
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
import org.apache.pdfbox.text.PDFTextStripper;

public class PDFTextStripperUtil {

 /**
  * @param fileName
  * @return complete file data as string
  * @throws NullPointerException
  *             if fileName is null
  */
 public static Optional<String> getDataAsString(final String fileName) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) {

   PDFTextStripper stripper = new PDFTextStripper();
   stripper.setLineSeparator("\n");
   stripper.setAddMoreFormatting(true);

   return Optional.of(stripper.getText(pdDoc));

  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }

 }

 public static Optional<String> getDataAsString(final String fileName,
   final int startPage, final int endPage) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  if (startPage < 1 || endPage < 1 || endPage < startPage) {
   throw new IllegalArgumentException(
     "startPage, endPage must >= 1 and  endPage >= startPage");
  }

  try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) {

   PDFTextStripper stripper = new PDFTextStripper();
   stripper.setLineSeparator("\n");
   stripper.setAddMoreFormatting(true);
   stripper.setStartPage(startPage);
   stripper.setEndPage(endPage);

   return Optional.of(stripper.getText(pdDoc));

  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }
 }

 public static Optional<String> getDataAsStringFromStartPage(
   String fileName, int startPage) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  if (startPage < 1) {
   throw new IllegalArgumentException("startPage must >= 1");
  }

  try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) {
   int noOfPages = pdDoc.getNumberOfPages();
   return getDataAsString(fileName, startPage, noOfPages);

  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }
 }

 public static Optional<String> getDataAsStringTillEndPage(String fileName,
   int endPage) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  if (endPage < 1) {
   throw new IllegalArgumentException("endPage must >= 1");
  }

  return getDataAsString(fileName, 1, endPage);
 }

 public static Optional<Integer> getNumberOfPages(String fileName) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }
  try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) {
   return Optional.of(pdDoc.getNumberOfPages());
  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }
 }

 public static Optional<Map<String, Object>> getDocumentBasicMetaData(
   final String fileName) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) {
   PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
   Set<String> keys = docInfo.getMetadataKeys();

   Map<String, Object> map = new HashMap<>();

   for (String key : keys) {
    map.put(key, docInfo.getPropertyStringValue(key));
   }

   return Optional.of(map);

  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }
 }

 public static Optional<List<String>> getCatalogMetaData(
   final String fileName) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) {
   PDDocumentCatalog catalog = pdDoc.getDocumentCatalog();
   PDMetadata metadata = catalog.getMetadata();
   return getMeatData(metadata);
  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }

 }

 private static Optional<List<String>> getDataFromStream(InputStream in) {

  try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) {
   List<String> data = new ArrayList<>();
   String str;

   while ((str = br.readLine()) != null) {
    data.add(str);
   }
   return Optional.of(data);
  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }

 }

 private static Optional<List<String>> getMeatData(PDMetadata metadata) {
  if (metadata == null) {
   System.out.println("There is no meta data associated");
   return Optional.empty();
  }

  try (InputStream in = metadata.createInputStream()) {
   return getDataFromStream(in);
  } catch (IOException e) {
   return Optional.empty();
  }
 }

 public static Optional<List<String>> getPDPageMetaData(
   final String fileName, int pageIndex) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  if (pageIndex < 1) {
   throw new IllegalArgumentException("pageIndex must >= 1");
  }

  try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) {

   if (pageIndex > pdDoc.getNumberOfPages()) {
    throw new IllegalArgumentException("pageIndex : " + pageIndex
      + " must <= " + pdDoc.getNumberOfPages());
   }

   PDPage pdPage = pdDoc.getPage(pageIndex);
   PDMetadata metadata = pdPage.getMetadata();
   return getMeatData(metadata);
  } catch (IOException e) {
   System.out.println(e.getMessage());
   return Optional.empty();
  }
 }

 public static boolean encryptDocument(final String fileName,
   final String encryptedFileName, final int keyLength,
   final String ownerPassword, final String userPassword) {

  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  if (Objects.isNull(encryptedFileName)) {
   throw new NullPointerException(
     "encryptedFileName shouldn't be null");
  }

  if (keyLength < 1) {
   throw new IllegalArgumentException("keyLength should > 0");
  }

  if (Objects.isNull(ownerPassword)) {
   throw new NullPointerException("ownerPassword shouldn't be null");
  }

  if (Objects.isNull(userPassword)) {
   throw new NullPointerException("userPassword shouldn't be null");
  }

  try (PDDocument doc = PDDocument.load(new File(fileName))) {
   AccessPermission ap = new AccessPermission();

   /* disable printing, everything else is allowed */
   ap.setCanPrint(false);

   StandardProtectionPolicy spp = new StandardProtectionPolicy(
     ownerPassword, userPassword, ap);

   /*
    * Define the length of the encryption key. Possible values are 40,
    * 128 256
    */
   spp.setEncryptionKeyLength(keyLength);
   spp.setPermissions(ap);
   doc.protect(spp);

   doc.save(encryptedFileName);
   return true;
  } catch (IOException e) {
   System.out.println(e.getMessage());
   return false;
  }
 }

 public static boolean encryptDocument(final String fileName,
   final String encryptedFileName, final String ownerPassword,
   final String userPassword) {
  return encryptDocument(fileName, encryptedFileName, 128, ownerPassword,
    userPassword);
 }

 public static boolean encryptDocument(final String fileName,
   final String ownerPassword, final String userPassword) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  File file = new File(fileName);
  String encryptedFileName = "encrypted_" + file.getName();
  return encryptDocument(fileName, encryptedFileName, 128, ownerPassword,
    userPassword);
 }

 public static boolean addAtachement(final String fileName,
   final String... attachements) {
  if (Objects.isNull(fileName)) {
   throw new NullPointerException("fileName shouldn't be null");
  }

  if (Objects.isNull(attachements)) {
   throw new NullPointerException("attachements shouldn't be null");
  }

  Map<String, PDComplexFileSpecification> efMap = new HashMap<>();

  try (PDDocument doc = PDDocument.load(new File(fileName))) {

   /*
    * Attachments are stored as part of the "names" dictionary in the
    * document catalog
    */
   PDDocumentNameDictionary names = new PDDocumentNameDictionary(
     doc.getDocumentCatalog());

   PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles();
   if (Objects.isNull(efTree)) {
    efTree = new PDEmbeddedFilesNameTreeNode();
   }
   Map<String, PDComplexFileSpecification> existedNames = efTree
     .getNames();
   efMap.putAll(existedNames);

   for (String attachement : attachements) {
    /* Create the file specification, which holds the embedded file */
    PDComplexFileSpecification fs = new PDComplexFileSpecification();

    fs.setFile(attachement);

    try (InputStream is = new FileInputStream(attachement)) {
     /* This represents an embedded file in a file specification */
     PDEmbeddedFile ef = new PDEmbeddedFile(doc, is);

     /* Set some relevant properties of embedded file */
     ef.setCreationDate(new GregorianCalendar());
     fs.setEmbeddedFile(ef);

     /*
      * now add the entry to the embedded file tree and set in
      * the document.
      */
     efMap.put(attachement, fs);
    }
   }

   efTree.setNames(efMap);
   names.setEmbeddedFiles(efTree);
   doc.getDocumentCatalog().setNames(names);
   doc.save(fileName);
   return true;
  } catch (IOException e) {
   System.out.println(e.getMessage());
   return false;
  }

 }
}

import java.io.IOException;

public class PDFTextStripperUtilTest {
 public static void main(String args[]) throws IOException {
  String fileName = "/Users/harikrishna_gurram/Downloads/Saurabh.pdf";
  String attachement1 = "/Users/harikrishna_gurram/b.txt";
  String attachement2 = "/Users/harikrishna_gurram/Downloads/Saurabh.pdf";

  boolean status = PDFTextStripperUtil.addAtachement(fileName,
    attachement1, attachement2);

  if (status == true) {
   System.out.println("Attachements are added");
  } else {
   System.out.println("Operation failed");
  }

 }

}


How to see attachments in PDF file?
View -> Show/Hide -> Navigation Panes -> Attachements




Previous                                                 Next                                                 Home

No comments:

Post a Comment