A PDF can
contain references to external files via the file system or a URL to a remote
location. It is also possible to embed a binary file into a PDF document.
Apache
PDFBox provides following classes to embed documents into a pdf file.
Class
|
Description
|
PDSimpleFileSpecification
|
By using
this class we can embed simple string reference to a file(e.g.
"./photos/trekking/krishna.jpg")
|
PDComplexFileSpecification
|
It is more
feature rich and allows for advanced settings on the file reference.
|
Following
step-by-step procedure explains, how to add attachements tp PDF file.
Step 1: Load PDF Document.
PDDocument doc = PDDocument.load(new File(fileName))
Step 2: Instantiate PDDocumentNameDictionary, Attachments
are stored as part of the "names" dictionary in the document catalog.
PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
Step 3: First we need to get all the existed attachments, after that we can add new attachments.
PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles(); Map existedNames = efTree.getNames();
Step 4: Create the file specification, which holds the
embedded file.
PDComplexFileSpecification fs = new PDComplexFileSpecification(); fs.setFile(attachement); for (String attachement : attachements) { /* Create the file specification, which holds the embedded file */ PDComplexFileSpecification fs = new PDComplexFileSpecification(); fs.setFile(attachement); try (InputStream is = new FileInputStream(attachement)) { /* This represents an embedded file in a file specification */ PDEmbeddedFile ef = new PDEmbeddedFile(doc, is); /* Set some relevant properties of embedded file */ ef.setCreationDate(new GregorianCalendar()); fs.setEmbeddedFile(ef); /* * now add the entry to the embedded file tree and set in * the document. */ efMap.put(attachement, fs); } }
Following is the complete code to attach a file.
public static boolean addAtachement(final String fileName, final String... attachements) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } if (Objects.isNull(attachements)) { throw new NullPointerException("attachements shouldn't be null"); } Map<String, PDComplexFileSpecification> efMap = new HashMap<>(); try (PDDocument doc = PDDocument.load(new File(fileName))) { /* * Attachments are stored as part of the "names" dictionary in the * document catalog */ PDDocumentNameDictionary names = new PDDocumentNameDictionary( doc.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles(); if (Objects.isNull(efTree)) { efTree = new PDEmbeddedFilesNameTreeNode(); } Map<String, PDComplexFileSpecification> existedNames = efTree.getNames(); efMap.putAll(existedNames); for (String attachement : attachements) { /* Create the file specification, which holds the embedded file */ PDComplexFileSpecification fs = new PDComplexFileSpecification(); fs.setFile(attachement); try (InputStream is = new FileInputStream(attachement)) { /* This represents an embedded file in a file specification */ PDEmbeddedFile ef = new PDEmbeddedFile(doc, is); /* Set some relevant properties of embedded file */ ef.setCreationDate(new GregorianCalendar()); fs.setEmbeddedFile(ef); /* * now add the entry to the embedded file tree and set in * the document. */ efMap.put(attachement, fs); } } efTree.setNames(efMap); names.setEmbeddedFiles(efTree); doc.getDocumentCatalog().setNames(names); doc.save(fileName); return true; } catch (IOException e) { System.out.println(e.getMessage()); return false; } }
Following is
the updated utility class.
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Set; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDMetadata; import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy; import org.apache.pdfbox.text.PDFTextStripper; public class PDFTextStripperUtil { /** * @param fileName * @return complete file data as string * @throws NullPointerException * if fileName is null */ public static Optional<String> getDataAsString(final String fileName) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) { PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stripper.setAddMoreFormatting(true); return Optional.of(stripper.getText(pdDoc)); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } public static Optional<String> getDataAsString(final String fileName, final int startPage, final int endPage) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } if (startPage < 1 || endPage < 1 || endPage < startPage) { throw new IllegalArgumentException( "startPage, endPage must >= 1 and endPage >= startPage"); } try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) { PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stripper.setAddMoreFormatting(true); stripper.setStartPage(startPage); stripper.setEndPage(endPage); return Optional.of(stripper.getText(pdDoc)); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } public static Optional<String> getDataAsStringFromStartPage( String fileName, int startPage) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } if (startPage < 1) { throw new IllegalArgumentException("startPage must >= 1"); } try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) { int noOfPages = pdDoc.getNumberOfPages(); return getDataAsString(fileName, startPage, noOfPages); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } public static Optional<String> getDataAsStringTillEndPage(String fileName, int endPage) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } if (endPage < 1) { throw new IllegalArgumentException("endPage must >= 1"); } return getDataAsString(fileName, 1, endPage); } public static Optional<Integer> getNumberOfPages(String fileName) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) { return Optional.of(pdDoc.getNumberOfPages()); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } public static Optional<Map<String, Object>> getDocumentBasicMetaData( final String fileName) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) { PDDocumentInformation docInfo = pdDoc.getDocumentInformation(); Set<String> keys = docInfo.getMetadataKeys(); Map<String, Object> map = new HashMap<>(); for (String key : keys) { map.put(key, docInfo.getPropertyStringValue(key)); } return Optional.of(map); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } public static Optional<List<String>> getCatalogMetaData( final String fileName) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) { PDDocumentCatalog catalog = pdDoc.getDocumentCatalog(); PDMetadata metadata = catalog.getMetadata(); return getMeatData(metadata); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } private static Optional<List<String>> getDataFromStream(InputStream in) { try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) { List<String> data = new ArrayList<>(); String str; while ((str = br.readLine()) != null) { data.add(str); } return Optional.of(data); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } private static Optional<List<String>> getMeatData(PDMetadata metadata) { if (metadata == null) { System.out.println("There is no meta data associated"); return Optional.empty(); } try (InputStream in = metadata.createInputStream()) { return getDataFromStream(in); } catch (IOException e) { return Optional.empty(); } } public static Optional<List<String>> getPDPageMetaData( final String fileName, int pageIndex) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } if (pageIndex < 1) { throw new IllegalArgumentException("pageIndex must >= 1"); } try (final PDDocument pdDoc = PDDocument.load(new File(fileName))) { if (pageIndex > pdDoc.getNumberOfPages()) { throw new IllegalArgumentException("pageIndex : " + pageIndex + " must <= " + pdDoc.getNumberOfPages()); } PDPage pdPage = pdDoc.getPage(pageIndex); PDMetadata metadata = pdPage.getMetadata(); return getMeatData(metadata); } catch (IOException e) { System.out.println(e.getMessage()); return Optional.empty(); } } public static boolean encryptDocument(final String fileName, final String encryptedFileName, final int keyLength, final String ownerPassword, final String userPassword) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } if (Objects.isNull(encryptedFileName)) { throw new NullPointerException( "encryptedFileName shouldn't be null"); } if (keyLength < 1) { throw new IllegalArgumentException("keyLength should > 0"); } if (Objects.isNull(ownerPassword)) { throw new NullPointerException("ownerPassword shouldn't be null"); } if (Objects.isNull(userPassword)) { throw new NullPointerException("userPassword shouldn't be null"); } try (PDDocument doc = PDDocument.load(new File(fileName))) { AccessPermission ap = new AccessPermission(); /* disable printing, everything else is allowed */ ap.setCanPrint(false); StandardProtectionPolicy spp = new StandardProtectionPolicy( ownerPassword, userPassword, ap); /* * Define the length of the encryption key. Possible values are 40, * 128 256 */ spp.setEncryptionKeyLength(keyLength); spp.setPermissions(ap); doc.protect(spp); doc.save(encryptedFileName); return true; } catch (IOException e) { System.out.println(e.getMessage()); return false; } } public static boolean encryptDocument(final String fileName, final String encryptedFileName, final String ownerPassword, final String userPassword) { return encryptDocument(fileName, encryptedFileName, 128, ownerPassword, userPassword); } public static boolean encryptDocument(final String fileName, final String ownerPassword, final String userPassword) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } File file = new File(fileName); String encryptedFileName = "encrypted_" + file.getName(); return encryptDocument(fileName, encryptedFileName, 128, ownerPassword, userPassword); } public static boolean addAtachement(final String fileName, final String... attachements) { if (Objects.isNull(fileName)) { throw new NullPointerException("fileName shouldn't be null"); } if (Objects.isNull(attachements)) { throw new NullPointerException("attachements shouldn't be null"); } Map<String, PDComplexFileSpecification> efMap = new HashMap<>(); try (PDDocument doc = PDDocument.load(new File(fileName))) { /* * Attachments are stored as part of the "names" dictionary in the * document catalog */ PDDocumentNameDictionary names = new PDDocumentNameDictionary( doc.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles(); if (Objects.isNull(efTree)) { efTree = new PDEmbeddedFilesNameTreeNode(); } Map<String, PDComplexFileSpecification> existedNames = efTree .getNames(); efMap.putAll(existedNames); for (String attachement : attachements) { /* Create the file specification, which holds the embedded file */ PDComplexFileSpecification fs = new PDComplexFileSpecification(); fs.setFile(attachement); try (InputStream is = new FileInputStream(attachement)) { /* This represents an embedded file in a file specification */ PDEmbeddedFile ef = new PDEmbeddedFile(doc, is); /* Set some relevant properties of embedded file */ ef.setCreationDate(new GregorianCalendar()); fs.setEmbeddedFile(ef); /* * now add the entry to the embedded file tree and set in * the document. */ efMap.put(attachement, fs); } } efTree.setNames(efMap); names.setEmbeddedFiles(efTree); doc.getDocumentCatalog().setNames(names); doc.save(fileName); return true; } catch (IOException e) { System.out.println(e.getMessage()); return false; } } }
import java.io.IOException; public class PDFTextStripperUtilTest { public static void main(String args[]) throws IOException { String fileName = "/Users/harikrishna_gurram/Downloads/Saurabh.pdf"; String attachement1 = "/Users/harikrishna_gurram/b.txt"; String attachement2 = "/Users/harikrishna_gurram/Downloads/Saurabh.pdf"; boolean status = PDFTextStripperUtil.addAtachement(fileName, attachement1, attachement2); if (status == true) { System.out.println("Attachements are added"); } else { System.out.println("Operation failed"); } } }
How to see attachments in PDF file?
View ->
Show/Hide -> Navigation Panes -> Attachements
No comments:
Post a Comment