| 1 | /* |
|---|
| 2 | * Copyright 2007-2008, Plutext Pty Ltd. |
|---|
| 3 | * |
|---|
| 4 | * This file is part of docx4j. |
|---|
| 5 | |
|---|
| 6 | docx4j is licensed under the Apache License, Version 2.0 (the "License"); |
|---|
| 7 | you may not use this file except in compliance with the License. |
|---|
| 8 | |
|---|
| 9 | You may obtain a copy of the License at |
|---|
| 10 | |
|---|
| 11 | http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 12 | |
|---|
| 13 | Unless required by applicable law or agreed to in writing, software |
|---|
| 14 | distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 16 | See the License for the specific language governing permissions and |
|---|
| 17 | limitations under the License. |
|---|
| 18 | |
|---|
| 19 | */ |
|---|
| 20 | |
|---|
| 21 | package org.docx4j.openpackaging.packages; |
|---|
| 22 | |
|---|
| 23 | |
|---|
| 24 | import java.io.File; |
|---|
| 25 | import java.io.FileInputStream; |
|---|
| 26 | import java.io.FileNotFoundException; |
|---|
| 27 | import java.io.OutputStream; |
|---|
| 28 | import java.util.Iterator; |
|---|
| 29 | import java.util.Map; |
|---|
| 30 | |
|---|
| 31 | import javax.xml.bind.JAXBContext; |
|---|
| 32 | import javax.xml.bind.JAXBElement; |
|---|
| 33 | import javax.xml.bind.Marshaller; |
|---|
| 34 | import javax.xml.bind.Unmarshaller; |
|---|
| 35 | import javax.xml.parsers.DocumentBuilderFactory; |
|---|
| 36 | import javax.xml.transform.Source; |
|---|
| 37 | import javax.xml.transform.Templates; |
|---|
| 38 | import javax.xml.transform.stream.StreamSource; |
|---|
| 39 | |
|---|
| 40 | import org.apache.log4j.Logger; |
|---|
| 41 | import org.docx4j.XmlUtils; |
|---|
| 42 | import org.docx4j.convert.out.flatOpcXml.FlatOpcXmlCreator; |
|---|
| 43 | import org.docx4j.fonts.BestMatchingMapper; |
|---|
| 44 | import org.docx4j.fonts.IdentityPlusMapper; |
|---|
| 45 | import org.docx4j.fonts.Mapper; |
|---|
| 46 | import org.docx4j.fonts.FontUtils; |
|---|
| 47 | import org.docx4j.jaxb.Context; |
|---|
| 48 | import org.docx4j.model.HeaderFooterPolicy; |
|---|
| 49 | import org.docx4j.openpackaging.contenttype.ContentType; |
|---|
| 50 | import org.docx4j.openpackaging.contenttype.ContentTypeManager; |
|---|
| 51 | import org.docx4j.openpackaging.contenttype.ContentTypeManagerImpl; |
|---|
| 52 | import org.docx4j.openpackaging.contenttype.ContentTypes; |
|---|
| 53 | import org.docx4j.openpackaging.exceptions.Docx4JException; |
|---|
| 54 | import org.docx4j.openpackaging.exceptions.InvalidFormatException; |
|---|
| 55 | import org.docx4j.openpackaging.io.LoadFromZipFile; |
|---|
| 56 | import org.docx4j.openpackaging.io.LoadFromZipNG; |
|---|
| 57 | import org.docx4j.openpackaging.io.SaveToZipFile; |
|---|
| 58 | import org.docx4j.openpackaging.parts.DocPropsCorePart; |
|---|
| 59 | import org.docx4j.openpackaging.parts.DocPropsCustomPart; |
|---|
| 60 | import org.docx4j.openpackaging.parts.DocPropsExtendedPart; |
|---|
| 61 | import org.docx4j.openpackaging.parts.JaxbXmlPart; |
|---|
| 62 | import org.docx4j.openpackaging.parts.Part; |
|---|
| 63 | import org.docx4j.openpackaging.parts.WordprocessingML.FontTablePart; |
|---|
| 64 | import org.docx4j.openpackaging.parts.WordprocessingML.GlossaryDocumentPart; |
|---|
| 65 | import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart; |
|---|
| 66 | import org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart; |
|---|
| 67 | import org.docx4j.openpackaging.parts.relationships.Namespaces; |
|---|
| 68 | |
|---|
| 69 | import com.lowagie.text.pdf.BaseFont; |
|---|
| 70 | |
|---|
| 71 | |
|---|
| 72 | |
|---|
| 73 | |
|---|
| 74 | |
|---|
| 75 | |
|---|
| 76 | |
|---|
| 77 | /** |
|---|
| 78 | * @author jharrop |
|---|
| 79 | * |
|---|
| 80 | */ |
|---|
| 81 | public class WordprocessingMLPackage extends Package { |
|---|
| 82 | |
|---|
| 83 | // What is a Word document these days? |
|---|
| 84 | // |
|---|
| 85 | // Well, a package is a logical entity which holds a collection of parts |
|---|
| 86 | // And a word document is exactly a WordProcessingML package |
|---|
| 87 | // Which has a Main Document Part, and optionally, a Glossary Document Part |
|---|
| 88 | |
|---|
| 89 | /* So its a Word doc if: |
|---|
| 90 | * 1. _rels/.rels tells you where to find an office document |
|---|
| 91 | * 2. [Content_Types].xml tells you that office document is |
|---|
| 92 | * of content type application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml |
|---|
| 93 | |
|---|
| 94 | * A minimal docx has: |
|---|
| 95 | * |
|---|
| 96 | * [Content_Types].xml containing: |
|---|
| 97 | * 1. <Default Extension="rels" ... |
|---|
| 98 | * 2. <Override PartName="/word/document.xml"... |
|---|
| 99 | * |
|---|
| 100 | * _rels/.rels with a target for word/document.xml |
|---|
| 101 | * |
|---|
| 102 | * word/document.xml |
|---|
| 103 | */ |
|---|
| 104 | |
|---|
| 105 | protected static Logger log = Logger.getLogger(WordprocessingMLPackage.class); |
|---|
| 106 | |
|---|
| 107 | |
|---|
| 108 | // Main document |
|---|
| 109 | protected MainDocumentPart mainDoc; |
|---|
| 110 | |
|---|
| 111 | // (optional) Glossary document |
|---|
| 112 | protected GlossaryDocumentPart glossaryDoc; |
|---|
| 113 | |
|---|
| 114 | private HeaderFooterPolicy headerFooterPolicy; |
|---|
| 115 | public HeaderFooterPolicy getHeaderFooterPolicy() { |
|---|
| 116 | if (headerFooterPolicy==null) { |
|---|
| 117 | headerFooterPolicy = new HeaderFooterPolicy(this); |
|---|
| 118 | } |
|---|
| 119 | return headerFooterPolicy; |
|---|
| 120 | } |
|---|
| 121 | public void setHeaderFooterPolicy(HeaderFooterPolicy headerFooterPolicy) { |
|---|
| 122 | this.headerFooterPolicy = headerFooterPolicy; |
|---|
| 123 | } |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | /** |
|---|
| 127 | * Constructor. Also creates a new content type manager |
|---|
| 128 | * |
|---|
| 129 | */ |
|---|
| 130 | public WordprocessingMLPackage() { |
|---|
| 131 | super(); |
|---|
| 132 | setContentType(new ContentType(ContentTypes.WORDPROCESSINGML_DOCUMENT)); |
|---|
| 133 | } |
|---|
| 134 | /** |
|---|
| 135 | * Constructor. |
|---|
| 136 | * |
|---|
| 137 | * @param contentTypeManager |
|---|
| 138 | * The content type manager to use |
|---|
| 139 | */ |
|---|
| 140 | public WordprocessingMLPackage(ContentTypeManager contentTypeManager) { |
|---|
| 141 | super(contentTypeManager); |
|---|
| 142 | setContentType(new ContentType(ContentTypes.WORDPROCESSINGML_DOCUMENT)); |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | /** |
|---|
| 146 | * Convenience method to create a WordprocessingMLPackage |
|---|
| 147 | * from an existing File. |
|---|
| 148 | * |
|---|
| 149 | * @param docxFile |
|---|
| 150 | * The docx file |
|---|
| 151 | */ |
|---|
| 152 | public static WordprocessingMLPackage load(java.io.File docxFile) throws Docx4JException { |
|---|
| 153 | |
|---|
| 154 | // LoadFromZipFile loader = new LoadFromZipFile(); |
|---|
| 155 | LoadFromZipNG loader = new LoadFromZipNG(); |
|---|
| 156 | // return (WordprocessingMLPackage)loader.get(docxFile); |
|---|
| 157 | FileInputStream fis = null; |
|---|
| 158 | try { |
|---|
| 159 | fis = new FileInputStream(docxFile); |
|---|
| 160 | } catch (FileNotFoundException e) { |
|---|
| 161 | e.printStackTrace(); |
|---|
| 162 | } |
|---|
| 163 | return (WordprocessingMLPackage)loader.get(fis); |
|---|
| 164 | } |
|---|
| 165 | |
|---|
| 166 | /** |
|---|
| 167 | * Convenience method to save a WordprocessingMLPackage |
|---|
| 168 | * to a File. |
|---|
| 169 | * |
|---|
| 170 | * @param docxFile |
|---|
| 171 | * The docx file |
|---|
| 172 | */ |
|---|
| 173 | public void save(java.io.File docxFile) throws Docx4JException { |
|---|
| 174 | |
|---|
| 175 | SaveToZipFile saver = new SaveToZipFile(this); |
|---|
| 176 | saver.save(docxFile); |
|---|
| 177 | } |
|---|
| 178 | |
|---|
| 179 | |
|---|
| 180 | public boolean setPartShortcut(Part part, String relationshipType) { |
|---|
| 181 | if (relationshipType.equals(Namespaces.PROPERTIES_CORE)) { |
|---|
| 182 | docPropsCorePart = (DocPropsCorePart)part; |
|---|
| 183 | log.info("Set shortcut for docPropsCorePart"); |
|---|
| 184 | return true; |
|---|
| 185 | } else if (relationshipType.equals(Namespaces.PROPERTIES_EXTENDED)) { |
|---|
| 186 | docPropsExtendedPart = (DocPropsExtendedPart)part; |
|---|
| 187 | log.info("Set shortcut for docPropsExtendedPart"); |
|---|
| 188 | return true; |
|---|
| 189 | } else if (relationshipType.equals(Namespaces.PROPERTIES_CUSTOM)) { |
|---|
| 190 | docPropsCustomPart = (DocPropsCustomPart)part; |
|---|
| 191 | log.info("Set shortcut for docPropsCustomPart"); |
|---|
| 192 | return true; |
|---|
| 193 | } else if (relationshipType.equals(Namespaces.DOCUMENT)) { |
|---|
| 194 | mainDoc = (MainDocumentPart)part; |
|---|
| 195 | log.info("Set shortcut for mainDoc"); |
|---|
| 196 | return true; |
|---|
| 197 | } else { |
|---|
| 198 | return false; |
|---|
| 199 | } |
|---|
| 200 | } |
|---|
| 201 | |
|---|
| 202 | public MainDocumentPart getMainDocumentPart() { |
|---|
| 203 | return mainDoc; |
|---|
| 204 | } |
|---|
| 205 | |
|---|
| 206 | |
|---|
| 207 | /** |
|---|
| 208 | * Use an XSLT to alter the contents of this package. |
|---|
| 209 | * The output of the transformation must be valid |
|---|
| 210 | * pck:package/pck:part format, as emitted by Word 2007. |
|---|
| 211 | * |
|---|
| 212 | * @param is |
|---|
| 213 | * @param transformParameters |
|---|
| 214 | * @throws Exception |
|---|
| 215 | */ |
|---|
| 216 | public void transform(Templates xslt, |
|---|
| 217 | Map<String, Object> transformParameters) throws Exception { |
|---|
| 218 | |
|---|
| 219 | // Prepare in the input document |
|---|
| 220 | |
|---|
| 221 | FlatOpcXmlCreator worker = new FlatOpcXmlCreator(this); |
|---|
| 222 | org.docx4j.xmlPackage.Package pkg = worker.get(); |
|---|
| 223 | |
|---|
| 224 | JAXBContext jc = Context.jcXmlPackage; |
|---|
| 225 | Marshaller marshaller=jc.createMarshaller(); |
|---|
| 226 | org.w3c.dom.Document doc = org.docx4j.XmlUtils.neww3cDomDocument(); |
|---|
| 227 | marshaller.marshal(pkg, doc); |
|---|
| 228 | |
|---|
| 229 | javax.xml.bind.util.JAXBResult result = new javax.xml.bind.util.JAXBResult(jc ); |
|---|
| 230 | |
|---|
| 231 | // Perform the transformation |
|---|
| 232 | org.docx4j.XmlUtils.transform(doc, xslt, transformParameters, result); |
|---|
| 233 | |
|---|
| 234 | |
|---|
| 235 | //org.docx4j.xmlPackage.Package wmlPackageEl = (org.docx4j.xmlPackage.Package)result.getResult(); |
|---|
| 236 | javax.xml.bind.JAXBElement je = (javax.xml.bind.JAXBElement)result.getResult(); |
|---|
| 237 | org.docx4j.xmlPackage.Package wmlPackageEl = (org.docx4j.xmlPackage.Package)je.getValue(); |
|---|
| 238 | org.docx4j.convert.in.FlatOpcXmlImporter xmlPackage = new org.docx4j.convert.in.FlatOpcXmlImporter( wmlPackageEl); |
|---|
| 239 | |
|---|
| 240 | ContentTypeManager ctm = new ContentTypeManagerImpl(); |
|---|
| 241 | |
|---|
| 242 | Part tmpDocPart = xmlPackage.getRawPart(ctm, "/word/document.xml"); |
|---|
| 243 | Part tmpStylesPart = xmlPackage.getRawPart(ctm, "/word/styles.xml"); |
|---|
| 244 | |
|---|
| 245 | // This code assumes all the existing rels etc of |
|---|
| 246 | // the existing main document part are still relevant. |
|---|
| 247 | // if (wmlDocument==null) { |
|---|
| 248 | // log.warn("Couldn't get main document part from package transform result!"); |
|---|
| 249 | // } else { |
|---|
| 250 | // this.getMainDocumentPart().setJaxbElement(wmlDocument); |
|---|
| 251 | // } |
|---|
| 252 | this.getMainDocumentPart().setJaxbElement( ((JaxbXmlPart)tmpDocPart).getJaxbElement() ); |
|---|
| 253 | // |
|---|
| 254 | // if (wmlStyles==null) { |
|---|
| 255 | // log.warn("Couldn't get style definitions part from package transform result!"); |
|---|
| 256 | // } else { |
|---|
| 257 | // this.getMainDocumentPart().getStyleDefinitionsPart().setJaxbElement(wmlStyles); |
|---|
| 258 | // } |
|---|
| 259 | this.getMainDocumentPart().getStyleDefinitionsPart().setJaxbElement( ((JaxbXmlPart)tmpStylesPart).getJaxbElement() ); |
|---|
| 260 | |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | public void filter( FilterSettings filterSettings ) throws Exception { |
|---|
| 264 | |
|---|
| 265 | if (filterTemplate==null) { // first use |
|---|
| 266 | Source xsltSource = new StreamSource( |
|---|
| 267 | org.docx4j.utils.ResourceUtils.getResource( |
|---|
| 268 | "org/docx4j/openpackaging/packages/filter.xslt")); |
|---|
| 269 | filterTemplate = XmlUtils.getTransformerTemplate(xsltSource); |
|---|
| 270 | } |
|---|
| 271 | transform(filterTemplate, filterSettings.getSettings() ); |
|---|
| 272 | |
|---|
| 273 | } |
|---|
| 274 | |
|---|
| 275 | static Templates filterTemplate; |
|---|
| 276 | |
|---|
| 277 | /* There should be a mapper per document, |
|---|
| 278 | * but PhysicalFonts should be system wide. |
|---|
| 279 | * |
|---|
| 280 | * The only way PhysicalFonts will change |
|---|
| 281 | * is if fonts are added/removed while |
|---|
| 282 | * docx4j is executing (which can happen eg if an |
|---|
| 283 | * obfuscated font part is read) |
|---|
| 284 | */ |
|---|
| 285 | |
|---|
| 286 | public void setFontMapper(Mapper fm) throws Exception { |
|---|
| 287 | if (fm == null) { |
|---|
| 288 | throw new IllegalArgumentException("Font Substituter cannot be null."); |
|---|
| 289 | } |
|---|
| 290 | fontMapper = fm; |
|---|
| 291 | org.docx4j.wml.Fonts fonts = null; |
|---|
| 292 | |
|---|
| 293 | // 1. Get a list of all the fonts in the document |
|---|
| 294 | java.util.Map fontsInUse = this.getMainDocumentPart().fontsInUse(); |
|---|
| 295 | |
|---|
| 296 | if ( fm instanceof BestMatchingMapper ) { |
|---|
| 297 | |
|---|
| 298 | |
|---|
| 299 | // 2. For each font, find the closest match on the system (use OO's VCL.xcu to do this) |
|---|
| 300 | // - do this in a general way, since docx4all needs this as well to display fonts |
|---|
| 301 | FontTablePart fontTablePart= this.getMainDocumentPart().getFontTablePart(); |
|---|
| 302 | |
|---|
| 303 | if (fontTablePart==null) { |
|---|
| 304 | log.warn("FontTable missing; creating default part."); |
|---|
| 305 | fontTablePart= new org.docx4j.openpackaging.parts.WordprocessingML.FontTablePart(); |
|---|
| 306 | fontTablePart.unmarshalDefaultFonts(); |
|---|
| 307 | fontTablePart.processEmbeddings(); |
|---|
| 308 | } |
|---|
| 309 | |
|---|
| 310 | fonts = (org.docx4j.wml.Fonts)fontTablePart.getJaxbElement(); |
|---|
| 311 | } |
|---|
| 312 | |
|---|
| 313 | fontMapper.populateFontMappings(fontsInUse, fonts); |
|---|
| 314 | |
|---|
| 315 | } |
|---|
| 316 | |
|---|
| 317 | public Mapper getFontMapper() { |
|---|
| 318 | if (fontMapper==null) { |
|---|
| 319 | fontMapper = new IdentityPlusMapper(); |
|---|
| 320 | } |
|---|
| 321 | return fontMapper; |
|---|
| 322 | } |
|---|
| 323 | |
|---|
| 324 | private Mapper fontMapper; |
|---|
| 325 | |
|---|
| 326 | |
|---|
| 327 | public static WordprocessingMLPackage createPackage() throws InvalidFormatException { |
|---|
| 328 | |
|---|
| 329 | |
|---|
| 330 | // Create a package |
|---|
| 331 | WordprocessingMLPackage wmlPack = new WordprocessingMLPackage(); |
|---|
| 332 | |
|---|
| 333 | // Create main document part |
|---|
| 334 | Part wordDocumentPart = new org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart(); |
|---|
| 335 | |
|---|
| 336 | // Create main document part content |
|---|
| 337 | org.docx4j.wml.ObjectFactory factory = new org.docx4j.wml.ObjectFactory(); |
|---|
| 338 | |
|---|
| 339 | org.docx4j.wml.Body body = factory.createBody(); |
|---|
| 340 | |
|---|
| 341 | org.docx4j.wml.Document wmlDocumentEl = factory.createDocument(); |
|---|
| 342 | wmlDocumentEl.setBody(body); |
|---|
| 343 | |
|---|
| 344 | // Put the content in the part |
|---|
| 345 | ((org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart)wordDocumentPart).setJaxbElement(wmlDocumentEl); |
|---|
| 346 | |
|---|
| 347 | // Add the main document part to the package relationships |
|---|
| 348 | // (creating it if necessary) |
|---|
| 349 | wmlPack.addTargetPart(wordDocumentPart); |
|---|
| 350 | |
|---|
| 351 | // Create a styles part |
|---|
| 352 | Part stylesPart = new org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart(); |
|---|
| 353 | try { |
|---|
| 354 | ((org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart) stylesPart) |
|---|
| 355 | .unmarshalDefaultStyles(); |
|---|
| 356 | |
|---|
| 357 | // Add the styles part to the main document part relationships |
|---|
| 358 | // (creating it if necessary) |
|---|
| 359 | wordDocumentPart.addTargetPart(stylesPart); // NB - add it to main doc part, not package! |
|---|
| 360 | |
|---|
| 361 | } catch (Exception e) { |
|---|
| 362 | // TODO: handle exception |
|---|
| 363 | e.printStackTrace(); |
|---|
| 364 | } |
|---|
| 365 | // Return the new package |
|---|
| 366 | return wmlPack; |
|---|
| 367 | |
|---|
| 368 | } |
|---|
| 369 | |
|---|
| 370 | public static class FilterSettings { |
|---|
| 371 | |
|---|
| 372 | Boolean removeProofErrors = Boolean.FALSE; |
|---|
| 373 | public void setRemoveProofErrors(boolean val) { |
|---|
| 374 | removeProofErrors = new Boolean(val); |
|---|
| 375 | } |
|---|
| 376 | |
|---|
| 377 | Boolean removeContentControls = Boolean.FALSE; |
|---|
| 378 | public void setRemoveContentControls(boolean val) { |
|---|
| 379 | removeContentControls = new Boolean(val); |
|---|
| 380 | } |
|---|
| 381 | |
|---|
| 382 | Boolean removeRsids = Boolean.FALSE; |
|---|
| 383 | public void setRemoveRsids(boolean val) { |
|---|
| 384 | removeRsids = new Boolean(val); |
|---|
| 385 | } |
|---|
| 386 | |
|---|
| 387 | Boolean tidyForDocx4all = Boolean.FALSE; |
|---|
| 388 | public void setTidyForDocx4all(boolean val) { |
|---|
| 389 | tidyForDocx4all = new Boolean(val); |
|---|
| 390 | } |
|---|
| 391 | |
|---|
| 392 | |
|---|
| 393 | Map<String, Object> getSettings() { |
|---|
| 394 | Map<String, Object> settings = new java.util.HashMap<String, Object>(); |
|---|
| 395 | |
|---|
| 396 | settings.put("removeProofErrors", removeProofErrors); |
|---|
| 397 | settings.put("removeContentControls", removeContentControls); |
|---|
| 398 | settings.put("removeRsids", removeRsids); |
|---|
| 399 | settings.put("tidyForDocx4all", tidyForDocx4all); |
|---|
| 400 | |
|---|
| 401 | return settings; |
|---|
| 402 | } |
|---|
| 403 | |
|---|
| 404 | |
|---|
| 405 | } |
|---|
| 406 | |
|---|
| 407 | |
|---|
| 408 | |
|---|
| 409 | } |
|---|