Class | Line # | Actions | |||||
---|---|---|---|---|---|---|---|
LinkOfficeCleaningTest | 36 | 10 | 0% | 2 | 0 |
1 | /* | |
2 | * See the NOTICE file distributed with this work for additional | |
3 | * information regarding copyright ownership. | |
4 | * | |
5 | * This is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU Lesser General Public License as | |
7 | * published by the Free Software Foundation; either version 2.1 of | |
8 | * the License, or (at your option) any later version. | |
9 | * | |
10 | * This software is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this software; if not, write to the Free | |
17 | * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | |
18 | * 02110-1301 USA, or see the FSF site: http://www.fsf.org. | |
19 | */ | |
20 | package org.xwiki.officeimporter.internal.cleaner; | |
21 | ||
22 | import java.io.StringReader; | |
23 | ||
24 | import org.junit.Assert; | |
25 | import org.junit.Test; | |
26 | import org.w3c.dom.Document; | |
27 | import org.w3c.dom.Element; | |
28 | import org.w3c.dom.NodeList; | |
29 | ||
30 | /** | |
31 | * Test case for cleaning HTML links ({@code<a/>} elements) in {@link OfficeHTMLCleaner}. | |
32 | * | |
33 | * @version $Id: f28b7b0b01f6dc13b37afe831da7ed6538d5eed2 $ | |
34 | * @since 1.8 | |
35 | */ | |
36 | public class LinkOfficeCleaningTest extends AbstractHTMLCleaningTest | |
37 | { | |
38 | /** | |
39 | * The HTML generated by open office server includes anchors of the form: | |
40 | * | |
41 | * <pre> | |
42 | * {@code <a name="table1"> | |
43 | * <h1>Sheet 1: <em>Hello</em></h1> | |
44 | * </a>} | |
45 | * </pre> | |
46 | * | |
47 | * and the default HTML cleaner converts them to: | |
48 | * | |
49 | * <pre> | |
50 | * {@code <a name="table1"/> | |
51 | * <h1> | |
52 | * <a name="table1">Sheet 1: <em>Hello</em></a> | |
53 | * </h1> | |
54 | * </pre> | |
55 | * | |
56 | * this is because of the close-before-copy-inside behavior of default HTML cleaner. Thus the additional | |
57 | * (copy-inside) anchor needs to be ripped off. | |
58 | */ | |
59 | 1 | @Test |
60 | public void testDuplicateAnchorRemoving() | |
61 | { | |
62 | 1 | String html = header + "<a name=\"table1\"/><h1><a name=\"table1\">Sheet 1: <em>Hello</em></a></h1>" + footer; |
63 | 1 | Document doc = officeHTMLCleaner.clean(new StringReader(html)); |
64 | 1 | NodeList nodes = doc.getElementsByTagName("a"); |
65 | 1 | Assert.assertEquals(1, nodes.getLength()); |
66 | 1 | Element parent = (Element) nodes.item(0).getParentNode(); |
67 | 1 | Assert.assertEquals("p", parent.getNodeName()); |
68 | } | |
69 | ||
70 | /** | |
71 | * Test duplicate anchor filtering with TOC structures. see: http://jira.xwiki.org/jira/browse/XWIKI-3415 | |
72 | */ | |
73 | 1 | @Test |
74 | public void testAnchorFilteringWithTOC() | |
75 | { | |
76 | 1 | String html = header + "<div>some text<h1><a name=\"Topic1\"/>Topic1</h1></div>" + footer; |
77 | 1 | Document doc = officeHTMLCleaner.clean(new StringReader(html)); |
78 | 1 | NodeList nodes = doc.getElementsByTagName("a"); |
79 | 1 | Assert.assertEquals(1, nodes.getLength()); |
80 | } | |
81 | } |