Class | Line # | Actions | |||||
---|---|---|---|---|---|---|---|
SpaceNormalizerContentAlterer | 44 | 25 | 0% | 11 | 0 |
1 | /* | |
2 | * See the NOTICE file distributed with this work for additional | |
3 | * information regarding copyright ownership. | |
4 | * | |
5 | * This is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU Lesser General Public License as | |
7 | * published by the Free Software Foundation; either version 2.1 of | |
8 | * the License, or (at your option) any later version. | |
9 | * | |
10 | * This software is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * Lesser General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU Lesser General Public | |
16 | * License along with this software; if not, write to the Free | |
17 | * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | |
18 | * 02110-1301 USA, or see the FSF site: http://www.fsf.org. | |
19 | */ | |
20 | package org.xwiki.annotation.internal.content; | |
21 | ||
22 | import java.util.HashMap; | |
23 | import java.util.Map; | |
24 | ||
25 | import javax.inject.Inject; | |
26 | import javax.inject.Named; | |
27 | import javax.inject.Singleton; | |
28 | ||
29 | import org.xwiki.annotation.content.AlteredContent; | |
30 | import org.xwiki.annotation.content.filter.Filter; | |
31 | import org.xwiki.component.annotation.Component; | |
32 | ||
33 | /** | |
34 | * Space normalizer content alterer. Will trim all leading and trailing white spaces in the passed sequence along with | |
35 | * collapsing all the inner white spaces to a single space. It also replaces all sorts of white spaces such as | |
36 | * non-breakable spaces with regular spaces. | |
37 | * | |
38 | * @version $Id: 12861f580d031cbecfcf751c1b271937150b3420 $ | |
39 | * @since 2.3M1 | |
40 | */ | |
41 | @Component | |
42 | @Named("space-normalizer") | |
43 | @Singleton | |
44 | public class SpaceNormalizerContentAlterer extends AbstractContentAlterer | |
45 | { | |
46 | /** | |
47 | * The whitespace filter, to identify all characters which are whitespace. | |
48 | */ | |
49 | @Inject | |
50 | @Named("whitespace") | |
51 | private Filter whitespaceFilter; | |
52 | ||
53 | 15 | @Override |
54 | public AlteredContent alter(CharSequence sequence) | |
55 | { | |
56 | // same as filtering only that on encountering the first space in a series of whitespace, only print the first | |
57 | // one | |
58 | 15 | StringBuffer buffer = new StringBuffer(); |
59 | 15 | Map<Integer, Integer> initialToAltered = new HashMap<Integer, Integer>(); |
60 | 15 | Map<Integer, Integer> alteredToInitial = new HashMap<Integer, Integer>(); |
61 | ||
62 | // number of refused chars | |
63 | 15 | int removedChars = 0; |
64 | 15 | Character c; |
65 | // initially assume we're in whitespace printing, since we need to trim all leading spaces | |
66 | 15 | boolean isInWhitespace = true; |
67 | 343 | for (int i = 0; i < sequence.length(); ++i) { |
68 | 328 | c = sequence.charAt(i); |
69 | 328 | boolean isWhitespace = !whitespaceFilter.accept(c); |
70 | // if either it's a non-whitespace or it's a whitespace but it's the first whitespace after some characters | |
71 | 328 | if (!isWhitespace || (isWhitespace && !isInWhitespace)) { |
72 | // update the whitespace printing state according to the the type of the current character | |
73 | 310 | isInWhitespace = isWhitespace; |
74 | ||
75 | // if it's whitespace print a plain space, not the char itself | |
76 | 310 | buffer.append(isWhitespace ? " " : c); |
77 | // update the altered indexes for all the removed characters in this removed fragment to point to this | |
78 | // position | |
79 | 634 | for (int t = 0; t <= removedChars; ++t) { |
80 | // 1+0;1 // 1+1;1 | |
81 | 324 | initialToAltered.put(i - t, buffer.length() - 1); |
82 | } | |
83 | ||
84 | // restore the removed chars count | |
85 | 310 | removedChars = 0; |
86 | ||
87 | // map this altered index to the index in the original sequence | |
88 | 310 | alteredToInitial.put(buffer.length() - 1, i); |
89 | } else { | |
90 | 18 | removedChars++; |
91 | } | |
92 | } | |
93 | // if the last character is a space, remove it and add it to the removed chars | |
94 | 15 | if (buffer.length() > 0 && buffer.charAt(buffer.length() - 1) == ' ') { |
95 | 3 | buffer.deleteCharAt(buffer.length() - 1); |
96 | 3 | removedChars++; |
97 | // remove the mapping from the altered to initial mapping since it doesn't exist anymore. buffer.length is | |
98 | // now the old buffer length - 1 | |
99 | 3 | alteredToInitial.remove(buffer.length()); |
100 | } | |
101 | // finally update the indexes for the last stream of removed chars | |
102 | 15 | if (buffer.length() > 0) { |
103 | // add the offsets for the remaining removed chars | |
104 | 17 | for (int t = 0; t < removedChars; ++t) { |
105 | 4 | initialToAltered.put(sequence.length() - 1 - t, buffer.length() - 1 - 1); |
106 | } | |
107 | } | |
108 | ||
109 | 15 | return new OffsetsMapAlteredContent(buffer.toString(), sequence.length(), initialToAltered, alteredToInitial); |
110 | } | |
111 | } |