Skip to content

Commit f7e22ff

Browse files
authored
Merge pull request #136 from box/android-escape-html
escape html when there are variables between html tags
2 parents dc3d609 + a9ddf68 commit f7e22ff

4 files changed

Lines changed: 148 additions & 27 deletions

File tree

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,41 @@
11
package com.box.l10n.mojito.okapi.filters;
22

3+
import java.util.regex.Matcher;
4+
import java.util.regex.Pattern;
35
import net.sf.okapi.common.encoder.EncoderContext;
46
import net.sf.okapi.filters.its.Parameters;
57
import org.apache.commons.lang.StringUtils;
68

79
/**
8-
* This overrides the {@link net.sf.okapi.common.encoder.XMLEncoder}
9-
* to not to escape supported HTML elements for Android strings.
10+
* This overrides the {@link net.sf.okapi.common.encoder.XMLEncoder} for Android
11+
* strings.
12+
*
13+
* It does not escape supported HTML elements for Android strings unless there
14+
* are variables within the HTML elements.
15+
* For example, <b>songs</b> vs. &lt;b>%d songs&lt;/b>
16+
*
1017
* Also it overrides the default quotemode setting so the quotes do not get escaped.
11-
* According to Android specification in http://developer.android.com/guide/topics/resources/string-resource.html,
12-
* <b>bold</b>, <i>italian</i> and <u>underline</u> should be in localized file as-is.
13-
*
18+
*
19+
* For detailed information, see to Android specification in
20+
* http://developer.android.com/guide/topics/resources/string-resource.html,
21+
*
1422
* @author jyi
1523
*/
1624
public class XMLEncoder extends net.sf.okapi.common.encoder.XMLEncoder {
17-
25+
26+
// trying to match variables between html tags, for example, <b>%d</b>, <i>%1$s</i>, <u>%2$s</u>
27+
private static final Pattern ANDROID_VARIABLE_WITHIN_HTML = Pattern.compile("(&lt;[b|i|u]&gt;)((.*?)%(([-0+ #]?)[-0+ #]?)((\\d\\$)?)(([\\d\\*]*)(\\.[\\d\\*]*)?)[dioxXucsfeEgGpn](.*?))+(&lt;/[b|i|u]&gt;)");
28+
private static final Pattern ANDROID_HTML = Pattern.compile("(&lt;)(/?)(b|i|u)(&gt;)");
29+
private static final Pattern UNESCAPED_DOUBLE_QUOTE = Pattern.compile("([^\\\\])(\")");
30+
private static final Pattern START_WITH_DOUBLE_QUOTE = Pattern.compile("(^\")");
31+
private static final Pattern UNESCAPED_SINGLE_QUOTE = Pattern.compile("([^\\\\])(')");
32+
private static final Pattern START_WITH_SINGLE_QUOTE = Pattern.compile("(^')");
33+
1834
@Override
1935
public String encode(String text, EncoderContext context) {
2036
String encoded = super.encode(text, context);
2137
if (isAndroidStrings()) {
22-
encoded = escape(encoded);
38+
encoded = escapeAndroid(encoded);
2339
}
2440
return encoded;
2541
}
@@ -32,39 +48,44 @@ private boolean isAndroidStrings() {
3248
return false;
3349
}
3450
}
35-
36-
public String escape(String text) {
51+
52+
public String escapeAndroid(String text) {
3753
boolean enclosedInDoubleQuotes = StringUtils.startsWith(text, "\"") && StringUtils.endsWith(text, "\"");
3854
if (enclosedInDoubleQuotes) {
3955
text = text.substring(1, text.length() - 1);
4056
}
41-
42-
String pattern = "(&lt;)(/?)(b|i|u)(&gt;)";
43-
String replacement = "<$2$3>";
44-
text = text.replaceAll(pattern, replacement);
57+
58+
String replacement;
59+
if (needsAndroidEscapeHTML(text)) {
60+
replacement = "$1$2$3>";
61+
} else {
62+
replacement = "<$2$3>";
63+
}
64+
text = ANDROID_HTML.matcher(text).replaceAll(replacement);
4565
text = text.replaceAll("\n", "\\\\n");
4666
text = text.replaceAll("\r", "\\\\r");
4767
text = escapeDoubleQuotes(text);
4868
if (!enclosedInDoubleQuotes) {
4969
text = escapeSingleQuotes(text);
5070
}
5171
return enclosedInDoubleQuotes ? "\"" + text + "\"" : text;
52-
72+
5373
}
54-
74+
75+
private boolean needsAndroidEscapeHTML(String text) {
76+
Matcher matcher = ANDROID_VARIABLE_WITHIN_HTML.matcher(text);
77+
return matcher.find();
78+
}
79+
5580
private String escapeDoubleQuotes(String text) {
56-
String pattern1 = "([^\\\\])(\")";
57-
String pattern2 = "(^\")";
58-
String escaped = text.replaceAll(pattern1, "$1\\\\$2");
59-
escaped = escaped.replaceFirst(pattern2, "\\\\$1");
81+
String escaped = UNESCAPED_DOUBLE_QUOTE.matcher(text).replaceAll("$1\\\\$2");
82+
escaped = START_WITH_DOUBLE_QUOTE.matcher(escaped).replaceFirst("\\\\$1");
6083
return escaped;
6184
}
6285

6386
private String escapeSingleQuotes(String text) {
64-
String pattern1 = "([^\\\\])(')";
65-
String pattern2 = "(^')";
66-
String escaped = text.replaceAll(pattern1, "$1\\\\$2");
67-
escaped = escaped.replaceFirst(pattern2, "\\\\$1");
87+
String escaped = UNESCAPED_SINGLE_QUOTE.matcher(text).replaceAll("$1\\\\$2");
88+
escaped = START_WITH_SINGLE_QUOTE.matcher(escaped).replaceFirst("\\\\$1");
6889
return escaped;
6990
}
7091
}

webapp/src/main/resources/com/box/l10n/mojito/okapi/filters/AndroidStrings_mojito.fprm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
<its:preserveSpaceRule selector="resources//plurals//item" space="preserve"/>
3030

3131
<okp:options escapeQuotes="no"/>
32-
<okp:codeFinder useCodeFinder="yes">#v1
32+
<okp:options escapeGT="no"/>
33+
<okp:codeFinder useCodeFinder="no">#v1
3334
count.i=2
3435
rule0=%(([-0+ #]?)[-0+ #]?)((\d\$)?)(([\d\*]*)(\.[\d\*]*)?)[dioxXucsfeEgGpn]
3536
rule1=&lt;(/?)\w[^&lt;]*?&gt;

webapp/src/test/java/com/box/l10n/mojito/service/assetExtraction/AssetExtractionServiceTest.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,50 @@ public void testAndroidStringsWithNotTranslatable() throws Exception {
446446

447447
}
448448

449+
@Test
450+
public void testAndroidStringsWithEscapedHTMLTags() throws Exception {
451+
452+
Repository repository = repositoryService.createRepository(testIdWatcher.getEntityName("repository"));
453+
454+
String content = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
455+
+ "<resources>\n"
456+
+ " <string name=\"Test\">Hello, %1$s! You have &lt;b>%2$d new messages&lt;/b>.</string>\n"
457+
+ "</resources>";
458+
Asset asset = assetService.createAsset(repository.getId(), content, "path/to/fake/res/strings.xml");
459+
460+
PollableFuture<Asset> processResult = assetExtractionService.processAsset(asset.getId(), null, PollableTask.INJECT_CURRENT_TASK);
461+
Asset processedAsset = processResult.get();
462+
463+
List<AssetTextUnit> assetTextUnits = assetTextUnitRepository.findByAssetExtraction(processedAsset.getLastSuccessfulAssetExtraction());
464+
465+
assertEquals("Processing should have extracted 1 text units", 1, assetTextUnits.size());
466+
assertEquals("Test", assetTextUnits.get(0).getName());
467+
assertEquals("Hello, %1$s! You have <b>%2$d new messages</b>.", assetTextUnits.get(0).getContent());
468+
469+
}
470+
471+
@Test
472+
public void testAndroidStringsWithCDATA() throws Exception {
473+
474+
Repository repository = repositoryService.createRepository(testIdWatcher.getEntityName("repository"));
475+
476+
String content = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
477+
+ "<resources>\n"
478+
+ " <string name=\"Test\">Hello, %1$s! You have <![CDATA[<b>%2$d new messages</b>]]>.</string>\n"
479+
+ "</resources>";
480+
Asset asset = assetService.createAsset(repository.getId(), content, "path/to/fake/res/strings.xml");
481+
482+
PollableFuture<Asset> processResult = assetExtractionService.processAsset(asset.getId(), null, PollableTask.INJECT_CURRENT_TASK);
483+
Asset processedAsset = processResult.get();
484+
485+
List<AssetTextUnit> assetTextUnits = assetTextUnitRepository.findByAssetExtraction(processedAsset.getLastSuccessfulAssetExtraction());
486+
487+
assertEquals("Processing should have extracted 1 text units", 1, assetTextUnits.size());
488+
assertEquals("Test", assetTextUnits.get(0).getName());
489+
assertEquals("Hello, %1$s! You have <b>%2$d new messages</b>.", assetTextUnits.get(0).getContent());
490+
491+
}
492+
449493
@Test
450494
public void testXliffNoResname() throws Exception {
451495

webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -674,11 +674,66 @@ public void testLocalizeAndroidStringsWithSpecialCharacters() throws Exception {
674674
logger.debug("localized=\n{}", localizedAsset);
675675
assertEquals(assetContent, localizedAsset);
676676
}
677-
677+
678+
/**
679+
* This test is to test {@link XMLEncoder} with escaped HTML and CDATA
680+
*
681+
* @throws Exception
682+
*/
683+
@Test
684+
public void testLocalizeAndroidStringsWithEscapedHTMLAndCDATA() throws Exception {
685+
686+
Repository repo = repositoryService.createRepository(testIdWatcher.getEntityName("repository"));
687+
RepositoryLocale repoLocale;
688+
try {
689+
repoLocale = repositoryService.addRepositoryLocale(repo, "en-GB");
690+
} catch (RepositoryLocaleCreationException e) {
691+
throw new RuntimeException(e);
692+
}
693+
694+
String assetContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
695+
+ "<resources>\n"
696+
+ " <string name=\"welcome_messages0\">Hello, %1$s! You have <b>%2$d new messages</b>.</string>\n"
697+
+ " <string name=\"welcome_messages1\">Hello, %1$s! You have &lt;b>%2$d new messages&lt;/b>.</string>\n"
698+
+ " <string name=\"welcome_messages2\">Hello, %1$s! You have <![CDATA[<b>%2$d new messages</b>]]>.</string>\n"
699+
+ "</resources>";
700+
String expectedLocalizedAsset = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
701+
+ "<resources>\n"
702+
+ " <string name=\"welcome_messages0\">Hello, %1$s! You have &lt;b>%2$d new messages&lt;/b>.</string>\n"
703+
+ " <string name=\"welcome_messages1\">Hello, %1$s! You have &lt;b>%2$d new messages&lt;/b>.</string>\n"
704+
+ " <string name=\"welcome_messages2\">Hello, %1$s! You have &lt;b>%2$d new messages&lt;/b>.</string>\n"
705+
+ "</resources>";
706+
asset = assetService.createAsset(repo.getId(), assetContent, "res/values/strings.xml");
707+
asset = assetRepository.findOne(asset.getId());
708+
assetId = asset.getId();
709+
tmId = repo.getTm().getId();
710+
711+
PollableFuture<Asset> assetResult = assetService.addOrUpdateAssetAndProcessIfNeeded(repo.getId(), assetContent, asset.getPath());
712+
try {
713+
pollableTaskService.waitForPollableTask(assetResult.getPollableTask().getId());
714+
} catch (PollableTaskException | InterruptedException e) {
715+
throw new RuntimeException(e);
716+
}
717+
assetResult.get();
718+
719+
TextUnitSearcherParameters textUnitSearcherParameters = new TextUnitSearcherParameters();
720+
textUnitSearcherParameters.setRepositoryIds(repo.getId());
721+
textUnitSearcherParameters.setStatusFilter(StatusFilter.FOR_TRANSLATION);
722+
List<TextUnitDTO> textUnitDTOs = textUnitSearcher.search(textUnitSearcherParameters);
723+
for (TextUnitDTO textUnitDTO : textUnitDTOs) {
724+
logger.info("source=[{}]", textUnitDTO.getSource());
725+
assertEquals("Hello, %1$s! You have <b>%2$d new messages</b>.", textUnitDTO.getSource());
726+
}
727+
728+
String localizedAsset = tmService.generateLocalized(asset, assetContent, repoLocale, "en-GB");
729+
logger.info("localized=\n{}", localizedAsset);
730+
assertEquals(expectedLocalizedAsset, localizedAsset);
731+
}
732+
678733
/**
679734
* This test is to test AndroidStrings array with empty item
680-
*
681-
* @throws Exception
735+
*
736+
* @throws Exception
682737
*/
683738
@Test
684739
public void testLocalizeAndroidStringsArrayWithEmptyItem() throws Exception {

0 commit comments

Comments
 (0)