diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilter.java b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilter.java index 3232eca..4aadde9 100644 --- a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilter.java +++ b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilter.java @@ -24,13 +24,25 @@ public SwedishBrailleFilter(String locale) { this(locale, false); } + /** + * Creates a new Swedish braille filter. + * @param locale the locale + * @param strict if true the result is braille only, if false the result + * contains break point characters such as space, dash and soft hyphen. + */ + public SwedishBrailleFilter(String locale, boolean strict) { + this(locale, strict, false); + } + /** * Creates a new Swedish braille filter with the specified mode. * @param locale the locale * @param strict if true the result is braille only, if false the result * contains break point characters such as space, dash and soft hyphen. + * @param useContractedBraille Whether or not the string should be translated to contracted braille. + * */ - public SwedishBrailleFilter(String locale, boolean strict) { + public SwedishBrailleFilter(String locale, boolean strict, boolean useContractedBraille) { filters = new CombinationFilter(); // Remove zero width space filters.add(new RegexFilter("\\u200B", "")); @@ -42,6 +54,12 @@ public SwedishBrailleFilter(String locale, boolean strict) { filters.add(new CapitalizationMarkers()); Locale l = FilterLocale.parse(locale).toLocale(); + + if (useContractedBraille) { + // Text to braille, shorthand format + filters.add(new SwedishContractedBrailleFilter()); + } + // Text to braille, Pas 1 filters.add(new UCharFilter(getResource("sv_SE-pas1.xml"), l)); // Text to braille, Pas 2 diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactory.java b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactory.java index dd83668..72ef71b 100644 --- a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactory.java +++ b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactory.java @@ -35,7 +35,16 @@ public BrailleFilter newFilter(String locale, String mode) throws TranslatorConf throw new SwedishFilterConfigurationException(e); } return new DefaultBrailleFilter(new SwedishBrailleFilter(loc.get(), true), loc.get(), sap, hyphenatorService); - } + } else if (loc.isPresent() && mode.equals(TranslatorType.CONTRACTED.toString())) { + + DefaultMarkerProcessor sap; + try { + sap = new SwedishMarkerProcessorFactory().newMarkerProcessor(loc.get(), mode); + } catch (SwedishMarkerProcessorConfigurationException e) { + throw new SwedishFilterConfigurationException(e); + } + return new DefaultBrailleFilter(new SwedishBrailleFilter(loc.get(), true, true), loc.get(), sap, hyphenatorService); + } throw new SwedishFilterConfigurationException("Factory does not support " + locale + "/" + mode); } diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactoryService.java b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactoryService.java index 141a092..0ed0ed4 100644 --- a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactoryService.java +++ b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterFactoryService.java @@ -36,6 +36,8 @@ public SwedishBrailleFilterFactoryService() { this.specs = new ArrayList<>(); String displayName = RESOURCE_BUNDLE.getString("uncontracted-6-dot"); String desc = RESOURCE_BUNDLE.getString("uncontracted-description"); + String contractedDisplayName = RESOURCE_BUNDLE.getString("contracted-6-dot"); + String contractedDesc = RESOURCE_BUNDLE.getString("contracted-description"); specs.add(new TranslatorSpecification("sv", TranslatorMode.Builder .withType(TranslatorType.UNCONTRACTED) .displayName(displayName) @@ -46,11 +48,27 @@ public SwedishBrailleFilterFactoryService() { .displayName(displayName) .description(desc) .build())); - } - - @Override - public boolean supportsSpecification(String locale, String mode) { - return ("sv".equalsIgnoreCase(locale) || "sv-SE".equalsIgnoreCase(locale)) && mode.equals(TranslatorType.UNCONTRACTED.toString()); + specs.add(new TranslatorSpecification("sv", TranslatorMode.Builder + .withType(TranslatorType.CONTRACTED) + .displayName(contractedDisplayName) + .description(contractedDesc) + .build())); + specs.add(new TranslatorSpecification("sv-SE", TranslatorMode.Builder + .withType(TranslatorType.CONTRACTED) + .displayName(contractedDisplayName) + .description(contractedDesc) + .build())); + } + + @Override + public boolean supportsSpecification(String locale, String mode) { + if (("sv".equalsIgnoreCase(locale) || "sv-SE".equalsIgnoreCase(locale)) && mode.equals(TranslatorType.UNCONTRACTED.toString())) { + return true; + } else if (("sv".equalsIgnoreCase(locale) || "sv-SE".equalsIgnoreCase(locale)) && mode.equals(TranslatorType.CONTRACTED.toString())) { + return true; + } else { + return false; + } } @Override @@ -74,7 +92,7 @@ public void setHyphenator(HyphenatorFactoryMakerService hyphenator) { public void unsetHyphenator(HyphenatorFactoryMakerService hyphenator) { this.hyphenator = null; } - + @Override public Collection listSpecifications() { return specs; diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactory.java b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactory.java index 6e45443..380863c 100644 --- a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactory.java +++ b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactory.java @@ -41,6 +41,18 @@ public BrailleTranslator newTranslator(String locale, String mode) throws Transl return new SimpleBrailleTranslator( new DefaultBrailleFilter(new SwedishBrailleFilter(loc.get()), loc.get(), sap, hyphenatorService), new DefaultBrailleFinalizer(), mode); + } else if (loc.isPresent() && mode.equals(TranslatorType.CONTRACTED.toString())) { + + DefaultMarkerProcessor sap; + try { + sap = new SwedishMarkerProcessorFactory().newMarkerProcessor(loc.get(), mode); + } catch (SwedishMarkerProcessorConfigurationException e) { + throw new SwedishTranslatorConfigurationException(e); + } + + return new SimpleBrailleTranslator( + new DefaultBrailleFilter(new SwedishBrailleFilter(loc.get(), false, true), loc.get(), sap, hyphenatorService), + new DefaultBrailleFinalizer(), mode); } else if (loc.isPresent() && mode.equals(TranslatorType.PRE_TRANSLATED.toString())) { return new SimpleBrailleTranslator( new PreTranslatedBrailleFilter(), diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactoryService.java b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactoryService.java index c097c59..98db11d 100644 --- a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactoryService.java +++ b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleTranslatorFactoryService.java @@ -35,14 +35,21 @@ public class SwedishBrailleTranslatorFactoryService implements public SwedishBrailleTranslatorFactoryService() { this.specs = new ArrayList<>(); String uncontracted = RESOURCE_BUNDLE.getString("uncontracted-6-dot"); + String contracted = RESOURCE_BUNDLE.getString("contracted-6-dot"); String preTranslated = RESOURCE_BUNDLE.getString("pre-translated"); String descUncontracted = RESOURCE_BUNDLE.getString("uncontracted-description"); + String descContracted = RESOURCE_BUNDLE.getString("contracted-description"); String descPreTranslated = RESOURCE_BUNDLE.getString("pre-translated-description"); specs.add(new TranslatorSpecification("sv", TranslatorMode.Builder .withType(TranslatorType.UNCONTRACTED) .displayName(uncontracted) .description(descUncontracted) .build())); + specs.add(new TranslatorSpecification("sv", TranslatorMode.Builder + .withType(TranslatorType.CONTRACTED) + .displayName(contracted) + .description(descContracted) + .build())); specs.add(new TranslatorSpecification("sv", TranslatorMode.Builder .withType(TranslatorType.PRE_TRANSLATED) .displayName(preTranslated) @@ -53,6 +60,11 @@ public SwedishBrailleTranslatorFactoryService() { .displayName(uncontracted) .description(descUncontracted) .build())); + specs.add(new TranslatorSpecification("sv-SE", TranslatorMode.Builder + .withType(TranslatorType.CONTRACTED) + .displayName(contracted) + .description(descContracted) + .build())); specs.add(new TranslatorSpecification("sv-SE", TranslatorMode.Builder .withType(TranslatorType.PRE_TRANSLATED) .displayName(preTranslated) diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishContractedBrailleFilter.java b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishContractedBrailleFilter.java new file mode 100644 index 0000000..b6096f4 --- /dev/null +++ b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishContractedBrailleFilter.java @@ -0,0 +1,89 @@ +package org.daisy.dotify.translator.impl.sv_SE; + +import org.daisy.dotify.common.text.StringFilter; + +import java.io.IOException; +import java.net.URL; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Properties; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * + */ +public class SwedishContractedBrailleFilter implements StringFilter { + + private HashMap contractedBrailleMap; + private static final String CONTRACTED_BRAILLE_TABLE_PATH = "sv_SE-single-character-contracted-words.xml"; + + public static final String CAPITAL_CHAR_MARKER = "\u2820"; + public static final String SOFT_HYPHEN = "\u00AD"; + + /** + * Todo: add support for different grades of contraction. + * For now only single character contracted words is supported + */ + public SwedishContractedBrailleFilter() { + this.contractedBrailleMap = new HashMap<>(); + this.loadTable(); + } + + /** + * Search and replace with contracted braille. + * First split the string on space character and go through all words and look for a match in the contractin table. + * + * @param str - The string that should be filterd + * @return the filtered string + */ + @Override + public String filter(String str) { + + String[] words = str.split("\\s"); + // Handle Edge case with no words. + if (words.length == 0) { + return str; + } + // Strip string from + Pattern pattern = Pattern.compile(CAPITAL_CHAR_MARKER + "*([\\p{javaUpperCase}\\p{javaLowerCase}"+SOFT_HYPHEN+"]+)"); + StringBuilder sb = new StringBuilder(); + String key, replace; + + for (String word: words) { + Matcher matcher = pattern.matcher(word); + if (matcher.find()) { + key = matcher.group(1).toLowerCase().replace(SOFT_HYPHEN, ""); + if (this.contractedBrailleMap.containsKey(key)) { + replace = this.contractedBrailleMap.get(key); + word = word.substring(0, matcher.start(1)) + replace + word.substring(matcher.end(1)); + } + } + sb.append(word); + sb.append(" "); + } + str = sb.toString().trim(); + return str; + } + + /** + * Loads a table using the Properties class. + */ + private void loadTable() { + URL tableURL = this.getClass().getResource(CONTRACTED_BRAILLE_TABLE_PATH); + Properties props = new Properties(); + try { + props.loadFromXML(tableURL.openStream()); + } catch (IOException e) { + e.printStackTrace(); + return; + } + Set keys = props.keySet(); + for (Iterator it = keys.iterator(); it.hasNext(); ) { + String key = (String) it.next(); + contractedBrailleMap.put(key, props.getProperty(key)); + } + } +} \ No newline at end of file diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishMarkerProcessorFactory.java b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishMarkerProcessorFactory.java index aa7d07c..0d6c378 100644 --- a/src/org/daisy/dotify/translator/impl/sv_SE/SwedishMarkerProcessorFactory.java +++ b/src/org/daisy/dotify/translator/impl/sv_SE/SwedishMarkerProcessorFactory.java @@ -18,7 +18,7 @@ class SwedishMarkerProcessorFactory { public DefaultMarkerProcessor newMarkerProcessor(String locale, String mode) throws SwedishMarkerProcessorConfigurationException { if (FilterLocale.parse(locale).equals(sv)||FilterLocale.parse(locale).equals(sv_SE)) { - if (mode.equals(TranslatorType.UNCONTRACTED.toString())) { + if (mode.equals(TranslatorType.UNCONTRACTED.toString()) || mode.equals(TranslatorType.CONTRACTED.toString())) { // Svenska skrivregler för punktskrift 2009, page 34 RegexMarkerDictionary strong = new RegexMarkerDictionary.Builder(). diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/messages.properties b/src/org/daisy/dotify/translator/impl/sv_SE/messages.properties index b851dd6..91533b1 100644 --- a/src/org/daisy/dotify/translator/impl/sv_SE/messages.properties +++ b/src/org/daisy/dotify/translator/impl/sv_SE/messages.properties @@ -1,4 +1,6 @@ -uncontracted-6-dot=Uncontracted (6-dot) +uncontracted-6-dot=Uncontracted (6-dot) uncontracted-description=Uncontracted Swedish braille +contracted-6-dot=Contracted (6-dot) +contracted-description=Contracted Swedish braille pre-translated=Pre-translated pre-translated-description=Pre-translated braille \ No newline at end of file diff --git a/src/org/daisy/dotify/translator/impl/sv_SE/sv_SE-single-character-contracted-words.xml b/src/org/daisy/dotify/translator/impl/sv_SE/sv_SE-single-character-contracted-words.xml new file mode 100644 index 0000000..c0bc393 --- /dev/null +++ b/src/org/daisy/dotify/translator/impl/sv_SE/sv_SE-single-character-contracted-words.xml @@ -0,0 +1,42 @@ + + + + Braille translation table for Swedish short script words. + + a + b + c + d + e + f + g + h + j + k + l + m + n + p + q + r + s + t + u + v + w + x + y + z + ä + + ê + î + û + § + ë + ü + ô + è + \ + à + \ No newline at end of file diff --git a/test/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterTest.java b/test/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterTest.java index ecf66e9..4673ec9 100644 --- a/test/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterTest.java +++ b/test/org/daisy/dotify/translator/impl/sv_SE/SwedishBrailleFilterTest.java @@ -237,4 +237,44 @@ public void testFractions_001() { public void testCapitalIWithDot() { assertEquals("⠠⠈⠊", filter.filter("İ")); } + + @Test + public void testFilterAppliesContractedBrailleFilterCorrectly(){ + SwedishBrailleFilter filter_1 = new SwedishBrailleFilter("sv-SE", false, true); + String test_string = "jag vet att"; + String filtered_string = filter_1.filter(test_string); + assertEquals("⠚ ⠧⠑⠞ ⠁", filtered_string); + } + + @Test + public void testFilterAppliesContractedBrailleFilterCorrectlyWithCharCapitalization() { + SwedishBrailleFilter filter_1 = new SwedishBrailleFilter("sv-SE", false, true); + String test_string = "Hon hade många husdjur"; + String filtered_string = filter_1.filter(test_string); + assertEquals("⠠⠓⠕⠝ ⠓⠁⠙⠑ ⠍⠡⠝⠛⠁ ⠓⠥⠎⠙⠚⠥⠗", filtered_string); + } + + @Test + public void testFilterDoesNotCrashWhensContractedBrailleFilterIsAppliedForEmptyStrings() { + SwedishBrailleFilter filter_1 = new SwedishBrailleFilter("sv-SE", false, true); + String test_string = ""; + String filtered_string = filter_1.filter(test_string); + assertEquals("", filtered_string); + } + + @Test + public void testFilterAppliesContractedBrailleFilterCorrectlyWithWordCapitalization(){ + SwedishBrailleFilter filter_1 = new SwedishBrailleFilter("sv-SE", false, true); + String test_string = "HEY jag heter java"; + String filtered_string = filter_1.filter(test_string); + assertEquals("⠠⠠⠓⠑⠽ ⠚ ⠓⠑⠞⠑⠗ ⠚⠁⠧⠁", filtered_string); + } + + @Test + public void testFilterAppliesContractedBrailleFilterCorrectlyWithContractionWithSequentialCaptalLetters(){ + SwedishBrailleFilter filter_1 = new SwedishBrailleFilter("sv-SE", false, true); + String test_string = "JAG SAKNAR HONOM MYCKET"; + String filtered_string = filter_1.filter(test_string); + assertEquals("⠠⠠⠠⠚ ⠎⠁⠅⠝⠁⠗ ⠓⠕⠝⠕⠍ ⠽⠱", filtered_string); + } } diff --git a/test/org/daisy/dotify/translator/impl/sv_SE/SwedishContractedBrailleFilterTest.java b/test/org/daisy/dotify/translator/impl/sv_SE/SwedishContractedBrailleFilterTest.java new file mode 100644 index 0000000..e85403c --- /dev/null +++ b/test/org/daisy/dotify/translator/impl/sv_SE/SwedishContractedBrailleFilterTest.java @@ -0,0 +1,77 @@ +package org.daisy.dotify.translator.impl.sv_SE; + +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +import java.net.URL; + +public class SwedishContractedBrailleFilterTest { + + private final SwedishContractedBrailleFilter filter; + + public SwedishContractedBrailleFilterTest() { + + filter = new SwedishContractedBrailleFilter(); + } + + @Test + public void testFilter() { + String text = "jag gillar att dansa men inte att sjunga"; + String filteredString = this.filter.filter(text); + assertEquals("j gillar a dansa men ü a sjunga", filteredString); + } + + @Test + public void testFilterWithFirstCharacterCapitalMarker() { + String text = "⠠Test ⠠Testson ⠠Ett livs berättelse om saker som berör många"; + String filteredString = this.filter.filter(text); + assertEquals("⠠Test ⠠Testson ⠠§ livs berättelse om saker s berör många", filteredString); + } + + @Test + public void testFilterWithEmptyString() { + String text = ""; + String filteredString = this.filter.filter(text); + assertEquals("", filteredString); + } + + @Test + public void testFilterWithOnlySpace() { + String text = " "; + String filteredString = this.filter.filter(text); + assertEquals(" ", filteredString); + + // Test multiple spaces + text = " "; + filteredString = this.filter.filter(text); + assertEquals(" ", filteredString); + } + + @Test + public void testFilterWithWordCapitalMarker() { + String test = "⠠⠠JAG börjar att sjunga"; + String filteredString = this.filter.filter(test); + assertEquals("⠠⠠j börjar a sjunga", filteredString); + } + + @Test + public void testFilterWithDoubleBackSlashCharacter() { + String test = "Kan du ge mig den där"; + String filteredString = this.filter.filter(test); + assertEquals("k du ge mig \\ d", filteredString); + } + + @Test + public void testFilterWhenSequentialCapitalWordsExists() { + String test = "⠠⠠⠠JAG SAKNAR HONOM MYCKET⠱"; + String filteredString = this.filter.filter(test); + assertEquals("⠠⠠⠠j SAKNAR HONOM y⠱", filteredString); + } + + @Test + public void testFilterWhenStringContainsSoftHyphen() { + String test = "den här texten inne\u00ADhåller ord med och ut\u00ADan soft hyphen"; + String filteredString = this.filter.filter(test); + assertEquals("\\ här texten inne\u00ADhåller ord î c u soft hyphen", filteredString); + } +}