package com.literalice.docrdr; import java.io.Serializable; import java.text.BreakIterator; import java.util.Locale; import java.util.UUID; import java.util.function.Consumer; import software.amazon.awssdk.services.translate.TranslateClient; import software.amazon.awssdk.services.translate.model.TranslateException; import software.amazon.awssdk.services.translate.model.TranslateTextRequest; import software.amazon.awssdk.services.translate.model.TranslateTextResponse; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.safety.Safelist; public class HTMLDocument implements Serializable { private static final long serialVersionUID = 1L; private static final TranslateClient translateClient = TranslateClient.create(); private String id; private String title; private String translatedTitle; private String url; private String content; private String translatedContent; private String targetLanguage; HTMLDocument(String title, String url, String content, String targetLanguage) { this.id = UUID.randomUUID().toString(); this.title = title; this.url = url; this.content = content; this.targetLanguage = targetLanguage; } HTMLDocument(String id, String translatedTitle, String url, String translatedContent, String targetLanguage) { this.id = id; this.url = url; this.translatedTitle = translatedTitle; this.translatedContent = translatedContent; this.targetLanguage = targetLanguage; } public String getId() { return this.id; } public String getContent() { return this.content; } public String getTitle() { return this.title; } public String getUrl() { return this.url; } public String getTranslatedTitle() { if (this.translatedTitle == null) { this.translatedTitle = translate(this.title, this.targetLanguage); } return this.translatedTitle; } public String getTranslatedContent() { if (this.translatedContent == null) { this.translatedContent = appendTranslatedText(toText(this.content), this.targetLanguage); } return this.translatedContent; } private static String translate(String text, String targetLanguage) { try { TranslateTextRequest request = TranslateTextRequest.builder() .text(text) .sourceLanguageCode("en") .targetLanguageCode(targetLanguage) .build(); TranslateTextResponse result = translateClient.translateText(request); return result.translatedText(); } catch (TranslateException e) { throw new RuntimeException(e); } } private static String appendTranslatedText(String text, String targetLanguage) { StringBuilder translatedTextBuilder = new StringBuilder(); eachSegument(text, "en", (seg) -> { translatedTextBuilder .append("