feat:资料库增获取markdown等内容增加签名处理
This commit is contained in:
@@ -36,13 +36,6 @@
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<!-- tools end -->
|
||||
|
||||
<!-- Jsoup for HTML parsing -->
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.16.1</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
||||
@@ -1,68 +1,124 @@
|
||||
package com.shuwen.groot.common.utils;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class MarkdownUtils {
|
||||
|
||||
public static String replaceMediaUrls(String markdown, Function<String, String> urlReplacer) {
|
||||
// 先处理 Markdown 图片(使用正则)
|
||||
markdown = replaceMarkdownImages(markdown, urlReplacer);
|
||||
public static String replaceMediaUrls(String markdown, UrlReplacer replacer) {
|
||||
// 处理Markdown图片 
|
||||
String result = processPattern(markdown,
|
||||
"!\\[([^\\]]*)\\]\\(([^)]+)\\)",
|
||||
2,
|
||||
replacer);
|
||||
|
||||
// 使用 Jsoup 处理 HTML 标签
|
||||
Document doc = Jsoup.parse(markdown);
|
||||
// 处理简单媒体标签 <video src="url"> 和 <audio src="url">
|
||||
result = processPattern(result,
|
||||
"<(video|audio)\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
|
||||
2,
|
||||
replacer);
|
||||
|
||||
// 处理所有媒体标签
|
||||
processMediaTags(doc, "img", "src", urlReplacer);
|
||||
processMediaTags(doc, "video", "src", urlReplacer);
|
||||
processMediaTags(doc, "audio", "src", urlReplacer);
|
||||
processMediaTags(doc, "source", "src", urlReplacer); // 处理 <source> 标签
|
||||
// 处理带有<source>元素的媒体标签
|
||||
result = processPattern(result,
|
||||
"<(video|audio)[^>]*>\\s*<source\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
|
||||
2,
|
||||
replacer);
|
||||
|
||||
// 获取 body 的 HTML(但不包括 <html><head> 等自动添加的标签)
|
||||
return doc.body().html();
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String replaceMarkdownImages(String markdown, Function<String, String> urlReplacer) {
|
||||
// 使用正则处理 Markdown 图片语法 
|
||||
Pattern pattern = Pattern.compile("!\\[(.*?)\\]\\((.*?)\\)");
|
||||
Matcher matcher = pattern.matcher(markdown);
|
||||
public static List<String> extractMediaUrls(String markdown) {
|
||||
List<String> urls = new ArrayList<>();
|
||||
|
||||
// 提取Markdown图片URL
|
||||
extractUrlsFromPattern(markdown,
|
||||
"!\\[([^\\]]*)\\]\\(([^)]+)\\)",
|
||||
2,
|
||||
urls);
|
||||
|
||||
// 提取简单媒体标签URL
|
||||
extractUrlsFromPattern(markdown,
|
||||
"<(video|audio)\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
|
||||
2,
|
||||
urls);
|
||||
|
||||
// 提取带有<source>元素的媒体标签URL
|
||||
extractUrlsFromPattern(markdown,
|
||||
"<(video|audio)[^>]*>\\s*<source\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
|
||||
2,
|
||||
urls);
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
private static String processPattern(String input, String regex, int urlGroup, UrlReplacer replacer) {
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
StringBuffer sb = new StringBuffer();
|
||||
|
||||
while (matcher.find()) {
|
||||
String newUrl = urlReplacer.apply(matcher.group(2));
|
||||
matcher.appendReplacement(sb, "");
|
||||
String originalUrl = matcher.group(urlGroup);
|
||||
String newUrl = replacer.replaceUrl(originalUrl);
|
||||
String replacement = matcher.group().replace(originalUrl, newUrl);
|
||||
matcher.appendReplacement(sb, replacement);
|
||||
}
|
||||
matcher.appendTail(sb);
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static void processMediaTags(Document doc, String tagName, String attrName, Function<String, String> urlReplacer) {
|
||||
Elements elements = doc.getElementsByTag(tagName);
|
||||
for (Element element : elements) {
|
||||
String originalUrl = element.attr(attrName);
|
||||
if (!originalUrl.isEmpty()) {
|
||||
element.attr(attrName, urlReplacer.apply(originalUrl));
|
||||
}
|
||||
private static void extractUrlsFromPattern(String input, String regex, int urlGroup, List<String> urlList) {
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
|
||||
while (matcher.find()) {
|
||||
String url = matcher.group(urlGroup);
|
||||
urlList.add(url);
|
||||
}
|
||||
}
|
||||
|
||||
public interface UrlReplacer {
|
||||
String replaceUrl(String originalUrl);
|
||||
}
|
||||
|
||||
// 示例使用
|
||||
public static void main(String[] args) {
|
||||
String markdown = "这是一段Markdown文本\n" +
|
||||
"\n" +
|
||||
"<img src=\"http://example.com/old/photo.jpg\" alt=\"照片\">\n" +
|
||||
"<video src=\"http://example.com/old/video.mp4\"></video>\n" +
|
||||
"<audio src=\"http://example.com/old/audio.mp3\"></audio>\n" +
|
||||
"<video><source src=\"http://example.com/old/video2.mp4\"></video>";
|
||||
String markdown = "# 示例文档\n\n" +
|
||||
"这是一张图片:\n\n" +
|
||||
"\n\n" +
|
||||
"# 测试1\n" +
|
||||
"\n" +
|
||||
"## 测试2\n" +
|
||||
"\n" +
|
||||
"###### 测试3\n" +
|
||||
"\n" +
|
||||
"* 格式1\n" +
|
||||
"* 格式2\n" +
|
||||
"\n" +
|
||||
"1. 项目1\n" +
|
||||
"2. 项目2\n" +
|
||||
"\n" +
|
||||
"测试markdown\n" +
|
||||
"\n" +
|
||||
"**加粗**\n" +
|
||||
"\n" +
|
||||
"*斜体*\n"+
|
||||
"这是一段HTML:\n\n" +
|
||||
"<div>\n" +
|
||||
" <video controls src=\"https://old-domain.com/videos/1.mp4\"></video>\n" +
|
||||
" <audio controls>\n" +
|
||||
" <source src=\"https://old-domain.com/audio/1.mp3\" type=\"audio/mpeg\">\n" +
|
||||
" </audio>\n" +
|
||||
"</div>";
|
||||
|
||||
Function<String, String> urlReplacer = oldUrl ->
|
||||
oldUrl.replace("http://example.com/old/", "https://cdn.new.com/");
|
||||
System.out.println("原始内容:\n" + markdown);
|
||||
|
||||
String result = replaceMediaUrls(markdown, urlReplacer);
|
||||
System.out.println(result);
|
||||
String processed = replaceMediaUrls(markdown, originalUrl -> {
|
||||
// 这里实现你的URL替换逻辑
|
||||
return originalUrl.replace("old", "new");
|
||||
});
|
||||
|
||||
System.out.println("\n处理后内容:\n" + processed);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user