feat:资料库增获取markdown等内容增加签名处理

This commit is contained in:
刘亮
2025-05-26 21:45:11 +08:00
parent 9f1b4d29fb
commit 87fd25c7d9
2 changed files with 98 additions and 49 deletions

View File

@@ -36,13 +36,6 @@
<scope>provided</scope>
</dependency>
<!-- tools end -->
<!-- Jsoup for HTML parsing -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.16.1</version>
</dependency>
</dependencies>
<build>

View File

@@ -1,68 +1,124 @@
package com.shuwen.groot.common.utils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.function.Function;
import java.util.regex.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MarkdownUtils {
public static String replaceMediaUrls(String markdown, Function<String, String> urlReplacer) {
// 处理 Markdown 图片(使用正则)
markdown = replaceMarkdownImages(markdown, urlReplacer);
public static String replaceMediaUrls(String markdown, UrlReplacer replacer) {
// 处理Markdown图片 ![alt](url)
String result = processPattern(markdown,
"!\\[([^\\]]*)\\]\\(([^)]+)\\)",
2,
replacer);
// 使用 Jsoup 处理 HTML 标签
Document doc = Jsoup.parse(markdown);
// 处理简单媒体标签 <video src="url"> 和 <audio src="url">
result = processPattern(result,
"<(video|audio)\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
2,
replacer);
// 处理所有媒体标签
processMediaTags(doc, "img", "src", urlReplacer);
processMediaTags(doc, "video", "src", urlReplacer);
processMediaTags(doc, "audio", "src", urlReplacer);
processMediaTags(doc, "source", "src", urlReplacer); // 处理 <source> 标签
// 处理带有<source>元素的媒体标签
result = processPattern(result,
"<(video|audio)[^>]*>\\s*<source\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
2,
replacer);
// 获取 body 的 HTML但不包括 <html><head> 等自动添加的标签)
return doc.body().html();
return result;
}
private static String replaceMarkdownImages(String markdown, Function<String, String> urlReplacer) {
// 使用正则处理 Markdown 图片语法 ![alt](url)
Pattern pattern = Pattern.compile("!\\[(.*?)\\]\\((.*?)\\)");
Matcher matcher = pattern.matcher(markdown);
public static List<String> extractMediaUrls(String markdown) {
List<String> urls = new ArrayList<>();
// 提取Markdown图片URL
extractUrlsFromPattern(markdown,
"!\\[([^\\]]*)\\]\\(([^)]+)\\)",
2,
urls);
// 提取简单媒体标签URL
extractUrlsFromPattern(markdown,
"<(video|audio)\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
2,
urls);
// 提取带有<source>元素的媒体标签URL
extractUrlsFromPattern(markdown,
"<(video|audio)[^>]*>\\s*<source\\s+[^>]*src=\"([^\"]+)\"[^>]*>",
2,
urls);
return urls;
}
private static String processPattern(String input, String regex, int urlGroup, UrlReplacer replacer) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(input);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String newUrl = urlReplacer.apply(matcher.group(2));
matcher.appendReplacement(sb, "![" + matcher.group(1) + "](" + newUrl + ")");
String originalUrl = matcher.group(urlGroup);
String newUrl = replacer.replaceUrl(originalUrl);
String replacement = matcher.group().replace(originalUrl, newUrl);
matcher.appendReplacement(sb, replacement);
}
matcher.appendTail(sb);
return sb.toString();
}
private static void processMediaTags(Document doc, String tagName, String attrName, Function<String, String> urlReplacer) {
Elements elements = doc.getElementsByTag(tagName);
for (Element element : elements) {
String originalUrl = element.attr(attrName);
if (!originalUrl.isEmpty()) {
element.attr(attrName, urlReplacer.apply(originalUrl));
}
private static void extractUrlsFromPattern(String input, String regex, int urlGroup, List<String> urlList) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(input);
while (matcher.find()) {
String url = matcher.group(urlGroup);
urlList.add(url);
}
}
public interface UrlReplacer {
String replaceUrl(String originalUrl);
}
// 示例使用
public static void main(String[] args) {
String markdown = "这是一段Markdown文本\n" +
"![示例图片](http://example.com/old/image.png)\n" +
"<img src=\"http://example.com/old/photo.jpg\" alt=\"照片\">\n" +
"<video src=\"http://example.com/old/video.mp4\"></video>\n" +
"<audio src=\"http://example.com/old/audio.mp3\"></audio>\n" +
"<video><source src=\"http://example.com/old/video2.mp4\"></video>";
String markdown = "# 示例文档\n\n" +
"这是一张图片:\n\n" +
"![示例图片](https://old-domain.com/images/1.jpg \"图片标题\")\n\n" +
"# 测试1\n" +
"\n" +
"## 测试2\n" +
"\n" +
"###### 测试3\n" +
"\n" +
"* 格式1\n" +
"* 格式2\n" +
"\n" +
"1. 项目1\n" +
"2. 项目2\n" +
"\n" +
"测试markdown\n" +
"\n" +
"**加粗**\n" +
"\n" +
"*斜体*\n"+
"这是一段HTML:\n\n" +
"<div>\n" +
" <video controls src=\"https://old-domain.com/videos/1.mp4\"></video>\n" +
" <audio controls>\n" +
" <source src=\"https://old-domain.com/audio/1.mp3\" type=\"audio/mpeg\">\n" +
" </audio>\n" +
"</div>";
Function<String, String> urlReplacer = oldUrl ->
oldUrl.replace("http://example.com/old/", "https://cdn.new.com/");
System.out.println("原始内容:\n" + markdown);
String result = replaceMediaUrls(markdown, urlReplacer);
System.out.println(result);
String processed = replaceMediaUrls(markdown, originalUrl -> {
// 这里实现你的URL替换逻辑
return originalUrl.replace("old", "new");
});
System.out.println("\n处理后内容:\n" + processed);
}
}