first commit
Some checks failed
Backend Tests / Static Checks (push) Has been cancelled
Backend Tests / Tests (other) (push) Has been cancelled
Backend Tests / Tests (plugin) (push) Has been cancelled
Backend Tests / Tests (server) (push) Has been cancelled
Backend Tests / Tests (store) (push) Has been cancelled
Build Canary Image / build-frontend (push) Has been cancelled
Build Canary Image / build-push (linux/amd64) (push) Has been cancelled
Build Canary Image / build-push (linux/arm64) (push) Has been cancelled
Build Canary Image / merge (push) Has been cancelled
Frontend Tests / Lint (push) Has been cancelled
Frontend Tests / Build (push) Has been cancelled
Proto Linter / Lint Protos (push) Has been cancelled
Some checks failed
Backend Tests / Static Checks (push) Has been cancelled
Backend Tests / Tests (other) (push) Has been cancelled
Backend Tests / Tests (plugin) (push) Has been cancelled
Backend Tests / Tests (server) (push) Has been cancelled
Backend Tests / Tests (store) (push) Has been cancelled
Build Canary Image / build-frontend (push) Has been cancelled
Build Canary Image / build-push (linux/amd64) (push) Has been cancelled
Build Canary Image / build-push (linux/arm64) (push) Has been cancelled
Build Canary Image / merge (push) Has been cancelled
Frontend Tests / Lint (push) Has been cancelled
Frontend Tests / Build (push) Has been cancelled
Proto Linter / Lint Protos (push) Has been cancelled
This commit is contained in:
28
plugin/markdown/ast/tag.go
Normal file
28
plugin/markdown/ast/tag.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package ast
|
||||
|
||||
import (
|
||||
gast "github.com/yuin/goldmark/ast"
|
||||
)
|
||||
|
||||
// TagNode represents a #tag in the markdown AST.
|
||||
type TagNode struct {
|
||||
gast.BaseInline
|
||||
|
||||
// Tag name without the # prefix
|
||||
Tag []byte
|
||||
}
|
||||
|
||||
// KindTag is the NodeKind for TagNode.
|
||||
var KindTag = gast.NewNodeKind("Tag")
|
||||
|
||||
// Kind returns KindTag.
|
||||
func (*TagNode) Kind() gast.NodeKind {
|
||||
return KindTag
|
||||
}
|
||||
|
||||
// Dump implements Node.Dump for debugging.
|
||||
func (n *TagNode) Dump(source []byte, level int) {
|
||||
gast.DumpHelper(n, source, level, map[string]string{
|
||||
"Tag": string(n.Tag),
|
||||
}, nil)
|
||||
}
|
||||
24
plugin/markdown/extensions/tag.go
Normal file
24
plugin/markdown/extensions/tag.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package extensions
|
||||
|
||||
import (
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/util"
|
||||
|
||||
mparser "github.com/usememos/memos/plugin/markdown/parser"
|
||||
)
|
||||
|
||||
type tagExtension struct{}
|
||||
|
||||
// TagExtension is a goldmark extension for #tag syntax.
|
||||
var TagExtension = &tagExtension{}
|
||||
|
||||
// Extend extends the goldmark parser with tag support.
|
||||
func (*tagExtension) Extend(m goldmark.Markdown) {
|
||||
m.Parser().AddOptions(
|
||||
parser.WithInlineParsers(
|
||||
// Priority 200 - run before standard link parser (500)
|
||||
util.Prioritized(mparser.NewTagParser(), 200),
|
||||
),
|
||||
)
|
||||
}
|
||||
409
plugin/markdown/markdown.go
Normal file
409
plugin/markdown/markdown.go
Normal file
@@ -0,0 +1,409 @@
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
gast "github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
east "github.com/yuin/goldmark/extension/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/text"
|
||||
|
||||
mast "github.com/usememos/memos/plugin/markdown/ast"
|
||||
"github.com/usememos/memos/plugin/markdown/extensions"
|
||||
"github.com/usememos/memos/plugin/markdown/renderer"
|
||||
storepb "github.com/usememos/memos/proto/gen/store"
|
||||
)
|
||||
|
||||
// ExtractedData contains all metadata extracted from markdown in a single pass.
|
||||
type ExtractedData struct {
|
||||
Tags []string
|
||||
Property *storepb.MemoPayload_Property
|
||||
}
|
||||
|
||||
// Service handles markdown metadata extraction.
|
||||
// It uses goldmark to parse markdown and extract tags, properties, and snippets.
|
||||
// HTML rendering is primarily done on frontend using markdown-it, but backend provides
|
||||
// RenderHTML for RSS feeds and other server-side rendering needs.
|
||||
type Service interface {
|
||||
// ExtractAll extracts tags, properties, and references in a single parse (most efficient)
|
||||
ExtractAll(content []byte) (*ExtractedData, error)
|
||||
|
||||
// ExtractTags returns all #tags found in content
|
||||
ExtractTags(content []byte) ([]string, error)
|
||||
|
||||
// ExtractProperties computes boolean properties
|
||||
ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error)
|
||||
|
||||
// RenderMarkdown renders goldmark AST back to markdown text
|
||||
RenderMarkdown(content []byte) (string, error)
|
||||
|
||||
// RenderHTML renders markdown content to HTML
|
||||
RenderHTML(content []byte) (string, error)
|
||||
|
||||
// GenerateSnippet creates plain text summary
|
||||
GenerateSnippet(content []byte, maxLength int) (string, error)
|
||||
|
||||
// ValidateContent checks for syntax errors
|
||||
ValidateContent(content []byte) error
|
||||
|
||||
// RenameTag renames all occurrences of oldTag to newTag in content
|
||||
RenameTag(content []byte, oldTag, newTag string) (string, error)
|
||||
}
|
||||
|
||||
// service implements the Service interface.
|
||||
type service struct {
|
||||
md goldmark.Markdown
|
||||
}
|
||||
|
||||
// Option configures the markdown service.
|
||||
type Option func(*config)
|
||||
|
||||
type config struct {
|
||||
enableTags bool
|
||||
}
|
||||
|
||||
// WithTagExtension enables #tag parsing.
|
||||
func WithTagExtension() Option {
|
||||
return func(c *config) {
|
||||
c.enableTags = true
|
||||
}
|
||||
}
|
||||
|
||||
// NewService creates a new markdown service with the given options.
|
||||
func NewService(opts ...Option) Service {
|
||||
cfg := &config{}
|
||||
for _, opt := range opts {
|
||||
opt(cfg)
|
||||
}
|
||||
|
||||
exts := []goldmark.Extender{
|
||||
extension.GFM, // GitHub Flavored Markdown (tables, strikethrough, task lists, autolinks)
|
||||
}
|
||||
|
||||
// Add custom extensions based on config
|
||||
if cfg.enableTags {
|
||||
exts = append(exts, extensions.TagExtension)
|
||||
}
|
||||
|
||||
md := goldmark.New(
|
||||
goldmark.WithExtensions(exts...),
|
||||
goldmark.WithParserOptions(
|
||||
parser.WithAutoHeadingID(), // Generate heading IDs
|
||||
),
|
||||
)
|
||||
|
||||
return &service{
|
||||
md: md,
|
||||
}
|
||||
}
|
||||
|
||||
// parse is an internal helper to parse content into AST.
|
||||
func (s *service) parse(content []byte) (gast.Node, error) {
|
||||
reader := text.NewReader(content)
|
||||
doc := s.md.Parser().Parse(reader)
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
// ExtractTags returns all #tags found in content.
|
||||
func (s *service) ExtractTags(content []byte) ([]string, error) {
|
||||
root, err := s.parse(content)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var tags []string
|
||||
|
||||
// Walk the AST to find tag nodes
|
||||
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return gast.WalkContinue, nil
|
||||
}
|
||||
|
||||
// Check for custom TagNode
|
||||
if tagNode, ok := n.(*mast.TagNode); ok {
|
||||
tags = append(tags, string(tagNode.Tag))
|
||||
}
|
||||
|
||||
return gast.WalkContinue, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Deduplicate tags while preserving original case
|
||||
return uniquePreserveCase(tags), nil
|
||||
}
|
||||
|
||||
// ExtractProperties computes boolean properties about the content.
|
||||
func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
|
||||
root, err := s.parse(content)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
prop := &storepb.MemoPayload_Property{}
|
||||
|
||||
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return gast.WalkContinue, nil
|
||||
}
|
||||
|
||||
switch n.Kind() {
|
||||
case gast.KindLink:
|
||||
prop.HasLink = true
|
||||
|
||||
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
|
||||
prop.HasCode = true
|
||||
|
||||
case east.KindTaskCheckBox:
|
||||
prop.HasTaskList = true
|
||||
if checkBox, ok := n.(*east.TaskCheckBox); ok {
|
||||
if !checkBox.IsChecked {
|
||||
prop.HasIncompleteTasks = true
|
||||
}
|
||||
}
|
||||
default:
|
||||
// No special handling for other node types
|
||||
}
|
||||
|
||||
return gast.WalkContinue, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return prop, nil
|
||||
}
|
||||
|
||||
// RenderMarkdown renders goldmark AST back to markdown text.
|
||||
func (s *service) RenderMarkdown(content []byte) (string, error) {
|
||||
root, err := s.parse(content)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
mdRenderer := renderer.NewMarkdownRenderer()
|
||||
return mdRenderer.Render(root, content), nil
|
||||
}
|
||||
|
||||
// RenderHTML renders markdown content to HTML using goldmark's built-in HTML renderer.
|
||||
func (s *service) RenderHTML(content []byte) (string, error) {
|
||||
var buf bytes.Buffer
|
||||
if err := s.md.Convert(content, &buf); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
// GenerateSnippet creates a plain text summary from markdown content.
|
||||
func (s *service) GenerateSnippet(content []byte, maxLength int) (string, error) {
|
||||
root, err := s.parse(content)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var buf strings.Builder
|
||||
var lastNodeWasBlock bool
|
||||
|
||||
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
||||
if entering {
|
||||
// Skip code blocks and code spans entirely
|
||||
switch n.Kind() {
|
||||
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
|
||||
return gast.WalkSkipChildren, nil
|
||||
default:
|
||||
// Continue walking for other node types
|
||||
}
|
||||
|
||||
// Add space before block elements (except first)
|
||||
switch n.Kind() {
|
||||
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
|
||||
if buf.Len() > 0 && lastNodeWasBlock {
|
||||
buf.WriteByte(' ')
|
||||
}
|
||||
default:
|
||||
// No space needed for other node types
|
||||
}
|
||||
}
|
||||
|
||||
if !entering {
|
||||
// Mark that we just exited a block element
|
||||
switch n.Kind() {
|
||||
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
|
||||
lastNodeWasBlock = true
|
||||
default:
|
||||
// Not a block element
|
||||
}
|
||||
return gast.WalkContinue, nil
|
||||
}
|
||||
|
||||
lastNodeWasBlock = false
|
||||
|
||||
// Only extract plain text nodes
|
||||
if textNode, ok := n.(*gast.Text); ok {
|
||||
segment := textNode.Segment
|
||||
buf.Write(segment.Value(content))
|
||||
|
||||
// Add space if this is a soft line break
|
||||
if textNode.SoftLineBreak() {
|
||||
buf.WriteByte(' ')
|
||||
}
|
||||
}
|
||||
|
||||
// Stop walking if we've exceeded double the max length
|
||||
// (we'll truncate precisely later)
|
||||
if buf.Len() > maxLength*2 {
|
||||
return gast.WalkStop, nil
|
||||
}
|
||||
|
||||
return gast.WalkContinue, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
snippet := buf.String()
|
||||
|
||||
// Truncate at word boundary if needed
|
||||
if len(snippet) > maxLength {
|
||||
snippet = truncateAtWord(snippet, maxLength)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(snippet), nil
|
||||
}
|
||||
|
||||
// ValidateContent checks if the markdown content is valid.
|
||||
func (s *service) ValidateContent(content []byte) error {
|
||||
// Try to parse the content
|
||||
_, err := s.parse(content)
|
||||
return err
|
||||
}
|
||||
|
||||
// ExtractAll extracts tags, properties, and references in a single parse for efficiency.
|
||||
func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
|
||||
root, err := s.parse(content)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data := &ExtractedData{
|
||||
Tags: []string{},
|
||||
Property: &storepb.MemoPayload_Property{},
|
||||
}
|
||||
|
||||
// Single walk to collect all data
|
||||
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return gast.WalkContinue, nil
|
||||
}
|
||||
|
||||
// Extract tags
|
||||
if tagNode, ok := n.(*mast.TagNode); ok {
|
||||
data.Tags = append(data.Tags, string(tagNode.Tag))
|
||||
}
|
||||
|
||||
// Extract properties based on node kind
|
||||
switch n.Kind() {
|
||||
case gast.KindLink:
|
||||
data.Property.HasLink = true
|
||||
|
||||
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
|
||||
data.Property.HasCode = true
|
||||
|
||||
case east.KindTaskCheckBox:
|
||||
data.Property.HasTaskList = true
|
||||
if checkBox, ok := n.(*east.TaskCheckBox); ok {
|
||||
if !checkBox.IsChecked {
|
||||
data.Property.HasIncompleteTasks = true
|
||||
}
|
||||
}
|
||||
default:
|
||||
// No special handling for other node types
|
||||
}
|
||||
|
||||
return gast.WalkContinue, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Deduplicate tags while preserving original case
|
||||
data.Tags = uniquePreserveCase(data.Tags)
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// RenameTag renames all occurrences of oldTag to newTag in content.
|
||||
func (s *service) RenameTag(content []byte, oldTag, newTag string) (string, error) {
|
||||
root, err := s.parse(content)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Walk the AST to find and rename tag nodes
|
||||
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return gast.WalkContinue, nil
|
||||
}
|
||||
|
||||
// Check for custom TagNode and rename if it matches
|
||||
if tagNode, ok := n.(*mast.TagNode); ok {
|
||||
if string(tagNode.Tag) == oldTag {
|
||||
tagNode.Tag = []byte(newTag)
|
||||
}
|
||||
}
|
||||
|
||||
return gast.WalkContinue, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Render back to markdown using the already-parsed AST
|
||||
mdRenderer := renderer.NewMarkdownRenderer()
|
||||
return mdRenderer.Render(root, content), nil
|
||||
}
|
||||
|
||||
// uniquePreserveCase returns unique strings from input while preserving case.
|
||||
func uniquePreserveCase(strs []string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
var result []string
|
||||
|
||||
for _, s := range strs {
|
||||
if _, exists := seen[s]; !exists {
|
||||
seen[s] = struct{}{}
|
||||
result = append(result, s)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// truncateAtWord truncates a string at the last word boundary before maxLength.
|
||||
// maxLength is treated as a rune (character) count to properly handle UTF-8 multi-byte characters.
|
||||
func truncateAtWord(s string, maxLength int) string {
|
||||
// Convert to runes to properly handle multi-byte UTF-8 characters
|
||||
runes := []rune(s)
|
||||
if len(runes) <= maxLength {
|
||||
return s
|
||||
}
|
||||
|
||||
// Truncate to max length (by character count, not byte count)
|
||||
truncated := string(runes[:maxLength])
|
||||
|
||||
// Find last space to avoid cutting in the middle of a word
|
||||
lastSpace := strings.LastIndexAny(truncated, " \t\n\r")
|
||||
if lastSpace > 0 {
|
||||
truncated = truncated[:lastSpace]
|
||||
}
|
||||
|
||||
return truncated + " ..."
|
||||
}
|
||||
448
plugin/markdown/markdown_test.go
Normal file
448
plugin/markdown/markdown_test.go
Normal file
@@ -0,0 +1,448 @@
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewService(t *testing.T) {
|
||||
svc := NewService()
|
||||
assert.NotNil(t, svc)
|
||||
}
|
||||
|
||||
func TestValidateContent(t *testing.T) {
|
||||
svc := NewService()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid markdown",
|
||||
content: "# Hello\n\nThis is **bold** text.",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty content",
|
||||
content: "",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "complex markdown",
|
||||
content: "# Title\n\n- List item 1\n- List item 2\n\n```go\ncode block\n```",
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := svc.ValidateContent([]byte(tt.content))
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateSnippet(t *testing.T) {
|
||||
svc := NewService()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
maxLength int
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "simple text",
|
||||
content: "Hello world",
|
||||
maxLength: 100,
|
||||
expected: "Hello world",
|
||||
},
|
||||
{
|
||||
name: "text with formatting",
|
||||
content: "This is **bold** and *italic* text.",
|
||||
maxLength: 100,
|
||||
expected: "This is bold and italic text.",
|
||||
},
|
||||
{
|
||||
name: "truncate long text",
|
||||
content: "This is a very long piece of text that should be truncated at a word boundary.",
|
||||
maxLength: 30,
|
||||
expected: "This is a very long piece of ...",
|
||||
},
|
||||
{
|
||||
name: "heading and paragraph",
|
||||
content: "# My Title\n\nThis is the first paragraph.",
|
||||
maxLength: 100,
|
||||
expected: "My Title This is the first paragraph.",
|
||||
},
|
||||
{
|
||||
name: "code block removed",
|
||||
content: "Text before\n\n```go\ncode\n```\n\nText after",
|
||||
maxLength: 100,
|
||||
expected: "Text before Text after",
|
||||
},
|
||||
{
|
||||
name: "list items",
|
||||
content: "- Item 1\n- Item 2\n- Item 3",
|
||||
maxLength: 100,
|
||||
expected: "Item 1 Item 2 Item 3",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
snippet, err := svc.GenerateSnippet([]byte(tt.content), tt.maxLength)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tt.expected, snippet)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractProperties(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
hasLink bool
|
||||
hasCode bool
|
||||
hasTasks bool
|
||||
hasInc bool
|
||||
}{
|
||||
{
|
||||
name: "plain text",
|
||||
content: "Just plain text",
|
||||
hasLink: false,
|
||||
hasCode: false,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
},
|
||||
{
|
||||
name: "with link",
|
||||
content: "Check out [this link](https://example.com)",
|
||||
hasLink: true,
|
||||
hasCode: false,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
},
|
||||
{
|
||||
name: "with inline code",
|
||||
content: "Use `console.log()` to debug",
|
||||
hasLink: false,
|
||||
hasCode: true,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
},
|
||||
{
|
||||
name: "with code block",
|
||||
content: "```go\nfunc main() {}\n```",
|
||||
hasLink: false,
|
||||
hasCode: true,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
},
|
||||
{
|
||||
name: "with completed task",
|
||||
content: "- [x] Completed task",
|
||||
hasLink: false,
|
||||
hasCode: false,
|
||||
hasTasks: true,
|
||||
hasInc: false,
|
||||
},
|
||||
{
|
||||
name: "with incomplete task",
|
||||
content: "- [ ] Todo item",
|
||||
hasLink: false,
|
||||
hasCode: false,
|
||||
hasTasks: true,
|
||||
hasInc: true,
|
||||
},
|
||||
{
|
||||
name: "mixed tasks",
|
||||
content: "- [x] Done\n- [ ] Not done",
|
||||
hasLink: false,
|
||||
hasCode: false,
|
||||
hasTasks: true,
|
||||
hasInc: true,
|
||||
},
|
||||
{
|
||||
name: "everything",
|
||||
content: "# Title\n\n[Link](url)\n\n`code`\n\n- [ ] Task",
|
||||
hasLink: true,
|
||||
hasCode: true,
|
||||
hasTasks: true,
|
||||
hasInc: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
svc := NewService()
|
||||
|
||||
props, err := svc.ExtractProperties([]byte(tt.content))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tt.hasLink, props.HasLink, "HasLink")
|
||||
assert.Equal(t, tt.hasCode, props.HasCode, "HasCode")
|
||||
assert.Equal(t, tt.hasTasks, props.HasTaskList, "HasTaskList")
|
||||
assert.Equal(t, tt.hasInc, props.HasIncompleteTasks, "HasIncompleteTasks")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractTags(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
withExt bool
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "no tags",
|
||||
content: "Just plain text",
|
||||
withExt: false,
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "single tag",
|
||||
content: "Text with #tag",
|
||||
withExt: true,
|
||||
expected: []string{"tag"},
|
||||
},
|
||||
{
|
||||
name: "multiple tags",
|
||||
content: "Text with #tag1 and #tag2",
|
||||
withExt: true,
|
||||
expected: []string{"tag1", "tag2"},
|
||||
},
|
||||
{
|
||||
name: "duplicate tags",
|
||||
content: "#work is important. #Work #WORK",
|
||||
withExt: true,
|
||||
expected: []string{"work", "Work", "WORK"},
|
||||
},
|
||||
{
|
||||
name: "tags with hyphens and underscores",
|
||||
content: "Tags: #work-notes #2024_plans",
|
||||
withExt: true,
|
||||
expected: []string{"work-notes", "2024_plans"},
|
||||
},
|
||||
{
|
||||
name: "tags at end of sentence",
|
||||
content: "This is important #urgent.",
|
||||
withExt: true,
|
||||
expected: []string{"urgent"},
|
||||
},
|
||||
{
|
||||
name: "headings not tags",
|
||||
content: "## Heading\n\n# Title\n\nText with #realtag",
|
||||
withExt: true,
|
||||
expected: []string{"realtag"},
|
||||
},
|
||||
{
|
||||
name: "numeric tag",
|
||||
content: "Issue #123",
|
||||
withExt: true,
|
||||
expected: []string{"123"},
|
||||
},
|
||||
{
|
||||
name: "tag in list",
|
||||
content: "- Item 1 #todo\n- Item 2 #done",
|
||||
withExt: true,
|
||||
expected: []string{"todo", "done"},
|
||||
},
|
||||
{
|
||||
name: "no extension enabled",
|
||||
content: "Text with #tag",
|
||||
withExt: false,
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Chinese tag",
|
||||
content: "Text with #测试",
|
||||
withExt: true,
|
||||
expected: []string{"测试"},
|
||||
},
|
||||
{
|
||||
name: "Chinese tag followed by punctuation",
|
||||
content: "Text #测试。 More text",
|
||||
withExt: true,
|
||||
expected: []string{"测试"},
|
||||
},
|
||||
{
|
||||
name: "mixed Chinese and ASCII tag",
|
||||
content: "#测试test123 content",
|
||||
withExt: true,
|
||||
expected: []string{"测试test123"},
|
||||
},
|
||||
{
|
||||
name: "Japanese tag",
|
||||
content: "#日本語 content",
|
||||
withExt: true,
|
||||
expected: []string{"日本語"},
|
||||
},
|
||||
{
|
||||
name: "Korean tag",
|
||||
content: "#한국어 content",
|
||||
withExt: true,
|
||||
expected: []string{"한국어"},
|
||||
},
|
||||
{
|
||||
name: "hierarchical tag with Chinese",
|
||||
content: "#work/测试/项目",
|
||||
withExt: true,
|
||||
expected: []string{"work/测试/项目"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var svc Service
|
||||
if tt.withExt {
|
||||
svc = NewService(WithTagExtension())
|
||||
} else {
|
||||
svc = NewService()
|
||||
}
|
||||
|
||||
tags, err := svc.ExtractTags([]byte(tt.content))
|
||||
require.NoError(t, err)
|
||||
assert.ElementsMatch(t, tt.expected, tags)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUniquePreserveCase(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input []string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "empty",
|
||||
input: []string{},
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "unique items",
|
||||
input: []string{"tag1", "tag2", "tag3"},
|
||||
expected: []string{"tag1", "tag2", "tag3"},
|
||||
},
|
||||
{
|
||||
name: "duplicates",
|
||||
input: []string{"tag", "TAG", "Tag"},
|
||||
expected: []string{"tag", "TAG", "Tag"},
|
||||
},
|
||||
{
|
||||
name: "mixed",
|
||||
input: []string{"Work", "work", "Important", "work"},
|
||||
expected: []string{"Work", "work", "Important"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := uniquePreserveCase(tt.input)
|
||||
assert.ElementsMatch(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateAtWord(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
maxLength int
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "no truncation needed",
|
||||
input: "short",
|
||||
maxLength: 10,
|
||||
expected: "short",
|
||||
},
|
||||
{
|
||||
name: "exact length",
|
||||
input: "exactly ten",
|
||||
maxLength: 11,
|
||||
expected: "exactly ten",
|
||||
},
|
||||
{
|
||||
name: "truncate at word",
|
||||
input: "this is a long sentence",
|
||||
maxLength: 10,
|
||||
expected: "this is a ...",
|
||||
},
|
||||
{
|
||||
name: "truncate very long word",
|
||||
input: "supercalifragilisticexpialidocious",
|
||||
maxLength: 10,
|
||||
expected: "supercalif ...",
|
||||
},
|
||||
{
|
||||
name: "CJK characters without spaces",
|
||||
input: "这是一个很长的中文句子没有空格的情况下也要正确处理",
|
||||
maxLength: 15,
|
||||
expected: "这是一个很长的中文句子没有空格 ...",
|
||||
},
|
||||
{
|
||||
name: "mixed CJK and Latin",
|
||||
input: "这是中文mixed with English文字",
|
||||
maxLength: 10,
|
||||
expected: "这是中文mixed ...",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := truncateAtWord(tt.input, tt.maxLength)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark tests.
|
||||
func BenchmarkGenerateSnippet(b *testing.B) {
|
||||
svc := NewService()
|
||||
content := []byte(`# Large Document
|
||||
|
||||
This is a large document with multiple paragraphs and formatting.
|
||||
|
||||
## Section 1
|
||||
|
||||
Here is some **bold** text and *italic* text with [links](https://example.com).
|
||||
|
||||
- List item 1
|
||||
- List item 2
|
||||
- List item 3
|
||||
|
||||
## Section 2
|
||||
|
||||
More content here with ` + "`inline code`" + ` and other elements.
|
||||
|
||||
` + "```go\nfunc example() {\n return true\n}\n```")
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := svc.GenerateSnippet(content, 200)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkExtractProperties(b *testing.B) {
|
||||
svc := NewService()
|
||||
content := []byte("# Title\n\n[Link](url)\n\n`code`\n\n- [ ] Task\n- [x] Done")
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := svc.ExtractProperties(content)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
139
plugin/markdown/parser/tag.go
Normal file
139
plugin/markdown/parser/tag.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
gast "github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/text"
|
||||
|
||||
mast "github.com/usememos/memos/plugin/markdown/ast"
|
||||
)
|
||||
|
||||
const (
|
||||
// MaxTagLength defines the maximum number of runes allowed in a tag.
|
||||
MaxTagLength = 100
|
||||
)
|
||||
|
||||
type tagParser struct{}
|
||||
|
||||
// NewTagParser creates a new inline parser for #tag syntax.
|
||||
func NewTagParser() parser.InlineParser {
|
||||
return &tagParser{}
|
||||
}
|
||||
|
||||
// Trigger returns the characters that trigger this parser.
|
||||
func (*tagParser) Trigger() []byte {
|
||||
return []byte{'#'}
|
||||
}
|
||||
|
||||
// isValidTagRune checks if a Unicode rune is valid in a tag.
|
||||
// Uses Unicode categories for proper international character support.
|
||||
func isValidTagRune(r rune) bool {
|
||||
// Allow Unicode letters (any script: Latin, CJK, Arabic, Cyrillic, etc.)
|
||||
if unicode.IsLetter(r) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Allow Unicode digits
|
||||
if unicode.IsNumber(r) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Allow emoji and symbols (So category: Symbol, Other)
|
||||
// This includes emoji, which are essential for social media-style tagging
|
||||
if unicode.IsSymbol(r) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Allow specific ASCII symbols for tag structure
|
||||
// Underscore: word separation (snake_case)
|
||||
// Hyphen: word separation (kebab-case)
|
||||
// Forward slash: hierarchical tags (category/subcategory)
|
||||
// Ampersand: compound tags (science&tech)
|
||||
if r == '_' || r == '-' || r == '/' || r == '&' {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Parse parses #tag syntax using Unicode-aware validation.
|
||||
// Tags support international characters and follow these rules:
|
||||
// - Must start with # followed by valid tag characters
|
||||
// - Valid characters: Unicode letters, Unicode digits, underscore (_), hyphen (-), forward slash (/)
|
||||
// - Maximum length: 100 runes (Unicode characters)
|
||||
// - Stops at: whitespace, punctuation, or other invalid characters
|
||||
func (*tagParser) Parse(_ gast.Node, block text.Reader, _ parser.Context) gast.Node {
|
||||
line, _ := block.PeekLine()
|
||||
|
||||
// Must start with #
|
||||
if len(line) == 0 || line[0] != '#' {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if it's a heading (## or space after #)
|
||||
if len(line) > 1 {
|
||||
if line[1] == '#' {
|
||||
// It's a heading (##), not a tag
|
||||
return nil
|
||||
}
|
||||
if line[1] == ' ' {
|
||||
// Space after # - heading or just a hash
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
// Just a lone #
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse tag using UTF-8 aware rune iteration
|
||||
tagStart := 1
|
||||
pos := tagStart
|
||||
runeCount := 0
|
||||
|
||||
for pos < len(line) {
|
||||
r, size := utf8.DecodeRune(line[pos:])
|
||||
|
||||
// Stop at invalid UTF-8
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
break
|
||||
}
|
||||
|
||||
// Validate character using Unicode categories
|
||||
if !isValidTagRune(r) {
|
||||
break
|
||||
}
|
||||
|
||||
// Enforce max length (by rune count, not byte count)
|
||||
runeCount++
|
||||
if runeCount > MaxTagLength {
|
||||
break
|
||||
}
|
||||
|
||||
pos += size
|
||||
}
|
||||
|
||||
// Must have at least one character after #
|
||||
if pos <= tagStart {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Extract tag (without #)
|
||||
tagName := line[tagStart:pos]
|
||||
|
||||
// Make a copy of the tag name
|
||||
tagCopy := make([]byte, len(tagName))
|
||||
copy(tagCopy, tagName)
|
||||
|
||||
// Advance reader
|
||||
block.Advance(pos)
|
||||
|
||||
// Create node
|
||||
node := &mast.TagNode{
|
||||
Tag: tagCopy,
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
251
plugin/markdown/parser/tag_test.go
Normal file
251
plugin/markdown/parser/tag_test.go
Normal file
@@ -0,0 +1,251 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/text"
|
||||
|
||||
mast "github.com/usememos/memos/plugin/markdown/ast"
|
||||
)
|
||||
|
||||
func TestTagParser(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expectedTag string
|
||||
shouldParse bool
|
||||
}{
|
||||
{
|
||||
name: "basic tag",
|
||||
input: "#tag",
|
||||
expectedTag: "tag",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "tag with hyphen",
|
||||
input: "#work-notes",
|
||||
expectedTag: "work-notes",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "tag with ampersand",
|
||||
input: "#science&tech",
|
||||
expectedTag: "science&tech",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "tag with underscore",
|
||||
input: "#2024_plans",
|
||||
expectedTag: "2024_plans",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "numeric tag",
|
||||
input: "#123",
|
||||
expectedTag: "123",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "tag followed by space",
|
||||
input: "#tag ",
|
||||
expectedTag: "tag",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "tag followed by punctuation",
|
||||
input: "#tag.",
|
||||
expectedTag: "tag",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "tag in sentence",
|
||||
input: "#important task",
|
||||
expectedTag: "important",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "heading (##)",
|
||||
input: "## Heading",
|
||||
expectedTag: "",
|
||||
shouldParse: false,
|
||||
},
|
||||
{
|
||||
name: "space after hash",
|
||||
input: "# heading",
|
||||
expectedTag: "",
|
||||
shouldParse: false,
|
||||
},
|
||||
{
|
||||
name: "lone hash",
|
||||
input: "#",
|
||||
expectedTag: "",
|
||||
shouldParse: false,
|
||||
},
|
||||
{
|
||||
name: "hash with space",
|
||||
input: "# ",
|
||||
expectedTag: "",
|
||||
shouldParse: false,
|
||||
},
|
||||
{
|
||||
name: "special characters",
|
||||
input: "#tag@special",
|
||||
expectedTag: "tag",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "mixed case",
|
||||
input: "#WorkNotes",
|
||||
expectedTag: "WorkNotes",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "hierarchical tag with slash",
|
||||
input: "#tag1/subtag",
|
||||
expectedTag: "tag1/subtag",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "hierarchical tag with multiple levels",
|
||||
input: "#tag1/subtag/subtag2",
|
||||
expectedTag: "tag1/subtag/subtag2",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "hierarchical tag followed by space",
|
||||
input: "#work/notes ",
|
||||
expectedTag: "work/notes",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "hierarchical tag followed by punctuation",
|
||||
input: "#project/2024.",
|
||||
expectedTag: "project/2024",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "hierarchical tag with numbers and dashes",
|
||||
input: "#work-log/2024/q1",
|
||||
expectedTag: "work-log/2024/q1",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "Chinese characters",
|
||||
input: "#测试",
|
||||
expectedTag: "测试",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "Chinese tag followed by space",
|
||||
input: "#测试 some text",
|
||||
expectedTag: "测试",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "Chinese tag followed by punctuation",
|
||||
input: "#测试。",
|
||||
expectedTag: "测试",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "mixed Chinese and ASCII",
|
||||
input: "#测试test123",
|
||||
expectedTag: "测试test123",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "Japanese characters",
|
||||
input: "#テスト",
|
||||
expectedTag: "テスト",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "Korean characters",
|
||||
input: "#테스트",
|
||||
expectedTag: "테스트",
|
||||
shouldParse: true,
|
||||
},
|
||||
{
|
||||
name: "emoji",
|
||||
input: "#test🚀",
|
||||
expectedTag: "test🚀",
|
||||
shouldParse: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p := NewTagParser()
|
||||
reader := text.NewReader([]byte(tt.input))
|
||||
ctx := parser.NewContext()
|
||||
|
||||
node := p.Parse(nil, reader, ctx)
|
||||
|
||||
if tt.shouldParse {
|
||||
require.NotNil(t, node, "Expected tag to be parsed")
|
||||
require.IsType(t, &mast.TagNode{}, node)
|
||||
|
||||
tagNode, ok := node.(*mast.TagNode)
|
||||
require.True(t, ok, "Expected node to be *mast.TagNode")
|
||||
assert.Equal(t, tt.expectedTag, string(tagNode.Tag))
|
||||
} else {
|
||||
assert.Nil(t, node, "Expected tag NOT to be parsed")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTagParser_Trigger(t *testing.T) {
|
||||
p := NewTagParser()
|
||||
triggers := p.Trigger()
|
||||
|
||||
assert.Equal(t, []byte{'#'}, triggers)
|
||||
}
|
||||
|
||||
func TestTagParser_MultipleTags(t *testing.T) {
|
||||
// Test that parser correctly handles multiple tags in sequence
|
||||
input := "#tag1 #tag2"
|
||||
|
||||
p := NewTagParser()
|
||||
reader := text.NewReader([]byte(input))
|
||||
ctx := parser.NewContext()
|
||||
|
||||
// Parse first tag
|
||||
node1 := p.Parse(nil, reader, ctx)
|
||||
require.NotNil(t, node1)
|
||||
tagNode1, ok := node1.(*mast.TagNode)
|
||||
require.True(t, ok, "Expected node1 to be *mast.TagNode")
|
||||
assert.Equal(t, "tag1", string(tagNode1.Tag))
|
||||
|
||||
// Advance past the space
|
||||
reader.Advance(1)
|
||||
|
||||
// Parse second tag
|
||||
node2 := p.Parse(nil, reader, ctx)
|
||||
require.NotNil(t, node2)
|
||||
tagNode2, ok := node2.(*mast.TagNode)
|
||||
require.True(t, ok, "Expected node2 to be *mast.TagNode")
|
||||
assert.Equal(t, "tag2", string(tagNode2.Tag))
|
||||
}
|
||||
|
||||
func TestTagNode_Kind(t *testing.T) {
|
||||
node := &mast.TagNode{
|
||||
Tag: []byte("test"),
|
||||
}
|
||||
|
||||
assert.Equal(t, mast.KindTag, node.Kind())
|
||||
}
|
||||
|
||||
func TestTagNode_Dump(t *testing.T) {
|
||||
node := &mast.TagNode{
|
||||
Tag: []byte("test"),
|
||||
}
|
||||
|
||||
// Should not panic
|
||||
assert.NotPanics(t, func() {
|
||||
node.Dump([]byte("#test"), 0)
|
||||
})
|
||||
}
|
||||
266
plugin/markdown/renderer/markdown_renderer.go
Normal file
266
plugin/markdown/renderer/markdown_renderer.go
Normal file
@@ -0,0 +1,266 @@
|
||||
package renderer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
gast "github.com/yuin/goldmark/ast"
|
||||
east "github.com/yuin/goldmark/extension/ast"
|
||||
|
||||
mast "github.com/usememos/memos/plugin/markdown/ast"
|
||||
)
|
||||
|
||||
// MarkdownRenderer renders goldmark AST back to markdown text.
|
||||
type MarkdownRenderer struct {
|
||||
buf *bytes.Buffer
|
||||
}
|
||||
|
||||
// NewMarkdownRenderer creates a new markdown renderer.
|
||||
func NewMarkdownRenderer() *MarkdownRenderer {
|
||||
return &MarkdownRenderer{
|
||||
buf: &bytes.Buffer{},
|
||||
}
|
||||
}
|
||||
|
||||
// Render renders the AST node to markdown and returns the result.
|
||||
func (r *MarkdownRenderer) Render(node gast.Node, source []byte) string {
|
||||
r.buf.Reset()
|
||||
r.renderNode(node, source, 0)
|
||||
return r.buf.String()
|
||||
}
|
||||
|
||||
// renderNode renders a single node and its children.
|
||||
func (r *MarkdownRenderer) renderNode(node gast.Node, source []byte, depth int) {
|
||||
switch n := node.(type) {
|
||||
case *gast.Document:
|
||||
r.renderChildren(n, source, depth)
|
||||
|
||||
case *gast.Paragraph:
|
||||
r.renderChildren(n, source, depth)
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
|
||||
case *gast.Text:
|
||||
// Text nodes store their content as segments in the source
|
||||
segment := n.Segment
|
||||
r.buf.Write(segment.Value(source))
|
||||
if n.SoftLineBreak() {
|
||||
r.buf.WriteByte('\n')
|
||||
} else if n.HardLineBreak() {
|
||||
r.buf.WriteString(" \n")
|
||||
}
|
||||
|
||||
case *gast.CodeSpan:
|
||||
r.buf.WriteByte('`')
|
||||
r.renderChildren(n, source, depth)
|
||||
r.buf.WriteByte('`')
|
||||
|
||||
case *gast.Emphasis:
|
||||
symbol := "*"
|
||||
if n.Level == 2 {
|
||||
symbol = "**"
|
||||
}
|
||||
r.buf.WriteString(symbol)
|
||||
r.renderChildren(n, source, depth)
|
||||
r.buf.WriteString(symbol)
|
||||
|
||||
case *gast.Link:
|
||||
r.buf.WriteString("[")
|
||||
r.renderChildren(n, source, depth)
|
||||
r.buf.WriteString("](")
|
||||
r.buf.Write(n.Destination)
|
||||
if len(n.Title) > 0 {
|
||||
r.buf.WriteString(` "`)
|
||||
r.buf.Write(n.Title)
|
||||
r.buf.WriteString(`"`)
|
||||
}
|
||||
r.buf.WriteString(")")
|
||||
|
||||
case *gast.AutoLink:
|
||||
url := n.URL(source)
|
||||
if n.AutoLinkType == gast.AutoLinkEmail {
|
||||
r.buf.WriteString("<")
|
||||
r.buf.Write(url)
|
||||
r.buf.WriteString(">")
|
||||
} else {
|
||||
r.buf.Write(url)
|
||||
}
|
||||
|
||||
case *gast.Image:
|
||||
r.buf.WriteString("
|
||||
r.buf.Write(n.Destination)
|
||||
if len(n.Title) > 0 {
|
||||
r.buf.WriteString(` "`)
|
||||
r.buf.Write(n.Title)
|
||||
r.buf.WriteString(`"`)
|
||||
}
|
||||
r.buf.WriteString(")")
|
||||
|
||||
case *gast.Heading:
|
||||
r.buf.WriteString(strings.Repeat("#", n.Level))
|
||||
r.buf.WriteByte(' ')
|
||||
r.renderChildren(n, source, depth)
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
|
||||
case *gast.CodeBlock, *gast.FencedCodeBlock:
|
||||
r.renderCodeBlock(n, source)
|
||||
|
||||
case *gast.Blockquote:
|
||||
// Render each child line with "> " prefix
|
||||
r.renderBlockquote(n, source, depth)
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
|
||||
case *gast.List:
|
||||
r.renderChildren(n, source, depth)
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
|
||||
case *gast.ListItem:
|
||||
r.renderListItem(n, source, depth)
|
||||
|
||||
case *gast.ThematicBreak:
|
||||
r.buf.WriteString("---")
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
|
||||
case *east.Strikethrough:
|
||||
r.buf.WriteString("~~")
|
||||
r.renderChildren(n, source, depth)
|
||||
r.buf.WriteString("~~")
|
||||
|
||||
case *east.TaskCheckBox:
|
||||
if n.IsChecked {
|
||||
r.buf.WriteString("[x] ")
|
||||
} else {
|
||||
r.buf.WriteString("[ ] ")
|
||||
}
|
||||
|
||||
case *east.Table:
|
||||
r.renderTable(n, source)
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
|
||||
// Custom Memos nodes
|
||||
case *mast.TagNode:
|
||||
r.buf.WriteByte('#')
|
||||
r.buf.Write(n.Tag)
|
||||
|
||||
default:
|
||||
// For unknown nodes, try to render children
|
||||
r.renderChildren(n, source, depth)
|
||||
}
|
||||
}
|
||||
|
||||
// renderChildren renders all children of a node.
|
||||
func (r *MarkdownRenderer) renderChildren(node gast.Node, source []byte, depth int) {
|
||||
child := node.FirstChild()
|
||||
for child != nil {
|
||||
r.renderNode(child, source, depth+1)
|
||||
child = child.NextSibling()
|
||||
}
|
||||
}
|
||||
|
||||
// renderCodeBlock renders a code block.
|
||||
func (r *MarkdownRenderer) renderCodeBlock(node gast.Node, source []byte) {
|
||||
if fenced, ok := node.(*gast.FencedCodeBlock); ok {
|
||||
// Fenced code block with language
|
||||
r.buf.WriteString("```")
|
||||
if lang := fenced.Language(source); len(lang) > 0 {
|
||||
r.buf.Write(lang)
|
||||
}
|
||||
r.buf.WriteByte('\n')
|
||||
|
||||
// Write all lines
|
||||
lines := fenced.Lines()
|
||||
for i := 0; i < lines.Len(); i++ {
|
||||
line := lines.At(i)
|
||||
r.buf.Write(line.Value(source))
|
||||
}
|
||||
|
||||
r.buf.WriteString("```")
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
} else if codeBlock, ok := node.(*gast.CodeBlock); ok {
|
||||
// Indented code block
|
||||
lines := codeBlock.Lines()
|
||||
for i := 0; i < lines.Len(); i++ {
|
||||
line := lines.At(i)
|
||||
r.buf.WriteString(" ")
|
||||
r.buf.Write(line.Value(source))
|
||||
}
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteString("\n\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// renderBlockquote renders a blockquote with "> " prefix.
|
||||
func (r *MarkdownRenderer) renderBlockquote(node *gast.Blockquote, source []byte, depth int) {
|
||||
// Create a temporary buffer for the blockquote content
|
||||
tempBuf := &bytes.Buffer{}
|
||||
tempRenderer := &MarkdownRenderer{buf: tempBuf}
|
||||
tempRenderer.renderChildren(node, source, depth)
|
||||
|
||||
// Add "> " prefix to each line
|
||||
content := tempBuf.String()
|
||||
lines := strings.Split(strings.TrimRight(content, "\n"), "\n")
|
||||
for i, line := range lines {
|
||||
r.buf.WriteString("> ")
|
||||
r.buf.WriteString(line)
|
||||
if i < len(lines)-1 {
|
||||
r.buf.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// renderListItem renders a list item with proper indentation and markers.
|
||||
func (r *MarkdownRenderer) renderListItem(node *gast.ListItem, source []byte, depth int) {
|
||||
parent := node.Parent()
|
||||
list, ok := parent.(*gast.List)
|
||||
if !ok {
|
||||
r.renderChildren(node, source, depth)
|
||||
return
|
||||
}
|
||||
|
||||
// Add indentation only for nested lists
|
||||
// Document=0, List=1, ListItem=2 (no indent), nested ListItem=3+ (indent)
|
||||
if depth > 2 {
|
||||
indent := strings.Repeat(" ", depth-2)
|
||||
r.buf.WriteString(indent)
|
||||
}
|
||||
|
||||
// Add list marker
|
||||
if list.IsOrdered() {
|
||||
fmt.Fprintf(r.buf, "%d. ", list.Start)
|
||||
list.Start++ // Increment for next item
|
||||
} else {
|
||||
r.buf.WriteString("- ")
|
||||
}
|
||||
|
||||
// Render content
|
||||
r.renderChildren(node, source, depth)
|
||||
|
||||
// Add newline if there's a next sibling
|
||||
if node.NextSibling() != nil {
|
||||
r.buf.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
|
||||
// renderTable renders a table in markdown format.
|
||||
func (r *MarkdownRenderer) renderTable(table *east.Table, source []byte) {
|
||||
// This is a simplified table renderer
|
||||
// A full implementation would need to handle alignment, etc.
|
||||
r.renderChildren(table, source, 0)
|
||||
}
|
||||
176
plugin/markdown/renderer/markdown_renderer_test.go
Normal file
176
plugin/markdown/renderer/markdown_renderer_test.go
Normal file
@@ -0,0 +1,176 @@
|
||||
package renderer
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/text"
|
||||
|
||||
"github.com/usememos/memos/plugin/markdown/extensions"
|
||||
)
|
||||
|
||||
func TestMarkdownRenderer(t *testing.T) {
|
||||
// Create goldmark instance with all extensions
|
||||
md := goldmark.New(
|
||||
goldmark.WithExtensions(
|
||||
extension.GFM,
|
||||
extensions.TagExtension,
|
||||
),
|
||||
goldmark.WithParserOptions(
|
||||
parser.WithAutoHeadingID(),
|
||||
),
|
||||
)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "simple text",
|
||||
input: "Hello world",
|
||||
expected: "Hello world",
|
||||
},
|
||||
{
|
||||
name: "paragraph with newlines",
|
||||
input: "First paragraph\n\nSecond paragraph",
|
||||
expected: "First paragraph\n\nSecond paragraph",
|
||||
},
|
||||
{
|
||||
name: "emphasis",
|
||||
input: "This is *italic* and **bold** text",
|
||||
expected: "This is *italic* and **bold** text",
|
||||
},
|
||||
{
|
||||
name: "headings",
|
||||
input: "# Heading 1\n\n## Heading 2\n\n### Heading 3",
|
||||
expected: "# Heading 1\n\n## Heading 2\n\n### Heading 3",
|
||||
},
|
||||
{
|
||||
name: "link",
|
||||
input: "Check [this link](https://example.com)",
|
||||
expected: "Check [this link](https://example.com)",
|
||||
},
|
||||
{
|
||||
name: "image",
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "code inline",
|
||||
input: "This is `inline code` here",
|
||||
expected: "This is `inline code` here",
|
||||
},
|
||||
{
|
||||
name: "code block fenced",
|
||||
input: "```go\nfunc main() {\n}\n```",
|
||||
expected: "```go\nfunc main() {\n}\n```",
|
||||
},
|
||||
{
|
||||
name: "unordered list",
|
||||
input: "- Item 1\n- Item 2\n- Item 3",
|
||||
expected: "- Item 1\n- Item 2\n- Item 3",
|
||||
},
|
||||
{
|
||||
name: "ordered list",
|
||||
input: "1. First\n2. Second\n3. Third",
|
||||
expected: "1. First\n2. Second\n3. Third",
|
||||
},
|
||||
{
|
||||
name: "blockquote",
|
||||
input: "> This is a quote\n> Second line",
|
||||
expected: "> This is a quote\n> Second line",
|
||||
},
|
||||
{
|
||||
name: "horizontal rule",
|
||||
input: "Text before\n\n---\n\nText after",
|
||||
expected: "Text before\n\n---\n\nText after",
|
||||
},
|
||||
{
|
||||
name: "strikethrough",
|
||||
input: "This is ~~deleted~~ text",
|
||||
expected: "This is ~~deleted~~ text",
|
||||
},
|
||||
{
|
||||
name: "task list",
|
||||
input: "- [x] Completed task\n- [ ] Incomplete task",
|
||||
expected: "- [x] Completed task\n- [ ] Incomplete task",
|
||||
},
|
||||
{
|
||||
name: "tag",
|
||||
input: "This has #tag in it",
|
||||
expected: "This has #tag in it",
|
||||
},
|
||||
{
|
||||
name: "multiple tags",
|
||||
input: "#work #important meeting notes",
|
||||
expected: "#work #important meeting notes",
|
||||
},
|
||||
{
|
||||
name: "complex mixed content",
|
||||
input: "# Meeting Notes\n\n**Date**: 2024-01-01\n\n## Attendees\n- Alice\n- Bob\n\n## Discussion\n\nWe discussed #project status.\n\n```python\nprint('hello')\n```",
|
||||
expected: "# Meeting Notes\n\n**Date**: 2024-01-01\n\n## Attendees\n\n- Alice\n- Bob\n\n## Discussion\n\nWe discussed #project status.\n\n```python\nprint('hello')\n```",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Parse the input
|
||||
source := []byte(tt.input)
|
||||
reader := text.NewReader(source)
|
||||
doc := md.Parser().Parse(reader)
|
||||
require.NotNil(t, doc)
|
||||
|
||||
// Render back to markdown
|
||||
renderer := NewMarkdownRenderer()
|
||||
result := renderer.Render(doc, source)
|
||||
|
||||
// For debugging
|
||||
if result != tt.expected {
|
||||
t.Logf("Input: %q", tt.input)
|
||||
t.Logf("Expected: %q", tt.expected)
|
||||
t.Logf("Got: %q", result)
|
||||
}
|
||||
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarkdownRendererPreservesStructure(t *testing.T) {
|
||||
// Test that parsing and rendering preserves structure
|
||||
md := goldmark.New(
|
||||
goldmark.WithExtensions(
|
||||
extension.GFM,
|
||||
extensions.TagExtension,
|
||||
),
|
||||
)
|
||||
|
||||
inputs := []string{
|
||||
"# Title\n\nParagraph",
|
||||
"**Bold** and *italic*",
|
||||
"- List\n- Items",
|
||||
"#tag #another",
|
||||
"> Quote",
|
||||
}
|
||||
|
||||
renderer := NewMarkdownRenderer()
|
||||
|
||||
for _, input := range inputs {
|
||||
t.Run(input, func(t *testing.T) {
|
||||
source := []byte(input)
|
||||
reader := text.NewReader(source)
|
||||
doc := md.Parser().Parse(reader)
|
||||
|
||||
result := renderer.Render(doc, source)
|
||||
|
||||
// The result should be structurally similar
|
||||
// (may have minor formatting differences)
|
||||
assert.NotEmpty(t, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user