first commit
Some checks failed
Backend Tests / Static Checks (push) Has been cancelled
Backend Tests / Tests (other) (push) Has been cancelled
Backend Tests / Tests (plugin) (push) Has been cancelled
Backend Tests / Tests (server) (push) Has been cancelled
Backend Tests / Tests (store) (push) Has been cancelled
Build Canary Image / build-frontend (push) Has been cancelled
Build Canary Image / build-push (linux/amd64) (push) Has been cancelled
Build Canary Image / build-push (linux/arm64) (push) Has been cancelled
Build Canary Image / merge (push) Has been cancelled
Frontend Tests / Lint (push) Has been cancelled
Frontend Tests / Build (push) Has been cancelled
Proto Linter / Lint Protos (push) Has been cancelled

This commit is contained in:
2026-03-04 06:30:47 +00:00
commit bb402d4ccc
777 changed files with 135661 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
package ast
import (
gast "github.com/yuin/goldmark/ast"
)
// TagNode represents a #tag in the markdown AST.
type TagNode struct {
gast.BaseInline
// Tag name without the # prefix
Tag []byte
}
// KindTag is the NodeKind for TagNode.
var KindTag = gast.NewNodeKind("Tag")
// Kind returns KindTag.
func (*TagNode) Kind() gast.NodeKind {
return KindTag
}
// Dump implements Node.Dump for debugging.
func (n *TagNode) Dump(source []byte, level int) {
gast.DumpHelper(n, source, level, map[string]string{
"Tag": string(n.Tag),
}, nil)
}

View File

@@ -0,0 +1,24 @@
package extensions
import (
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util"
mparser "github.com/usememos/memos/plugin/markdown/parser"
)
type tagExtension struct{}
// TagExtension is a goldmark extension for #tag syntax.
var TagExtension = &tagExtension{}
// Extend extends the goldmark parser with tag support.
func (*tagExtension) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(
parser.WithInlineParsers(
// Priority 200 - run before standard link parser (500)
util.Prioritized(mparser.NewTagParser(), 200),
),
)
}

409
plugin/markdown/markdown.go Normal file
View File

@@ -0,0 +1,409 @@
package markdown
import (
"bytes"
"strings"
"github.com/yuin/goldmark"
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
mast "github.com/usememos/memos/plugin/markdown/ast"
"github.com/usememos/memos/plugin/markdown/extensions"
"github.com/usememos/memos/plugin/markdown/renderer"
storepb "github.com/usememos/memos/proto/gen/store"
)
// ExtractedData contains all metadata extracted from markdown in a single pass.
type ExtractedData struct {
Tags []string
Property *storepb.MemoPayload_Property
}
// Service handles markdown metadata extraction.
// It uses goldmark to parse markdown and extract tags, properties, and snippets.
// HTML rendering is primarily done on frontend using markdown-it, but backend provides
// RenderHTML for RSS feeds and other server-side rendering needs.
type Service interface {
// ExtractAll extracts tags, properties, and references in a single parse (most efficient)
ExtractAll(content []byte) (*ExtractedData, error)
// ExtractTags returns all #tags found in content
ExtractTags(content []byte) ([]string, error)
// ExtractProperties computes boolean properties
ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error)
// RenderMarkdown renders goldmark AST back to markdown text
RenderMarkdown(content []byte) (string, error)
// RenderHTML renders markdown content to HTML
RenderHTML(content []byte) (string, error)
// GenerateSnippet creates plain text summary
GenerateSnippet(content []byte, maxLength int) (string, error)
// ValidateContent checks for syntax errors
ValidateContent(content []byte) error
// RenameTag renames all occurrences of oldTag to newTag in content
RenameTag(content []byte, oldTag, newTag string) (string, error)
}
// service implements the Service interface.
type service struct {
md goldmark.Markdown
}
// Option configures the markdown service.
type Option func(*config)
type config struct {
enableTags bool
}
// WithTagExtension enables #tag parsing.
func WithTagExtension() Option {
return func(c *config) {
c.enableTags = true
}
}
// NewService creates a new markdown service with the given options.
func NewService(opts ...Option) Service {
cfg := &config{}
for _, opt := range opts {
opt(cfg)
}
exts := []goldmark.Extender{
extension.GFM, // GitHub Flavored Markdown (tables, strikethrough, task lists, autolinks)
}
// Add custom extensions based on config
if cfg.enableTags {
exts = append(exts, extensions.TagExtension)
}
md := goldmark.New(
goldmark.WithExtensions(exts...),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(), // Generate heading IDs
),
)
return &service{
md: md,
}
}
// parse is an internal helper to parse content into AST.
func (s *service) parse(content []byte) (gast.Node, error) {
reader := text.NewReader(content)
doc := s.md.Parser().Parse(reader)
return doc, nil
}
// ExtractTags returns all #tags found in content.
func (s *service) ExtractTags(content []byte) ([]string, error) {
root, err := s.parse(content)
if err != nil {
return nil, err
}
var tags []string
// Walk the AST to find tag nodes
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
// Check for custom TagNode
if tagNode, ok := n.(*mast.TagNode); ok {
tags = append(tags, string(tagNode.Tag))
}
return gast.WalkContinue, nil
})
if err != nil {
return nil, err
}
// Deduplicate tags while preserving original case
return uniquePreserveCase(tags), nil
}
// ExtractProperties computes boolean properties about the content.
func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
root, err := s.parse(content)
if err != nil {
return nil, err
}
prop := &storepb.MemoPayload_Property{}
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
switch n.Kind() {
case gast.KindLink:
prop.HasLink = true
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
prop.HasCode = true
case east.KindTaskCheckBox:
prop.HasTaskList = true
if checkBox, ok := n.(*east.TaskCheckBox); ok {
if !checkBox.IsChecked {
prop.HasIncompleteTasks = true
}
}
default:
// No special handling for other node types
}
return gast.WalkContinue, nil
})
if err != nil {
return nil, err
}
return prop, nil
}
// RenderMarkdown renders goldmark AST back to markdown text.
func (s *service) RenderMarkdown(content []byte) (string, error) {
root, err := s.parse(content)
if err != nil {
return "", err
}
mdRenderer := renderer.NewMarkdownRenderer()
return mdRenderer.Render(root, content), nil
}
// RenderHTML renders markdown content to HTML using goldmark's built-in HTML renderer.
func (s *service) RenderHTML(content []byte) (string, error) {
var buf bytes.Buffer
if err := s.md.Convert(content, &buf); err != nil {
return "", err
}
return buf.String(), nil
}
// GenerateSnippet creates a plain text summary from markdown content.
func (s *service) GenerateSnippet(content []byte, maxLength int) (string, error) {
root, err := s.parse(content)
if err != nil {
return "", err
}
var buf strings.Builder
var lastNodeWasBlock bool
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if entering {
// Skip code blocks and code spans entirely
switch n.Kind() {
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
return gast.WalkSkipChildren, nil
default:
// Continue walking for other node types
}
// Add space before block elements (except first)
switch n.Kind() {
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
if buf.Len() > 0 && lastNodeWasBlock {
buf.WriteByte(' ')
}
default:
// No space needed for other node types
}
}
if !entering {
// Mark that we just exited a block element
switch n.Kind() {
case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
lastNodeWasBlock = true
default:
// Not a block element
}
return gast.WalkContinue, nil
}
lastNodeWasBlock = false
// Only extract plain text nodes
if textNode, ok := n.(*gast.Text); ok {
segment := textNode.Segment
buf.Write(segment.Value(content))
// Add space if this is a soft line break
if textNode.SoftLineBreak() {
buf.WriteByte(' ')
}
}
// Stop walking if we've exceeded double the max length
// (we'll truncate precisely later)
if buf.Len() > maxLength*2 {
return gast.WalkStop, nil
}
return gast.WalkContinue, nil
})
if err != nil {
return "", err
}
snippet := buf.String()
// Truncate at word boundary if needed
if len(snippet) > maxLength {
snippet = truncateAtWord(snippet, maxLength)
}
return strings.TrimSpace(snippet), nil
}
// ValidateContent checks if the markdown content is valid.
func (s *service) ValidateContent(content []byte) error {
// Try to parse the content
_, err := s.parse(content)
return err
}
// ExtractAll extracts tags, properties, and references in a single parse for efficiency.
func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
root, err := s.parse(content)
if err != nil {
return nil, err
}
data := &ExtractedData{
Tags: []string{},
Property: &storepb.MemoPayload_Property{},
}
// Single walk to collect all data
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
// Extract tags
if tagNode, ok := n.(*mast.TagNode); ok {
data.Tags = append(data.Tags, string(tagNode.Tag))
}
// Extract properties based on node kind
switch n.Kind() {
case gast.KindLink:
data.Property.HasLink = true
case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
data.Property.HasCode = true
case east.KindTaskCheckBox:
data.Property.HasTaskList = true
if checkBox, ok := n.(*east.TaskCheckBox); ok {
if !checkBox.IsChecked {
data.Property.HasIncompleteTasks = true
}
}
default:
// No special handling for other node types
}
return gast.WalkContinue, nil
})
if err != nil {
return nil, err
}
// Deduplicate tags while preserving original case
data.Tags = uniquePreserveCase(data.Tags)
return data, nil
}
// RenameTag renames all occurrences of oldTag to newTag in content.
func (s *service) RenameTag(content []byte, oldTag, newTag string) (string, error) {
root, err := s.parse(content)
if err != nil {
return "", err
}
// Walk the AST to find and rename tag nodes
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
// Check for custom TagNode and rename if it matches
if tagNode, ok := n.(*mast.TagNode); ok {
if string(tagNode.Tag) == oldTag {
tagNode.Tag = []byte(newTag)
}
}
return gast.WalkContinue, nil
})
if err != nil {
return "", err
}
// Render back to markdown using the already-parsed AST
mdRenderer := renderer.NewMarkdownRenderer()
return mdRenderer.Render(root, content), nil
}
// uniquePreserveCase returns unique strings from input while preserving case.
func uniquePreserveCase(strs []string) []string {
seen := make(map[string]struct{})
var result []string
for _, s := range strs {
if _, exists := seen[s]; !exists {
seen[s] = struct{}{}
result = append(result, s)
}
}
return result
}
// truncateAtWord truncates a string at the last word boundary before maxLength.
// maxLength is treated as a rune (character) count to properly handle UTF-8 multi-byte characters.
func truncateAtWord(s string, maxLength int) string {
// Convert to runes to properly handle multi-byte UTF-8 characters
runes := []rune(s)
if len(runes) <= maxLength {
return s
}
// Truncate to max length (by character count, not byte count)
truncated := string(runes[:maxLength])
// Find last space to avoid cutting in the middle of a word
lastSpace := strings.LastIndexAny(truncated, " \t\n\r")
if lastSpace > 0 {
truncated = truncated[:lastSpace]
}
return truncated + " ..."
}

View File

@@ -0,0 +1,448 @@
package markdown
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewService(t *testing.T) {
svc := NewService()
assert.NotNil(t, svc)
}
func TestValidateContent(t *testing.T) {
svc := NewService()
tests := []struct {
name string
content string
wantErr bool
}{
{
name: "valid markdown",
content: "# Hello\n\nThis is **bold** text.",
wantErr: false,
},
{
name: "empty content",
content: "",
wantErr: false,
},
{
name: "complex markdown",
content: "# Title\n\n- List item 1\n- List item 2\n\n```go\ncode block\n```",
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := svc.ValidateContent([]byte(tt.content))
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
func TestGenerateSnippet(t *testing.T) {
svc := NewService()
tests := []struct {
name string
content string
maxLength int
expected string
}{
{
name: "simple text",
content: "Hello world",
maxLength: 100,
expected: "Hello world",
},
{
name: "text with formatting",
content: "This is **bold** and *italic* text.",
maxLength: 100,
expected: "This is bold and italic text.",
},
{
name: "truncate long text",
content: "This is a very long piece of text that should be truncated at a word boundary.",
maxLength: 30,
expected: "This is a very long piece of ...",
},
{
name: "heading and paragraph",
content: "# My Title\n\nThis is the first paragraph.",
maxLength: 100,
expected: "My Title This is the first paragraph.",
},
{
name: "code block removed",
content: "Text before\n\n```go\ncode\n```\n\nText after",
maxLength: 100,
expected: "Text before Text after",
},
{
name: "list items",
content: "- Item 1\n- Item 2\n- Item 3",
maxLength: 100,
expected: "Item 1 Item 2 Item 3",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
snippet, err := svc.GenerateSnippet([]byte(tt.content), tt.maxLength)
require.NoError(t, err)
assert.Equal(t, tt.expected, snippet)
})
}
}
func TestExtractProperties(t *testing.T) {
tests := []struct {
name string
content string
hasLink bool
hasCode bool
hasTasks bool
hasInc bool
}{
{
name: "plain text",
content: "Just plain text",
hasLink: false,
hasCode: false,
hasTasks: false,
hasInc: false,
},
{
name: "with link",
content: "Check out [this link](https://example.com)",
hasLink: true,
hasCode: false,
hasTasks: false,
hasInc: false,
},
{
name: "with inline code",
content: "Use `console.log()` to debug",
hasLink: false,
hasCode: true,
hasTasks: false,
hasInc: false,
},
{
name: "with code block",
content: "```go\nfunc main() {}\n```",
hasLink: false,
hasCode: true,
hasTasks: false,
hasInc: false,
},
{
name: "with completed task",
content: "- [x] Completed task",
hasLink: false,
hasCode: false,
hasTasks: true,
hasInc: false,
},
{
name: "with incomplete task",
content: "- [ ] Todo item",
hasLink: false,
hasCode: false,
hasTasks: true,
hasInc: true,
},
{
name: "mixed tasks",
content: "- [x] Done\n- [ ] Not done",
hasLink: false,
hasCode: false,
hasTasks: true,
hasInc: true,
},
{
name: "everything",
content: "# Title\n\n[Link](url)\n\n`code`\n\n- [ ] Task",
hasLink: true,
hasCode: true,
hasTasks: true,
hasInc: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
svc := NewService()
props, err := svc.ExtractProperties([]byte(tt.content))
require.NoError(t, err)
assert.Equal(t, tt.hasLink, props.HasLink, "HasLink")
assert.Equal(t, tt.hasCode, props.HasCode, "HasCode")
assert.Equal(t, tt.hasTasks, props.HasTaskList, "HasTaskList")
assert.Equal(t, tt.hasInc, props.HasIncompleteTasks, "HasIncompleteTasks")
})
}
}
func TestExtractTags(t *testing.T) {
tests := []struct {
name string
content string
withExt bool
expected []string
}{
{
name: "no tags",
content: "Just plain text",
withExt: false,
expected: []string{},
},
{
name: "single tag",
content: "Text with #tag",
withExt: true,
expected: []string{"tag"},
},
{
name: "multiple tags",
content: "Text with #tag1 and #tag2",
withExt: true,
expected: []string{"tag1", "tag2"},
},
{
name: "duplicate tags",
content: "#work is important. #Work #WORK",
withExt: true,
expected: []string{"work", "Work", "WORK"},
},
{
name: "tags with hyphens and underscores",
content: "Tags: #work-notes #2024_plans",
withExt: true,
expected: []string{"work-notes", "2024_plans"},
},
{
name: "tags at end of sentence",
content: "This is important #urgent.",
withExt: true,
expected: []string{"urgent"},
},
{
name: "headings not tags",
content: "## Heading\n\n# Title\n\nText with #realtag",
withExt: true,
expected: []string{"realtag"},
},
{
name: "numeric tag",
content: "Issue #123",
withExt: true,
expected: []string{"123"},
},
{
name: "tag in list",
content: "- Item 1 #todo\n- Item 2 #done",
withExt: true,
expected: []string{"todo", "done"},
},
{
name: "no extension enabled",
content: "Text with #tag",
withExt: false,
expected: []string{},
},
{
name: "Chinese tag",
content: "Text with #测试",
withExt: true,
expected: []string{"测试"},
},
{
name: "Chinese tag followed by punctuation",
content: "Text #测试。 More text",
withExt: true,
expected: []string{"测试"},
},
{
name: "mixed Chinese and ASCII tag",
content: "#测试test123 content",
withExt: true,
expected: []string{"测试test123"},
},
{
name: "Japanese tag",
content: "#日本語 content",
withExt: true,
expected: []string{"日本語"},
},
{
name: "Korean tag",
content: "#한국어 content",
withExt: true,
expected: []string{"한국어"},
},
{
name: "hierarchical tag with Chinese",
content: "#work/测试/项目",
withExt: true,
expected: []string{"work/测试/项目"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var svc Service
if tt.withExt {
svc = NewService(WithTagExtension())
} else {
svc = NewService()
}
tags, err := svc.ExtractTags([]byte(tt.content))
require.NoError(t, err)
assert.ElementsMatch(t, tt.expected, tags)
})
}
}
func TestUniquePreserveCase(t *testing.T) {
tests := []struct {
name string
input []string
expected []string
}{
{
name: "empty",
input: []string{},
expected: []string{},
},
{
name: "unique items",
input: []string{"tag1", "tag2", "tag3"},
expected: []string{"tag1", "tag2", "tag3"},
},
{
name: "duplicates",
input: []string{"tag", "TAG", "Tag"},
expected: []string{"tag", "TAG", "Tag"},
},
{
name: "mixed",
input: []string{"Work", "work", "Important", "work"},
expected: []string{"Work", "work", "Important"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := uniquePreserveCase(tt.input)
assert.ElementsMatch(t, tt.expected, result)
})
}
}
func TestTruncateAtWord(t *testing.T) {
tests := []struct {
name string
input string
maxLength int
expected string
}{
{
name: "no truncation needed",
input: "short",
maxLength: 10,
expected: "short",
},
{
name: "exact length",
input: "exactly ten",
maxLength: 11,
expected: "exactly ten",
},
{
name: "truncate at word",
input: "this is a long sentence",
maxLength: 10,
expected: "this is a ...",
},
{
name: "truncate very long word",
input: "supercalifragilisticexpialidocious",
maxLength: 10,
expected: "supercalif ...",
},
{
name: "CJK characters without spaces",
input: "这是一个很长的中文句子没有空格的情况下也要正确处理",
maxLength: 15,
expected: "这是一个很长的中文句子没有空格 ...",
},
{
name: "mixed CJK and Latin",
input: "这是中文mixed with English文字",
maxLength: 10,
expected: "这是中文mixed ...",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := truncateAtWord(tt.input, tt.maxLength)
assert.Equal(t, tt.expected, result)
})
}
}
// Benchmark tests.
func BenchmarkGenerateSnippet(b *testing.B) {
svc := NewService()
content := []byte(`# Large Document
This is a large document with multiple paragraphs and formatting.
## Section 1
Here is some **bold** text and *italic* text with [links](https://example.com).
- List item 1
- List item 2
- List item 3
## Section 2
More content here with ` + "`inline code`" + ` and other elements.
` + "```go\nfunc example() {\n return true\n}\n```")
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := svc.GenerateSnippet(content, 200)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkExtractProperties(b *testing.B) {
svc := NewService()
content := []byte("# Title\n\n[Link](url)\n\n`code`\n\n- [ ] Task\n- [x] Done")
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := svc.ExtractProperties(content)
if err != nil {
b.Fatal(err)
}
}
}

View File

@@ -0,0 +1,139 @@
package parser
import (
"unicode"
"unicode/utf8"
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
mast "github.com/usememos/memos/plugin/markdown/ast"
)
const (
// MaxTagLength defines the maximum number of runes allowed in a tag.
MaxTagLength = 100
)
type tagParser struct{}
// NewTagParser creates a new inline parser for #tag syntax.
func NewTagParser() parser.InlineParser {
return &tagParser{}
}
// Trigger returns the characters that trigger this parser.
func (*tagParser) Trigger() []byte {
return []byte{'#'}
}
// isValidTagRune checks if a Unicode rune is valid in a tag.
// Uses Unicode categories for proper international character support.
func isValidTagRune(r rune) bool {
// Allow Unicode letters (any script: Latin, CJK, Arabic, Cyrillic, etc.)
if unicode.IsLetter(r) {
return true
}
// Allow Unicode digits
if unicode.IsNumber(r) {
return true
}
// Allow emoji and symbols (So category: Symbol, Other)
// This includes emoji, which are essential for social media-style tagging
if unicode.IsSymbol(r) {
return true
}
// Allow specific ASCII symbols for tag structure
// Underscore: word separation (snake_case)
// Hyphen: word separation (kebab-case)
// Forward slash: hierarchical tags (category/subcategory)
// Ampersand: compound tags (science&tech)
if r == '_' || r == '-' || r == '/' || r == '&' {
return true
}
return false
}
// Parse parses #tag syntax using Unicode-aware validation.
// Tags support international characters and follow these rules:
// - Must start with # followed by valid tag characters
// - Valid characters: Unicode letters, Unicode digits, underscore (_), hyphen (-), forward slash (/)
// - Maximum length: 100 runes (Unicode characters)
// - Stops at: whitespace, punctuation, or other invalid characters
func (*tagParser) Parse(_ gast.Node, block text.Reader, _ parser.Context) gast.Node {
line, _ := block.PeekLine()
// Must start with #
if len(line) == 0 || line[0] != '#' {
return nil
}
// Check if it's a heading (## or space after #)
if len(line) > 1 {
if line[1] == '#' {
// It's a heading (##), not a tag
return nil
}
if line[1] == ' ' {
// Space after # - heading or just a hash
return nil
}
} else {
// Just a lone #
return nil
}
// Parse tag using UTF-8 aware rune iteration
tagStart := 1
pos := tagStart
runeCount := 0
for pos < len(line) {
r, size := utf8.DecodeRune(line[pos:])
// Stop at invalid UTF-8
if r == utf8.RuneError && size == 1 {
break
}
// Validate character using Unicode categories
if !isValidTagRune(r) {
break
}
// Enforce max length (by rune count, not byte count)
runeCount++
if runeCount > MaxTagLength {
break
}
pos += size
}
// Must have at least one character after #
if pos <= tagStart {
return nil
}
// Extract tag (without #)
tagName := line[tagStart:pos]
// Make a copy of the tag name
tagCopy := make([]byte, len(tagName))
copy(tagCopy, tagName)
// Advance reader
block.Advance(pos)
// Create node
node := &mast.TagNode{
Tag: tagCopy,
}
return node
}

View File

@@ -0,0 +1,251 @@
package parser
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
mast "github.com/usememos/memos/plugin/markdown/ast"
)
func TestTagParser(t *testing.T) {
tests := []struct {
name string
input string
expectedTag string
shouldParse bool
}{
{
name: "basic tag",
input: "#tag",
expectedTag: "tag",
shouldParse: true,
},
{
name: "tag with hyphen",
input: "#work-notes",
expectedTag: "work-notes",
shouldParse: true,
},
{
name: "tag with ampersand",
input: "#science&tech",
expectedTag: "science&tech",
shouldParse: true,
},
{
name: "tag with underscore",
input: "#2024_plans",
expectedTag: "2024_plans",
shouldParse: true,
},
{
name: "numeric tag",
input: "#123",
expectedTag: "123",
shouldParse: true,
},
{
name: "tag followed by space",
input: "#tag ",
expectedTag: "tag",
shouldParse: true,
},
{
name: "tag followed by punctuation",
input: "#tag.",
expectedTag: "tag",
shouldParse: true,
},
{
name: "tag in sentence",
input: "#important task",
expectedTag: "important",
shouldParse: true,
},
{
name: "heading (##)",
input: "## Heading",
expectedTag: "",
shouldParse: false,
},
{
name: "space after hash",
input: "# heading",
expectedTag: "",
shouldParse: false,
},
{
name: "lone hash",
input: "#",
expectedTag: "",
shouldParse: false,
},
{
name: "hash with space",
input: "# ",
expectedTag: "",
shouldParse: false,
},
{
name: "special characters",
input: "#tag@special",
expectedTag: "tag",
shouldParse: true,
},
{
name: "mixed case",
input: "#WorkNotes",
expectedTag: "WorkNotes",
shouldParse: true,
},
{
name: "hierarchical tag with slash",
input: "#tag1/subtag",
expectedTag: "tag1/subtag",
shouldParse: true,
},
{
name: "hierarchical tag with multiple levels",
input: "#tag1/subtag/subtag2",
expectedTag: "tag1/subtag/subtag2",
shouldParse: true,
},
{
name: "hierarchical tag followed by space",
input: "#work/notes ",
expectedTag: "work/notes",
shouldParse: true,
},
{
name: "hierarchical tag followed by punctuation",
input: "#project/2024.",
expectedTag: "project/2024",
shouldParse: true,
},
{
name: "hierarchical tag with numbers and dashes",
input: "#work-log/2024/q1",
expectedTag: "work-log/2024/q1",
shouldParse: true,
},
{
name: "Chinese characters",
input: "#测试",
expectedTag: "测试",
shouldParse: true,
},
{
name: "Chinese tag followed by space",
input: "#测试 some text",
expectedTag: "测试",
shouldParse: true,
},
{
name: "Chinese tag followed by punctuation",
input: "#测试。",
expectedTag: "测试",
shouldParse: true,
},
{
name: "mixed Chinese and ASCII",
input: "#测试test123",
expectedTag: "测试test123",
shouldParse: true,
},
{
name: "Japanese characters",
input: "#テスト",
expectedTag: "テスト",
shouldParse: true,
},
{
name: "Korean characters",
input: "#테스트",
expectedTag: "테스트",
shouldParse: true,
},
{
name: "emoji",
input: "#test🚀",
expectedTag: "test🚀",
shouldParse: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := NewTagParser()
reader := text.NewReader([]byte(tt.input))
ctx := parser.NewContext()
node := p.Parse(nil, reader, ctx)
if tt.shouldParse {
require.NotNil(t, node, "Expected tag to be parsed")
require.IsType(t, &mast.TagNode{}, node)
tagNode, ok := node.(*mast.TagNode)
require.True(t, ok, "Expected node to be *mast.TagNode")
assert.Equal(t, tt.expectedTag, string(tagNode.Tag))
} else {
assert.Nil(t, node, "Expected tag NOT to be parsed")
}
})
}
}
func TestTagParser_Trigger(t *testing.T) {
p := NewTagParser()
triggers := p.Trigger()
assert.Equal(t, []byte{'#'}, triggers)
}
func TestTagParser_MultipleTags(t *testing.T) {
// Test that parser correctly handles multiple tags in sequence
input := "#tag1 #tag2"
p := NewTagParser()
reader := text.NewReader([]byte(input))
ctx := parser.NewContext()
// Parse first tag
node1 := p.Parse(nil, reader, ctx)
require.NotNil(t, node1)
tagNode1, ok := node1.(*mast.TagNode)
require.True(t, ok, "Expected node1 to be *mast.TagNode")
assert.Equal(t, "tag1", string(tagNode1.Tag))
// Advance past the space
reader.Advance(1)
// Parse second tag
node2 := p.Parse(nil, reader, ctx)
require.NotNil(t, node2)
tagNode2, ok := node2.(*mast.TagNode)
require.True(t, ok, "Expected node2 to be *mast.TagNode")
assert.Equal(t, "tag2", string(tagNode2.Tag))
}
func TestTagNode_Kind(t *testing.T) {
node := &mast.TagNode{
Tag: []byte("test"),
}
assert.Equal(t, mast.KindTag, node.Kind())
}
func TestTagNode_Dump(t *testing.T) {
node := &mast.TagNode{
Tag: []byte("test"),
}
// Should not panic
assert.NotPanics(t, func() {
node.Dump([]byte("#test"), 0)
})
}

View File

@@ -0,0 +1,266 @@
package renderer
import (
"bytes"
"fmt"
"strings"
gast "github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
mast "github.com/usememos/memos/plugin/markdown/ast"
)
// MarkdownRenderer renders goldmark AST back to markdown text.
type MarkdownRenderer struct {
buf *bytes.Buffer
}
// NewMarkdownRenderer creates a new markdown renderer.
func NewMarkdownRenderer() *MarkdownRenderer {
return &MarkdownRenderer{
buf: &bytes.Buffer{},
}
}
// Render renders the AST node to markdown and returns the result.
func (r *MarkdownRenderer) Render(node gast.Node, source []byte) string {
r.buf.Reset()
r.renderNode(node, source, 0)
return r.buf.String()
}
// renderNode renders a single node and its children.
func (r *MarkdownRenderer) renderNode(node gast.Node, source []byte, depth int) {
switch n := node.(type) {
case *gast.Document:
r.renderChildren(n, source, depth)
case *gast.Paragraph:
r.renderChildren(n, source, depth)
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
case *gast.Text:
// Text nodes store their content as segments in the source
segment := n.Segment
r.buf.Write(segment.Value(source))
if n.SoftLineBreak() {
r.buf.WriteByte('\n')
} else if n.HardLineBreak() {
r.buf.WriteString(" \n")
}
case *gast.CodeSpan:
r.buf.WriteByte('`')
r.renderChildren(n, source, depth)
r.buf.WriteByte('`')
case *gast.Emphasis:
symbol := "*"
if n.Level == 2 {
symbol = "**"
}
r.buf.WriteString(symbol)
r.renderChildren(n, source, depth)
r.buf.WriteString(symbol)
case *gast.Link:
r.buf.WriteString("[")
r.renderChildren(n, source, depth)
r.buf.WriteString("](")
r.buf.Write(n.Destination)
if len(n.Title) > 0 {
r.buf.WriteString(` "`)
r.buf.Write(n.Title)
r.buf.WriteString(`"`)
}
r.buf.WriteString(")")
case *gast.AutoLink:
url := n.URL(source)
if n.AutoLinkType == gast.AutoLinkEmail {
r.buf.WriteString("<")
r.buf.Write(url)
r.buf.WriteString(">")
} else {
r.buf.Write(url)
}
case *gast.Image:
r.buf.WriteString("![")
r.renderChildren(n, source, depth)
r.buf.WriteString("](")
r.buf.Write(n.Destination)
if len(n.Title) > 0 {
r.buf.WriteString(` "`)
r.buf.Write(n.Title)
r.buf.WriteString(`"`)
}
r.buf.WriteString(")")
case *gast.Heading:
r.buf.WriteString(strings.Repeat("#", n.Level))
r.buf.WriteByte(' ')
r.renderChildren(n, source, depth)
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
case *gast.CodeBlock, *gast.FencedCodeBlock:
r.renderCodeBlock(n, source)
case *gast.Blockquote:
// Render each child line with "> " prefix
r.renderBlockquote(n, source, depth)
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
case *gast.List:
r.renderChildren(n, source, depth)
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
case *gast.ListItem:
r.renderListItem(n, source, depth)
case *gast.ThematicBreak:
r.buf.WriteString("---")
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
case *east.Strikethrough:
r.buf.WriteString("~~")
r.renderChildren(n, source, depth)
r.buf.WriteString("~~")
case *east.TaskCheckBox:
if n.IsChecked {
r.buf.WriteString("[x] ")
} else {
r.buf.WriteString("[ ] ")
}
case *east.Table:
r.renderTable(n, source)
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
// Custom Memos nodes
case *mast.TagNode:
r.buf.WriteByte('#')
r.buf.Write(n.Tag)
default:
// For unknown nodes, try to render children
r.renderChildren(n, source, depth)
}
}
// renderChildren renders all children of a node.
func (r *MarkdownRenderer) renderChildren(node gast.Node, source []byte, depth int) {
child := node.FirstChild()
for child != nil {
r.renderNode(child, source, depth+1)
child = child.NextSibling()
}
}
// renderCodeBlock renders a code block.
func (r *MarkdownRenderer) renderCodeBlock(node gast.Node, source []byte) {
if fenced, ok := node.(*gast.FencedCodeBlock); ok {
// Fenced code block with language
r.buf.WriteString("```")
if lang := fenced.Language(source); len(lang) > 0 {
r.buf.Write(lang)
}
r.buf.WriteByte('\n')
// Write all lines
lines := fenced.Lines()
for i := 0; i < lines.Len(); i++ {
line := lines.At(i)
r.buf.Write(line.Value(source))
}
r.buf.WriteString("```")
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
} else if codeBlock, ok := node.(*gast.CodeBlock); ok {
// Indented code block
lines := codeBlock.Lines()
for i := 0; i < lines.Len(); i++ {
line := lines.At(i)
r.buf.WriteString(" ")
r.buf.Write(line.Value(source))
}
if node.NextSibling() != nil {
r.buf.WriteString("\n\n")
}
}
}
// renderBlockquote renders a blockquote with "> " prefix.
func (r *MarkdownRenderer) renderBlockquote(node *gast.Blockquote, source []byte, depth int) {
// Create a temporary buffer for the blockquote content
tempBuf := &bytes.Buffer{}
tempRenderer := &MarkdownRenderer{buf: tempBuf}
tempRenderer.renderChildren(node, source, depth)
// Add "> " prefix to each line
content := tempBuf.String()
lines := strings.Split(strings.TrimRight(content, "\n"), "\n")
for i, line := range lines {
r.buf.WriteString("> ")
r.buf.WriteString(line)
if i < len(lines)-1 {
r.buf.WriteByte('\n')
}
}
}
// renderListItem renders a list item with proper indentation and markers.
func (r *MarkdownRenderer) renderListItem(node *gast.ListItem, source []byte, depth int) {
parent := node.Parent()
list, ok := parent.(*gast.List)
if !ok {
r.renderChildren(node, source, depth)
return
}
// Add indentation only for nested lists
// Document=0, List=1, ListItem=2 (no indent), nested ListItem=3+ (indent)
if depth > 2 {
indent := strings.Repeat(" ", depth-2)
r.buf.WriteString(indent)
}
// Add list marker
if list.IsOrdered() {
fmt.Fprintf(r.buf, "%d. ", list.Start)
list.Start++ // Increment for next item
} else {
r.buf.WriteString("- ")
}
// Render content
r.renderChildren(node, source, depth)
// Add newline if there's a next sibling
if node.NextSibling() != nil {
r.buf.WriteByte('\n')
}
}
// renderTable renders a table in markdown format.
func (r *MarkdownRenderer) renderTable(table *east.Table, source []byte) {
// This is a simplified table renderer
// A full implementation would need to handle alignment, etc.
r.renderChildren(table, source, 0)
}

View File

@@ -0,0 +1,176 @@
package renderer
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/usememos/memos/plugin/markdown/extensions"
)
func TestMarkdownRenderer(t *testing.T) {
// Create goldmark instance with all extensions
md := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
extensions.TagExtension,
),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(),
),
)
tests := []struct {
name string
input string
expected string
}{
{
name: "simple text",
input: "Hello world",
expected: "Hello world",
},
{
name: "paragraph with newlines",
input: "First paragraph\n\nSecond paragraph",
expected: "First paragraph\n\nSecond paragraph",
},
{
name: "emphasis",
input: "This is *italic* and **bold** text",
expected: "This is *italic* and **bold** text",
},
{
name: "headings",
input: "# Heading 1\n\n## Heading 2\n\n### Heading 3",
expected: "# Heading 1\n\n## Heading 2\n\n### Heading 3",
},
{
name: "link",
input: "Check [this link](https://example.com)",
expected: "Check [this link](https://example.com)",
},
{
name: "image",
input: "![alt text](image.png)",
expected: "![alt text](image.png)",
},
{
name: "code inline",
input: "This is `inline code` here",
expected: "This is `inline code` here",
},
{
name: "code block fenced",
input: "```go\nfunc main() {\n}\n```",
expected: "```go\nfunc main() {\n}\n```",
},
{
name: "unordered list",
input: "- Item 1\n- Item 2\n- Item 3",
expected: "- Item 1\n- Item 2\n- Item 3",
},
{
name: "ordered list",
input: "1. First\n2. Second\n3. Third",
expected: "1. First\n2. Second\n3. Third",
},
{
name: "blockquote",
input: "> This is a quote\n> Second line",
expected: "> This is a quote\n> Second line",
},
{
name: "horizontal rule",
input: "Text before\n\n---\n\nText after",
expected: "Text before\n\n---\n\nText after",
},
{
name: "strikethrough",
input: "This is ~~deleted~~ text",
expected: "This is ~~deleted~~ text",
},
{
name: "task list",
input: "- [x] Completed task\n- [ ] Incomplete task",
expected: "- [x] Completed task\n- [ ] Incomplete task",
},
{
name: "tag",
input: "This has #tag in it",
expected: "This has #tag in it",
},
{
name: "multiple tags",
input: "#work #important meeting notes",
expected: "#work #important meeting notes",
},
{
name: "complex mixed content",
input: "# Meeting Notes\n\n**Date**: 2024-01-01\n\n## Attendees\n- Alice\n- Bob\n\n## Discussion\n\nWe discussed #project status.\n\n```python\nprint('hello')\n```",
expected: "# Meeting Notes\n\n**Date**: 2024-01-01\n\n## Attendees\n\n- Alice\n- Bob\n\n## Discussion\n\nWe discussed #project status.\n\n```python\nprint('hello')\n```",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Parse the input
source := []byte(tt.input)
reader := text.NewReader(source)
doc := md.Parser().Parse(reader)
require.NotNil(t, doc)
// Render back to markdown
renderer := NewMarkdownRenderer()
result := renderer.Render(doc, source)
// For debugging
if result != tt.expected {
t.Logf("Input: %q", tt.input)
t.Logf("Expected: %q", tt.expected)
t.Logf("Got: %q", result)
}
assert.Equal(t, tt.expected, result)
})
}
}
func TestMarkdownRendererPreservesStructure(t *testing.T) {
// Test that parsing and rendering preserves structure
md := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
extensions.TagExtension,
),
)
inputs := []string{
"# Title\n\nParagraph",
"**Bold** and *italic*",
"- List\n- Items",
"#tag #another",
"> Quote",
}
renderer := NewMarkdownRenderer()
for _, input := range inputs {
t.Run(input, func(t *testing.T) {
source := []byte(input)
reader := text.NewReader(source)
doc := md.Parser().Parse(reader)
result := renderer.Render(doc, source)
// The result should be structurally similar
// (may have minor formatting differences)
assert.NotEmpty(t, result)
})
}
}