first commit

2026-03-04 06:30:47 +00:00
commit bb402d4ccc
777 changed files with 135661 additions and 0 deletions
--- a/plugin/markdown/ast/tag.go
+++ b/plugin/markdown/ast/tag.go
@@ -0,0 +1,28 @@
+package ast
+
+import (
+	gast "github.com/yuin/goldmark/ast"
+)
+
+// TagNode represents a #tag in the markdown AST.
+type TagNode struct {
+	gast.BaseInline
+
+	// Tag name without the # prefix
+	Tag []byte
+}
+
+// KindTag is the NodeKind for TagNode.
+var KindTag = gast.NewNodeKind("Tag")
+
+// Kind returns KindTag.
+func (*TagNode) Kind() gast.NodeKind {
+	return KindTag
+}
+
+// Dump implements Node.Dump for debugging.
+func (n *TagNode) Dump(source []byte, level int) {
+	gast.DumpHelper(n, source, level, map[string]string{
+		"Tag": string(n.Tag),
+	}, nil)
+}
--- a/plugin/markdown/extensions/tag.go
+++ b/plugin/markdown/extensions/tag.go
@@ -0,0 +1,24 @@
+package extensions
+
+import (
+	"github.com/yuin/goldmark"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/util"
+
+	mparser "github.com/usememos/memos/plugin/markdown/parser"
+)
+
+type tagExtension struct{}
+
+// TagExtension is a goldmark extension for #tag syntax.
+var TagExtension = &tagExtension{}
+
+// Extend extends the goldmark parser with tag support.
+func (*tagExtension) Extend(m goldmark.Markdown) {
+	m.Parser().AddOptions(
+		parser.WithInlineParsers(
+			// Priority 200 - run before standard link parser (500)
+			util.Prioritized(mparser.NewTagParser(), 200),
+		),
+	)
+}
--- a/plugin/markdown/markdown.go
+++ b/plugin/markdown/markdown.go
@@ -0,0 +1,409 @@
+package markdown
+
+import (
+	"bytes"
+	"strings"
+
+	"github.com/yuin/goldmark"
+	gast "github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/extension"
+	east "github.com/yuin/goldmark/extension/ast"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/text"
+
+	mast "github.com/usememos/memos/plugin/markdown/ast"
+	"github.com/usememos/memos/plugin/markdown/extensions"
+	"github.com/usememos/memos/plugin/markdown/renderer"
+	storepb "github.com/usememos/memos/proto/gen/store"
+)
+
+// ExtractedData contains all metadata extracted from markdown in a single pass.
+type ExtractedData struct {
+	Tags     []string
+	Property *storepb.MemoPayload_Property
+}
+
+// Service handles markdown metadata extraction.
+// It uses goldmark to parse markdown and extract tags, properties, and snippets.
+// HTML rendering is primarily done on frontend using markdown-it, but backend provides
+// RenderHTML for RSS feeds and other server-side rendering needs.
+type Service interface {
+	// ExtractAll extracts tags, properties, and references in a single parse (most efficient)
+	ExtractAll(content []byte) (*ExtractedData, error)
+
+	// ExtractTags returns all #tags found in content
+	ExtractTags(content []byte) ([]string, error)
+
+	// ExtractProperties computes boolean properties
+	ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error)
+
+	// RenderMarkdown renders goldmark AST back to markdown text
+	RenderMarkdown(content []byte) (string, error)
+
+	// RenderHTML renders markdown content to HTML
+	RenderHTML(content []byte) (string, error)
+
+	// GenerateSnippet creates plain text summary
+	GenerateSnippet(content []byte, maxLength int) (string, error)
+
+	// ValidateContent checks for syntax errors
+	ValidateContent(content []byte) error
+
+	// RenameTag renames all occurrences of oldTag to newTag in content
+	RenameTag(content []byte, oldTag, newTag string) (string, error)
+}
+
+// service implements the Service interface.
+type service struct {
+	md goldmark.Markdown
+}
+
+// Option configures the markdown service.
+type Option func(*config)
+
+type config struct {
+	enableTags bool
+}
+
+// WithTagExtension enables #tag parsing.
+func WithTagExtension() Option {
+	return func(c *config) {
+		c.enableTags = true
+	}
+}
+
+// NewService creates a new markdown service with the given options.
+func NewService(opts ...Option) Service {
+	cfg := &config{}
+	for _, opt := range opts {
+		opt(cfg)
+	}
+
+	exts := []goldmark.Extender{
+		extension.GFM, // GitHub Flavored Markdown (tables, strikethrough, task lists, autolinks)
+	}
+
+	// Add custom extensions based on config
+	if cfg.enableTags {
+		exts = append(exts, extensions.TagExtension)
+	}
+
+	md := goldmark.New(
+		goldmark.WithExtensions(exts...),
+		goldmark.WithParserOptions(
+			parser.WithAutoHeadingID(), // Generate heading IDs
+		),
+	)
+
+	return &service{
+		md: md,
+	}
+}
+
+// parse is an internal helper to parse content into AST.
+func (s *service) parse(content []byte) (gast.Node, error) {
+	reader := text.NewReader(content)
+	doc := s.md.Parser().Parse(reader)
+	return doc, nil
+}
+
+// ExtractTags returns all #tags found in content.
+func (s *service) ExtractTags(content []byte) ([]string, error) {
+	root, err := s.parse(content)
+	if err != nil {
+		return nil, err
+	}
+
+	var tags []string
+
+	// Walk the AST to find tag nodes
+	err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
+		if !entering {
+			return gast.WalkContinue, nil
+		}
+
+		// Check for custom TagNode
+		if tagNode, ok := n.(*mast.TagNode); ok {
+			tags = append(tags, string(tagNode.Tag))
+		}
+
+		return gast.WalkContinue, nil
+	})
+
+	if err != nil {
+		return nil, err
+	}
+
+	// Deduplicate tags while preserving original case
+	return uniquePreserveCase(tags), nil
+}
+
+// ExtractProperties computes boolean properties about the content.
+func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
+	root, err := s.parse(content)
+	if err != nil {
+		return nil, err
+	}
+
+	prop := &storepb.MemoPayload_Property{}
+
+	err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
+		if !entering {
+			return gast.WalkContinue, nil
+		}
+
+		switch n.Kind() {
+		case gast.KindLink:
+			prop.HasLink = true
+
+		case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
+			prop.HasCode = true
+
+		case east.KindTaskCheckBox:
+			prop.HasTaskList = true
+			if checkBox, ok := n.(*east.TaskCheckBox); ok {
+				if !checkBox.IsChecked {
+					prop.HasIncompleteTasks = true
+				}
+			}
+		default:
+			// No special handling for other node types
+		}
+
+		return gast.WalkContinue, nil
+	})
+
+	if err != nil {
+		return nil, err
+	}
+
+	return prop, nil
+}
+
+// RenderMarkdown renders goldmark AST back to markdown text.
+func (s *service) RenderMarkdown(content []byte) (string, error) {
+	root, err := s.parse(content)
+	if err != nil {
+		return "", err
+	}
+
+	mdRenderer := renderer.NewMarkdownRenderer()
+	return mdRenderer.Render(root, content), nil
+}
+
+// RenderHTML renders markdown content to HTML using goldmark's built-in HTML renderer.
+func (s *service) RenderHTML(content []byte) (string, error) {
+	var buf bytes.Buffer
+	if err := s.md.Convert(content, &buf); err != nil {
+		return "", err
+	}
+	return buf.String(), nil
+}
+
+// GenerateSnippet creates a plain text summary from markdown content.
+func (s *service) GenerateSnippet(content []byte, maxLength int) (string, error) {
+	root, err := s.parse(content)
+	if err != nil {
+		return "", err
+	}
+
+	var buf strings.Builder
+	var lastNodeWasBlock bool
+
+	err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
+		if entering {
+			// Skip code blocks and code spans entirely
+			switch n.Kind() {
+			case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
+				return gast.WalkSkipChildren, nil
+			default:
+				// Continue walking for other node types
+			}
+
+			// Add space before block elements (except first)
+			switch n.Kind() {
+			case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
+				if buf.Len() > 0 && lastNodeWasBlock {
+					buf.WriteByte(' ')
+				}
+			default:
+				// No space needed for other node types
+			}
+		}
+
+		if !entering {
+			// Mark that we just exited a block element
+			switch n.Kind() {
+			case gast.KindParagraph, gast.KindHeading, gast.KindListItem:
+				lastNodeWasBlock = true
+			default:
+				// Not a block element
+			}
+			return gast.WalkContinue, nil
+		}
+
+		lastNodeWasBlock = false
+
+		// Only extract plain text nodes
+		if textNode, ok := n.(*gast.Text); ok {
+			segment := textNode.Segment
+			buf.Write(segment.Value(content))
+
+			// Add space if this is a soft line break
+			if textNode.SoftLineBreak() {
+				buf.WriteByte(' ')
+			}
+		}
+
+		// Stop walking if we've exceeded double the max length
+		// (we'll truncate precisely later)
+		if buf.Len() > maxLength*2 {
+			return gast.WalkStop, nil
+		}
+
+		return gast.WalkContinue, nil
+	})
+
+	if err != nil {
+		return "", err
+	}
+
+	snippet := buf.String()
+
+	// Truncate at word boundary if needed
+	if len(snippet) > maxLength {
+		snippet = truncateAtWord(snippet, maxLength)
+	}
+
+	return strings.TrimSpace(snippet), nil
+}
+
+// ValidateContent checks if the markdown content is valid.
+func (s *service) ValidateContent(content []byte) error {
+	// Try to parse the content
+	_, err := s.parse(content)
+	return err
+}
+
+// ExtractAll extracts tags, properties, and references in a single parse for efficiency.
+func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
+	root, err := s.parse(content)
+	if err != nil {
+		return nil, err
+	}
+
+	data := &ExtractedData{
+		Tags:     []string{},
+		Property: &storepb.MemoPayload_Property{},
+	}
+
+	// Single walk to collect all data
+	err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
+		if !entering {
+			return gast.WalkContinue, nil
+		}
+
+		// Extract tags
+		if tagNode, ok := n.(*mast.TagNode); ok {
+			data.Tags = append(data.Tags, string(tagNode.Tag))
+		}
+
+		// Extract properties based on node kind
+		switch n.Kind() {
+		case gast.KindLink:
+			data.Property.HasLink = true
+
+		case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan:
+			data.Property.HasCode = true
+
+		case east.KindTaskCheckBox:
+			data.Property.HasTaskList = true
+			if checkBox, ok := n.(*east.TaskCheckBox); ok {
+				if !checkBox.IsChecked {
+					data.Property.HasIncompleteTasks = true
+				}
+			}
+		default:
+			// No special handling for other node types
+		}
+
+		return gast.WalkContinue, nil
+	})
+
+	if err != nil {
+		return nil, err
+	}
+
+	// Deduplicate tags while preserving original case
+	data.Tags = uniquePreserveCase(data.Tags)
+
+	return data, nil
+}
+
+// RenameTag renames all occurrences of oldTag to newTag in content.
+func (s *service) RenameTag(content []byte, oldTag, newTag string) (string, error) {
+	root, err := s.parse(content)
+	if err != nil {
+		return "", err
+	}
+
+	// Walk the AST to find and rename tag nodes
+	err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
+		if !entering {
+			return gast.WalkContinue, nil
+		}
+
+		// Check for custom TagNode and rename if it matches
+		if tagNode, ok := n.(*mast.TagNode); ok {
+			if string(tagNode.Tag) == oldTag {
+				tagNode.Tag = []byte(newTag)
+			}
+		}
+
+		return gast.WalkContinue, nil
+	})
+
+	if err != nil {
+		return "", err
+	}
+
+	// Render back to markdown using the already-parsed AST
+	mdRenderer := renderer.NewMarkdownRenderer()
+	return mdRenderer.Render(root, content), nil
+}
+
+// uniquePreserveCase returns unique strings from input while preserving case.
+func uniquePreserveCase(strs []string) []string {
+	seen := make(map[string]struct{})
+	var result []string
+
+	for _, s := range strs {
+		if _, exists := seen[s]; !exists {
+			seen[s] = struct{}{}
+			result = append(result, s)
+		}
+	}
+
+	return result
+}
+
+// truncateAtWord truncates a string at the last word boundary before maxLength.
+// maxLength is treated as a rune (character) count to properly handle UTF-8 multi-byte characters.
+func truncateAtWord(s string, maxLength int) string {
+	// Convert to runes to properly handle multi-byte UTF-8 characters
+	runes := []rune(s)
+	if len(runes) <= maxLength {
+		return s
+	}
+
+	// Truncate to max length (by character count, not byte count)
+	truncated := string(runes[:maxLength])
+
+	// Find last space to avoid cutting in the middle of a word
+	lastSpace := strings.LastIndexAny(truncated, " \t\n\r")
+	if lastSpace > 0 {
+		truncated = truncated[:lastSpace]
+	}
+
+	return truncated + " ..."
+}
--- a/plugin/markdown/markdown_test.go
+++ b/plugin/markdown/markdown_test.go
@@ -0,0 +1,448 @@
+package markdown
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestNewService(t *testing.T) {
+	svc := NewService()
+	assert.NotNil(t, svc)
+}
+
+func TestValidateContent(t *testing.T) {
+	svc := NewService()
+
+	tests := []struct {
+		name    string
+		content string
+		wantErr bool
+	}{
+		{
+			name:    "valid markdown",
+			content: "# Hello\n\nThis is **bold** text.",
+			wantErr: false,
+		},
+		{
+			name:    "empty content",
+			content: "",
+			wantErr: false,
+		},
+		{
+			name:    "complex markdown",
+			content: "# Title\n\n- List item 1\n- List item 2\n\n```go\ncode block\n```",
+			wantErr: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := svc.ValidateContent([]byte(tt.content))
+			if tt.wantErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+			}
+		})
+	}
+}
+
+func TestGenerateSnippet(t *testing.T) {
+	svc := NewService()
+
+	tests := []struct {
+		name      string
+		content   string
+		maxLength int
+		expected  string
+	}{
+		{
+			name:      "simple text",
+			content:   "Hello world",
+			maxLength: 100,
+			expected:  "Hello world",
+		},
+		{
+			name:      "text with formatting",
+			content:   "This is **bold** and *italic* text.",
+			maxLength: 100,
+			expected:  "This is bold and italic text.",
+		},
+		{
+			name:      "truncate long text",
+			content:   "This is a very long piece of text that should be truncated at a word boundary.",
+			maxLength: 30,
+			expected:  "This is a very long piece of ...",
+		},
+		{
+			name:      "heading and paragraph",
+			content:   "# My Title\n\nThis is the first paragraph.",
+			maxLength: 100,
+			expected:  "My Title This is the first paragraph.",
+		},
+		{
+			name:      "code block removed",
+			content:   "Text before\n\n```go\ncode\n```\n\nText after",
+			maxLength: 100,
+			expected:  "Text before Text after",
+		},
+		{
+			name:      "list items",
+			content:   "- Item 1\n- Item 2\n- Item 3",
+			maxLength: 100,
+			expected:  "Item 1 Item 2 Item 3",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			snippet, err := svc.GenerateSnippet([]byte(tt.content), tt.maxLength)
+			require.NoError(t, err)
+			assert.Equal(t, tt.expected, snippet)
+		})
+	}
+}
+
+func TestExtractProperties(t *testing.T) {
+	tests := []struct {
+		name     string
+		content  string
+		hasLink  bool
+		hasCode  bool
+		hasTasks bool
+		hasInc   bool
+	}{
+		{
+			name:     "plain text",
+			content:  "Just plain text",
+			hasLink:  false,
+			hasCode:  false,
+			hasTasks: false,
+			hasInc:   false,
+		},
+		{
+			name:     "with link",
+			content:  "Check out [this link](https://example.com)",
+			hasLink:  true,
+			hasCode:  false,
+			hasTasks: false,
+			hasInc:   false,
+		},
+		{
+			name:     "with inline code",
+			content:  "Use `console.log()` to debug",
+			hasLink:  false,
+			hasCode:  true,
+			hasTasks: false,
+			hasInc:   false,
+		},
+		{
+			name:     "with code block",
+			content:  "```go\nfunc main() {}\n```",
+			hasLink:  false,
+			hasCode:  true,
+			hasTasks: false,
+			hasInc:   false,
+		},
+		{
+			name:     "with completed task",
+			content:  "- [x] Completed task",
+			hasLink:  false,
+			hasCode:  false,
+			hasTasks: true,
+			hasInc:   false,
+		},
+		{
+			name:     "with incomplete task",
+			content:  "- [ ] Todo item",
+			hasLink:  false,
+			hasCode:  false,
+			hasTasks: true,
+			hasInc:   true,
+		},
+		{
+			name:     "mixed tasks",
+			content:  "- [x] Done\n- [ ] Not done",
+			hasLink:  false,
+			hasCode:  false,
+			hasTasks: true,
+			hasInc:   true,
+		},
+		{
+			name:     "everything",
+			content:  "# Title\n\n[Link](url)\n\n`code`\n\n- [ ] Task",
+			hasLink:  true,
+			hasCode:  true,
+			hasTasks: true,
+			hasInc:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			svc := NewService()
+
+			props, err := svc.ExtractProperties([]byte(tt.content))
+			require.NoError(t, err)
+			assert.Equal(t, tt.hasLink, props.HasLink, "HasLink")
+			assert.Equal(t, tt.hasCode, props.HasCode, "HasCode")
+			assert.Equal(t, tt.hasTasks, props.HasTaskList, "HasTaskList")
+			assert.Equal(t, tt.hasInc, props.HasIncompleteTasks, "HasIncompleteTasks")
+		})
+	}
+}
+
+func TestExtractTags(t *testing.T) {
+	tests := []struct {
+		name     string
+		content  string
+		withExt  bool
+		expected []string
+	}{
+		{
+			name:     "no tags",
+			content:  "Just plain text",
+			withExt:  false,
+			expected: []string{},
+		},
+		{
+			name:     "single tag",
+			content:  "Text with #tag",
+			withExt:  true,
+			expected: []string{"tag"},
+		},
+		{
+			name:     "multiple tags",
+			content:  "Text with #tag1 and #tag2",
+			withExt:  true,
+			expected: []string{"tag1", "tag2"},
+		},
+		{
+			name:     "duplicate tags",
+			content:  "#work is important. #Work #WORK",
+			withExt:  true,
+			expected: []string{"work", "Work", "WORK"},
+		},
+		{
+			name:     "tags with hyphens and underscores",
+			content:  "Tags: #work-notes #2024_plans",
+			withExt:  true,
+			expected: []string{"work-notes", "2024_plans"},
+		},
+		{
+			name:     "tags at end of sentence",
+			content:  "This is important #urgent.",
+			withExt:  true,
+			expected: []string{"urgent"},
+		},
+		{
+			name:     "headings not tags",
+			content:  "## Heading\n\n# Title\n\nText with #realtag",
+			withExt:  true,
+			expected: []string{"realtag"},
+		},
+		{
+			name:     "numeric tag",
+			content:  "Issue #123",
+			withExt:  true,
+			expected: []string{"123"},
+		},
+		{
+			name:     "tag in list",
+			content:  "- Item 1 #todo\n- Item 2 #done",
+			withExt:  true,
+			expected: []string{"todo", "done"},
+		},
+		{
+			name:     "no extension enabled",
+			content:  "Text with #tag",
+			withExt:  false,
+			expected: []string{},
+		},
+		{
+			name:     "Chinese tag",
+			content:  "Text with #测试",
+			withExt:  true,
+			expected: []string{"测试"},
+		},
+		{
+			name:     "Chinese tag followed by punctuation",
+			content:  "Text #测试。 More text",
+			withExt:  true,
+			expected: []string{"测试"},
+		},
+		{
+			name:     "mixed Chinese and ASCII tag",
+			content:  "#测试test123 content",
+			withExt:  true,
+			expected: []string{"测试test123"},
+		},
+		{
+			name:     "Japanese tag",
+			content:  "#日本語 content",
+			withExt:  true,
+			expected: []string{"日本語"},
+		},
+		{
+			name:     "Korean tag",
+			content:  "#한국어 content",
+			withExt:  true,
+			expected: []string{"한국어"},
+		},
+		{
+			name:     "hierarchical tag with Chinese",
+			content:  "#work/测试/项目",
+			withExt:  true,
+			expected: []string{"work/测试/项目"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var svc Service
+			if tt.withExt {
+				svc = NewService(WithTagExtension())
+			} else {
+				svc = NewService()
+			}
+
+			tags, err := svc.ExtractTags([]byte(tt.content))
+			require.NoError(t, err)
+			assert.ElementsMatch(t, tt.expected, tags)
+		})
+	}
+}
+
+func TestUniquePreserveCase(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    []string
+		expected []string
+	}{
+		{
+			name:     "empty",
+			input:    []string{},
+			expected: []string{},
+		},
+		{
+			name:     "unique items",
+			input:    []string{"tag1", "tag2", "tag3"},
+			expected: []string{"tag1", "tag2", "tag3"},
+		},
+		{
+			name:     "duplicates",
+			input:    []string{"tag", "TAG", "Tag"},
+			expected: []string{"tag", "TAG", "Tag"},
+		},
+		{
+			name:     "mixed",
+			input:    []string{"Work", "work", "Important", "work"},
+			expected: []string{"Work", "work", "Important"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := uniquePreserveCase(tt.input)
+			assert.ElementsMatch(t, tt.expected, result)
+		})
+	}
+}
+
+func TestTruncateAtWord(t *testing.T) {
+	tests := []struct {
+		name      string
+		input     string
+		maxLength int
+		expected  string
+	}{
+		{
+			name:      "no truncation needed",
+			input:     "short",
+			maxLength: 10,
+			expected:  "short",
+		},
+		{
+			name:      "exact length",
+			input:     "exactly ten",
+			maxLength: 11,
+			expected:  "exactly ten",
+		},
+		{
+			name:      "truncate at word",
+			input:     "this is a long sentence",
+			maxLength: 10,
+			expected:  "this is a ...",
+		},
+		{
+			name:      "truncate very long word",
+			input:     "supercalifragilisticexpialidocious",
+			maxLength: 10,
+			expected:  "supercalif ...",
+		},
+		{
+			name:      "CJK characters without spaces",
+			input:     "这是一个很长的中文句子没有空格的情况下也要正确处理",
+			maxLength: 15,
+			expected:  "这是一个很长的中文句子没有空格 ...",
+		},
+		{
+			name:      "mixed CJK and Latin",
+			input:     "这是中文mixed with English文字",
+			maxLength: 10,
+			expected:  "这是中文mixed ...",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := truncateAtWord(tt.input, tt.maxLength)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+// Benchmark tests.
+func BenchmarkGenerateSnippet(b *testing.B) {
+	svc := NewService()
+	content := []byte(`# Large Document
+
+This is a large document with multiple paragraphs and formatting.
+
+## Section 1
+
+Here is some **bold** text and *italic* text with [links](https://example.com).
+
+- List item 1
+- List item 2
+- List item 3
+
+## Section 2
+
+More content here with ` + "`inline code`" + ` and other elements.
+
+` + "```go\nfunc example() {\n    return true\n}\n```")
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, err := svc.GenerateSnippet(content, 200)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func BenchmarkExtractProperties(b *testing.B) {
+	svc := NewService()
+	content := []byte("# Title\n\n[Link](url)\n\n`code`\n\n- [ ] Task\n- [x] Done")
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, err := svc.ExtractProperties(content)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
--- a/plugin/markdown/parser/tag.go
+++ b/plugin/markdown/parser/tag.go
@@ -0,0 +1,139 @@
+package parser
+
+import (
+	"unicode"
+	"unicode/utf8"
+
+	gast "github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/text"
+
+	mast "github.com/usememos/memos/plugin/markdown/ast"
+)
+
+const (
+	// MaxTagLength defines the maximum number of runes allowed in a tag.
+	MaxTagLength = 100
+)
+
+type tagParser struct{}
+
+// NewTagParser creates a new inline parser for #tag syntax.
+func NewTagParser() parser.InlineParser {
+	return &tagParser{}
+}
+
+// Trigger returns the characters that trigger this parser.
+func (*tagParser) Trigger() []byte {
+	return []byte{'#'}
+}
+
+// isValidTagRune checks if a Unicode rune is valid in a tag.
+// Uses Unicode categories for proper international character support.
+func isValidTagRune(r rune) bool {
+	// Allow Unicode letters (any script: Latin, CJK, Arabic, Cyrillic, etc.)
+	if unicode.IsLetter(r) {
+		return true
+	}
+
+	// Allow Unicode digits
+	if unicode.IsNumber(r) {
+		return true
+	}
+
+	// Allow emoji and symbols (So category: Symbol, Other)
+	// This includes emoji, which are essential for social media-style tagging
+	if unicode.IsSymbol(r) {
+		return true
+	}
+
+	// Allow specific ASCII symbols for tag structure
+	// Underscore: word separation (snake_case)
+	// Hyphen: word separation (kebab-case)
+	// Forward slash: hierarchical tags (category/subcategory)
+	// Ampersand: compound tags (science&tech)
+	if r == '_' || r == '-' || r == '/' || r == '&' {
+		return true
+	}
+
+	return false
+}
+
+// Parse parses #tag syntax using Unicode-aware validation.
+// Tags support international characters and follow these rules:
+//   - Must start with # followed by valid tag characters
+//   - Valid characters: Unicode letters, Unicode digits, underscore (_), hyphen (-), forward slash (/)
+//   - Maximum length: 100 runes (Unicode characters)
+//   - Stops at: whitespace, punctuation, or other invalid characters
+func (*tagParser) Parse(_ gast.Node, block text.Reader, _ parser.Context) gast.Node {
+	line, _ := block.PeekLine()
+
+	// Must start with #
+	if len(line) == 0 || line[0] != '#' {
+		return nil
+	}
+
+	// Check if it's a heading (## or space after #)
+	if len(line) > 1 {
+		if line[1] == '#' {
+			// It's a heading (##), not a tag
+			return nil
+		}
+		if line[1] == ' ' {
+			// Space after # - heading or just a hash
+			return nil
+		}
+	} else {
+		// Just a lone #
+		return nil
+	}
+
+	// Parse tag using UTF-8 aware rune iteration
+	tagStart := 1
+	pos := tagStart
+	runeCount := 0
+
+	for pos < len(line) {
+		r, size := utf8.DecodeRune(line[pos:])
+
+		// Stop at invalid UTF-8
+		if r == utf8.RuneError && size == 1 {
+			break
+		}
+
+		// Validate character using Unicode categories
+		if !isValidTagRune(r) {
+			break
+		}
+
+		// Enforce max length (by rune count, not byte count)
+		runeCount++
+		if runeCount > MaxTagLength {
+			break
+		}
+
+		pos += size
+	}
+
+	// Must have at least one character after #
+	if pos <= tagStart {
+		return nil
+	}
+
+	// Extract tag (without #)
+	tagName := line[tagStart:pos]
+
+	// Make a copy of the tag name
+	tagCopy := make([]byte, len(tagName))
+	copy(tagCopy, tagName)
+
+	// Advance reader
+	block.Advance(pos)
+
+	// Create node
+	node := &mast.TagNode{
+		Tag: tagCopy,
+	}
+
+	return node
+}
--- a/plugin/markdown/parser/tag_test.go
+++ b/plugin/markdown/parser/tag_test.go
@@ -0,0 +1,251 @@
+package parser
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/text"
+
+	mast "github.com/usememos/memos/plugin/markdown/ast"
+)
+
+func TestTagParser(t *testing.T) {
+	tests := []struct {
+		name        string
+		input       string
+		expectedTag string
+		shouldParse bool
+	}{
+		{
+			name:        "basic tag",
+			input:       "#tag",
+			expectedTag: "tag",
+			shouldParse: true,
+		},
+		{
+			name:        "tag with hyphen",
+			input:       "#work-notes",
+			expectedTag: "work-notes",
+			shouldParse: true,
+		},
+		{
+			name:        "tag with ampersand",
+			input:       "#science&tech",
+			expectedTag: "science&tech",
+			shouldParse: true,
+		},
+		{
+			name:        "tag with underscore",
+			input:       "#2024_plans",
+			expectedTag: "2024_plans",
+			shouldParse: true,
+		},
+		{
+			name:        "numeric tag",
+			input:       "#123",
+			expectedTag: "123",
+			shouldParse: true,
+		},
+		{
+			name:        "tag followed by space",
+			input:       "#tag ",
+			expectedTag: "tag",
+			shouldParse: true,
+		},
+		{
+			name:        "tag followed by punctuation",
+			input:       "#tag.",
+			expectedTag: "tag",
+			shouldParse: true,
+		},
+		{
+			name:        "tag in sentence",
+			input:       "#important task",
+			expectedTag: "important",
+			shouldParse: true,
+		},
+		{
+			name:        "heading (##)",
+			input:       "## Heading",
+			expectedTag: "",
+			shouldParse: false,
+		},
+		{
+			name:        "space after hash",
+			input:       "# heading",
+			expectedTag: "",
+			shouldParse: false,
+		},
+		{
+			name:        "lone hash",
+			input:       "#",
+			expectedTag: "",
+			shouldParse: false,
+		},
+		{
+			name:        "hash with space",
+			input:       "# ",
+			expectedTag: "",
+			shouldParse: false,
+		},
+		{
+			name:        "special characters",
+			input:       "#tag@special",
+			expectedTag: "tag",
+			shouldParse: true,
+		},
+		{
+			name:        "mixed case",
+			input:       "#WorkNotes",
+			expectedTag: "WorkNotes",
+			shouldParse: true,
+		},
+		{
+			name:        "hierarchical tag with slash",
+			input:       "#tag1/subtag",
+			expectedTag: "tag1/subtag",
+			shouldParse: true,
+		},
+		{
+			name:        "hierarchical tag with multiple levels",
+			input:       "#tag1/subtag/subtag2",
+			expectedTag: "tag1/subtag/subtag2",
+			shouldParse: true,
+		},
+		{
+			name:        "hierarchical tag followed by space",
+			input:       "#work/notes ",
+			expectedTag: "work/notes",
+			shouldParse: true,
+		},
+		{
+			name:        "hierarchical tag followed by punctuation",
+			input:       "#project/2024.",
+			expectedTag: "project/2024",
+			shouldParse: true,
+		},
+		{
+			name:        "hierarchical tag with numbers and dashes",
+			input:       "#work-log/2024/q1",
+			expectedTag: "work-log/2024/q1",
+			shouldParse: true,
+		},
+		{
+			name:        "Chinese characters",
+			input:       "#测试",
+			expectedTag: "测试",
+			shouldParse: true,
+		},
+		{
+			name:        "Chinese tag followed by space",
+			input:       "#测试 some text",
+			expectedTag: "测试",
+			shouldParse: true,
+		},
+		{
+			name:        "Chinese tag followed by punctuation",
+			input:       "#测试。",
+			expectedTag: "测试",
+			shouldParse: true,
+		},
+		{
+			name:        "mixed Chinese and ASCII",
+			input:       "#测试test123",
+			expectedTag: "测试test123",
+			shouldParse: true,
+		},
+		{
+			name:        "Japanese characters",
+			input:       "#テスト",
+			expectedTag: "テスト",
+			shouldParse: true,
+		},
+		{
+			name:        "Korean characters",
+			input:       "#테스트",
+			expectedTag: "테스트",
+			shouldParse: true,
+		},
+		{
+			name:        "emoji",
+			input:       "#test🚀",
+			expectedTag: "test🚀",
+			shouldParse: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			p := NewTagParser()
+			reader := text.NewReader([]byte(tt.input))
+			ctx := parser.NewContext()
+
+			node := p.Parse(nil, reader, ctx)
+
+			if tt.shouldParse {
+				require.NotNil(t, node, "Expected tag to be parsed")
+				require.IsType(t, &mast.TagNode{}, node)
+
+				tagNode, ok := node.(*mast.TagNode)
+				require.True(t, ok, "Expected node to be *mast.TagNode")
+				assert.Equal(t, tt.expectedTag, string(tagNode.Tag))
+			} else {
+				assert.Nil(t, node, "Expected tag NOT to be parsed")
+			}
+		})
+	}
+}
+
+func TestTagParser_Trigger(t *testing.T) {
+	p := NewTagParser()
+	triggers := p.Trigger()
+
+	assert.Equal(t, []byte{'#'}, triggers)
+}
+
+func TestTagParser_MultipleTags(t *testing.T) {
+	// Test that parser correctly handles multiple tags in sequence
+	input := "#tag1 #tag2"
+
+	p := NewTagParser()
+	reader := text.NewReader([]byte(input))
+	ctx := parser.NewContext()
+
+	// Parse first tag
+	node1 := p.Parse(nil, reader, ctx)
+	require.NotNil(t, node1)
+	tagNode1, ok := node1.(*mast.TagNode)
+	require.True(t, ok, "Expected node1 to be *mast.TagNode")
+	assert.Equal(t, "tag1", string(tagNode1.Tag))
+
+	// Advance past the space
+	reader.Advance(1)
+
+	// Parse second tag
+	node2 := p.Parse(nil, reader, ctx)
+	require.NotNil(t, node2)
+	tagNode2, ok := node2.(*mast.TagNode)
+	require.True(t, ok, "Expected node2 to be *mast.TagNode")
+	assert.Equal(t, "tag2", string(tagNode2.Tag))
+}
+
+func TestTagNode_Kind(t *testing.T) {
+	node := &mast.TagNode{
+		Tag: []byte("test"),
+	}
+
+	assert.Equal(t, mast.KindTag, node.Kind())
+}
+
+func TestTagNode_Dump(t *testing.T) {
+	node := &mast.TagNode{
+		Tag: []byte("test"),
+	}
+
+	// Should not panic
+	assert.NotPanics(t, func() {
+		node.Dump([]byte("#test"), 0)
+	})
+}
--- a/plugin/markdown/renderer/markdown_renderer.go
+++ b/plugin/markdown/renderer/markdown_renderer.go
@@ -0,0 +1,266 @@
+package renderer
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+
+	gast "github.com/yuin/goldmark/ast"
+	east "github.com/yuin/goldmark/extension/ast"
+
+	mast "github.com/usememos/memos/plugin/markdown/ast"
+)
+
+// MarkdownRenderer renders goldmark AST back to markdown text.
+type MarkdownRenderer struct {
+	buf *bytes.Buffer
+}
+
+// NewMarkdownRenderer creates a new markdown renderer.
+func NewMarkdownRenderer() *MarkdownRenderer {
+	return &MarkdownRenderer{
+		buf: &bytes.Buffer{},
+	}
+}
+
+// Render renders the AST node to markdown and returns the result.
+func (r *MarkdownRenderer) Render(node gast.Node, source []byte) string {
+	r.buf.Reset()
+	r.renderNode(node, source, 0)
+	return r.buf.String()
+}
+
+// renderNode renders a single node and its children.
+func (r *MarkdownRenderer) renderNode(node gast.Node, source []byte, depth int) {
+	switch n := node.(type) {
+	case *gast.Document:
+		r.renderChildren(n, source, depth)
+
+	case *gast.Paragraph:
+		r.renderChildren(n, source, depth)
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+
+	case *gast.Text:
+		// Text nodes store their content as segments in the source
+		segment := n.Segment
+		r.buf.Write(segment.Value(source))
+		if n.SoftLineBreak() {
+			r.buf.WriteByte('\n')
+		} else if n.HardLineBreak() {
+			r.buf.WriteString("  \n")
+		}
+
+	case *gast.CodeSpan:
+		r.buf.WriteByte('`')
+		r.renderChildren(n, source, depth)
+		r.buf.WriteByte('`')
+
+	case *gast.Emphasis:
+		symbol := "*"
+		if n.Level == 2 {
+			symbol = "**"
+		}
+		r.buf.WriteString(symbol)
+		r.renderChildren(n, source, depth)
+		r.buf.WriteString(symbol)
+
+	case *gast.Link:
+		r.buf.WriteString("[")
+		r.renderChildren(n, source, depth)
+		r.buf.WriteString("](")
+		r.buf.Write(n.Destination)
+		if len(n.Title) > 0 {
+			r.buf.WriteString(` "`)
+			r.buf.Write(n.Title)
+			r.buf.WriteString(`"`)
+		}
+		r.buf.WriteString(")")
+
+	case *gast.AutoLink:
+		url := n.URL(source)
+		if n.AutoLinkType == gast.AutoLinkEmail {
+			r.buf.WriteString("<")
+			r.buf.Write(url)
+			r.buf.WriteString(">")
+		} else {
+			r.buf.Write(url)
+		}
+
+	case *gast.Image:
+		r.buf.WriteString("![")
+		r.renderChildren(n, source, depth)
+		r.buf.WriteString("](")
+		r.buf.Write(n.Destination)
+		if len(n.Title) > 0 {
+			r.buf.WriteString(` "`)
+			r.buf.Write(n.Title)
+			r.buf.WriteString(`"`)
+		}
+		r.buf.WriteString(")")
+
+	case *gast.Heading:
+		r.buf.WriteString(strings.Repeat("#", n.Level))
+		r.buf.WriteByte(' ')
+		r.renderChildren(n, source, depth)
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+
+	case *gast.CodeBlock, *gast.FencedCodeBlock:
+		r.renderCodeBlock(n, source)
+
+	case *gast.Blockquote:
+		// Render each child line with "> " prefix
+		r.renderBlockquote(n, source, depth)
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+
+	case *gast.List:
+		r.renderChildren(n, source, depth)
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+
+	case *gast.ListItem:
+		r.renderListItem(n, source, depth)
+
+	case *gast.ThematicBreak:
+		r.buf.WriteString("---")
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+
+	case *east.Strikethrough:
+		r.buf.WriteString("~~")
+		r.renderChildren(n, source, depth)
+		r.buf.WriteString("~~")
+
+	case *east.TaskCheckBox:
+		if n.IsChecked {
+			r.buf.WriteString("[x] ")
+		} else {
+			r.buf.WriteString("[ ] ")
+		}
+
+	case *east.Table:
+		r.renderTable(n, source)
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+
+	// Custom Memos nodes
+	case *mast.TagNode:
+		r.buf.WriteByte('#')
+		r.buf.Write(n.Tag)
+
+	default:
+		// For unknown nodes, try to render children
+		r.renderChildren(n, source, depth)
+	}
+}
+
+// renderChildren renders all children of a node.
+func (r *MarkdownRenderer) renderChildren(node gast.Node, source []byte, depth int) {
+	child := node.FirstChild()
+	for child != nil {
+		r.renderNode(child, source, depth+1)
+		child = child.NextSibling()
+	}
+}
+
+// renderCodeBlock renders a code block.
+func (r *MarkdownRenderer) renderCodeBlock(node gast.Node, source []byte) {
+	if fenced, ok := node.(*gast.FencedCodeBlock); ok {
+		// Fenced code block with language
+		r.buf.WriteString("```")
+		if lang := fenced.Language(source); len(lang) > 0 {
+			r.buf.Write(lang)
+		}
+		r.buf.WriteByte('\n')
+
+		// Write all lines
+		lines := fenced.Lines()
+		for i := 0; i < lines.Len(); i++ {
+			line := lines.At(i)
+			r.buf.Write(line.Value(source))
+		}
+
+		r.buf.WriteString("```")
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+	} else if codeBlock, ok := node.(*gast.CodeBlock); ok {
+		// Indented code block
+		lines := codeBlock.Lines()
+		for i := 0; i < lines.Len(); i++ {
+			line := lines.At(i)
+			r.buf.WriteString("    ")
+			r.buf.Write(line.Value(source))
+		}
+		if node.NextSibling() != nil {
+			r.buf.WriteString("\n\n")
+		}
+	}
+}
+
+// renderBlockquote renders a blockquote with "> " prefix.
+func (r *MarkdownRenderer) renderBlockquote(node *gast.Blockquote, source []byte, depth int) {
+	// Create a temporary buffer for the blockquote content
+	tempBuf := &bytes.Buffer{}
+	tempRenderer := &MarkdownRenderer{buf: tempBuf}
+	tempRenderer.renderChildren(node, source, depth)
+
+	// Add "> " prefix to each line
+	content := tempBuf.String()
+	lines := strings.Split(strings.TrimRight(content, "\n"), "\n")
+	for i, line := range lines {
+		r.buf.WriteString("> ")
+		r.buf.WriteString(line)
+		if i < len(lines)-1 {
+			r.buf.WriteByte('\n')
+		}
+	}
+}
+
+// renderListItem renders a list item with proper indentation and markers.
+func (r *MarkdownRenderer) renderListItem(node *gast.ListItem, source []byte, depth int) {
+	parent := node.Parent()
+	list, ok := parent.(*gast.List)
+	if !ok {
+		r.renderChildren(node, source, depth)
+		return
+	}
+
+	// Add indentation only for nested lists
+	// Document=0, List=1, ListItem=2 (no indent), nested ListItem=3+ (indent)
+	if depth > 2 {
+		indent := strings.Repeat("  ", depth-2)
+		r.buf.WriteString(indent)
+	}
+
+	// Add list marker
+	if list.IsOrdered() {
+		fmt.Fprintf(r.buf, "%d. ", list.Start)
+		list.Start++ // Increment for next item
+	} else {
+		r.buf.WriteString("- ")
+	}
+
+	// Render content
+	r.renderChildren(node, source, depth)
+
+	// Add newline if there's a next sibling
+	if node.NextSibling() != nil {
+		r.buf.WriteByte('\n')
+	}
+}
+
+// renderTable renders a table in markdown format.
+func (r *MarkdownRenderer) renderTable(table *east.Table, source []byte) {
+	// This is a simplified table renderer
+	// A full implementation would need to handle alignment, etc.
+	r.renderChildren(table, source, 0)
+}
--- a/plugin/markdown/renderer/markdown_renderer_test.go
+++ b/plugin/markdown/renderer/markdown_renderer_test.go
@@ -0,0 +1,176 @@
+package renderer
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/yuin/goldmark"
+	"github.com/yuin/goldmark/extension"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/text"
+
+	"github.com/usememos/memos/plugin/markdown/extensions"
+)
+
+func TestMarkdownRenderer(t *testing.T) {
+	// Create goldmark instance with all extensions
+	md := goldmark.New(
+		goldmark.WithExtensions(
+			extension.GFM,
+			extensions.TagExtension,
+		),
+		goldmark.WithParserOptions(
+			parser.WithAutoHeadingID(),
+		),
+	)
+
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "simple text",
+			input:    "Hello world",
+			expected: "Hello world",
+		},
+		{
+			name:     "paragraph with newlines",
+			input:    "First paragraph\n\nSecond paragraph",
+			expected: "First paragraph\n\nSecond paragraph",
+		},
+		{
+			name:     "emphasis",
+			input:    "This is *italic* and **bold** text",
+			expected: "This is *italic* and **bold** text",
+		},
+		{
+			name:     "headings",
+			input:    "# Heading 1\n\n## Heading 2\n\n### Heading 3",
+			expected: "# Heading 1\n\n## Heading 2\n\n### Heading 3",
+		},
+		{
+			name:     "link",
+			input:    "Check [this link](https://example.com)",
+			expected: "Check [this link](https://example.com)",
+		},
+		{
+			name:     "image",
+			input:    "![alt text](image.png)",
+			expected: "![alt text](image.png)",
+		},
+		{
+			name:     "code inline",
+			input:    "This is `inline code` here",
+			expected: "This is `inline code` here",
+		},
+		{
+			name:     "code block fenced",
+			input:    "```go\nfunc main() {\n}\n```",
+			expected: "```go\nfunc main() {\n}\n```",
+		},
+		{
+			name:     "unordered list",
+			input:    "- Item 1\n- Item 2\n- Item 3",
+			expected: "- Item 1\n- Item 2\n- Item 3",
+		},
+		{
+			name:     "ordered list",
+			input:    "1. First\n2. Second\n3. Third",
+			expected: "1. First\n2. Second\n3. Third",
+		},
+		{
+			name:     "blockquote",
+			input:    "> This is a quote\n> Second line",
+			expected: "> This is a quote\n> Second line",
+		},
+		{
+			name:     "horizontal rule",
+			input:    "Text before\n\n---\n\nText after",
+			expected: "Text before\n\n---\n\nText after",
+		},
+		{
+			name:     "strikethrough",
+			input:    "This is ~~deleted~~ text",
+			expected: "This is ~~deleted~~ text",
+		},
+		{
+			name:     "task list",
+			input:    "- [x] Completed task\n- [ ] Incomplete task",
+			expected: "- [x] Completed task\n- [ ] Incomplete task",
+		},
+		{
+			name:     "tag",
+			input:    "This has #tag in it",
+			expected: "This has #tag in it",
+		},
+		{
+			name:     "multiple tags",
+			input:    "#work #important meeting notes",
+			expected: "#work #important meeting notes",
+		},
+		{
+			name:     "complex mixed content",
+			input:    "# Meeting Notes\n\n**Date**: 2024-01-01\n\n## Attendees\n- Alice\n- Bob\n\n## Discussion\n\nWe discussed #project status.\n\n```python\nprint('hello')\n```",
+			expected: "# Meeting Notes\n\n**Date**: 2024-01-01\n\n## Attendees\n\n- Alice\n- Bob\n\n## Discussion\n\nWe discussed #project status.\n\n```python\nprint('hello')\n```",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Parse the input
+			source := []byte(tt.input)
+			reader := text.NewReader(source)
+			doc := md.Parser().Parse(reader)
+			require.NotNil(t, doc)
+
+			// Render back to markdown
+			renderer := NewMarkdownRenderer()
+			result := renderer.Render(doc, source)
+
+			// For debugging
+			if result != tt.expected {
+				t.Logf("Input:    %q", tt.input)
+				t.Logf("Expected: %q", tt.expected)
+				t.Logf("Got:      %q", result)
+			}
+
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestMarkdownRendererPreservesStructure(t *testing.T) {
+	// Test that parsing and rendering preserves structure
+	md := goldmark.New(
+		goldmark.WithExtensions(
+			extension.GFM,
+			extensions.TagExtension,
+		),
+	)
+
+	inputs := []string{
+		"# Title\n\nParagraph",
+		"**Bold** and *italic*",
+		"- List\n- Items",
+		"#tag #another",
+		"> Quote",
+	}
+
+	renderer := NewMarkdownRenderer()
+
+	for _, input := range inputs {
+		t.Run(input, func(t *testing.T) {
+			source := []byte(input)
+			reader := text.NewReader(source)
+			doc := md.Parser().Parse(reader)
+
+			result := renderer.Render(doc, source)
+
+			// The result should be structurally similar
+			// (may have minor formatting differences)
+			assert.NotEmpty(t, result)
+		})
+	}
+}