first commit
Some checks failed
Backend Tests / Static Checks (push) Has been cancelled
Backend Tests / Tests (other) (push) Has been cancelled
Backend Tests / Tests (plugin) (push) Has been cancelled
Backend Tests / Tests (server) (push) Has been cancelled
Backend Tests / Tests (store) (push) Has been cancelled
Build Canary Image / build-frontend (push) Has been cancelled
Build Canary Image / build-push (linux/amd64) (push) Has been cancelled
Build Canary Image / build-push (linux/arm64) (push) Has been cancelled
Build Canary Image / merge (push) Has been cancelled
Frontend Tests / Lint (push) Has been cancelled
Frontend Tests / Build (push) Has been cancelled
Proto Linter / Lint Protos (push) Has been cancelled

This commit is contained in:
2026-03-04 06:30:47 +00:00
commit bb402d4ccc
777 changed files with 135661 additions and 0 deletions

View File

@@ -0,0 +1,139 @@
package parser
import (
"unicode"
"unicode/utf8"
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
mast "github.com/usememos/memos/plugin/markdown/ast"
)
const (
// MaxTagLength defines the maximum number of runes allowed in a tag.
MaxTagLength = 100
)
type tagParser struct{}
// NewTagParser creates a new inline parser for #tag syntax.
func NewTagParser() parser.InlineParser {
return &tagParser{}
}
// Trigger returns the characters that trigger this parser.
func (*tagParser) Trigger() []byte {
return []byte{'#'}
}
// isValidTagRune checks if a Unicode rune is valid in a tag.
// Uses Unicode categories for proper international character support.
func isValidTagRune(r rune) bool {
// Allow Unicode letters (any script: Latin, CJK, Arabic, Cyrillic, etc.)
if unicode.IsLetter(r) {
return true
}
// Allow Unicode digits
if unicode.IsNumber(r) {
return true
}
// Allow emoji and symbols (So category: Symbol, Other)
// This includes emoji, which are essential for social media-style tagging
if unicode.IsSymbol(r) {
return true
}
// Allow specific ASCII symbols for tag structure
// Underscore: word separation (snake_case)
// Hyphen: word separation (kebab-case)
// Forward slash: hierarchical tags (category/subcategory)
// Ampersand: compound tags (science&tech)
if r == '_' || r == '-' || r == '/' || r == '&' {
return true
}
return false
}
// Parse parses #tag syntax using Unicode-aware validation.
// Tags support international characters and follow these rules:
// - Must start with # followed by valid tag characters
// - Valid characters: Unicode letters, Unicode digits, underscore (_), hyphen (-), forward slash (/)
// - Maximum length: 100 runes (Unicode characters)
// - Stops at: whitespace, punctuation, or other invalid characters
func (*tagParser) Parse(_ gast.Node, block text.Reader, _ parser.Context) gast.Node {
line, _ := block.PeekLine()
// Must start with #
if len(line) == 0 || line[0] != '#' {
return nil
}
// Check if it's a heading (## or space after #)
if len(line) > 1 {
if line[1] == '#' {
// It's a heading (##), not a tag
return nil
}
if line[1] == ' ' {
// Space after # - heading or just a hash
return nil
}
} else {
// Just a lone #
return nil
}
// Parse tag using UTF-8 aware rune iteration
tagStart := 1
pos := tagStart
runeCount := 0
for pos < len(line) {
r, size := utf8.DecodeRune(line[pos:])
// Stop at invalid UTF-8
if r == utf8.RuneError && size == 1 {
break
}
// Validate character using Unicode categories
if !isValidTagRune(r) {
break
}
// Enforce max length (by rune count, not byte count)
runeCount++
if runeCount > MaxTagLength {
break
}
pos += size
}
// Must have at least one character after #
if pos <= tagStart {
return nil
}
// Extract tag (without #)
tagName := line[tagStart:pos]
// Make a copy of the tag name
tagCopy := make([]byte, len(tagName))
copy(tagCopy, tagName)
// Advance reader
block.Advance(pos)
// Create node
node := &mast.TagNode{
Tag: tagCopy,
}
return node
}

View File

@@ -0,0 +1,251 @@
package parser
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
mast "github.com/usememos/memos/plugin/markdown/ast"
)
func TestTagParser(t *testing.T) {
tests := []struct {
name string
input string
expectedTag string
shouldParse bool
}{
{
name: "basic tag",
input: "#tag",
expectedTag: "tag",
shouldParse: true,
},
{
name: "tag with hyphen",
input: "#work-notes",
expectedTag: "work-notes",
shouldParse: true,
},
{
name: "tag with ampersand",
input: "#science&tech",
expectedTag: "science&tech",
shouldParse: true,
},
{
name: "tag with underscore",
input: "#2024_plans",
expectedTag: "2024_plans",
shouldParse: true,
},
{
name: "numeric tag",
input: "#123",
expectedTag: "123",
shouldParse: true,
},
{
name: "tag followed by space",
input: "#tag ",
expectedTag: "tag",
shouldParse: true,
},
{
name: "tag followed by punctuation",
input: "#tag.",
expectedTag: "tag",
shouldParse: true,
},
{
name: "tag in sentence",
input: "#important task",
expectedTag: "important",
shouldParse: true,
},
{
name: "heading (##)",
input: "## Heading",
expectedTag: "",
shouldParse: false,
},
{
name: "space after hash",
input: "# heading",
expectedTag: "",
shouldParse: false,
},
{
name: "lone hash",
input: "#",
expectedTag: "",
shouldParse: false,
},
{
name: "hash with space",
input: "# ",
expectedTag: "",
shouldParse: false,
},
{
name: "special characters",
input: "#tag@special",
expectedTag: "tag",
shouldParse: true,
},
{
name: "mixed case",
input: "#WorkNotes",
expectedTag: "WorkNotes",
shouldParse: true,
},
{
name: "hierarchical tag with slash",
input: "#tag1/subtag",
expectedTag: "tag1/subtag",
shouldParse: true,
},
{
name: "hierarchical tag with multiple levels",
input: "#tag1/subtag/subtag2",
expectedTag: "tag1/subtag/subtag2",
shouldParse: true,
},
{
name: "hierarchical tag followed by space",
input: "#work/notes ",
expectedTag: "work/notes",
shouldParse: true,
},
{
name: "hierarchical tag followed by punctuation",
input: "#project/2024.",
expectedTag: "project/2024",
shouldParse: true,
},
{
name: "hierarchical tag with numbers and dashes",
input: "#work-log/2024/q1",
expectedTag: "work-log/2024/q1",
shouldParse: true,
},
{
name: "Chinese characters",
input: "#测试",
expectedTag: "测试",
shouldParse: true,
},
{
name: "Chinese tag followed by space",
input: "#测试 some text",
expectedTag: "测试",
shouldParse: true,
},
{
name: "Chinese tag followed by punctuation",
input: "#测试。",
expectedTag: "测试",
shouldParse: true,
},
{
name: "mixed Chinese and ASCII",
input: "#测试test123",
expectedTag: "测试test123",
shouldParse: true,
},
{
name: "Japanese characters",
input: "#テスト",
expectedTag: "テスト",
shouldParse: true,
},
{
name: "Korean characters",
input: "#테스트",
expectedTag: "테스트",
shouldParse: true,
},
{
name: "emoji",
input: "#test🚀",
expectedTag: "test🚀",
shouldParse: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := NewTagParser()
reader := text.NewReader([]byte(tt.input))
ctx := parser.NewContext()
node := p.Parse(nil, reader, ctx)
if tt.shouldParse {
require.NotNil(t, node, "Expected tag to be parsed")
require.IsType(t, &mast.TagNode{}, node)
tagNode, ok := node.(*mast.TagNode)
require.True(t, ok, "Expected node to be *mast.TagNode")
assert.Equal(t, tt.expectedTag, string(tagNode.Tag))
} else {
assert.Nil(t, node, "Expected tag NOT to be parsed")
}
})
}
}
func TestTagParser_Trigger(t *testing.T) {
p := NewTagParser()
triggers := p.Trigger()
assert.Equal(t, []byte{'#'}, triggers)
}
func TestTagParser_MultipleTags(t *testing.T) {
// Test that parser correctly handles multiple tags in sequence
input := "#tag1 #tag2"
p := NewTagParser()
reader := text.NewReader([]byte(input))
ctx := parser.NewContext()
// Parse first tag
node1 := p.Parse(nil, reader, ctx)
require.NotNil(t, node1)
tagNode1, ok := node1.(*mast.TagNode)
require.True(t, ok, "Expected node1 to be *mast.TagNode")
assert.Equal(t, "tag1", string(tagNode1.Tag))
// Advance past the space
reader.Advance(1)
// Parse second tag
node2 := p.Parse(nil, reader, ctx)
require.NotNil(t, node2)
tagNode2, ok := node2.(*mast.TagNode)
require.True(t, ok, "Expected node2 to be *mast.TagNode")
assert.Equal(t, "tag2", string(tagNode2.Tag))
}
func TestTagNode_Kind(t *testing.T) {
node := &mast.TagNode{
Tag: []byte("test"),
}
assert.Equal(t, mast.KindTag, node.Kind())
}
func TestTagNode_Dump(t *testing.T) {
node := &mast.TagNode{
Tag: []byte("test"),
}
// Should not panic
assert.NotPanics(t, func() {
node.Dump([]byte("#test"), 0)
})
}