diff --git a/parser/frontmatter.go b/parser/frontmatter.go new file mode 100644 index 0000000..881fb94 --- /dev/null +++ b/parser/frontmatter.go @@ -0,0 +1,108 @@ +package parser + +import ( + "errors" + "fmt" + "regexp" + "strings" +) + +var ( + // ErrInvalidKeyValuePair is an error when parsing a FrontMatter entry + // without a : delimiter. + ErrInvalidKeyValuePair = errors.New("the line does not contain a valid entry for FrontMatter") + + // ErrBlankKey is an error when parsing a FrontMatter entry where + // the key is nothing, when space is trimmed. + ErrBlankKey = errors.New("FrontMatter key is empty") + + // ErrDuplicateKey is an error when parsing the content FrontMatter, where + // the same key appears more than once. + ErrDuplicateKey = errors.New("the provided key has already been defined") + + // ErrEOF is an error when parsing the content FrontMatter, and it reaches + // the end of the file before the dashes for the end are reached. + ErrEOF = errors.New("unexpected EOF. expected closing dashes for FrontMatter") +) + +// FrontMatter is the key-value structure that is used to contain metadata about +// the MarkDown file, containing information like author and description. +type FrontMatter map[string]string + +// ParseKeyValueLine will take a string in the format of "The Key: The Value". +// The input is split on ":", and space is trimmed before returning. If there +// are no colons, ErrInvalidKeyValuePair is returned. If the key is empty, +// ErrBlankKey is returned. +// +// The input will only split on the first colon. More colons will be part +// of the value. +func ParseKeyValueLine(line string) (string, string, error) { + split := strings.SplitN(line, ":", 2) + + if len(split) != 2 { + return "", "", ErrInvalidKeyValuePair + } + + key := strings.TrimSpace(split[0]) + if key == "" { + return "", "", ErrBlankKey + } + + return key, strings.TrimSpace(split[1]), nil +} + +// delimiterRegex denotes the regex for a line that matches on when the +// FrontMatter section is delimited from the rest of the content. +// It must be a minimum of 3 dashes (-), and no other content. +var delimiterRegex = regexp.MustCompile("$-{3,}^") + +// ExtractFrontMatter will take an entire MarkDown file, and return a map that +// contains key-value pairs. The key-value pairs must end with an extra line +// with the content of "---". If this is not found, an ErrEOF is returned. +// Optionally, the FrontMatter can start with a ---. This is to have support +// with older template files, which follow this format. +// +// If the first line is not "---" and is not parsed as a valid FrontMatter +// entry, then the entire file is skipped and interpreted as having an empty +// FrontMatter. +// Duplicate keys will return a ErrDuplicateKey. +// Invalid FrontMatter entries will return a ErrInvalidKeyValuePair. +// Example Front Matter Format: +// +// --- +// Some Key: Some Value +// Another Key:Another Value +// A Key : A Value +// --- +// # Markdown Content +// ... +func ExtractFrontMatter(contents []string) (FrontMatter, error) { + matter := FrontMatter{} + if len(contents) == 0 { + return matter, nil + } + + for i, line := range contents { + if i == 0 && delimiterRegex.MatchString(line) { + continue + } + + if delimiterRegex.MatchString(line) { + return matter, nil + } + + key, value, err := ParseKeyValueLine(line) + if err != nil && i == 0 { + return matter, nil + } + if err != nil { + return matter, fmt.Errorf("error parsing line %d: %w", i+1, err) + } + + if _, ok := matter[key]; ok { + return matter, fmt.Errorf("error on parsing line %d: %w", i+1, ErrDuplicateKey) + } + matter[key] = value + } + return matter, fmt.Errorf("error on parsing: %w", ErrEOF) +} diff --git a/parser/frontmatter_test.go b/parser/frontmatter_test.go new file mode 100644 index 0000000..be9c73e --- /dev/null +++ b/parser/frontmatter_test.go @@ -0,0 +1,149 @@ +package parser_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + + "gitea.teamortix.com/Team-Ortix/blgo/parser" +) + +type keyValueResult struct { + Key string + Value string + Error error +} + +func TestParseKeyValueLineWithValidContent(t *testing.T) { + asrt := assert.New(t) + + k, v, e := parser.ParseKeyValueLine("A: B") + asrt.EqualValues( + keyValueResult{"A", "B", nil}, + keyValueResult{k, v, e}, + "parsing valid key-value line gave invalid result", + ) + + k, v, e = parser.ParseKeyValueLine(" A : B : C") + asrt.EqualValues( + keyValueResult{"A", "B : C", nil}, + keyValueResult{k, v, e}, + "parsing valid key-value line gave invalid result", + ) + + k, v, e = parser.ParseKeyValueLine(" A B: B") + asrt.EqualValues( + keyValueResult{"A B", "B", nil}, + keyValueResult{k, v, e}, + "parsing valid key-value line gave invalid result", + ) + + k, v, e = parser.ParseKeyValueLine("A:") + asrt.EqualValues( + keyValueResult{"A", "", nil}, + keyValueResult{k, v, e}, + "parsing valid key-value line gave invalid result", + ) +} + +func TestParseKeyValueWithInvalidEntry(t *testing.T) { + asrt := assert.New(t) + + k, v, e := parser.ParseKeyValueLine("") + asrt.EqualValues( + keyValueResult{"", "", parser.ErrInvalidKeyValuePair}, + keyValueResult{k, v, e}, + "parsing invalid key-value with no content gave unexpected result", + ) + + k, v, e = parser.ParseKeyValueLine("A B") + asrt.EqualValues( + keyValueResult{"", "", parser.ErrInvalidKeyValuePair}, + keyValueResult{k, v, e}, + "parsing invalid key-value with no delimiter gave unexpected result", + ) + + k, v, e = parser.ParseKeyValueLine(": B") + asrt.EqualValues( + keyValueResult{"", "", parser.ErrBlankKey}, + keyValueResult{k, v, e}, + "parsing invalid key-value with no key gave unexpected result", + ) +} + +type extractResult struct { + FrontMatter parser.FrontMatter + Error error +} + +func TestExtractFrontMatterWithValidContent(t *testing.T) { + asrt := assert.New(t) + + fm, e := parser.ExtractFrontMatter([]string{}) + asrt.EqualValues( + extractResult{map[string]string{}, nil}, + extractResult{fm, e}, + "parsing empty input yields unexpected result", + ) + + fm, e = parser.ExtractFrontMatter([]string{"Key: Value", "---"}) + asrt.EqualValues( + extractResult{map[string]string{"Key": "Value"}, nil}, + extractResult{fm, e}, + "parsing valid FrontMatter yields invalid result", + ) + + fm, e = parser.ExtractFrontMatter([]string{"Key: Value", "---", "# Content", "Other content"}) + asrt.EqualValues( + extractResult{map[string]string{"Key": "Value"}, nil}, + extractResult{fm, e}, + "parsing valid FrontMatter yields invalid result", + ) + + fm, e = parser.ExtractFrontMatter([]string{"---", "Key: Value", "---"}) + asrt.EqualValues( + extractResult{map[string]string{"Key": "Value"}, nil}, + extractResult{fm, e}, + "parsing valid FrontMatter yields invalid result", + ) + + fm, e = parser.ExtractFrontMatter([]string{"Key: Value", "Another Key: Another Value", "---"}) + asrt.EqualValues( + extractResult{map[string]string{"Key": "Value", "Another Key": "Another Value"}, nil}, + extractResult{fm, e}, + "parsing valid FrontMatter yields invalid result", + ) +} + +func TestExtractFrontMatterWithBadKeys(t *testing.T) { + asrt := assert.New(t) + + fm, e := parser.ExtractFrontMatter([]string{"---", "Key Value", "---"}) + asrt.EqualValues( + extractResult{map[string]string{}, fmt.Errorf("error parsing line 2: %w", parser.ErrInvalidKeyValuePair)}, + extractResult{fm, e}, + "parsing invalid FrontMatter with no delimiter yields invalid result", + ) + + fm, e = parser.ExtractFrontMatter([]string{"Key: Value", ": Another Value", "---"}) + asrt.EqualValues( + extractResult{map[string]string{"Key": "Value"}, fmt.Errorf("error parsing line 2: %w", parser.ErrBlankKey)}, + extractResult{fm, e}, + "parsing invalid FrontMatter with blank key yields invalid result", + ) + + fm, e = parser.ExtractFrontMatter([]string{"Key: Value", "Key: Dupe Value"}) + asrt.EqualValues( + extractResult{map[string]string{"Key": "Value"}, fmt.Errorf("error on parsing line 2: %w", parser.ErrDuplicateKey)}, + extractResult{fm, e}, + "parsing invalid FrontMatter with no final dashes yields invalid result", + ) + + fm, e = parser.ExtractFrontMatter([]string{"Key: Value", "Another Key: Another Value"}) + asrt.EqualValues( + extractResult{map[string]string{"Key": "Value", "Another Key": "Another Value"}, fmt.Errorf("error on parsing: %w", parser.ErrEOF)}, + extractResult{fm, e}, + "parsing invalid FrontMatter with no final dashes yields invalid result", + ) +}