blgo/parser/frontmatter.go

116 lines
3.7 KiB
Go

package parser
import (
"errors"
"fmt"
"regexp"
"strings"
)
var (
// ErrInvalidKeyValuePair is an error when parsing a FrontMatter entry
// without a : delimiter.
ErrInvalidKeyValuePair = errors.New("the line does not contain a valid entry for FrontMatter")
// ErrBlankKey is an error when parsing a FrontMatter entry where
// the key is nothing, when space is trimmed.
ErrBlankKey = errors.New("FrontMatter key is empty")
// ErrDuplicateKey is an error when parsing the content FrontMatter, where
// the same key appears more than once.
ErrDuplicateKey = errors.New("the provided key has already been defined")
// ErrEOF is an error when parsing the content FrontMatter, and it reaches
// the end of the file before the dashes for the end are reached.
ErrEOF = errors.New("unexpected EOF. expected closing dashes for FrontMatter")
)
// FrontMatter is the key-value structure that is used to contain metadata about
// the MarkDown file, containing information like author and description.
type FrontMatter map[string]string
// ParseKeyValueLine will take a string in the format of "The Key: The Value".
// The input is split on ":", and space is trimmed before returning. If there
// are no colons, ErrInvalidKeyValuePair is returned. If the key is empty,
// ErrBlankKey is returned.
//
// The input will only split on the first colon. More colons will be part
// of the value.
func ParseKeyValueLine(line string) (string, string, error) {
split := strings.SplitN(line, ":", 2)
if len(split) != 2 {
return "", "", ErrInvalidKeyValuePair
}
key := strings.TrimSpace(split[0])
if key == "" {
return "", "", ErrBlankKey
}
return key, strings.TrimSpace(split[1]), nil
}
// delimiterRegex denotes the regex for a line that matches on when the
// FrontMatter section is delimited from the rest of the content.
// It must be a minimum of 3 dashes (-), and no other content.
var delimiterRegex = regexp.MustCompile("^-{3,}$")
// ExtractFrontMatter will take an entire MarkDown file, and return a map that
// contains key-value pairs. The key-value pairs must end with an extra line
// with the content of "---". If this is not found, an ErrEOF is returned.
// Optionally, the FrontMatter can start with a ---. This is to have support
// with older template files, which follow this format.
//
// If the first line is not "---" and is not parsed as a valid FrontMatter
// entry, then the entire file is skipped and interpreted as having an empty
// FrontMatter.
// Duplicate keys will return a ErrDuplicateKey.
// Invalid FrontMatter entries will return a ErrInvalidKeyValuePair.
// Example Front Matter Format:
//
// ---
// Some Key: Some Value
// Another Key:Another Value
// A Key : A Value
// ---
// # Markdown Content
// ...
func ExtractFrontMatter(contents []string) (FrontMatter, []string, error) {
matter := FrontMatter{}
if len(contents) == 0 {
return matter, contents, nil
}
for i, line := range contents {
// Ignore first line if it matches the delimiter
if i == 0 && delimiterRegex.MatchString(line) {
continue
}
// Matches on the ending delimiter.
if delimiterRegex.MatchString(line) {
return matter, contents[i+1:], nil
}
// Now parsing FrontMatter. If the content is emptystring, skip the line
if strings.TrimSpace(line) == "" {
continue
}
key, value, err := ParseKeyValueLine(line)
if err != nil && i == 0 {
return matter, contents, nil
}
if err != nil {
return matter, contents[i+1:], fmt.Errorf("error parsing line %d: %w", i+1, err)
}
if _, ok := matter[key]; ok {
return matter, contents[i+1:], fmt.Errorf("error on parsing line %d: %w", i+1, ErrDuplicateKey)
}
matter[key] = value
}
return matter, contents, fmt.Errorf("error on parsing: %w", ErrEOF)
}