forked from Team-Ortix/blgo
116 lines
3.7 KiB
Go
116 lines
3.7 KiB
Go
package parser
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
// ErrInvalidKeyValuePair is an error when parsing a FrontMatter entry
|
|
// without a : delimiter.
|
|
ErrInvalidKeyValuePair = errors.New("the line does not contain a valid entry for FrontMatter")
|
|
|
|
// ErrBlankKey is an error when parsing a FrontMatter entry where
|
|
// the key is nothing, when space is trimmed.
|
|
ErrBlankKey = errors.New("FrontMatter key is empty")
|
|
|
|
// ErrDuplicateKey is an error when parsing the content FrontMatter, where
|
|
// the same key appears more than once.
|
|
ErrDuplicateKey = errors.New("the provided key has already been defined")
|
|
|
|
// ErrEOF is an error when parsing the content FrontMatter, and it reaches
|
|
// the end of the file before the dashes for the end are reached.
|
|
ErrEOF = errors.New("unexpected EOF. expected closing dashes for FrontMatter")
|
|
)
|
|
|
|
// FrontMatter is the key-value structure that is used to contain metadata about
|
|
// the MarkDown file, containing information like author and description.
|
|
type FrontMatter map[string]string
|
|
|
|
// ParseKeyValueLine will take a string in the format of "The Key: The Value".
|
|
// The input is split on ":", and space is trimmed before returning. If there
|
|
// are no colons, ErrInvalidKeyValuePair is returned. If the key is empty,
|
|
// ErrBlankKey is returned.
|
|
//
|
|
// The input will only split on the first colon. More colons will be part
|
|
// of the value.
|
|
func ParseKeyValueLine(line string) (string, string, error) {
|
|
split := strings.SplitN(line, ":", 2)
|
|
|
|
if len(split) != 2 {
|
|
return "", "", ErrInvalidKeyValuePair
|
|
}
|
|
|
|
key := strings.TrimSpace(split[0])
|
|
if key == "" {
|
|
return "", "", ErrBlankKey
|
|
}
|
|
|
|
return key, strings.TrimSpace(split[1]), nil
|
|
}
|
|
|
|
// delimiterRegex denotes the regex for a line that matches on when the
|
|
// FrontMatter section is delimited from the rest of the content.
|
|
// It must be a minimum of 3 dashes (-), and no other content.
|
|
var delimiterRegex = regexp.MustCompile("^-{3,}$")
|
|
|
|
// ExtractFrontMatter will take an entire MarkDown file, and return a map that
|
|
// contains key-value pairs. The key-value pairs must end with an extra line
|
|
// with the content of "---". If this is not found, an ErrEOF is returned.
|
|
// Optionally, the FrontMatter can start with a ---. This is to have support
|
|
// with older template files, which follow this format.
|
|
//
|
|
// If the first line is not "---" and is not parsed as a valid FrontMatter
|
|
// entry, then the entire file is skipped and interpreted as having an empty
|
|
// FrontMatter.
|
|
// Duplicate keys will return a ErrDuplicateKey.
|
|
// Invalid FrontMatter entries will return a ErrInvalidKeyValuePair.
|
|
// Example Front Matter Format:
|
|
//
|
|
// ---
|
|
// Some Key: Some Value
|
|
// Another Key:Another Value
|
|
// A Key : A Value
|
|
// ---
|
|
// # Markdown Content
|
|
// ...
|
|
func ExtractFrontMatter(contents []string) (FrontMatter, []string, error) {
|
|
matter := FrontMatter{}
|
|
if len(contents) == 0 {
|
|
return matter, contents, nil
|
|
}
|
|
|
|
for i, line := range contents {
|
|
// Ignore first line if it matches the delimiter
|
|
if i == 0 && delimiterRegex.MatchString(line) {
|
|
continue
|
|
}
|
|
|
|
// Matches on the ending delimiter.
|
|
if delimiterRegex.MatchString(line) {
|
|
return matter, contents[i+1:], nil
|
|
}
|
|
|
|
// Now parsing FrontMatter. If the content is emptystring, skip the line
|
|
if strings.TrimSpace(line) == "" {
|
|
continue
|
|
}
|
|
|
|
key, value, err := ParseKeyValueLine(line)
|
|
if err != nil && i == 0 {
|
|
return matter, contents, nil
|
|
}
|
|
if err != nil {
|
|
return matter, contents[i+1:], fmt.Errorf("error parsing line %d: %w", i+1, err)
|
|
}
|
|
|
|
if _, ok := matter[key]; ok {
|
|
return matter, contents[i+1:], fmt.Errorf("error on parsing line %d: %w", i+1, ErrDuplicateKey)
|
|
}
|
|
matter[key] = value
|
|
}
|
|
return matter, contents, fmt.Errorf("error on parsing: %w", ErrEOF)
|
|
}
|