package parser import ( "errors" "fmt" "regexp" "strings" ) var ( // ErrInvalidKeyValuePair is an error when parsing a FrontMatter entry // without a : delimiter. ErrInvalidKeyValuePair = errors.New("the line does not contain a valid entry for FrontMatter") // ErrBlankKey is an error when parsing a FrontMatter entry where // the key is nothing, when space is trimmed. ErrBlankKey = errors.New("FrontMatter key is empty") // ErrDuplicateKey is an error when parsing the content FrontMatter, where // the same key appears more than once. ErrDuplicateKey = errors.New("the provided key has already been defined") // ErrEOF is an error when parsing the content FrontMatter, and it reaches // the end of the file before the dashes for the end are reached. ErrEOF = errors.New("unexpected EOF. expected closing dashes for FrontMatter") ) // FrontMatter is the key-value structure that is used to contain metadata about // the MarkDown file, containing information like author and description. type FrontMatter map[string]string // ParseKeyValueLine will take a string in the format of "The Key: The Value". // The input is split on ":", and space is trimmed before returning. If there // are no colons, ErrInvalidKeyValuePair is returned. If the key is empty, // ErrBlankKey is returned. // // The input will only split on the first colon. More colons will be part // of the value. func ParseKeyValueLine(line string) (string, string, error) { split := strings.SplitN(line, ":", 2) if len(split) != 2 { return "", "", ErrInvalidKeyValuePair } key := strings.TrimSpace(split[0]) if key == "" { return "", "", ErrBlankKey } return key, strings.TrimSpace(split[1]), nil } // delimiterRegex denotes the regex for a line that matches on when the // FrontMatter section is delimited from the rest of the content. // It must be a minimum of 3 dashes (-), and no other content. var delimiterRegex = regexp.MustCompile("^-{3,}$") // ExtractFrontMatter will take an entire MarkDown file, and return a map that // contains key-value pairs. The key-value pairs must end with an extra line // with the content of "---". If this is not found, an ErrEOF is returned. // Optionally, the FrontMatter can start with a ---. This is to have support // with older template files, which follow this format. // // If the first line is not "---" and is not parsed as a valid FrontMatter // entry, then the entire file is skipped and interpreted as having an empty // FrontMatter. // Duplicate keys will return a ErrDuplicateKey. // Invalid FrontMatter entries will return a ErrInvalidKeyValuePair. // Example Front Matter Format: // // --- // Some Key: Some Value // Another Key:Another Value // A Key : A Value // --- // # Markdown Content // ... func ExtractFrontMatter(contents []string) (FrontMatter, []string, error) { matter := FrontMatter{} if len(contents) == 0 { return matter, contents, nil } for i, line := range contents { // Ignore first line if it matches the delimiter if i == 0 && delimiterRegex.MatchString(line) { continue } // Matches on the ending delimiter. if delimiterRegex.MatchString(line) { return matter, contents[i+1:], nil } // Now parsing FrontMatter. If the content is emptystring, skip the line if strings.TrimSpace(line) == "" { continue } key, value, err := ParseKeyValueLine(line) if err != nil && i == 0 { return matter, contents, nil } if err != nil { return matter, contents[i+1:], fmt.Errorf("error parsing line %d: %w", i+1, err) } if _, ok := matter[key]; ok { return matter, contents[i+1:], fmt.Errorf("error on parsing line %d: %w", i+1, ErrDuplicateKey) } matter[key] = value } return matter, contents, fmt.Errorf("error on parsing: %w", ErrEOF) }