|
- // Package parser contains stuff that are related to parsing a Markdown text.
- package parser
-
- import (
- "fmt"
- "strings"
- "sync"
-
- "github.com/yuin/goldmark/ast"
- "github.com/yuin/goldmark/text"
- "github.com/yuin/goldmark/util"
- )
-
- // A Reference interface represents a link reference in Markdown text.
- type Reference interface {
- // String implements Stringer.
- String() string
-
- // Label returns a label of the reference.
- Label() []byte
-
- // Destination returns a destination(URL) of the reference.
- Destination() []byte
-
- // Title returns a title of the reference.
- Title() []byte
- }
-
- type reference struct {
- label []byte
- destination []byte
- title []byte
- }
-
- // NewReference returns a new Reference.
- func NewReference(label, destination, title []byte) Reference {
- return &reference{label, destination, title}
- }
-
- func (r *reference) Label() []byte {
- return r.label
- }
-
- func (r *reference) Destination() []byte {
- return r.destination
- }
-
- func (r *reference) Title() []byte {
- return r.title
- }
-
- func (r *reference) String() string {
- return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
- }
-
- // An IDs interface is a collection of the element ids.
- type IDs interface {
- // Generate generates a new element id.
- Generate(value []byte, kind ast.NodeKind) []byte
-
- // Put puts a given element id to the used ids table.
- Put(value []byte)
- }
-
- type ids struct {
- values map[string]bool
- }
-
- func newIDs() IDs {
- return &ids{
- values: map[string]bool{},
- }
- }
-
- func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
- value = util.TrimLeftSpace(value)
- value = util.TrimRightSpace(value)
- result := []byte{}
- for i := 0; i < len(value); {
- v := value[i]
- l := util.UTF8Len(v)
- i += int(l)
- if l != 1 {
- continue
- }
- if util.IsAlphaNumeric(v) {
- if 'A' <= v && v <= 'Z' {
- v += 'a' - 'A'
- }
- result = append(result, v)
- } else if util.IsSpace(v) || v == '-' || v == '_' {
- result = append(result, '-')
- }
- }
- if len(result) == 0 {
- if kind == ast.KindHeading {
- result = []byte("heading")
- } else {
- result = []byte("id")
- }
- }
- if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
- s.values[util.BytesToReadOnlyString(result)] = true
- return result
- }
- for i := 1; ; i++ {
- newResult := fmt.Sprintf("%s-%d", result, i)
- if _, ok := s.values[newResult]; !ok {
- s.values[newResult] = true
- return []byte(newResult)
- }
-
- }
- }
-
- func (s *ids) Put(value []byte) {
- s.values[util.BytesToReadOnlyString(value)] = true
- }
-
- // ContextKey is a key that is used to set arbitrary values to the context.
- type ContextKey int
-
- // ContextKeyMax is a maximum value of the ContextKey.
- var ContextKeyMax ContextKey
-
- // NewContextKey return a new ContextKey value.
- func NewContextKey() ContextKey {
- ContextKeyMax++
- return ContextKeyMax
- }
-
- // A Context interface holds a information that are necessary to parse
- // Markdown text.
- type Context interface {
- // String implements Stringer.
- String() string
-
- // Get returns a value associated with the given key.
- Get(ContextKey) interface{}
-
- // Set sets the given value to the context.
- Set(ContextKey, interface{})
-
- // AddReference adds the given reference to this context.
- AddReference(Reference)
-
- // Reference returns (a reference, true) if a reference associated with
- // the given label exists, otherwise (nil, false).
- Reference(label string) (Reference, bool)
-
- // References returns a list of references.
- References() []Reference
-
- // IDs returns a collection of the element ids.
- IDs() IDs
-
- // BlockOffset returns a first non-space character position on current line.
- // This value is valid only for BlockParser.Open.
- // BlockOffset returns -1 if current line is blank.
- BlockOffset() int
-
- // BlockOffset sets a first non-space character position on current line.
- // This value is valid only for BlockParser.Open.
- SetBlockOffset(int)
-
- // BlockIndent returns an indent width on current line.
- // This value is valid only for BlockParser.Open.
- // BlockIndent returns -1 if current line is blank.
- BlockIndent() int
-
- // BlockIndent sets an indent width on current line.
- // This value is valid only for BlockParser.Open.
- SetBlockIndent(int)
-
- // FirstDelimiter returns a first delimiter of the current delimiter list.
- FirstDelimiter() *Delimiter
-
- // LastDelimiter returns a last delimiter of the current delimiter list.
- LastDelimiter() *Delimiter
-
- // PushDelimiter appends the given delimiter to the tail of the current
- // delimiter list.
- PushDelimiter(delimiter *Delimiter)
-
- // RemoveDelimiter removes the given delimiter from the current delimiter list.
- RemoveDelimiter(d *Delimiter)
-
- // ClearDelimiters clears the current delimiter list.
- ClearDelimiters(bottom ast.Node)
-
- // OpenedBlocks returns a list of nodes that are currently in parsing.
- OpenedBlocks() []Block
-
- // SetOpenedBlocks sets a list of nodes that are currently in parsing.
- SetOpenedBlocks([]Block)
-
- // LastOpenedBlock returns a last node that is currently in parsing.
- LastOpenedBlock() Block
-
- // IsInLinkLabel returns true if current position seems to be in link label.
- IsInLinkLabel() bool
- }
-
- // A ContextConfig struct is a data structure that holds configuration of the Context.
- type ContextConfig struct {
- IDs IDs
- }
-
- // An ContextOption is a functional option type for the Context.
- type ContextOption func(*ContextConfig)
-
- // WithIDs is a functional option for the Context.
- func WithIDs(ids IDs) ContextOption {
- return func(c *ContextConfig) {
- c.IDs = ids
- }
- }
-
- type parseContext struct {
- store []interface{}
- ids IDs
- refs map[string]Reference
- blockOffset int
- blockIndent int
- delimiters *Delimiter
- lastDelimiter *Delimiter
- openedBlocks []Block
- }
-
- // NewContext returns a new Context.
- func NewContext(options ...ContextOption) Context {
- cfg := &ContextConfig{
- IDs: newIDs(),
- }
- for _, option := range options {
- option(cfg)
- }
-
- return &parseContext{
- store: make([]interface{}, ContextKeyMax+1),
- refs: map[string]Reference{},
- ids: cfg.IDs,
- blockOffset: -1,
- blockIndent: -1,
- delimiters: nil,
- lastDelimiter: nil,
- openedBlocks: []Block{},
- }
- }
-
- func (p *parseContext) Get(key ContextKey) interface{} {
- return p.store[key]
- }
-
- func (p *parseContext) Set(key ContextKey, value interface{}) {
- p.store[key] = value
- }
-
- func (p *parseContext) IDs() IDs {
- return p.ids
- }
-
- func (p *parseContext) BlockOffset() int {
- return p.blockOffset
- }
-
- func (p *parseContext) SetBlockOffset(v int) {
- p.blockOffset = v
- }
-
- func (p *parseContext) BlockIndent() int {
- return p.blockIndent
- }
-
- func (p *parseContext) SetBlockIndent(v int) {
- p.blockIndent = v
- }
-
- func (p *parseContext) LastDelimiter() *Delimiter {
- return p.lastDelimiter
- }
-
- func (p *parseContext) FirstDelimiter() *Delimiter {
- return p.delimiters
- }
-
- func (p *parseContext) PushDelimiter(d *Delimiter) {
- if p.delimiters == nil {
- p.delimiters = d
- p.lastDelimiter = d
- } else {
- l := p.lastDelimiter
- p.lastDelimiter = d
- l.NextDelimiter = d
- d.PreviousDelimiter = l
- }
- }
-
- func (p *parseContext) RemoveDelimiter(d *Delimiter) {
- if d.PreviousDelimiter == nil {
- p.delimiters = d.NextDelimiter
- } else {
- d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
- if d.NextDelimiter != nil {
- d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
- }
- }
- if d.NextDelimiter == nil {
- p.lastDelimiter = d.PreviousDelimiter
- }
- if p.delimiters != nil {
- p.delimiters.PreviousDelimiter = nil
- }
- if p.lastDelimiter != nil {
- p.lastDelimiter.NextDelimiter = nil
- }
- d.NextDelimiter = nil
- d.PreviousDelimiter = nil
- if d.Length != 0 {
- ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
- } else {
- d.Parent().RemoveChild(d.Parent(), d)
- }
- }
-
- func (p *parseContext) ClearDelimiters(bottom ast.Node) {
- if p.lastDelimiter == nil {
- return
- }
- var c ast.Node
- for c = p.lastDelimiter; c != nil && c != bottom; {
- prev := c.PreviousSibling()
- if d, ok := c.(*Delimiter); ok {
- p.RemoveDelimiter(d)
- }
- c = prev
- }
- }
-
- func (p *parseContext) AddReference(ref Reference) {
- key := util.ToLinkReference(ref.Label())
- if _, ok := p.refs[key]; !ok {
- p.refs[key] = ref
- }
- }
-
- func (p *parseContext) Reference(label string) (Reference, bool) {
- v, ok := p.refs[label]
- return v, ok
- }
-
- func (p *parseContext) References() []Reference {
- ret := make([]Reference, 0, len(p.refs))
- for _, v := range p.refs {
- ret = append(ret, v)
- }
- return ret
- }
-
- func (p *parseContext) String() string {
- refs := []string{}
- for _, r := range p.refs {
- refs = append(refs, r.String())
- }
-
- return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
- }
-
- func (p *parseContext) OpenedBlocks() []Block {
- return p.openedBlocks
- }
-
- func (p *parseContext) SetOpenedBlocks(v []Block) {
- p.openedBlocks = v
- }
-
- func (p *parseContext) LastOpenedBlock() Block {
- if l := len(p.openedBlocks); l != 0 {
- return p.openedBlocks[l-1]
- }
- return Block{}
- }
-
- func (p *parseContext) IsInLinkLabel() bool {
- tlist := p.Get(linkLabelStateKey)
- return tlist != nil
- }
-
- // State represents parser's state.
- // State is designed to use as a bit flag.
- type State int
-
- const (
- none State = 1 << iota
-
- // Continue indicates parser can continue parsing.
- Continue
-
- // Close indicates parser cannot parse anymore.
- Close
-
- // HasChildren indicates parser may have child blocks.
- HasChildren
-
- // NoChildren indicates parser does not have child blocks.
- NoChildren
-
- // RequireParagraph indicates parser requires that the last node
- // must be a paragraph and is not converted to other nodes by
- // ParagraphTransformers.
- RequireParagraph
- )
-
- // A Config struct is a data structure that holds configuration of the Parser.
- type Config struct {
- Options map[OptionName]interface{}
- BlockParsers util.PrioritizedSlice /*<BlockParser>*/
- InlineParsers util.PrioritizedSlice /*<InlineParser>*/
- ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
- ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
- }
-
- // NewConfig returns a new Config.
- func NewConfig() *Config {
- return &Config{
- Options: map[OptionName]interface{}{},
- BlockParsers: util.PrioritizedSlice{},
- InlineParsers: util.PrioritizedSlice{},
- ParagraphTransformers: util.PrioritizedSlice{},
- ASTTransformers: util.PrioritizedSlice{},
- }
- }
-
- // An Option interface is a functional option type for the Parser.
- type Option interface {
- SetParserOption(*Config)
- }
-
- // OptionName is a name of parser options.
- type OptionName string
-
- // Attribute is an option name that spacify attributes of elements.
- const optAttribute OptionName = "Attribute"
-
- type withAttribute struct {
- }
-
- func (o *withAttribute) SetParserOption(c *Config) {
- c.Options[optAttribute] = true
- }
-
- // WithAttribute is a functional option that enables custom attributes.
- func WithAttribute() Option {
- return &withAttribute{}
- }
-
- // A Parser interface parses Markdown text into AST nodes.
- type Parser interface {
- // Parse parses the given Markdown text into AST nodes.
- Parse(reader text.Reader, opts ...ParseOption) ast.Node
-
- // AddOption adds the given option to this parser.
- AddOptions(...Option)
- }
-
- // A SetOptioner interface sets the given option to the object.
- type SetOptioner interface {
- // SetOption sets the given option to the object.
- // Unacceptable options may be passed.
- // Thus implementations must ignore unacceptable options.
- SetOption(name OptionName, value interface{})
- }
-
- // A BlockParser interface parses a block level element like Paragraph, List,
- // Blockquote etc.
- type BlockParser interface {
- // Trigger returns a list of characters that triggers Parse method of
- // this parser.
- // If Trigger returns a nil, Open will be called with any lines.
- Trigger() []byte
-
- // Open parses the current line and returns a result of parsing.
- //
- // Open must not parse beyond the current line.
- // If Open has been able to parse the current line, Open must advance a reader
- // position by consumed byte length.
- //
- // If Open has not been able to parse the current line, Open should returns
- // (nil, NoChildren). If Open has been able to parse the current line, Open
- // should returns a new Block node and returns HasChildren or NoChildren.
- Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
-
- // Continue parses the current line and returns a result of parsing.
- //
- // Continue must not parse beyond the current line.
- // If Continue has been able to parse the current line, Continue must advance
- // a reader position by consumed byte length.
- //
- // If Continue has not been able to parse the current line, Continue should
- // returns Close. If Continue has been able to parse the current line,
- // Continue should returns (Continue | NoChildren) or
- // (Continue | HasChildren)
- Continue(node ast.Node, reader text.Reader, pc Context) State
-
- // Close will be called when the parser returns Close.
- Close(node ast.Node, reader text.Reader, pc Context)
-
- // CanInterruptParagraph returns true if the parser can interrupt paragraphs,
- // otherwise false.
- CanInterruptParagraph() bool
-
- // CanAcceptIndentedLine returns true if the parser can open new node when
- // the given line is being indented more than 3 spaces.
- CanAcceptIndentedLine() bool
- }
-
- // An InlineParser interface parses an inline level element like CodeSpan, Link etc.
- type InlineParser interface {
- // Trigger returns a list of characters that triggers Parse method of
- // this parser.
- // Trigger characters must be a punctuation or a halfspace.
- // Halfspaces triggers this parser when character is any spaces characters or
- // a head of line
- Trigger() []byte
-
- // Parse parse the given block into an inline node.
- //
- // Parse can parse beyond the current line.
- // If Parse has been able to parse the current line, it must advance a reader
- // position by consumed byte length.
- Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
- }
-
- // A CloseBlocker interface is a callback function that will be
- // called when block is closed in the inline parsing.
- type CloseBlocker interface {
- // CloseBlock will be called when a block is closed.
- CloseBlock(parent ast.Node, block text.Reader, pc Context)
- }
-
- // A ParagraphTransformer transforms parsed Paragraph nodes.
- // For example, link references are searched in parsed Paragraphs.
- type ParagraphTransformer interface {
- // Transform transforms the given paragraph.
- Transform(node *ast.Paragraph, reader text.Reader, pc Context)
- }
-
- // ASTTransformer transforms entire Markdown document AST tree.
- type ASTTransformer interface {
- // Transform transforms the given AST tree.
- Transform(node *ast.Document, reader text.Reader, pc Context)
- }
-
- // DefaultBlockParsers returns a new list of default BlockParsers.
- // Priorities of default BlockParsers are:
- //
- // SetextHeadingParser, 100
- // ThematicBreakParser, 200
- // ListParser, 300
- // ListItemParser, 400
- // CodeBlockParser, 500
- // ATXHeadingParser, 600
- // FencedCodeBlockParser, 700
- // BlockquoteParser, 800
- // HTMLBlockParser, 900
- // ParagraphParser, 1000
- func DefaultBlockParsers() []util.PrioritizedValue {
- return []util.PrioritizedValue{
- util.Prioritized(NewSetextHeadingParser(), 100),
- util.Prioritized(NewThematicBreakParser(), 200),
- util.Prioritized(NewListParser(), 300),
- util.Prioritized(NewListItemParser(), 400),
- util.Prioritized(NewCodeBlockParser(), 500),
- util.Prioritized(NewATXHeadingParser(), 600),
- util.Prioritized(NewFencedCodeBlockParser(), 700),
- util.Prioritized(NewBlockquoteParser(), 800),
- util.Prioritized(NewHTMLBlockParser(), 900),
- util.Prioritized(NewParagraphParser(), 1000),
- }
- }
-
- // DefaultInlineParsers returns a new list of default InlineParsers.
- // Priorities of default InlineParsers are:
- //
- // CodeSpanParser, 100
- // LinkParser, 200
- // AutoLinkParser, 300
- // RawHTMLParser, 400
- // EmphasisParser, 500
- func DefaultInlineParsers() []util.PrioritizedValue {
- return []util.PrioritizedValue{
- util.Prioritized(NewCodeSpanParser(), 100),
- util.Prioritized(NewLinkParser(), 200),
- util.Prioritized(NewAutoLinkParser(), 300),
- util.Prioritized(NewRawHTMLParser(), 400),
- util.Prioritized(NewEmphasisParser(), 500),
- }
- }
-
- // DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
- // Priorities of default ParagraphTransformers are:
- //
- // LinkReferenceParagraphTransformer, 100
- func DefaultParagraphTransformers() []util.PrioritizedValue {
- return []util.PrioritizedValue{
- util.Prioritized(LinkReferenceParagraphTransformer, 100),
- }
- }
-
- // A Block struct holds a node and correspond parser pair.
- type Block struct {
- // Node is a BlockNode.
- Node ast.Node
- // Parser is a BlockParser.
- Parser BlockParser
- }
-
- type parser struct {
- options map[OptionName]interface{}
- blockParsers [256][]BlockParser
- freeBlockParsers []BlockParser
- inlineParsers [256][]InlineParser
- closeBlockers []CloseBlocker
- paragraphTransformers []ParagraphTransformer
- astTransformers []ASTTransformer
- config *Config
- initSync sync.Once
- }
-
- type withBlockParsers struct {
- value []util.PrioritizedValue
- }
-
- func (o *withBlockParsers) SetParserOption(c *Config) {
- c.BlockParsers = append(c.BlockParsers, o.value...)
- }
-
- // WithBlockParsers is a functional option that allow you to add
- // BlockParsers to the parser.
- func WithBlockParsers(bs ...util.PrioritizedValue) Option {
- return &withBlockParsers{bs}
- }
-
- type withInlineParsers struct {
- value []util.PrioritizedValue
- }
-
- func (o *withInlineParsers) SetParserOption(c *Config) {
- c.InlineParsers = append(c.InlineParsers, o.value...)
- }
-
- // WithInlineParsers is a functional option that allow you to add
- // InlineParsers to the parser.
- func WithInlineParsers(bs ...util.PrioritizedValue) Option {
- return &withInlineParsers{bs}
- }
-
- type withParagraphTransformers struct {
- value []util.PrioritizedValue
- }
-
- func (o *withParagraphTransformers) SetParserOption(c *Config) {
- c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
- }
-
- // WithParagraphTransformers is a functional option that allow you to add
- // ParagraphTransformers to the parser.
- func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
- return &withParagraphTransformers{ps}
- }
-
- type withASTTransformers struct {
- value []util.PrioritizedValue
- }
-
- func (o *withASTTransformers) SetParserOption(c *Config) {
- c.ASTTransformers = append(c.ASTTransformers, o.value...)
- }
-
- // WithASTTransformers is a functional option that allow you to add
- // ASTTransformers to the parser.
- func WithASTTransformers(ps ...util.PrioritizedValue) Option {
- return &withASTTransformers{ps}
- }
-
- type withOption struct {
- name OptionName
- value interface{}
- }
-
- func (o *withOption) SetParserOption(c *Config) {
- c.Options[o.name] = o.value
- }
-
- // WithOption is a functional option that allow you to set
- // an arbitrary option to the parser.
- func WithOption(name OptionName, value interface{}) Option {
- return &withOption{name, value}
- }
-
- // NewParser returns a new Parser with given options.
- func NewParser(options ...Option) Parser {
- config := NewConfig()
- for _, opt := range options {
- opt.SetParserOption(config)
- }
-
- p := &parser{
- options: map[OptionName]interface{}{},
- config: config,
- }
-
- return p
- }
-
- func (p *parser) AddOptions(opts ...Option) {
- for _, opt := range opts {
- opt.SetParserOption(p.config)
- }
- }
-
- func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
- bp, ok := v.Value.(BlockParser)
- if !ok {
- panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
- }
- tcs := bp.Trigger()
- so, ok := v.Value.(SetOptioner)
- if ok {
- for oname, ovalue := range options {
- so.SetOption(oname, ovalue)
- }
- }
- if tcs == nil {
- p.freeBlockParsers = append(p.freeBlockParsers, bp)
- } else {
- for _, tc := range tcs {
- if p.blockParsers[tc] == nil {
- p.blockParsers[tc] = []BlockParser{}
- }
- p.blockParsers[tc] = append(p.blockParsers[tc], bp)
- }
- }
- }
-
- func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
- ip, ok := v.Value.(InlineParser)
- if !ok {
- panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
- }
- tcs := ip.Trigger()
- so, ok := v.Value.(SetOptioner)
- if ok {
- for oname, ovalue := range options {
- so.SetOption(oname, ovalue)
- }
- }
- if cb, ok := ip.(CloseBlocker); ok {
- p.closeBlockers = append(p.closeBlockers, cb)
- }
- for _, tc := range tcs {
- if p.inlineParsers[tc] == nil {
- p.inlineParsers[tc] = []InlineParser{}
- }
- p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
- }
- }
-
- func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
- pt, ok := v.Value.(ParagraphTransformer)
- if !ok {
- panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
- }
- so, ok := v.Value.(SetOptioner)
- if ok {
- for oname, ovalue := range options {
- so.SetOption(oname, ovalue)
- }
- }
- p.paragraphTransformers = append(p.paragraphTransformers, pt)
- }
-
- func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
- at, ok := v.Value.(ASTTransformer)
- if !ok {
- panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
- }
- so, ok := v.Value.(SetOptioner)
- if ok {
- for oname, ovalue := range options {
- so.SetOption(oname, ovalue)
- }
- }
- p.astTransformers = append(p.astTransformers, at)
- }
-
- // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
- type ParseConfig struct {
- Context Context
- }
-
- // A ParseOption is a functional option type for the Parser.Parse.
- type ParseOption func(c *ParseConfig)
-
- // WithContext is a functional option that allow you to override
- // a default context.
- func WithContext(context Context) ParseOption {
- return func(c *ParseConfig) {
- c.Context = context
- }
- }
-
- func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
- p.initSync.Do(func() {
- p.config.BlockParsers.Sort()
- for _, v := range p.config.BlockParsers {
- p.addBlockParser(v, p.config.Options)
- }
- for i := range p.blockParsers {
- if p.blockParsers[i] != nil {
- p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
- }
- }
-
- p.config.InlineParsers.Sort()
- for _, v := range p.config.InlineParsers {
- p.addInlineParser(v, p.config.Options)
- }
- p.config.ParagraphTransformers.Sort()
- for _, v := range p.config.ParagraphTransformers {
- p.addParagraphTransformer(v, p.config.Options)
- }
- p.config.ASTTransformers.Sort()
- for _, v := range p.config.ASTTransformers {
- p.addASTTransformer(v, p.config.Options)
- }
- p.config = nil
- })
- c := &ParseConfig{}
- for _, opt := range opts {
- opt(c)
- }
- if c.Context == nil {
- c.Context = NewContext()
- }
- pc := c.Context
- root := ast.NewDocument()
- p.parseBlocks(root, reader, pc)
-
- blockReader := text.NewBlockReader(reader.Source(), nil)
- p.walkBlock(root, func(node ast.Node) {
- p.parseBlock(blockReader, node, pc)
- })
- for _, at := range p.astTransformers {
- at.Transform(root, reader, pc)
- }
- // root.Dump(reader.Source(), 0)
- return root
- }
-
- func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
- for _, pt := range p.paragraphTransformers {
- pt.Transform(node, reader, pc)
- if node.Parent() == nil {
- return true
- }
- }
- return false
- }
-
- func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
- blocks := pc.OpenedBlocks()
- for i := from; i >= to; i-- {
- node := blocks[i].Node
- blocks[i].Parser.Close(blocks[i].Node, reader, pc)
- paragraph, ok := node.(*ast.Paragraph)
- if ok && node.Parent() != nil {
- p.transformParagraph(paragraph, reader, pc)
- }
- }
- if from == len(blocks)-1 {
- blocks = blocks[0:to]
- } else {
- blocks = append(blocks[0:to], blocks[from+1:]...)
- }
- pc.SetOpenedBlocks(blocks)
- }
-
- type blockOpenResult int
-
- const (
- paragraphContinuation blockOpenResult = iota + 1
- newBlocksOpened
- noBlocksOpened
- )
-
- func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
- result := blockOpenResult(noBlocksOpened)
- continuable := false
- lastBlock := pc.LastOpenedBlock()
- if lastBlock.Node != nil {
- continuable = ast.IsParagraph(lastBlock.Node)
- }
- retry:
- var bps []BlockParser
- line, _ := reader.PeekLine()
- w, pos := util.IndentWidth(line, reader.LineOffset())
- if w >= len(line) {
- pc.SetBlockOffset(-1)
- pc.SetBlockIndent(-1)
- } else {
- pc.SetBlockOffset(pos)
- pc.SetBlockIndent(w)
- }
- if line == nil || line[0] == '\n' {
- goto continuable
- }
- bps = p.freeBlockParsers
- if pos < len(line) {
- bps = p.blockParsers[line[pos]]
- if bps == nil {
- bps = p.freeBlockParsers
- }
- }
- if bps == nil {
- goto continuable
- }
-
- for _, bp := range bps {
- if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
- continue
- }
- if w > 3 && !bp.CanAcceptIndentedLine() {
- continue
- }
- lastBlock = pc.LastOpenedBlock()
- last := lastBlock.Node
- node, state := bp.Open(parent, reader, pc)
- if node != nil {
- // Parser requires last node to be a paragraph.
- // With table extension:
- //
- // 0
- // -:
- // -
- //
- // '-' on 3rd line seems a Setext heading because 1st and 2nd lines
- // are being paragraph when the Settext heading parser tries to parse the 3rd
- // line.
- // But 1st line and 2nd line are a table. Thus this paragraph will be transformed
- // by a paragraph transformer. So this text should be converted to a table and
- // an empty list.
- if state&RequireParagraph != 0 {
- if last == parent.LastChild() {
- // Opened paragraph may be transformed by ParagraphTransformers in
- // closeBlocks().
- lastBlock.Parser.Close(last, reader, pc)
- blocks := pc.OpenedBlocks()
- pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
- if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
- // Paragraph has been transformed.
- // So this parser is considered as failing.
- continuable = false
- goto retry
- }
- }
- }
- node.SetBlankPreviousLines(blankLine)
- if last != nil && last.Parent() == nil {
- lastPos := len(pc.OpenedBlocks()) - 1
- p.closeBlocks(lastPos, lastPos, reader, pc)
- }
- parent.AppendChild(parent, node)
- result = newBlocksOpened
- be := Block{node, bp}
- pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
- if state&HasChildren != 0 {
- parent = node
- goto retry // try child block
- }
- break // no children, can not open more blocks on this line
- }
- }
-
- continuable:
- if result == noBlocksOpened && continuable {
- state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
- if state&Continue != 0 {
- result = paragraphContinuation
- }
- }
- return result
- }
-
- type lineStat struct {
- lineNum int
- level int
- isBlank bool
- }
-
- func isBlankLine(lineNum, level int, stats []lineStat) bool {
- ret := true
- for i := len(stats) - 1 - level; i >= 0; i-- {
- ret = false
- s := stats[i]
- if s.lineNum == lineNum {
- if s.level < level && s.isBlank {
- return true
- } else if s.level == level {
- return s.isBlank
- }
- }
- if s.lineNum < lineNum {
- return ret
- }
- }
- return ret
- }
-
- func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
- pc.SetOpenedBlocks([]Block{})
- blankLines := make([]lineStat, 0, 128)
- isBlank := false
- for { // process blocks separated by blank lines
- _, lines, ok := reader.SkipBlankLines()
- if !ok {
- return
- }
- lineNum, _ := reader.Position()
- if lines != 0 {
- blankLines = blankLines[0:0]
- l := len(pc.OpenedBlocks())
- for i := 0; i < l; i++ {
- blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
- }
- }
- isBlank = isBlankLine(lineNum-1, 0, blankLines)
- // first, we try to open blocks
- if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
- return
- }
- reader.AdvanceLine()
- for { // process opened blocks line by line
- openedBlocks := pc.OpenedBlocks()
- l := len(openedBlocks)
- if l == 0 {
- break
- }
- lastIndex := l - 1
- for i := 0; i < l; i++ {
- be := openedBlocks[i]
- line, _ := reader.PeekLine()
- if line == nil {
- p.closeBlocks(lastIndex, 0, reader, pc)
- reader.AdvanceLine()
- return
- }
- lineNum, _ := reader.Position()
- blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
- // If node is a paragraph, p.openBlocks determines whether it is continuable.
- // So we do not process paragraphs here.
- if !ast.IsParagraph(be.Node) {
- state := be.Parser.Continue(be.Node, reader, pc)
- if state&Continue != 0 {
- // When current node is a container block and has no children,
- // we try to open new child nodes
- if state&HasChildren != 0 && i == lastIndex {
- isBlank = isBlankLine(lineNum-1, i, blankLines)
- p.openBlocks(be.Node, isBlank, reader, pc)
- break
- }
- continue
- }
- }
- // current node may be closed or lazy continuation
- isBlank = isBlankLine(lineNum-1, i, blankLines)
- thisParent := parent
- if i != 0 {
- thisParent = openedBlocks[i-1].Node
- }
- lastNode := openedBlocks[lastIndex].Node
- result := p.openBlocks(thisParent, isBlank, reader, pc)
- if result != paragraphContinuation {
- // lastNode is a paragraph and was transformed by the paragraph
- // transformers.
- if openedBlocks[lastIndex].Node != lastNode {
- lastIndex--
- }
- p.closeBlocks(lastIndex, i, reader, pc)
- }
- break
- }
-
- reader.AdvanceLine()
- }
- }
- }
-
- func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
- for c := block.FirstChild(); c != nil; c = c.NextSibling() {
- p.walkBlock(c, cb)
- }
- cb(block)
- }
-
- func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
- if parent.IsRaw() {
- return
- }
- escaped := false
- source := block.Source()
- block.Reset(parent.Lines())
- for {
- retry:
- line, _ := block.PeekLine()
- if line == nil {
- break
- }
- lineLength := len(line)
- hardlineBreak := false
- softLinebreak := line[lineLength-1] == '\n'
- if lineLength >= 2 && line[lineLength-2] == '\\' && softLinebreak { // ends with \\n
- lineLength -= 2
- hardlineBreak = true
-
- } else if lineLength >= 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' && softLinebreak { // ends with \\r\n
- lineLength -= 3
- hardlineBreak = true
- } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && softLinebreak { // ends with [space][space]\n
- lineLength -= 3
- hardlineBreak = true
- } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && line[lineLength-2] == '\r' && softLinebreak { // ends with [space][space]\r\n
- lineLength -= 4
- hardlineBreak = true
- }
-
- l, startPosition := block.Position()
- n := 0
- for i := 0; i < lineLength; i++ {
- c := line[i]
- if c == '\n' {
- break
- }
- isSpace := util.IsSpace(c)
- isPunct := util.IsPunct(c)
- if (isPunct && !escaped) || isSpace || i == 0 {
- parserChar := c
- if isSpace || (i == 0 && !isPunct) {
- parserChar = ' '
- }
- ips := p.inlineParsers[parserChar]
- if ips != nil {
- block.Advance(n)
- n = 0
- savedLine, savedPosition := block.Position()
- if i != 0 {
- _, currentPosition := block.Position()
- ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
- _, startPosition = block.Position()
- }
- var inlineNode ast.Node
- for _, ip := range ips {
- inlineNode = ip.Parse(parent, block, pc)
- if inlineNode != nil {
- break
- }
- block.SetPosition(savedLine, savedPosition)
- }
- if inlineNode != nil {
- parent.AppendChild(parent, inlineNode)
- goto retry
- }
- }
- }
- if escaped {
- escaped = false
- n++
- continue
- }
-
- if c == '\\' {
- escaped = true
- n++
- continue
- }
-
- escaped = false
- n++
- }
- if n != 0 {
- block.Advance(n)
- }
- currentL, currentPosition := block.Position()
- if l != currentL {
- continue
- }
- diff := startPosition.Between(currentPosition)
- stop := diff.Stop
- rest := diff.WithStop(stop)
- text := ast.NewTextSegment(rest.TrimRightSpace(source))
- text.SetSoftLineBreak(softLinebreak)
- text.SetHardLineBreak(hardlineBreak)
- parent.AppendChild(parent, text)
- block.AdvanceLine()
- }
-
- ProcessDelimiters(nil, pc)
- for _, ip := range p.closeBlockers {
- ip.CloseBlock(parent, block, pc)
- }
- }
|