You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mdstripper.go 3.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package mdstripper
  5. import (
  6. "bytes"
  7. "io"
  8. "github.com/russross/blackfriday/v2"
  9. )
  10. // MarkdownStripper extends blackfriday.Renderer
  11. type MarkdownStripper struct {
  12. links []string
  13. coallesce bool
  14. empty bool
  15. }
  16. const (
  17. blackfridayExtensions = 0 |
  18. blackfriday.NoIntraEmphasis |
  19. blackfriday.Tables |
  20. blackfriday.FencedCode |
  21. blackfriday.Strikethrough |
  22. blackfriday.NoEmptyLineBeforeBlock |
  23. blackfriday.DefinitionLists |
  24. blackfriday.Footnotes |
  25. blackfriday.HeadingIDs |
  26. blackfriday.AutoHeadingIDs |
  27. // Not included in modules/markup/markdown/markdown.go;
  28. // required here to process inline links
  29. blackfriday.Autolink
  30. )
  31. // StripMarkdown parses markdown content by removing all markup and code blocks
  32. // in order to extract links and other references
  33. func StripMarkdown(rawBytes []byte) (string, []string) {
  34. buf, links := StripMarkdownBytes(rawBytes)
  35. return string(buf), links
  36. }
  37. // StripMarkdownBytes parses markdown content by removing all markup and code blocks
  38. // in order to extract links and other references
  39. func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
  40. stripper := &MarkdownStripper{
  41. links: make([]string, 0, 10),
  42. empty: true,
  43. }
  44. parser := blackfriday.New(blackfriday.WithRenderer(stripper), blackfriday.WithExtensions(blackfridayExtensions))
  45. ast := parser.Parse(rawBytes)
  46. var buf bytes.Buffer
  47. stripper.RenderHeader(&buf, ast)
  48. ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
  49. return stripper.RenderNode(&buf, node, entering)
  50. })
  51. stripper.RenderFooter(&buf, ast)
  52. return buf.Bytes(), stripper.GetLinks()
  53. }
  54. // RenderNode is the main rendering method. It will be called once for
  55. // every leaf node and twice for every non-leaf node (first with
  56. // entering=true, then with entering=false). The method should write its
  57. // rendition of the node to the supplied writer w.
  58. func (r *MarkdownStripper) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
  59. if !entering {
  60. return blackfriday.GoToNext
  61. }
  62. switch node.Type {
  63. case blackfriday.Text:
  64. r.processString(w, node.Literal, node.Parent == nil)
  65. return blackfriday.GoToNext
  66. case blackfriday.Link:
  67. r.processLink(w, node.LinkData.Destination)
  68. r.coallesce = false
  69. return blackfriday.SkipChildren
  70. }
  71. r.coallesce = false
  72. return blackfriday.GoToNext
  73. }
  74. // RenderHeader is a method that allows the renderer to produce some
  75. // content preceding the main body of the output document.
  76. func (r *MarkdownStripper) RenderHeader(w io.Writer, ast *blackfriday.Node) {
  77. }
  78. // RenderFooter is a symmetric counterpart of RenderHeader.
  79. func (r *MarkdownStripper) RenderFooter(w io.Writer, ast *blackfriday.Node) {
  80. }
  81. func (r *MarkdownStripper) doubleSpace(w io.Writer) {
  82. if !r.empty {
  83. _, _ = w.Write([]byte{'\n'})
  84. }
  85. }
  86. func (r *MarkdownStripper) processString(w io.Writer, text []byte, coallesce bool) {
  87. // Always break-up words
  88. if !coallesce || !r.coallesce {
  89. r.doubleSpace(w)
  90. }
  91. _, _ = w.Write(text)
  92. r.coallesce = coallesce
  93. r.empty = false
  94. }
  95. func (r *MarkdownStripper) processLink(w io.Writer, link []byte) {
  96. // Links are processed out of band
  97. r.links = append(r.links, string(link))
  98. r.coallesce = false
  99. }
  100. // GetLinks returns the list of link data collected while parsing
  101. func (r *MarkdownStripper) GetLinks() []string {
  102. return r.links
  103. }