You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

XmlSnippetExtractor.cs 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. using System;
  2. using Projbook.Extension.Exception;
  3. using Projbook.Extension.Spi;
  4. using System.IO.Abstractions;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. using System.Xml;
  8. namespace Projbook.Extension.XmlExtractor
  9. {
  10. /// <summary>
  11. /// Extractor in charge of browsing source directories. load file content and extract requested member.
  12. /// </summary>
  13. [Syntax(name: "xml")]
  14. public class XmlSnippetExtractor : DefaultSnippetExtractor
  15. {
  16. /// <summary>
  17. /// The regex extracting the document namespaces
  18. /// </summary>
  19. private Regex regex = new Regex(@"xmlns:([^=]+)=""([^""]*)""", RegexOptions.Compiled);
  20. /// <summary>
  21. /// The lazy loaded xml document.
  22. /// </summary>
  23. private XmlDocument xmlDocument;
  24. /// <summary>
  25. /// The lazy loaded namespace manager.
  26. /// </summary>
  27. private XmlNamespaceManager xmlNamespaceManager;
  28. /// <summary>
  29. /// Extracts a snippet from a given rule pattern.
  30. /// </summary>
  31. /// <param name="fileSystemInfo">The file system info.</param>
  32. /// <param name="memberPattern">The member pattern to extract.</param>
  33. /// <returns>The extracted snippet.</returns>
  34. public override Extension.Model.Snippet Extract(FileSystemInfoBase fileSystemInfo, string memberPattern)
  35. {
  36. // Return the entire code if no member is specified
  37. if (string.IsNullOrWhiteSpace(memberPattern))
  38. {
  39. return base.Extract(fileSystemInfo, memberPattern);
  40. }
  41. // Load the xml document for xpath execution
  42. if (null == this.xmlDocument)
  43. {
  44. // Load file content
  45. string sourceCode = base.LoadFile(this.ConvertToFile(fileSystemInfo));
  46. // Remove default avoiding to define and use a prefix for the default namespace
  47. // This is not strictly correct in a xml point of view but it's closest to most needs
  48. sourceCode = Regex.Replace(sourceCode, @"xmlns\s*=\s*""[^""]*""", string.Empty);
  49. // Parse the file as xml
  50. this.xmlDocument = new XmlDocument();
  51. try
  52. {
  53. // Initialize the document and the namespace manager
  54. this.xmlDocument.LoadXml(sourceCode);
  55. this.xmlNamespaceManager = new XmlNamespaceManager(this.xmlDocument.NameTable);
  56. // Match namespace declaration for filling the namespace manager
  57. Match match = this.regex.Match(sourceCode);
  58. while (match.Success)
  59. {
  60. // Collect prefix and namespace value
  61. string prefix = match.Groups[1].Value.Trim();
  62. string ns = match.Groups[2].Value.Trim();
  63. // Add namespace declaration to the namespace manager
  64. xmlNamespaceManager.AddNamespace(prefix, ns);
  65. // Mode to the next matching
  66. match = match.NextMatch();
  67. }
  68. }
  69. // Throw an exception is the file is not loadable as xml document
  70. catch (System.Exception exception)
  71. {
  72. throw new SnippetExtractionException("Cannot parse xml file", exception.Message);
  73. }
  74. }
  75. // Execute Xpath query
  76. XmlNodeList xmlNodeList = null;
  77. try
  78. {
  79. xmlNodeList = this.xmlDocument.SelectNodes(memberPattern, this.xmlNamespaceManager);
  80. }
  81. catch
  82. {
  83. throw new SnippetExtractionException("Invalid extraction rule", memberPattern);
  84. }
  85. // Ensure we found a result
  86. if (xmlNodeList.Count <= 0)
  87. {
  88. throw new SnippetExtractionException("Cannot find member", memberPattern);
  89. }
  90. // Build a snippet for extracted nodes
  91. return this.BuildSnippet(xmlNodeList);
  92. }
  93. /// <summary>
  94. /// Builds a snippet from xml node.
  95. /// </summary>
  96. /// <param name="xmlNodeList">The xml node list.</param>
  97. /// <returns>The built snippet.</returns>
  98. private Extension.Model.Snippet BuildSnippet(XmlNodeList xmlNodeList)
  99. {
  100. // Data validation
  101. if(xmlNodeList == null)
  102. {
  103. throw new ArgumentNullException(nameof(xmlNodeList));
  104. }
  105. // Extract code from each snippets
  106. StringBuilder stringBuilder = new StringBuilder();
  107. bool firstSnippet = true;
  108. for (int i = 0; i < xmlNodeList.Count; ++i)
  109. {
  110. // Get the current node
  111. XmlNode node = xmlNodeList.Item(i);
  112. // Write line return between each snippet
  113. if (!firstSnippet)
  114. {
  115. stringBuilder.AppendLine();
  116. stringBuilder.AppendLine();
  117. }
  118. // Write each snippet
  119. XmlWriterSettings settings = new XmlWriterSettings();
  120. settings.Indent = true;
  121. settings.OmitXmlDeclaration = true;
  122. settings.NewLineOnAttributes = true;
  123. using (XmlWriter xmlWriter = XmlWriter.Create(stringBuilder, settings))
  124. {
  125. node.WriteTo(xmlWriter);
  126. }
  127. // Flag the first snippet as false
  128. firstSnippet = false;
  129. }
  130. // Remove all generate namespace declaration
  131. // This is produce some output lacking of namespace declaration but it's what is relevant for a xml document extraction
  132. string output = stringBuilder.ToString();
  133. output = Regex.Replace(output, @" ?xmlns\s*(:[^=]+)?\s*=\s*""[^""]*""", string.Empty);
  134. // Create the snippet from the extracted code
  135. return new Model.PlainTextSnippet(output);
  136. }
  137. }
  138. }