using System; using Projbook.Extension.Exception; using Projbook.Extension.Spi; using System.IO.Abstractions; using System.Text; using System.Text.RegularExpressions; using System.Xml; namespace Projbook.Extension.XmlExtractor { /// /// Extractor in charge of browsing source directories. load file content and extract requested member. /// [Syntax(name: "xml")] public class XmlSnippetExtractor : DefaultSnippetExtractor { /// /// The regex extracting the document namespaces /// private Regex regex = new Regex(@"xmlns:([^=]+)=""([^""]*)""", RegexOptions.Compiled); /// /// The lazy loaded xml document. /// private XmlDocument xmlDocument; /// /// The lazy loaded namespace manager. /// private XmlNamespaceManager xmlNamespaceManager; /// /// Extracts a snippet from a given rule pattern. /// /// The file system info. /// The member pattern to extract. /// The extracted snippet. public override Extension.Model.Snippet Extract(FileSystemInfoBase fileSystemInfo, string memberPattern) { // Return the entire code if no member is specified if (string.IsNullOrWhiteSpace(memberPattern)) { return base.Extract(fileSystemInfo, memberPattern); } // Load the xml document for xpath execution if (null == this.xmlDocument) { // Load file content string sourceCode = base.LoadFile(this.ConvertToFile(fileSystemInfo)); // Remove default avoiding to define and use a prefix for the default namespace // This is not strictly correct in a xml point of view but it's closest to most needs sourceCode = Regex.Replace(sourceCode, @"xmlns\s*=\s*""[^""]*""", string.Empty); // Parse the file as xml this.xmlDocument = new XmlDocument(); try { // Initialize the document and the namespace manager this.xmlDocument.LoadXml(sourceCode); this.xmlNamespaceManager = new XmlNamespaceManager(this.xmlDocument.NameTable); // Match namespace declaration for filling the namespace manager Match match = this.regex.Match(sourceCode); while (match.Success) { // Collect prefix and namespace value string prefix = match.Groups[1].Value.Trim(); string ns = match.Groups[2].Value.Trim(); // Add namespace declaration to the namespace manager xmlNamespaceManager.AddNamespace(prefix, ns); // Mode to the next matching match = match.NextMatch(); } } // Throw an exception is the file is not loadable as xml document catch (System.Exception exception) { throw new SnippetExtractionException("Cannot parse xml file", exception.Message); } } // Execute Xpath query XmlNodeList xmlNodeList = null; try { xmlNodeList = this.xmlDocument.SelectNodes(memberPattern, this.xmlNamespaceManager); } catch { throw new SnippetExtractionException("Invalid extraction rule", memberPattern); } // Ensure we found a result if (xmlNodeList.Count <= 0) { throw new SnippetExtractionException("Cannot find member", memberPattern); } // Build a snippet for extracted nodes return this.BuildSnippet(xmlNodeList); } /// /// Builds a snippet from xml node. /// /// The xml node list. /// The built snippet. private Extension.Model.Snippet BuildSnippet(XmlNodeList xmlNodeList) { // Data validation if(xmlNodeList == null) { throw new ArgumentNullException(nameof(xmlNodeList)); } // Extract code from each snippets StringBuilder stringBuilder = new StringBuilder(); bool firstSnippet = true; for (int i = 0; i < xmlNodeList.Count; ++i) { // Get the current node XmlNode node = xmlNodeList.Item(i); // Write line return between each snippet if (!firstSnippet) { stringBuilder.AppendLine(); stringBuilder.AppendLine(); } // Write each snippet XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.OmitXmlDeclaration = true; settings.NewLineOnAttributes = true; using (XmlWriter xmlWriter = XmlWriter.Create(stringBuilder, settings)) { node.WriteTo(xmlWriter); } // Flag the first snippet as false firstSnippet = false; } // Remove all generate namespace declaration // This is produce some output lacking of namespace declaration but it's what is relevant for a xml document extraction string output = stringBuilder.ToString(); output = Regex.Replace(output, @" ?xmlns\s*(:[^=]+)?\s*=\s*""[^""]*""", string.Empty); // Create the snippet from the extracted code return new Model.PlainTextSnippet(output); } } }