From 405f805ff3f3313fbff9c99275194ff8bbff0d5a Mon Sep 17 00:00:00 2001 From: Nevisinn Date: Wed, 5 Nov 2025 13:12:17 +0500 Subject: [PATCH 1/4] =?UTF-8?q?=D0=9F=D0=BE=D1=81=D1=82=D1=80=D0=BE=D0=B8?= =?UTF-8?q?=D0=BB=20=D0=BF=D1=80=D0=B5=D0=B4=D0=B2=D0=B0=D1=80=D0=B8=D1=82?= =?UTF-8?q?=D0=B5=D0=BB=D1=8C=D0=BD=D1=83=D1=8E=20=D0=B0=D1=80=D1=85=D0=B8?= =?UTF-8?q?=D1=82=D0=B5=D0=BA=D1=82=D1=83=D1=80=D1=83=20=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D0=B5=D0=BA=D1=82=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Markdown.csproj | 10 ++++++++ cs/Markdown/MarkdownDocument.cs | 25 +++++++++++++++++++ cs/Markdown/Md.cs | 19 ++++++++++++++ .../Nodes/Interfaces/InternalMarkdownNode.cs | 17 +++++++++++++ .../Nodes/Interfaces/LeafMarkdownNode.cs | 13 ++++++++++ cs/Markdown/Nodes/Interfaces/MarkdownNode.cs | 19 ++++++++++++++ cs/Markdown/Nodes/Internal/BoldNode.cs | 15 +++++++++++ cs/Markdown/Nodes/Internal/DocumentNode.cs | 14 +++++++++++ cs/Markdown/Nodes/Internal/HeaderNode.cs | 15 +++++++++++ cs/Markdown/Nodes/Internal/ItalicNode.cs | 15 +++++++++++ cs/Markdown/Nodes/Leaf/ImageNode.cs | 15 +++++++++++ cs/Markdown/Nodes/Leaf/TextNode.cs | 15 +++++++++++ cs/Markdown/Parsing/Interfaces/IParser.cs | 9 +++++++ cs/Markdown/Parsing/MarkdownParser.cs | 9 +++++++ cs/Markdown/Parsing/ParserSelector.cs | 11 ++++++++ cs/Markdown/Parsing/TagParsers/BoldParser.cs | 17 +++++++++++++ .../Parsing/TagParsers/HeaderParser.cs | 17 +++++++++++++ cs/Markdown/Parsing/TagParsers/ImageParser.cs | 17 +++++++++++++ .../Parsing/TagParsers/ItalicParser.cs | 17 +++++++++++++ cs/Markdown/Parsing/TagParsers/TextParser.cs | 17 +++++++++++++ cs/Markdown/Program.cs | 2 ++ 21 files changed, 308 insertions(+) create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/MarkdownDocument.cs create mode 100644 cs/Markdown/Md.cs create mode 100644 cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs create mode 100644 cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs create mode 100644 cs/Markdown/Nodes/Interfaces/MarkdownNode.cs create mode 100644 cs/Markdown/Nodes/Internal/BoldNode.cs create mode 100644 cs/Markdown/Nodes/Internal/DocumentNode.cs create mode 100644 cs/Markdown/Nodes/Internal/HeaderNode.cs create mode 100644 cs/Markdown/Nodes/Internal/ItalicNode.cs create mode 100644 cs/Markdown/Nodes/Leaf/ImageNode.cs create mode 100644 cs/Markdown/Nodes/Leaf/TextNode.cs create mode 100644 cs/Markdown/Parsing/Interfaces/IParser.cs create mode 100644 cs/Markdown/Parsing/MarkdownParser.cs create mode 100644 cs/Markdown/Parsing/ParserSelector.cs create mode 100644 cs/Markdown/Parsing/TagParsers/BoldParser.cs create mode 100644 cs/Markdown/Parsing/TagParsers/HeaderParser.cs create mode 100644 cs/Markdown/Parsing/TagParsers/ImageParser.cs create mode 100644 cs/Markdown/Parsing/TagParsers/ItalicParser.cs create mode 100644 cs/Markdown/Parsing/TagParsers/TextParser.cs create mode 100644 cs/Markdown/Program.cs diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..2f4fc7765 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,10 @@ + + + + Exe + net8.0 + enable + enable + + + diff --git a/cs/Markdown/MarkdownDocument.cs b/cs/Markdown/MarkdownDocument.cs new file mode 100644 index 000000000..f57cf0cdd --- /dev/null +++ b/cs/Markdown/MarkdownDocument.cs @@ -0,0 +1,25 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; + +namespace Markdown; + +public class MarkdownDocument +{ + private DocumentNode root; + private MarkdownNode currentNode; + + public MarkdownDocument() + { + root = new DocumentNode(null, ""); + } + + public void AddNode(MarkdownNode node) + { + + } + + public string ToHtml() + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..419ad3ae2 --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,19 @@ +using Markdown.Parsing; + +namespace Markdown; + +public class Md +{ + private MarkdownParser markdownParser; + + public Md(MarkdownParser markdownParser) + { + this.markdownParser = markdownParser; + } + + public string Render(string markdownText) + { + var markdownDocument = markdownParser.Parse(markdownText); + return markdownDocument.ToHtml(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs new file mode 100644 index 000000000..b09b0ade6 --- /dev/null +++ b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs @@ -0,0 +1,17 @@ +namespace Markdown.Nodes.Interfaces; + +public abstract class InternalMarkdownNode : MarkdownNode +{ + protected readonly List children = []; + public override void Add(MarkdownNode node) + { + + } + + public override List GetChildren() + => children; + + protected InternalMarkdownNode(MarkdownNode? parent, string value) : base(parent, value) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs new file mode 100644 index 000000000..0b4b0d927 --- /dev/null +++ b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs @@ -0,0 +1,13 @@ +namespace Markdown.Nodes.Interfaces; + +public abstract class LeafMarkdownNode : MarkdownNode +{ + protected LeafMarkdownNode(MarkdownNode? parent, string value) : base(parent, value) + { + } + + public override void Add(MarkdownNode node) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs new file mode 100644 index 000000000..9520ec172 --- /dev/null +++ b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs @@ -0,0 +1,19 @@ +namespace Markdown.Nodes.Interfaces; + +public abstract class MarkdownNode +{ + public MarkdownNode? Parent; + protected string value; + + protected MarkdownNode(MarkdownNode? parent, string value) + { + Parent = parent; + this.value = value; + } + + public virtual void Add(MarkdownNode node) {} + public virtual List GetChildren() => []; + public abstract string ToHtml(); + + +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/BoldNode.cs b/cs/Markdown/Nodes/Internal/BoldNode.cs new file mode 100644 index 000000000..8dbda9044 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/BoldNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class BoldNode : InternalMarkdownNode +{ + public override string ToHtml() + { + throw new NotImplementedException(); + } + + public BoldNode(MarkdownNode? parent, string value) : base(parent, value) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/DocumentNode.cs b/cs/Markdown/Nodes/Internal/DocumentNode.cs new file mode 100644 index 000000000..58dfd5909 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/DocumentNode.cs @@ -0,0 +1,14 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class DocumentNode : InternalMarkdownNode +{ + public DocumentNode(MarkdownNode? parent, string value) : base(parent, value) + { + } + + public override string ToHtml() + => ""; + +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/HeaderNode.cs b/cs/Markdown/Nodes/Internal/HeaderNode.cs new file mode 100644 index 000000000..090ec4460 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/HeaderNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class HeaderNode : InternalMarkdownNode +{ + public override string ToHtml() + { + throw new NotImplementedException(); + } + + public HeaderNode(MarkdownNode? parent, string value) : base(parent, value) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/ItalicNode.cs b/cs/Markdown/Nodes/Internal/ItalicNode.cs new file mode 100644 index 000000000..a0e66dd3d --- /dev/null +++ b/cs/Markdown/Nodes/Internal/ItalicNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class ItalicNode : InternalMarkdownNode +{ + public override string ToHtml() + { + throw new NotImplementedException(); + } + + public ItalicNode(MarkdownNode? parent, string value) : base(parent, value) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/ImageNode.cs b/cs/Markdown/Nodes/Leaf/ImageNode.cs new file mode 100644 index 000000000..c9cadd38e --- /dev/null +++ b/cs/Markdown/Nodes/Leaf/ImageNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Leaf; + +public class ImageNode : LeafMarkdownNode +{ + public override string ToHtml() + { + throw new NotImplementedException(); + } + + public ImageNode(MarkdownNode? parent, string value) : base(parent, value) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/TextNode.cs b/cs/Markdown/Nodes/Leaf/TextNode.cs new file mode 100644 index 000000000..5836f22f5 --- /dev/null +++ b/cs/Markdown/Nodes/Leaf/TextNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Leaf; + +public class TextNode : LeafMarkdownNode +{ + public override string ToHtml() + { + throw new NotImplementedException(); + } + + public TextNode(MarkdownNode? parent, string value) : base(parent, value) + { + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Interfaces/IParser.cs b/cs/Markdown/Parsing/Interfaces/IParser.cs new file mode 100644 index 000000000..d09be4359 --- /dev/null +++ b/cs/Markdown/Parsing/Interfaces/IParser.cs @@ -0,0 +1,9 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Parsing.Interfaces; + +public interface IParser +{ + bool CanParse(string text); + MarkdownNode Parse(string text); +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/MarkdownParser.cs b/cs/Markdown/Parsing/MarkdownParser.cs new file mode 100644 index 000000000..688b1fa38 --- /dev/null +++ b/cs/Markdown/Parsing/MarkdownParser.cs @@ -0,0 +1,9 @@ +namespace Markdown.Parsing; + +public class MarkdownParser +{ + public MarkdownDocument Parse(string text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/ParserSelector.cs b/cs/Markdown/Parsing/ParserSelector.cs new file mode 100644 index 000000000..298213dbd --- /dev/null +++ b/cs/Markdown/Parsing/ParserSelector.cs @@ -0,0 +1,11 @@ +using Markdown.Parsing.Interfaces; + +namespace Markdown.Parsing; + +public static class ParserSelector +{ + public static IParser GetParser(char symbol) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/BoldParser.cs b/cs/Markdown/Parsing/TagParsers/BoldParser.cs new file mode 100644 index 000000000..fcd542bd2 --- /dev/null +++ b/cs/Markdown/Parsing/TagParsers/BoldParser.cs @@ -0,0 +1,17 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Parsing.Interfaces; + +namespace Markdown.Parsing.Parsers; + +public class BoldParser : IParser +{ + public bool CanParse(string text) + { + throw new NotImplementedException(); + } + + public MarkdownNode Parse(string text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/HeaderParser.cs b/cs/Markdown/Parsing/TagParsers/HeaderParser.cs new file mode 100644 index 000000000..1482d37e9 --- /dev/null +++ b/cs/Markdown/Parsing/TagParsers/HeaderParser.cs @@ -0,0 +1,17 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Parsing.Interfaces; + +namespace Markdown.Parsing.Parsers; + +public class HeaderParser : IParser +{ + public bool CanParse(string text) + { + throw new NotImplementedException(); + } + + public MarkdownNode Parse(string text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/ImageParser.cs b/cs/Markdown/Parsing/TagParsers/ImageParser.cs new file mode 100644 index 000000000..5358f2ab0 --- /dev/null +++ b/cs/Markdown/Parsing/TagParsers/ImageParser.cs @@ -0,0 +1,17 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Parsing.Interfaces; + +namespace Markdown.Parsing.Parsers; + +public class ImageParser : IParser +{ + public bool CanParse(string text) + { + throw new NotImplementedException(); + } + + public MarkdownNode Parse(string text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/ItalicParser.cs b/cs/Markdown/Parsing/TagParsers/ItalicParser.cs new file mode 100644 index 000000000..74727609d --- /dev/null +++ b/cs/Markdown/Parsing/TagParsers/ItalicParser.cs @@ -0,0 +1,17 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Parsing.Interfaces; + +namespace Markdown.Parsing.Parsers; + +public class ItalicParser : IParser +{ + public bool CanParse(string text) + { + throw new NotImplementedException(); + } + + public MarkdownNode Parse(string text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/TextParser.cs b/cs/Markdown/Parsing/TagParsers/TextParser.cs new file mode 100644 index 000000000..76adf10d4 --- /dev/null +++ b/cs/Markdown/Parsing/TagParsers/TextParser.cs @@ -0,0 +1,17 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Parsing.Interfaces; + +namespace Markdown.Parsing.Parsers; + +public class TextParser : IParser +{ + public bool CanParse(string text) + { + throw new NotImplementedException(); + } + + public MarkdownNode Parse(string text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..5e3d52a20 --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,2 @@ +using System.Text; + From a1b4f85bfbcdd64df9e31592c304e40452b67c91 Mon Sep 17 00:00:00 2001 From: Nevisinn Date: Fri, 7 Nov 2025 14:54:13 +0500 Subject: [PATCH 2/4] =?UTF-8?q?=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=B8=D0=BB?= =?UTF-8?q?=20=D0=B0=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA=D1=82=D1=83=D1=80?= =?UTF-8?q?=D1=83=20=D0=BF=D1=80=D0=BE=D0=B5=D0=BA=D1=82=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownDocument.cs | 25 ---------- cs/Markdown/MarkdownParser.cs | 48 +++++++++++++++++++ cs/Markdown/MarkdownTokenizer.cs | 35 ++++++++++++++ cs/Markdown/Md.cs | 18 +++---- .../Nodes/Interfaces/InternalMarkdownNode.cs | 2 +- .../Nodes/Interfaces/LeafMarkdownNode.cs | 2 +- cs/Markdown/Nodes/Interfaces/MarkdownNode.cs | 4 +- cs/Markdown/Nodes/Internal/DocumentNode.cs | 14 ------ .../Nodes/{Leaf => Internal}/ImageNode.cs | 4 +- .../Nodes/Internal/MarkdownDocumentNode.cs | 16 +++++++ cs/Markdown/Parsing/Interfaces/IParser.cs | 9 ---- cs/Markdown/Parsing/MarkdownParser.cs | 9 ---- cs/Markdown/Parsing/ParserSelector.cs | 11 ----- cs/Markdown/Parsing/TagParsers/BoldParser.cs | 17 ------- .../Parsing/TagParsers/HeaderParser.cs | 17 ------- cs/Markdown/Parsing/TagParsers/ImageParser.cs | 17 ------- .../Parsing/TagParsers/ItalicParser.cs | 17 ------- cs/Markdown/Parsing/TagParsers/TextParser.cs | 17 ------- cs/Markdown/Program.cs | 4 +- cs/Markdown/Token.cs | 13 +++++ cs/Markdown/TokenType.cs | 16 +++++++ 21 files changed, 143 insertions(+), 172 deletions(-) delete mode 100644 cs/Markdown/MarkdownDocument.cs create mode 100644 cs/Markdown/MarkdownParser.cs create mode 100644 cs/Markdown/MarkdownTokenizer.cs delete mode 100644 cs/Markdown/Nodes/Internal/DocumentNode.cs rename cs/Markdown/Nodes/{Leaf => Internal}/ImageNode.cs (73%) create mode 100644 cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs delete mode 100644 cs/Markdown/Parsing/Interfaces/IParser.cs delete mode 100644 cs/Markdown/Parsing/MarkdownParser.cs delete mode 100644 cs/Markdown/Parsing/ParserSelector.cs delete mode 100644 cs/Markdown/Parsing/TagParsers/BoldParser.cs delete mode 100644 cs/Markdown/Parsing/TagParsers/HeaderParser.cs delete mode 100644 cs/Markdown/Parsing/TagParsers/ImageParser.cs delete mode 100644 cs/Markdown/Parsing/TagParsers/ItalicParser.cs delete mode 100644 cs/Markdown/Parsing/TagParsers/TextParser.cs create mode 100644 cs/Markdown/Token.cs create mode 100644 cs/Markdown/TokenType.cs diff --git a/cs/Markdown/MarkdownDocument.cs b/cs/Markdown/MarkdownDocument.cs deleted file mode 100644 index f57cf0cdd..000000000 --- a/cs/Markdown/MarkdownDocument.cs +++ /dev/null @@ -1,25 +0,0 @@ -using Markdown.Nodes.Interfaces; -using Markdown.Nodes.Internal; - -namespace Markdown; - -public class MarkdownDocument -{ - private DocumentNode root; - private MarkdownNode currentNode; - - public MarkdownDocument() - { - root = new DocumentNode(null, ""); - } - - public void AddNode(MarkdownNode node) - { - - } - - public string ToHtml() - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/MarkdownParser.cs b/cs/Markdown/MarkdownParser.cs new file mode 100644 index 000000000..2604b9f43 --- /dev/null +++ b/cs/Markdown/MarkdownParser.cs @@ -0,0 +1,48 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; + +namespace Markdown; + +public class MarkdownParser +{ + private List tokens; + private int position; + public MarkdownParser(List tokens) + { + this.tokens = tokens; + } + public MarkdownDocumentNode ParseTokens() + { + throw new NotImplementedException(); + } + + private MarkdownNode ParseBlock() + { + throw new NotImplementedException(); + } + + private MarkdownNode ParseInline() + { + throw new NotImplementedException(); + } + + private HeaderNode ParseHeader() + { + throw new NotImplementedException(); + } + + private ImageNode ParseImage() + { + throw new NotImplementedException(); + } + + private BoldNode ParseStrong() + { + throw new NotImplementedException(); + } + + private ItalicNode ParseEmphasis() + { + throw new NotImplementedException(); + } +} diff --git a/cs/Markdown/MarkdownTokenizer.cs b/cs/Markdown/MarkdownTokenizer.cs new file mode 100644 index 000000000..0610bc086 --- /dev/null +++ b/cs/Markdown/MarkdownTokenizer.cs @@ -0,0 +1,35 @@ +namespace Markdown; + +public class MarkdownTokenizer +{ + private int position; + private string markdownText; + public MarkdownTokenizer(string markdownText) + { + this.markdownText = markdownText; + } + public List Tokenize() + { + throw new NotImplementedException(); + } + + private Token TokenizeHeader() + { + throw new NotImplementedException(); + } + + private Token TokenizeUnderscore() + { + throw new NotImplementedException(); + } + + private Token TokenizeImage() + { + throw new NotImplementedException(); + } + + private Token TokenizeText() + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index 419ad3ae2..dd161355e 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -1,19 +1,15 @@ -using Markdown.Parsing; - namespace Markdown; public class Md { - private MarkdownParser markdownParser; - - public Md(MarkdownParser markdownParser) - { - this.markdownParser = markdownParser; - } - public string Render(string markdownText) { - var markdownDocument = markdownParser.Parse(markdownText); - return markdownDocument.ToHtml(); + var tokenizer = new MarkdownTokenizer(markdownText); + var tokens = tokenizer.Tokenize(); + var parser = new MarkdownParser(tokens); + var markdownDocument = parser.ParseTokens(); + var htmlText = markdownDocument.ToHtml(); + + return htmlText; } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs index b09b0ade6..353f4b6b6 100644 --- a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs @@ -3,7 +3,7 @@ namespace Markdown.Nodes.Interfaces; public abstract class InternalMarkdownNode : MarkdownNode { protected readonly List children = []; - public override void Add(MarkdownNode node) + public override void AddChild(MarkdownNode node) { } diff --git a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs index 0b4b0d927..b325f2486 100644 --- a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs @@ -6,7 +6,7 @@ protected LeafMarkdownNode(MarkdownNode? parent, string value) : base(parent, va { } - public override void Add(MarkdownNode node) + public override void AddChild(MarkdownNode node) { throw new NotImplementedException(); } diff --git a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs index 9520ec172..57a31f2af 100644 --- a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs @@ -11,9 +11,7 @@ protected MarkdownNode(MarkdownNode? parent, string value) this.value = value; } - public virtual void Add(MarkdownNode node) {} + public virtual void AddChild(MarkdownNode node) {} public virtual List GetChildren() => []; public abstract string ToHtml(); - - } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/DocumentNode.cs b/cs/Markdown/Nodes/Internal/DocumentNode.cs deleted file mode 100644 index 58dfd5909..000000000 --- a/cs/Markdown/Nodes/Internal/DocumentNode.cs +++ /dev/null @@ -1,14 +0,0 @@ -using Markdown.Nodes.Interfaces; - -namespace Markdown.Nodes.Internal; - -public class DocumentNode : InternalMarkdownNode -{ - public DocumentNode(MarkdownNode? parent, string value) : base(parent, value) - { - } - - public override string ToHtml() - => ""; - -} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/ImageNode.cs b/cs/Markdown/Nodes/Internal/ImageNode.cs similarity index 73% rename from cs/Markdown/Nodes/Leaf/ImageNode.cs rename to cs/Markdown/Nodes/Internal/ImageNode.cs index c9cadd38e..828d3b432 100644 --- a/cs/Markdown/Nodes/Leaf/ImageNode.cs +++ b/cs/Markdown/Nodes/Internal/ImageNode.cs @@ -1,8 +1,8 @@ using Markdown.Nodes.Interfaces; -namespace Markdown.Nodes.Leaf; +namespace Markdown.Nodes.Internal; -public class ImageNode : LeafMarkdownNode +public class ImageNode : InternalMarkdownNode { public override string ToHtml() { diff --git a/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs new file mode 100644 index 000000000..e694a8f39 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs @@ -0,0 +1,16 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class MarkdownDocumentNode : InternalMarkdownNode +{ + public MarkdownDocumentNode(MarkdownNode? parent, string value) : base(parent, value) + { + } + + public override string ToHtml() + { + throw new NotImplementedException(); + } + +} \ No newline at end of file diff --git a/cs/Markdown/Parsing/Interfaces/IParser.cs b/cs/Markdown/Parsing/Interfaces/IParser.cs deleted file mode 100644 index d09be4359..000000000 --- a/cs/Markdown/Parsing/Interfaces/IParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -using Markdown.Nodes.Interfaces; - -namespace Markdown.Parsing.Interfaces; - -public interface IParser -{ - bool CanParse(string text); - MarkdownNode Parse(string text); -} \ No newline at end of file diff --git a/cs/Markdown/Parsing/MarkdownParser.cs b/cs/Markdown/Parsing/MarkdownParser.cs deleted file mode 100644 index 688b1fa38..000000000 --- a/cs/Markdown/Parsing/MarkdownParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Parsing; - -public class MarkdownParser -{ - public MarkdownDocument Parse(string text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsing/ParserSelector.cs b/cs/Markdown/Parsing/ParserSelector.cs deleted file mode 100644 index 298213dbd..000000000 --- a/cs/Markdown/Parsing/ParserSelector.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Parsing.Interfaces; - -namespace Markdown.Parsing; - -public static class ParserSelector -{ - public static IParser GetParser(char symbol) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/BoldParser.cs b/cs/Markdown/Parsing/TagParsers/BoldParser.cs deleted file mode 100644 index fcd542bd2..000000000 --- a/cs/Markdown/Parsing/TagParsers/BoldParser.cs +++ /dev/null @@ -1,17 +0,0 @@ -using Markdown.Nodes.Interfaces; -using Markdown.Parsing.Interfaces; - -namespace Markdown.Parsing.Parsers; - -public class BoldParser : IParser -{ - public bool CanParse(string text) - { - throw new NotImplementedException(); - } - - public MarkdownNode Parse(string text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/HeaderParser.cs b/cs/Markdown/Parsing/TagParsers/HeaderParser.cs deleted file mode 100644 index 1482d37e9..000000000 --- a/cs/Markdown/Parsing/TagParsers/HeaderParser.cs +++ /dev/null @@ -1,17 +0,0 @@ -using Markdown.Nodes.Interfaces; -using Markdown.Parsing.Interfaces; - -namespace Markdown.Parsing.Parsers; - -public class HeaderParser : IParser -{ - public bool CanParse(string text) - { - throw new NotImplementedException(); - } - - public MarkdownNode Parse(string text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/ImageParser.cs b/cs/Markdown/Parsing/TagParsers/ImageParser.cs deleted file mode 100644 index 5358f2ab0..000000000 --- a/cs/Markdown/Parsing/TagParsers/ImageParser.cs +++ /dev/null @@ -1,17 +0,0 @@ -using Markdown.Nodes.Interfaces; -using Markdown.Parsing.Interfaces; - -namespace Markdown.Parsing.Parsers; - -public class ImageParser : IParser -{ - public bool CanParse(string text) - { - throw new NotImplementedException(); - } - - public MarkdownNode Parse(string text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/ItalicParser.cs b/cs/Markdown/Parsing/TagParsers/ItalicParser.cs deleted file mode 100644 index 74727609d..000000000 --- a/cs/Markdown/Parsing/TagParsers/ItalicParser.cs +++ /dev/null @@ -1,17 +0,0 @@ -using Markdown.Nodes.Interfaces; -using Markdown.Parsing.Interfaces; - -namespace Markdown.Parsing.Parsers; - -public class ItalicParser : IParser -{ - public bool CanParse(string text) - { - throw new NotImplementedException(); - } - - public MarkdownNode Parse(string text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsing/TagParsers/TextParser.cs b/cs/Markdown/Parsing/TagParsers/TextParser.cs deleted file mode 100644 index 76adf10d4..000000000 --- a/cs/Markdown/Parsing/TagParsers/TextParser.cs +++ /dev/null @@ -1,17 +0,0 @@ -using Markdown.Nodes.Interfaces; -using Markdown.Parsing.Interfaces; - -namespace Markdown.Parsing.Parsers; - -public class TextParser : IParser -{ - public bool CanParse(string text) - { - throw new NotImplementedException(); - } - - public MarkdownNode Parse(string text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs index 5e3d52a20..e02417453 100644 --- a/cs/Markdown/Program.cs +++ b/cs/Markdown/Program.cs @@ -1,2 +1,4 @@ -using System.Text; +using Markdown; +var md = new Md(); +md.Render(""); \ No newline at end of file diff --git a/cs/Markdown/Token.cs b/cs/Markdown/Token.cs new file mode 100644 index 000000000..7d91e08be --- /dev/null +++ b/cs/Markdown/Token.cs @@ -0,0 +1,13 @@ +namespace Markdown; + +public class Token +{ + public TokenType Type; + public string Value; + + public Token(string value, TokenType type) + { + Value = value; + Type = type; + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenType.cs b/cs/Markdown/TokenType.cs new file mode 100644 index 000000000..212fcaae4 --- /dev/null +++ b/cs/Markdown/TokenType.cs @@ -0,0 +1,16 @@ +namespace Markdown; + +public enum TokenType +{ + Hash, + Underscore, + DoubleUnderscore, + Text, + NewLine, + Exclamation, + LBracket, + RBracket, + LParenthesis, + RParenthesis, + Escape +} \ No newline at end of file From f7c1be6960d92b8d996e8d28993045055897605f Mon Sep 17 00:00:00 2001 From: Nevisinn Date: Wed, 19 Nov 2025 10:40:12 +0500 Subject: [PATCH 3/4] =?UTF-8?q?=D0=94=D0=B5=D0=BC=D0=BE-=D0=B2=D0=B5=D1=80?= =?UTF-8?q?=D1=81=D0=B8=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MarkdownSpec.md | 9 +- cs/Markdown/Markdown.csproj | 6 + cs/Markdown/MarkdownParser.cs | 157 ++++++++--- cs/Markdown/MarkdownTokenizer.cs | 103 ++++++- cs/Markdown/Md.cs | 4 +- .../Nodes/Interfaces/InternalMarkdownNode.cs | 16 +- .../Nodes/Interfaces/LeafMarkdownNode.cs | 5 - cs/Markdown/Nodes/Interfaces/MarkdownNode.cs | 3 +- cs/Markdown/Nodes/Internal/AltNode.cs | 22 ++ cs/Markdown/Nodes/Internal/BoldNode.cs | 14 +- cs/Markdown/Nodes/Internal/HeaderNode.cs | 15 +- cs/Markdown/Nodes/Internal/ImageNode.cs | 10 +- cs/Markdown/Nodes/Internal/ItalicNode.cs | 13 +- .../Nodes/Internal/MarkdownDocumentNode.cs | 11 +- cs/Markdown/Nodes/Internal/UrlNode.cs | 22 ++ cs/Markdown/Nodes/Leaf/TextNode.cs | 2 +- cs/Markdown/ParseSelector.cs | 72 +++++ cs/Markdown/ParserContext.cs | 20 ++ cs/Markdown/Parsers/EscapeParser.cs | 40 +++ cs/Markdown/Parsers/HeaderParser.cs | 24 ++ cs/Markdown/Parsers/ImageParser.cs | 158 +++++++++++ .../Parsers/Interfaces/ICompletableParse.cs | 6 + .../Parsers/Interfaces/ITokenParser.cs | 6 + cs/Markdown/Parsers/NewLineEofParser.cs | 21 ++ cs/Markdown/Parsers/TextParser.cs | 8 + cs/Markdown/Parsers/UnderscoresParser.cs | 256 ++++++++++++++++++ cs/Markdown/Tests/MdTests.cs | 151 +++++++++++ cs/Markdown/TokenType.cs | 6 +- cs/clean-code.sln | 6 + 29 files changed, 1111 insertions(+), 75 deletions(-) create mode 100644 cs/Markdown/Nodes/Internal/AltNode.cs create mode 100644 cs/Markdown/Nodes/Internal/UrlNode.cs create mode 100644 cs/Markdown/ParseSelector.cs create mode 100644 cs/Markdown/ParserContext.cs create mode 100644 cs/Markdown/Parsers/EscapeParser.cs create mode 100644 cs/Markdown/Parsers/HeaderParser.cs create mode 100644 cs/Markdown/Parsers/ImageParser.cs create mode 100644 cs/Markdown/Parsers/Interfaces/ICompletableParse.cs create mode 100644 cs/Markdown/Parsers/Interfaces/ITokenParser.cs create mode 100644 cs/Markdown/Parsers/NewLineEofParser.cs create mode 100644 cs/Markdown/Parsers/TextParser.cs create mode 100644 cs/Markdown/Parsers/UnderscoresParser.cs create mode 100644 cs/Markdown/Tests/MdTests.cs diff --git a/MarkdownSpec.md b/MarkdownSpec.md index 886e99c95..d464aee6c 100644 --- a/MarkdownSpec.md +++ b/MarkdownSpec.md @@ -70,4 +70,11 @@ __Непарные_ символы в рамках одного абзаца н превратится в: -\

Заголовок \с \разными\ символами\\

\ No newline at end of file +\

Заголовок \с \разными\ символами\\

+ +# Image + +Конструкция, начинающаяся с ![](), преобразуется в тег \. +Внутри квадратных скобок [] указывается альтернативный текст (alt), а внутри круглых скобок () — ссылка на изображение (src). +Альтернативный текст и ссылка могут содержать остальные элементы разметки согласно общим правилам. В альтернативном тексте допускается использование символа [ только при наличии соответствующей закрывающей ]. +Аналогично, в URL допускается использование символа ( только при наличии соответствующей закрывающей ). \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 2f4fc7765..1d2c23e4c 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -7,4 +7,10 @@ enable + + + + + + diff --git a/cs/Markdown/MarkdownParser.cs b/cs/Markdown/MarkdownParser.cs index 2604b9f43..67fb8562d 100644 --- a/cs/Markdown/MarkdownParser.cs +++ b/cs/Markdown/MarkdownParser.cs @@ -1,48 +1,143 @@ using Markdown.Nodes.Interfaces; using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; namespace Markdown; public class MarkdownParser { - private List tokens; - private int position; - public MarkdownParser(List tokens) + private readonly ParserContext context; + private readonly ParseSelector selector; + public MarkdownParser(ParserContext context, ParseSelector selector) { - this.tokens = tokens; + this.context = context; + this.selector = selector; } + public MarkdownDocumentNode ParseTokens() - { - throw new NotImplementedException(); - } + { + while (context.Position < context.Tokens.Count) + { + var token = context.Tokens[context.Position]; + var parser = selector.GetParser(token.Type); + parser.Parse(); + context.IncreasePosition(); + } - private MarkdownNode ParseBlock() - { - throw new NotImplementedException(); + var markdownDocumentNode = BuildMarkdownDocument(); + + return markdownDocumentNode; } - private MarkdownNode ParseInline() - { - throw new NotImplementedException(); - } + private MarkdownDocumentNode BuildMarkdownDocument() + { + var markdownDocumentNode = new MarkdownDocumentNode(null, ""); + MarkdownNode currentNode = markdownDocumentNode; + var stack = new Stack(); + stack.Push(currentNode); + foreach (var token in context.Tokens) + { + switch (token.Type) + { + case TokenType.Text or TokenType.Escape or TokenType.NewLine or TokenType.Space: + { + if (token.Type is TokenType.NewLine) + { + var peeked = stack.Peek(); + if (peeked is not MarkdownDocumentNode) + { + stack.Pop(); + currentNode = peeked.Parent!; + } + } + + var textNode = new TextNode(currentNode, token.Value); + currentNode.AddChild(textNode); + break; + } + case TokenType.Underscore or TokenType.WordUnderscore: + { + var peeked = stack.Peek(); + if (peeked is ItalicNode) + { + stack.Pop(); + currentNode = peeked.Parent!; + } + else + { + var italicNode = new ItalicNode(currentNode, token.Value); + stack.Push(italicNode); + currentNode.AddChild(italicNode); + currentNode = italicNode; + } - private HeaderNode ParseHeader() - { - throw new NotImplementedException(); - } - - private ImageNode ParseImage() - { - throw new NotImplementedException(); - } - - private BoldNode ParseStrong() - { - throw new NotImplementedException(); - } + break; + } + case TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore: + { + var peeked = stack.Peek(); + if (peeked is BoldNode) + { + stack.Pop(); + currentNode = peeked.Parent!; + } + else + { + var boldNode = new BoldNode(currentNode, token.Value); + stack.Push(boldNode); + currentNode.AddChild(boldNode); + currentNode = boldNode; + } - private ItalicNode ParseEmphasis() - { - throw new NotImplementedException(); + break; + } + case TokenType.Exclamation: + { + var imageNode = new ImageNode(currentNode, token.Value); + stack.Push(imageNode); + currentNode.AddChild(imageNode); + currentNode = imageNode; + break; + } + case TokenType.Hash: + { + var headerNode = new HeaderNode(currentNode, token.Value); + currentNode.AddChild(headerNode); + currentNode = headerNode; + stack.Push(headerNode); + break; + } + case TokenType.RParenthesis: + { + var imageNode = stack.Pop(); + currentNode = imageNode.Parent!; + break; + } + case TokenType.RBracket: + { + var altNode = stack.Pop(); + currentNode = altNode.Parent!; + break; + } + case TokenType.LBracket: + { + var altNode = new AltNode(currentNode, token.Value); + currentNode.AddChild(altNode); + stack.Push(altNode); + currentNode = altNode; + break; + } + case TokenType.LParenthesis: + { + var urlNode = new UrlNode(currentNode, token.Value); + currentNode.AddChild(urlNode); + stack.Push(urlNode); + currentNode = urlNode; + break; + } + } + } + + return markdownDocumentNode; } } diff --git a/cs/Markdown/MarkdownTokenizer.cs b/cs/Markdown/MarkdownTokenizer.cs index 0610bc086..9323ffdcf 100644 --- a/cs/Markdown/MarkdownTokenizer.cs +++ b/cs/Markdown/MarkdownTokenizer.cs @@ -1,35 +1,122 @@ +using System.Text; + namespace Markdown; public class MarkdownTokenizer { private int position; - private string markdownText; + private readonly string markdownText; + private char Prev => position - 1 < 0 ? ' ' : markdownText[position - 1]; + private char Next => position + 1 == markdownText.Length ? ' ' : markdownText[position + 1]; public MarkdownTokenizer(string markdownText) { this.markdownText = markdownText; } public List Tokenize() + { + var tokens = new List(); + while (position < markdownText.Length) + { + var symbol = markdownText[position]; + var token = symbol switch + { + '#' => TokenizeHeader(), + '_' => TokenizeUnderscore(), + '!' => TokenizeImage(), + '(' => TokenizeImage(), + ')' => TokenizeImage(), + '[' => TokenizeImage(), + ']' => TokenizeImage(), + '\\' => TokenizeEscape(), + ' ' => TokenizeSpace(), + '\n' => TokenizeNewLine(), + _ => TokenizeText() + }; + + tokens.Add(token); + position++; + } + + var eof = TokenizeEndOfFile(); + tokens.Add(eof); + return tokens; + } + + private Token TokenizeEndOfFile() { - throw new NotImplementedException(); + return new Token("", TokenType.Eof); + } + private Token TokenizeNewLine() + { + return new Token("\n", TokenType.NewLine); } - private Token TokenizeHeader() + private Token TokenizeSpace() { - throw new NotImplementedException(); + return new Token(" ", TokenType.Space); + } + private Token TokenizeEscape() + { + return new Token(@"\", TokenType.Escape); + } + + private Token TokenizeHeader() + { + return new Token("#", TokenType.Hash); } private Token TokenizeUnderscore() { - throw new NotImplementedException(); + var prevSymbol = Prev; + var nextSymbol = Next; + if (nextSymbol != '_') + { + if (prevSymbol != ' ' && prevSymbol != '_' && nextSymbol != ' ') + { + return new Token("_", TokenType.WordUnderscore); + } + return new Token("_", TokenType.Underscore); + + } + + position++; + nextSymbol = Next; + if (prevSymbol != ' ' && nextSymbol != ' ' && nextSymbol != '_') + { + return new Token("__", TokenType.WordDoubleUnderscore); + } + return new Token("__", TokenType.DoubleUnderscore); + } private Token TokenizeImage() - { - throw new NotImplementedException(); + { + var symbol = markdownText[position]; + return symbol switch + { + '!' => new Token("!", TokenType.Exclamation), + '(' => new Token("(", TokenType.LParenthesis), + ')' => new Token(")", TokenType.RParenthesis), + '[' => new Token("[", TokenType.LBracket), + ']' => new Token("]", TokenType.RBracket), + _ => throw new ArgumentOutOfRangeException(nameof(symbol)) + }; } private Token TokenizeText() { - throw new NotImplementedException(); + var start = position; + while (position < markdownText.Length && !IsSpecialSymbol(markdownText[position])) + position++; + + var text = markdownText[start..position]; + position--; + + return new Token(text, TokenType.Text); } + + private bool IsSpecialSymbol(char symbol) + => specialsSymbols.Contains(symbol); + + private readonly HashSet specialsSymbols = ['#', '_', '!', '[', ']', '(', ')', ' ', '\n', '\\']; } \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index dd161355e..0d9fcfc2b 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -6,7 +6,9 @@ public string Render(string markdownText) { var tokenizer = new MarkdownTokenizer(markdownText); var tokens = tokenizer.Tokenize(); - var parser = new MarkdownParser(tokens); + var context = new ParserContext(tokens); + var parserSelector = new ParseSelector(context); + var parser = new MarkdownParser(context, parserSelector); var markdownDocument = parser.ParseTokens(); var htmlText = markdownDocument.ToHtml(); diff --git a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs index 353f4b6b6..5d80c23b5 100644 --- a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs @@ -1,17 +1,17 @@ +using System.Text; + namespace Markdown.Nodes.Interfaces; public abstract class InternalMarkdownNode : MarkdownNode { protected readonly List children = []; - public override void AddChild(MarkdownNode node) - { - - } - - public override List GetChildren() - => children; - + protected InternalMarkdownNode(MarkdownNode? parent, string value) : base(parent, value) + { } + + public override void AddChild(MarkdownNode node) { + children.Add(node); } + } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs index b325f2486..079210b7c 100644 --- a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs @@ -5,9 +5,4 @@ public abstract class LeafMarkdownNode : MarkdownNode protected LeafMarkdownNode(MarkdownNode? parent, string value) : base(parent, value) { } - - public override void AddChild(MarkdownNode node) - { - throw new NotImplementedException(); - } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs index 57a31f2af..39b9ad0c0 100644 --- a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs @@ -11,7 +11,6 @@ protected MarkdownNode(MarkdownNode? parent, string value) this.value = value; } - public virtual void AddChild(MarkdownNode node) {} - public virtual List GetChildren() => []; + public virtual void AddChild(MarkdownNode node) { } public abstract string ToHtml(); } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/AltNode.cs b/cs/Markdown/Nodes/Internal/AltNode.cs new file mode 100644 index 000000000..a14f37ad8 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/AltNode.cs @@ -0,0 +1,22 @@ +using System.Text; +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class AltNode : InternalMarkdownNode +{ + public AltNode(MarkdownNode? parent, string value) : base(parent, value) + { + } + + public override string ToHtml() + { + var textBuilder = new StringBuilder(); + foreach (var child in children) + { + textBuilder.Append(child.ToHtml()); + } + + return textBuilder.ToString(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/BoldNode.cs b/cs/Markdown/Nodes/Internal/BoldNode.cs index 8dbda9044..da8b10a92 100644 --- a/cs/Markdown/Nodes/Internal/BoldNode.cs +++ b/cs/Markdown/Nodes/Internal/BoldNode.cs @@ -1,15 +1,21 @@ +using System.Text; using Markdown.Nodes.Interfaces; namespace Markdown.Nodes.Internal; public class BoldNode : InternalMarkdownNode { - public override string ToHtml() + public BoldNode(MarkdownNode? parent, string value) : base(parent, value) { - throw new NotImplementedException(); } - - public BoldNode(MarkdownNode? parent, string value) : base(parent, value) + public override string ToHtml() { + var textBuilder = new StringBuilder(); + foreach (var child in children) + { + textBuilder.Append(child.ToHtml()); + } + + return $"{textBuilder}"; } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/HeaderNode.cs b/cs/Markdown/Nodes/Internal/HeaderNode.cs index 090ec4460..bb0116215 100644 --- a/cs/Markdown/Nodes/Internal/HeaderNode.cs +++ b/cs/Markdown/Nodes/Internal/HeaderNode.cs @@ -1,15 +1,22 @@ +using System.Text; using Markdown.Nodes.Interfaces; namespace Markdown.Nodes.Internal; public class HeaderNode : InternalMarkdownNode { - public override string ToHtml() + public HeaderNode(MarkdownNode? parent, string value) : base(parent, value) { - throw new NotImplementedException(); } - - public HeaderNode(MarkdownNode? parent, string value) : base(parent, value) + + public override string ToHtml() { + var textBuilder = new StringBuilder(); + foreach (var child in children) + { + textBuilder.Append(child.ToHtml()); + } + + return $"

{textBuilder}

"; } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/ImageNode.cs b/cs/Markdown/Nodes/Internal/ImageNode.cs index 828d3b432..003132dca 100644 --- a/cs/Markdown/Nodes/Internal/ImageNode.cs +++ b/cs/Markdown/Nodes/Internal/ImageNode.cs @@ -4,12 +4,14 @@ namespace Markdown.Nodes.Internal; public class ImageNode : InternalMarkdownNode { - public override string ToHtml() + public ImageNode(MarkdownNode? parent, string value) : base(parent, value) { - throw new NotImplementedException(); } - public ImageNode(MarkdownNode? parent, string value) : base(parent, value) + public override string ToHtml() { - } + var alt = children[0].ToHtml(); + var url = children[1].ToHtml(); + return $"\"{alt}\""; + } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/ItalicNode.cs b/cs/Markdown/Nodes/Internal/ItalicNode.cs index a0e66dd3d..292b9efe5 100644 --- a/cs/Markdown/Nodes/Internal/ItalicNode.cs +++ b/cs/Markdown/Nodes/Internal/ItalicNode.cs @@ -1,15 +1,22 @@ +using System.Text; using Markdown.Nodes.Interfaces; namespace Markdown.Nodes.Internal; public class ItalicNode : InternalMarkdownNode { - public override string ToHtml() + public ItalicNode(MarkdownNode? parent, string value) : base(parent, value) { - throw new NotImplementedException(); } - public ItalicNode(MarkdownNode? parent, string value) : base(parent, value) + public override string ToHtml() { + var textBuilder = new StringBuilder(); + foreach (var child in children) + { + textBuilder.Append(child.ToHtml()); + } + + return $"{textBuilder}"; } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs index e694a8f39..7ff01151a 100644 --- a/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs +++ b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs @@ -1,3 +1,4 @@ +using System.Text; using Markdown.Nodes.Interfaces; namespace Markdown.Nodes.Internal; @@ -6,11 +7,17 @@ public class MarkdownDocumentNode : InternalMarkdownNode { public MarkdownDocumentNode(MarkdownNode? parent, string value) : base(parent, value) { + } public override string ToHtml() { - throw new NotImplementedException(); - } + var textBuilder = new StringBuilder(); + foreach (var child in children) + { + textBuilder.Append(child.ToHtml()); + } + return textBuilder.ToString(); + } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/UrlNode.cs b/cs/Markdown/Nodes/Internal/UrlNode.cs new file mode 100644 index 000000000..f686326e2 --- /dev/null +++ b/cs/Markdown/Nodes/Internal/UrlNode.cs @@ -0,0 +1,22 @@ +using System.Text; +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Internal; + +public class UrlNode : InternalMarkdownNode +{ + public UrlNode(MarkdownNode? parent, string value) : base(parent, value) + { + } + + public override string ToHtml() + { + var textBuilder = new StringBuilder(); + foreach (var child in children) + { + textBuilder.Append(child.ToHtml()); + } + + return textBuilder.ToString(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/TextNode.cs b/cs/Markdown/Nodes/Leaf/TextNode.cs index 5836f22f5..3e09933a3 100644 --- a/cs/Markdown/Nodes/Leaf/TextNode.cs +++ b/cs/Markdown/Nodes/Leaf/TextNode.cs @@ -6,7 +6,7 @@ public class TextNode : LeafMarkdownNode { public override string ToHtml() { - throw new NotImplementedException(); + return value; } public TextNode(MarkdownNode? parent, string value) : base(parent, value) diff --git a/cs/Markdown/ParseSelector.cs b/cs/Markdown/ParseSelector.cs new file mode 100644 index 000000000..58458d290 --- /dev/null +++ b/cs/Markdown/ParseSelector.cs @@ -0,0 +1,72 @@ +using Markdown.Parsers; +using Markdown.Parsers.Interfaces; + +namespace Markdown; + +public class ParseSelector +{ + private readonly ImageParser imageParser; + private readonly TextParser textParser; + private readonly UnderscoresParser underscoresParser; + private readonly HeaderParser headerParser; + private readonly EscapeParser escapeParser; + private readonly NewLineEofParser newLineEofParser; + public ParseSelector(ParserContext context) + { + imageParser = new ImageParser(context); + textParser = new TextParser(); + underscoresParser = new UnderscoresParser(context); + headerParser = new HeaderParser(context); + escapeParser = new EscapeParser(context); + newLineEofParser = new NewLineEofParser([imageParser, underscoresParser]); + } + + public ITokenParser GetParser(TokenType tokenType) + { + ITokenParser parser; + switch (tokenType) + { + case TokenType.LBracket: + case TokenType.LParenthesis: + case TokenType.RBracket: + case TokenType.RParenthesis: + case TokenType.Exclamation: + { + parser = imageParser; + break; + } + case TokenType.Space: + case TokenType.Underscore: + case TokenType.DoubleUnderscore: + case TokenType.WordUnderscore: + case TokenType.WordDoubleUnderscore: + { + parser = underscoresParser; + break; + } + case TokenType.Hash: + { + parser = headerParser; + break; + } + case TokenType.Escape: + { + parser = escapeParser; + break; + } + case TokenType.NewLine: + case TokenType.Eof: + { + parser = newLineEofParser; + break; + } + default: + { + parser = textParser; + break; + } + } + + return parser; + } +} \ No newline at end of file diff --git a/cs/Markdown/ParserContext.cs b/cs/Markdown/ParserContext.cs new file mode 100644 index 000000000..fdc5ab851 --- /dev/null +++ b/cs/Markdown/ParserContext.cs @@ -0,0 +1,20 @@ +namespace Markdown; + +public class ParserContext +{ + public int Position { get; private set; } + public Token Current => Tokens[Position]; + public Token? Next => Tokens[Position + 1].Type == TokenType.Eof ? null : Tokens[Position + 1]; + public Token? Prev => Position - 1 < 0 ? null : Tokens[Position - 1]; + public readonly List Tokens; + + public ParserContext(List tokens) + { + Tokens = tokens; + } + + public void IncreasePosition() + { + Position++; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/EscapeParser.cs b/cs/Markdown/Parsers/EscapeParser.cs new file mode 100644 index 000000000..a10f34708 --- /dev/null +++ b/cs/Markdown/Parsers/EscapeParser.cs @@ -0,0 +1,40 @@ +using Markdown.Parsers.Interfaces; + +namespace Markdown.Parsers; + +public class EscapeParser : ITokenParser +{ + private readonly ParserContext context; + private readonly HashSet escapableTokens = + [ + TokenType.Underscore, + TokenType.DoubleUnderscore, + TokenType.WordUnderscore, + TokenType.WordDoubleUnderscore, + TokenType.LBracket, + TokenType.RBracket, + TokenType.LParenthesis, + TokenType.RParenthesis, + TokenType.Hash, + TokenType.Escape, + TokenType.Exclamation + ]; + + public EscapeParser(ParserContext context) + { + this.context = context; + } + + public void Parse() + { + if (context.Next != null && escapableTokens.Contains(context.Next.Type)) + { + context.Next.Type = TokenType.Text; + context.Current.Type = TokenType.Text; + context.Current.Value = ""; + return; + } + + context.Current.Type = TokenType.Text; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/HeaderParser.cs b/cs/Markdown/Parsers/HeaderParser.cs new file mode 100644 index 000000000..11c426a64 --- /dev/null +++ b/cs/Markdown/Parsers/HeaderParser.cs @@ -0,0 +1,24 @@ +using Markdown.Parsers.Interfaces; + +namespace Markdown.Parsers; + +public class HeaderParser : ITokenParser +{ + private readonly ParserContext context; + + public HeaderParser(ParserContext context) + { + this.context = context; + } + + public void Parse() + { + if ((context.Prev == null || context.Prev.Type == TokenType.NewLine) + && context.Next != null && context.Next.Value.StartsWith(" ")) + { + context.Next.Value = ""; + return; + } + context.Current.Type = TokenType.Text; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/ImageParser.cs b/cs/Markdown/Parsers/ImageParser.cs new file mode 100644 index 000000000..08655574d --- /dev/null +++ b/cs/Markdown/Parsers/ImageParser.cs @@ -0,0 +1,158 @@ +using System.Text; +using Markdown.Parsers.Interfaces; + +namespace Markdown.Parsers; + +public class ImageParser : ITokenParser, ICompletableParse +{ + private readonly Stack markdownBrackets = new(); + private readonly Stack markdownParenthesis = new(); + private bool startImageParse; + private bool isAltEnd; + private bool isUrlEnd; + private readonly StringBuilder urlBuilder = new(); + private readonly ParserContext context; + private readonly Stack imageTokens = new(); + + public ImageParser(ParserContext context) + { + this.context = context; + } + + public void Parse() + { + imageTokens.Push(context.Current); + if (!startImageParse) + { + if (context.Next is not { Type: TokenType.LBracket }) + { + Finish(); + return; + } + startImageParse = true; + return; + + } + var currentType = context.Current.Type; + if (currentType is TokenType.LBracket or TokenType.RBracket) + ParseAlt(); + else + ParseUrl(); + } + + public void Finish() + { + startImageParse = false; + isAltEnd = false; + isUrlEnd = false; + while (imageTokens.Count > 0) + { + imageTokens.Pop().Type = TokenType.Text; + } + } + + private void ParseAlt() + { + if (isAltEnd) + { + Finish(); + return; + } + + TrackBracket(markdownBrackets, TokenType.LBracket, TokenType.RBracket, out isAltEnd); + } + + private void ParseUrl() + { + if (!isAltEnd || context.Current.Type != TokenType.LParenthesis) + { + Finish(); + return; + } + + while (context.Current.Type is not (TokenType.Eof or TokenType.NewLine or TokenType.Space)) + { + var current = context.Current; + if (current.Type is TokenType.LParenthesis or TokenType.RParenthesis) + { + imageTokens.Push(context.Current); + TrackBracket(markdownParenthesis, TokenType.LParenthesis, TokenType.RParenthesis, out isUrlEnd); + if (isUrlEnd) + { + var isValidUrl = IsValidUrl(urlBuilder.ToString()); + if (isValidUrl) + { + imageTokens.Clear(); + } + else + { + Finish(); + return; + } + } + context.IncreasePosition(); + } + else + { + urlBuilder.Append(current.Value); + current.Type = TokenType.Text; + context.IncreasePosition(); + } + + } + + Finish(); + } + + private void TrackBracket(Stack brackets, TokenType left, TokenType right, out bool parseEnd) + { + var current = context.Current; + + if (brackets.Count == 0 && current.Type == left) + { + brackets.Push(current); + parseEnd = false; + return; + } + + if (current.Type == left) + { + current.Type = TokenType.Text; + brackets.Push(current); + parseEnd = false; + return; + } + + if (current.Type == right) + { + if (brackets.Count > 1) + { + current.Type = TokenType.Text; + brackets.Pop(); + parseEnd = false; + return; + } + brackets.Pop(); + parseEnd = true; + return; + } + + parseEnd = false; + } + + private bool IsValidUrl(string text) + { + var allowed = new HashSet{ ".jpg", ".jpeg", ".png", ".gif", ".svg", ".webp" }; + var canCreateUrl = Uri.TryCreate(text, UriKind.Absolute, out var url); + if (canCreateUrl && url != null) + { + var path = url.AbsolutePath; + var ext = Path.GetExtension(path).ToLowerInvariant(); + if (allowed.Contains(ext)) + return true; + } + + return false; + } + +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/Interfaces/ICompletableParse.cs b/cs/Markdown/Parsers/Interfaces/ICompletableParse.cs new file mode 100644 index 000000000..5624aeb88 --- /dev/null +++ b/cs/Markdown/Parsers/Interfaces/ICompletableParse.cs @@ -0,0 +1,6 @@ +namespace Markdown.Parsers.Interfaces; + +public interface ICompletableParse +{ + void Finish(); +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/Interfaces/ITokenParser.cs b/cs/Markdown/Parsers/Interfaces/ITokenParser.cs new file mode 100644 index 000000000..c51077eaa --- /dev/null +++ b/cs/Markdown/Parsers/Interfaces/ITokenParser.cs @@ -0,0 +1,6 @@ +namespace Markdown.Parsers.Interfaces; + +public interface ITokenParser +{ + void Parse(); +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/NewLineEofParser.cs b/cs/Markdown/Parsers/NewLineEofParser.cs new file mode 100644 index 000000000..680fd64a9 --- /dev/null +++ b/cs/Markdown/Parsers/NewLineEofParser.cs @@ -0,0 +1,21 @@ +using Markdown.Parsers.Interfaces; + +namespace Markdown.Parsers; + +public class NewLineEofParser : ITokenParser +{ + private readonly List completableParsers; + + public NewLineEofParser(List completableParsers) + { + this.completableParsers = completableParsers; + } + + public void Parse() + { + foreach (var parser in completableParsers) + { + parser.Finish(); + } + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/TextParser.cs b/cs/Markdown/Parsers/TextParser.cs new file mode 100644 index 000000000..054a46f1e --- /dev/null +++ b/cs/Markdown/Parsers/TextParser.cs @@ -0,0 +1,8 @@ +using Markdown.Parsers.Interfaces; + +namespace Markdown.Parsers; + +public class TextParser : ITokenParser +{ + public void Parse(){} +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/UnderscoresParser.cs b/cs/Markdown/Parsers/UnderscoresParser.cs new file mode 100644 index 000000000..2f98a83ec --- /dev/null +++ b/cs/Markdown/Parsers/UnderscoresParser.cs @@ -0,0 +1,256 @@ +using Markdown.Parsers.Interfaces; + +namespace Markdown.Parsers; + +public class UnderscoresParser : ITokenParser, ICompletableParse +{ + private readonly ParserContext context; + private readonly Stack markdownUnderscores = new(); + + public UnderscoresParser(ParserContext context) + { + this.context = context; + } + + public void Parse() + { + if (context.Current.Type == TokenType.Space) + { + ParseSpace(); + } + + if (context.Current.Type is TokenType.Underscore or TokenType.DoubleUnderscore) + { + ParseUnderscore(); + } + + if (context.Current.Type is TokenType.WordUnderscore or TokenType.WordDoubleUnderscore) + { + CanParseWordUnderscores(); + } + } + public void Finish() + { + if (markdownUnderscores.Count > 0) + { + ConvertStackToText(); + } + } + + private void ParseUnderscore() + { + HandleUnderscore(CanOpen(), CanClose(), IsValidOpenUnderscores, IsValidCloseUnderscores); + } + + private void CanParseWordUnderscores() + { + if (!IsValidWordUnderscores()) + { + context.Current.Type = TokenType.Text; + } + + HandleUnderscore(CanOpenWordUnderscore(), CanCloseWordUnderscore(), _ => true, _ => true); + } + + private void HandleUnderscore( + bool canOpen, + bool canClose, + Func isValidOpen, + Func isValidClose) + { + if (canOpen && canClose) + { + if (markdownUnderscores.Count > 0) + { + var peeked = markdownUnderscores.Peek(); + if (!IsIntersecting(peeked)) + markdownUnderscores.Pop(); + } + else + { + markdownUnderscores.Push(context.Current); + } + } + else if (canOpen) + { + HandleOpen(isValidOpen); + } + else if (canClose && markdownUnderscores.Count > 0) + { + HandleClose(isValidClose); + } + else + { + context.Current.Type = TokenType.Text; + } + } + + private void HandleOpen(Func isValidOpen) + { + if (markdownUnderscores.Count > 0) + { + var peeked = markdownUnderscores.Peek(); + if (!isValidOpen(peeked)) + { + context.Current.Type = TokenType.Text; + } + else + { + markdownUnderscores.Push(context.Current); + } + } + else + { + markdownUnderscores.Push(context.Current); + } + } + + private void HandleClose(Func isValidClose) + { + var peeked = markdownUnderscores.Peek(); + if (!isValidClose(peeked) && markdownUnderscores.Count == 1) + { + context.Current.Type = TokenType.Text; + } + else if (IsIntersecting(peeked)) + { + markdownUnderscores.Push(context.Current); + ConvertStackToText(); + } + else + { + markdownUnderscores.Pop(); + } + } + private void ParseSpace() + { + if (markdownUnderscores.Count <= 0) + { + return; + } + + var peeked = markdownUnderscores.Peek(); + if (peeked.Type is TokenType.WordUnderscore or TokenType.WordDoubleUnderscore) + { + peeked.Type = TokenType.Text; + markdownUnderscores.Pop(); + } + } + + private bool IsValidWordUnderscores() + { + if (char.IsDigit(context.Next!.Value[0]) && char.IsDigit(context.Prev!.Value[^1])) + return false; + + if (char.IsDigit(context.Next!.Value[0]) && char.IsLetter(context.Prev!.Value[^1])) + return false; + + if (char.IsDigit(context.Next!.Value[^1]) && char.IsLetter(context.Prev!.Value[0])) + return false; + + return true; + } + + private bool IsValidOpenUnderscores(Token peeked) + { + var curr = context.Current.Type; + var peekedType = peeked.Type; + + if (curr == peekedType) + return false; + + return curr != TokenType.DoubleUnderscore || peekedType != TokenType.Underscore; + } + + private bool IsValidCloseUnderscores(Token peeked) + { + var currentType = context.Current.Type; + var peekedType = peeked.Type; + + return currentType != TokenType.DoubleUnderscore || peekedType != TokenType.Underscore; + } + + private bool IsIntersecting(Token peeked) + { + switch (context.Current.Type) + { + case TokenType.Underscore: + case TokenType.WordUnderscore: + { + if (peeked.Type is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore) + return true; + break; + } + case TokenType.DoubleUnderscore: + case TokenType.WordDoubleUnderscore: + { + if (peeked.Type is TokenType.Underscore or TokenType.WordUnderscore) + return true; + break; + } + } + + return false; + } + + private void ConvertStackToText() + { + while (markdownUnderscores.Count > 0) + { + var token = markdownUnderscores.Pop(); + token.Type = TokenType.Text; + } + } + + private bool CanOpen() + { + var nextToken = context.Next; + + return nextToken != null && !nextToken.Value.StartsWith(" "); + } + + private bool CanClose() + { + if (context.Prev == null || context.Prev.Value.EndsWith(" ") || context.Prev.Type is TokenType.Underscore or TokenType.DoubleUnderscore) + return false; + + return context.Next == null || context.Next.Value.StartsWith(" ") || context.Next.Type is TokenType.Underscore or TokenType.DoubleUnderscore; + } + + private bool CanOpenWordUnderscore() + { + var current = context.Current; + if (markdownUnderscores.Count <= 0) + return true; + + var peeked = markdownUnderscores.Peek(); + return current.Type switch + { + TokenType.WordUnderscore => peeked.Type is not (TokenType.Underscore or TokenType.WordUnderscore), + TokenType.WordDoubleUnderscore => peeked.Type switch + { + TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore or TokenType.WordUnderscore + or TokenType.Underscore => false, + _ => true + }, + _ => true + }; + } + + private bool CanCloseWordUnderscore() + { + var current = context.Current; + if (markdownUnderscores.Count <= 0) + return false; + + var peeked = markdownUnderscores.Peek(); + + if (current.Type is TokenType.WordUnderscore) + { + return peeked.Type is TokenType.WordUnderscore or TokenType.Underscore; + } + + return peeked.Type is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore; + + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/MdTests.cs b/cs/Markdown/Tests/MdTests.cs new file mode 100644 index 000000000..39f82473c --- /dev/null +++ b/cs/Markdown/Tests/MdTests.cs @@ -0,0 +1,151 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class MdTests +{ + [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "\"Cat\"", TestName = "ValidUrlAndAlt")] + [TestCase("!\n[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "!\n[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "NewlineAfterExclamation")] + [TestCase("![Ca\nt](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![Ca\nt](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "AltWithNewline")] + [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6\na5b2c76c038ef0c8d2502fd2f6.jpg)", + "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6\na5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithNewline")] + [TestCase("![C[]at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "\"C[]at\"", TestName = "AltWithBrackets")] + [TestCase("![C[at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![C[at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UnclosedBracketInAlt")] + [TestCase("![C]at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![C]at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "ExtraClosingBracketInAlt")] + [TestCase("![Cat](https://i.pinimg.com/originals()/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "\"Cat\"", TestName = "UrlWithEmptyParentheses")] + [TestCase("![Cat](https://i.pinimg.com/originals(/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![Cat](https://i.pinimg.com/originals(/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithOneOpenParentheses")] + [TestCase("![Cat](https://i.pinimg.com/originals)/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![Cat](https://i.pinimg.com/originals)/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithOneCloseParentheses")] + [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c7 6c038ef0c8d2502fd2f6.jpg)", + "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c7 6c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithSpace")] + [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.qqq)", + "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.qqq)", TestName = "InvalidImageFormat")] + [TestCase("![](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "\"\"", TestName = "EmptyAltText")] + [TestCase("![Cat]()", "![Cat]()", TestName = "EmptyUrl")] + [TestCase("![Cat](Cat)", "![Cat](Cat)", TestName = "InvalidUrl")] + [TestCase("![__Cat__](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "\"<strongCat\">", TestName = "AltWithBold")] + [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a__5b2__c76c038ef0c8d2502fd2f6.jpg)", + "\"Cat\"", TestName = "UrlWithBold")] + [TestCase("![[][][]](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "\"[][][]\"", TestName = "AltWithMultipleBrackets")] + public void Render_Image_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + [TestCase("# текст", "

текст

", TestName = "SimpleHeader")] + [TestCase("# Первый \n# Второй", "

Первый

\n

Второй

", TestName = "TwoHeadersInDifferentLines")] + [TestCase("# Первый \n просто текст \n# Второй", "

Первый

\n просто текст \n

Второй

", TestName = "HeadersWithTextBetween")] + [TestCase("#текст", "#текст", TestName = "WithNoSpace")] + [TestCase("# Заголовок с _курсивом_ и __жирным шрифтом__", "

Заголовок с курсивом и жирным шрифтом

", TestName = "WithInlineFormatting")] + [TestCase("# ![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "

\"Cat\"

", TestName = "WithImage")] + [TestCase("# __текст__", "

текст

", TestName = "WithBold")] + [TestCase(" # текст", " # текст", TestName = "LeadingSpace")] + [TestCase("# ", "

", TestName = "Empty")] + [TestCase("# текст # ", "

текст #

", TestName = "TrailingHashIgnored")] + public void Render_Header_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + [TestCase("_текст_", "текст", TestName = "SimpleItalic")] + [TestCase("_те\nкст_", "_те\nкст_", TestName = "ItalicWithNewLineInside")] + [TestCase("__текст__", "текст", TestName = "SimpleBold")] + [TestCase("Внутри __двойного выделения _одинарное_ тоже__ работает", + "Внутри двойного выделения одинарное тоже работает", + TestName = "BoldWithItalicInside")] + [TestCase("_те_кст", "текст", TestName = "ItalicAtStartWord")] + [TestCase("т_екс_т", "текст", TestName = "ItalicInMiddleWord")] + [TestCase("текс_т_", "текст", TestName = "ItalicAtEndWord")] + [TestCase("__текст_", "__текст_", TestName = "UnpairedUnderscores")] + [TestCase("_ текст_","_ текст_", TestName = "LeadingSpacePreventsItalic")] + [TestCase("1_2_3","1_2_3", TestName = "DigitsPreventItalic")] + [TestCase("те_кст те_кст","те_кст те_кст", TestName = "SeparateWordsUnderscores")] + [TestCase("_текст _текст", "_текст _текст",TestName = "UnclosedItalicMultipleWords")] + [TestCase("__","__", TestName = "OnlyDoubleUnderscore")] + [TestCase("____","____", TestName = "OnlyFourUnderscores")] + [TestCase("__текст _текст__ текст_","__текст _текст__ текст_", TestName = "BoldWithNestedItalic")] + [TestCase("_текст __текст__ текст_","текст __текст__ текст", TestName = "ItalicWithBoldInside")] + [TestCase("_текст т__екс__т текст_","текст т__екс__т текст", TestName = "ItalicWithBoldInsideWord")] + [TestCase("__текст т_екст текст___","текст т_екст текст_", TestName = "BoldWithTrailingUnderscore")] + [TestCase("т__е_к_с__т","текст", TestName = "BoldInWordWithItalicInside")] + [TestCase("__те_к_ст__","текст", TestName = "BoldWithItalicInsideInWord")] + [TestCase("_те__к__ст_","те__к__ст", TestName = "ItalicWithBoldInsideInWord")] + [TestCase("_123_","123", TestName = "ItalicWithNumbers")] + [TestCase("__123__","123", TestName = "BoldWithNumbers")] + [TestCase("__++++__","++++", TestName = "BoldWithSymbols")] + [TestCase("__текст___текст_","тексттекст", TestName = "BoldThenItalic")] + [TestCase("__ текст __","__ текст __", TestName = "BoldWithSpacesPrevents")] + [TestCase("__текст __","__текст __", TestName = "BoldWithSpace")] + [TestCase("тек__с_т те_кс__т","тек__с_т те_кс__т", TestName = "UnclosedUnderscoresInDifferentWords")] + public void Render_Underscores_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + [TestCase(@"\_текст_", "_текст_", TestName = "EscapedUnderscore")] + [TestCase(@"\\_текст\\_", @"\текст\", TestName = "DoubleEscapedUnderscoresAroundText")] + [TestCase(@"\# текст", "# текст", TestName = "EscapedHeader")] + [TestCase(@"\![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "EscapedExclamationInImage")] + [TestCase(@"!\[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "EscapedAltInImage")] + [TestCase(@"![Cat]\(https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", + "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "EscapedUrlInImage")] + [TestCase(@"__текст \_текст__ текст_","текст _текст текст_", TestName = "BoldWithEscapedUnderscoreInside")] + [TestCase(@"т\екс\т",@"т\екс\т", TestName = "TextWithMultipleEscapes")] + public void Render_Escape_CorrectHtmlText(string markdownText, string expectedResult) + { + var renderer = new Md(); + var html = renderer.Render(markdownText); + + html.Should().Be(expectedResult); + } + + [TestCase(1000, TestName = "Parse_1000_repeat")] + [TestCase(2000, TestName = "Parse_2000_repeats")] + [TestCase(4000, TestName = "Parse_4000_repeats")] + [TestCase(8000, TestName = "Parse_8000_repeats")] + [TestCase(16000, TestName = "Parse_16000_repeats")] + public void Render_Performance_WithAllTokens(int repeatCount) + { + var markdownText = "# Заголовок _курсив_ __жирный__ ![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg) текст \\экранирование_\n"; + var sb = new StringBuilder(); + for (int i = 0; i < repeatCount; i++) + { + sb.Append(markdownText); + } + + var input = sb.ToString(); + var stopwatch = Stopwatch.StartNew(); + var renderer = new Md(); + var result = renderer.Render(input); + stopwatch.Stop(); + + Console.WriteLine($"Время выполнения: {stopwatch.ElapsedMilliseconds} ms"); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenType.cs b/cs/Markdown/TokenType.cs index 212fcaae4..019bb5145 100644 --- a/cs/Markdown/TokenType.cs +++ b/cs/Markdown/TokenType.cs @@ -5,6 +5,8 @@ public enum TokenType Hash, Underscore, DoubleUnderscore, + WordUnderscore, + WordDoubleUnderscore, Text, NewLine, Exclamation, @@ -12,5 +14,7 @@ public enum TokenType RBracket, LParenthesis, RParenthesis, - Escape + Escape, + Space, + Eof, } \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..d2d3c4e69 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{737AD9F0-D941-4026-AA57-A9CB28028C28}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Debug|Any CPU.Build.0 = Debug|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Release|Any CPU.ActiveCfg = Release|Any CPU + {737AD9F0-D941-4026-AA57-A9CB28028C28}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal From 8d534ae9c4603aeb865ee8fc54a52d217f73e234 Mon Sep 17 00:00:00 2001 From: Nevisinn Date: Sat, 13 Dec 2025 13:50:19 +0500 Subject: [PATCH 4/4] =?UTF-8?q?=D0=9F=D0=B5=D1=80=D0=B5=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=B0=D0=BD=D0=B0=20=D0=B0=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA?= =?UTF-8?q?=D1=82=D1=83=D1=80=D0=B0=20=D0=B8=20=D0=BB=D0=BE=D0=B3=D0=B8?= =?UTF-8?q?=D0=BA=D0=B0=20=D0=BF=D0=B0=D1=80=D1=81=D0=B5=D1=80=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Markdown.csproj | 7 +- cs/Markdown/MarkdownParser.cs | 143 ------ cs/Markdown/MarkdownTokenizer.cs | 122 ----- cs/Markdown/Md.cs | 9 +- .../Nodes/Interfaces/InternalMarkdownNode.cs | 19 +- .../Nodes/Interfaces/LeafMarkdownNode.cs | 2 +- cs/Markdown/Nodes/Interfaces/MarkdownNode.cs | 20 +- cs/Markdown/Nodes/Internal/AltNode.cs | 6 +- cs/Markdown/Nodes/Internal/BoldNode.cs | 8 +- cs/Markdown/Nodes/Internal/HeaderNode.cs | 16 +- cs/Markdown/Nodes/Internal/ImageNode.cs | 8 +- cs/Markdown/Nodes/Internal/ItalicNode.cs | 7 +- .../Nodes/Internal/MarkdownDocumentNode.cs | 7 +- cs/Markdown/Nodes/Internal/UrlNode.cs | 22 - cs/Markdown/Nodes/Leaf/TextNode.cs | 6 +- cs/Markdown/Nodes/Leaf/UrlNode.cs | 15 + cs/Markdown/ParseSelector.cs | 72 --- cs/Markdown/ParserContext.cs | 20 - cs/Markdown/Parsers/EscapeParser.cs | 42 +- cs/Markdown/Parsers/HeaderParser.cs | 40 +- cs/Markdown/Parsers/ImageParser.cs | 200 +++----- .../Parsers/Interfaces/ICompletableParse.cs | 6 - cs/Markdown/Parsers/Interfaces/IParser.cs | 8 + .../Parsers/Interfaces/ITokenParser.cs | 6 - cs/Markdown/Parsers/MarkdownParser.cs | 109 +++++ cs/Markdown/Parsers/NewLineEofParser.cs | 21 - cs/Markdown/Parsers/ParseStatus.cs | 39 ++ cs/Markdown/Parsers/TextParser.cs | 8 - cs/Markdown/Parsers/UnderscoresParser.cs | 432 +++++++++++------ cs/Markdown/Tests/MdPerformanceTest.cs | 53 +++ cs/Markdown/Tests/MdSpec.txt | 86 ++++ cs/Markdown/Tests/MdSpecApprovalTest.cs | 23 + cs/Markdown/Tests/MdTests.cs | 137 ++---- .../Tests/ParserTests/ParseEscapeTests.cs | 102 ++++ .../Tests/ParserTests/ParseHeaderTests.cs | 147 ++++++ .../Tests/ParserTests/ParseImageTests.cs | 118 +++++ .../ParserTests/ParseUnderscoresTests.cs | 379 +++++++++++++++ ...nder_MdSpec_ReturnCorrectHtml.approved.txt | 86 ++++ .../Tests/TokenizerTests/TokenAssert.cs | 16 + .../TokenizerTests/TokenizeEscapeTests.cs | 122 +++++ .../TokenizerTests/TokenizeHeaderTests.cs | 85 ++++ .../TokenizerTests/TokenizeImageTests.cs | 118 +++++ .../TokenizeUnderscoresTests.cs | 440 ++++++++++++++++++ cs/Markdown/Tokenizer/MarkdownTokenizer.cs | 186 ++++++++ cs/Markdown/{ => Tokenizer}/Token.cs | 6 +- cs/Markdown/{ => Tokenizer}/TokenType.cs | 5 +- 46 files changed, 2639 insertions(+), 890 deletions(-) delete mode 100644 cs/Markdown/MarkdownParser.cs delete mode 100644 cs/Markdown/MarkdownTokenizer.cs delete mode 100644 cs/Markdown/Nodes/Internal/UrlNode.cs create mode 100644 cs/Markdown/Nodes/Leaf/UrlNode.cs delete mode 100644 cs/Markdown/ParseSelector.cs delete mode 100644 cs/Markdown/ParserContext.cs delete mode 100644 cs/Markdown/Parsers/Interfaces/ICompletableParse.cs create mode 100644 cs/Markdown/Parsers/Interfaces/IParser.cs delete mode 100644 cs/Markdown/Parsers/Interfaces/ITokenParser.cs create mode 100644 cs/Markdown/Parsers/MarkdownParser.cs delete mode 100644 cs/Markdown/Parsers/NewLineEofParser.cs create mode 100644 cs/Markdown/Parsers/ParseStatus.cs delete mode 100644 cs/Markdown/Parsers/TextParser.cs create mode 100644 cs/Markdown/Tests/MdPerformanceTest.cs create mode 100644 cs/Markdown/Tests/MdSpec.txt create mode 100644 cs/Markdown/Tests/MdSpecApprovalTest.cs create mode 100644 cs/Markdown/Tests/ParserTests/ParseEscapeTests.cs create mode 100644 cs/Markdown/Tests/ParserTests/ParseHeaderTests.cs create mode 100644 cs/Markdown/Tests/ParserTests/ParseImageTests.cs create mode 100644 cs/Markdown/Tests/ParserTests/ParseUnderscoresTests.cs create mode 100644 cs/Markdown/Tests/Results/MdSpecApprovalTest.Render_MdSpec_ReturnCorrectHtml.approved.txt create mode 100644 cs/Markdown/Tests/TokenizerTests/TokenAssert.cs create mode 100644 cs/Markdown/Tests/TokenizerTests/TokenizeEscapeTests.cs create mode 100644 cs/Markdown/Tests/TokenizerTests/TokenizeHeaderTests.cs create mode 100644 cs/Markdown/Tests/TokenizerTests/TokenizeImageTests.cs create mode 100644 cs/Markdown/Tests/TokenizerTests/TokenizeUnderscoresTests.cs create mode 100644 cs/Markdown/Tokenizer/MarkdownTokenizer.cs rename cs/Markdown/{ => Tokenizer}/Token.cs (55%) rename cs/Markdown/{ => Tokenizer}/TokenType.cs (82%) diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 1d2c23e4c..9d639a2d4 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -8,9 +8,10 @@ - - - + + + + diff --git a/cs/Markdown/MarkdownParser.cs b/cs/Markdown/MarkdownParser.cs deleted file mode 100644 index 67fb8562d..000000000 --- a/cs/Markdown/MarkdownParser.cs +++ /dev/null @@ -1,143 +0,0 @@ -using Markdown.Nodes.Interfaces; -using Markdown.Nodes.Internal; -using Markdown.Nodes.Leaf; - -namespace Markdown; - -public class MarkdownParser -{ - private readonly ParserContext context; - private readonly ParseSelector selector; - public MarkdownParser(ParserContext context, ParseSelector selector) - { - this.context = context; - this.selector = selector; - } - - public MarkdownDocumentNode ParseTokens() - { - while (context.Position < context.Tokens.Count) - { - var token = context.Tokens[context.Position]; - var parser = selector.GetParser(token.Type); - parser.Parse(); - context.IncreasePosition(); - } - - var markdownDocumentNode = BuildMarkdownDocument(); - - return markdownDocumentNode; - } - - private MarkdownDocumentNode BuildMarkdownDocument() - { - var markdownDocumentNode = new MarkdownDocumentNode(null, ""); - MarkdownNode currentNode = markdownDocumentNode; - var stack = new Stack(); - stack.Push(currentNode); - foreach (var token in context.Tokens) - { - switch (token.Type) - { - case TokenType.Text or TokenType.Escape or TokenType.NewLine or TokenType.Space: - { - if (token.Type is TokenType.NewLine) - { - var peeked = stack.Peek(); - if (peeked is not MarkdownDocumentNode) - { - stack.Pop(); - currentNode = peeked.Parent!; - } - } - - var textNode = new TextNode(currentNode, token.Value); - currentNode.AddChild(textNode); - break; - } - case TokenType.Underscore or TokenType.WordUnderscore: - { - var peeked = stack.Peek(); - if (peeked is ItalicNode) - { - stack.Pop(); - currentNode = peeked.Parent!; - } - else - { - var italicNode = new ItalicNode(currentNode, token.Value); - stack.Push(italicNode); - currentNode.AddChild(italicNode); - currentNode = italicNode; - } - - break; - } - case TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore: - { - var peeked = stack.Peek(); - if (peeked is BoldNode) - { - stack.Pop(); - currentNode = peeked.Parent!; - } - else - { - var boldNode = new BoldNode(currentNode, token.Value); - stack.Push(boldNode); - currentNode.AddChild(boldNode); - currentNode = boldNode; - } - - break; - } - case TokenType.Exclamation: - { - var imageNode = new ImageNode(currentNode, token.Value); - stack.Push(imageNode); - currentNode.AddChild(imageNode); - currentNode = imageNode; - break; - } - case TokenType.Hash: - { - var headerNode = new HeaderNode(currentNode, token.Value); - currentNode.AddChild(headerNode); - currentNode = headerNode; - stack.Push(headerNode); - break; - } - case TokenType.RParenthesis: - { - var imageNode = stack.Pop(); - currentNode = imageNode.Parent!; - break; - } - case TokenType.RBracket: - { - var altNode = stack.Pop(); - currentNode = altNode.Parent!; - break; - } - case TokenType.LBracket: - { - var altNode = new AltNode(currentNode, token.Value); - currentNode.AddChild(altNode); - stack.Push(altNode); - currentNode = altNode; - break; - } - case TokenType.LParenthesis: - { - var urlNode = new UrlNode(currentNode, token.Value); - currentNode.AddChild(urlNode); - stack.Push(urlNode); - currentNode = urlNode; - break; - } - } - } - - return markdownDocumentNode; - } -} diff --git a/cs/Markdown/MarkdownTokenizer.cs b/cs/Markdown/MarkdownTokenizer.cs deleted file mode 100644 index 9323ffdcf..000000000 --- a/cs/Markdown/MarkdownTokenizer.cs +++ /dev/null @@ -1,122 +0,0 @@ -using System.Text; - -namespace Markdown; - -public class MarkdownTokenizer -{ - private int position; - private readonly string markdownText; - private char Prev => position - 1 < 0 ? ' ' : markdownText[position - 1]; - private char Next => position + 1 == markdownText.Length ? ' ' : markdownText[position + 1]; - public MarkdownTokenizer(string markdownText) - { - this.markdownText = markdownText; - } - public List Tokenize() - { - var tokens = new List(); - while (position < markdownText.Length) - { - var symbol = markdownText[position]; - var token = symbol switch - { - '#' => TokenizeHeader(), - '_' => TokenizeUnderscore(), - '!' => TokenizeImage(), - '(' => TokenizeImage(), - ')' => TokenizeImage(), - '[' => TokenizeImage(), - ']' => TokenizeImage(), - '\\' => TokenizeEscape(), - ' ' => TokenizeSpace(), - '\n' => TokenizeNewLine(), - _ => TokenizeText() - }; - - tokens.Add(token); - position++; - } - - var eof = TokenizeEndOfFile(); - tokens.Add(eof); - return tokens; - } - - private Token TokenizeEndOfFile() - { - return new Token("", TokenType.Eof); - } - private Token TokenizeNewLine() - { - return new Token("\n", TokenType.NewLine); - } - - private Token TokenizeSpace() - { - return new Token(" ", TokenType.Space); - } - private Token TokenizeEscape() - { - return new Token(@"\", TokenType.Escape); - } - - private Token TokenizeHeader() - { - return new Token("#", TokenType.Hash); - } - - private Token TokenizeUnderscore() - { - var prevSymbol = Prev; - var nextSymbol = Next; - if (nextSymbol != '_') - { - if (prevSymbol != ' ' && prevSymbol != '_' && nextSymbol != ' ') - { - return new Token("_", TokenType.WordUnderscore); - } - return new Token("_", TokenType.Underscore); - - } - - position++; - nextSymbol = Next; - if (prevSymbol != ' ' && nextSymbol != ' ' && nextSymbol != '_') - { - return new Token("__", TokenType.WordDoubleUnderscore); - } - return new Token("__", TokenType.DoubleUnderscore); - - } - - private Token TokenizeImage() - { - var symbol = markdownText[position]; - return symbol switch - { - '!' => new Token("!", TokenType.Exclamation), - '(' => new Token("(", TokenType.LParenthesis), - ')' => new Token(")", TokenType.RParenthesis), - '[' => new Token("[", TokenType.LBracket), - ']' => new Token("]", TokenType.RBracket), - _ => throw new ArgumentOutOfRangeException(nameof(symbol)) - }; - } - - private Token TokenizeText() - { - var start = position; - while (position < markdownText.Length && !IsSpecialSymbol(markdownText[position])) - position++; - - var text = markdownText[start..position]; - position--; - - return new Token(text, TokenType.Text); - } - - private bool IsSpecialSymbol(char symbol) - => specialsSymbols.Contains(symbol); - - private readonly HashSet specialsSymbols = ['#', '_', '!', '[', ']', '(', ')', ' ', '\n', '\\']; -} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs index 0d9fcfc2b..e69c18a8d 100644 --- a/cs/Markdown/Md.cs +++ b/cs/Markdown/Md.cs @@ -1,3 +1,6 @@ +using Markdown.Parsers; +using Markdown.Tokenizer; + namespace Markdown; public class Md @@ -6,12 +9,10 @@ public string Render(string markdownText) { var tokenizer = new MarkdownTokenizer(markdownText); var tokens = tokenizer.Tokenize(); - var context = new ParserContext(tokens); - var parserSelector = new ParseSelector(context); - var parser = new MarkdownParser(context, parserSelector); + var parser = new MarkdownParser(tokens); var markdownDocument = parser.ParseTokens(); var htmlText = markdownDocument.ToHtml(); - + return htmlText; } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs index 5d80c23b5..62995295d 100644 --- a/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/InternalMarkdownNode.cs @@ -1,17 +1,18 @@ -using System.Text; - namespace Markdown.Nodes.Interfaces; public abstract class InternalMarkdownNode : MarkdownNode { - protected readonly List children = []; - - protected InternalMarkdownNode(MarkdownNode? parent, string value) : base(parent, value) - { } - + protected InternalMarkdownNode(string value) : base(value) + { + } + public override void AddChild(MarkdownNode node) { - children.Add(node); + Children.Add(node); + } + + public override void AddChildren(List nodes) + { + Children.AddRange(nodes); } - } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs index 079210b7c..6e6e51977 100644 --- a/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/LeafMarkdownNode.cs @@ -2,7 +2,7 @@ namespace Markdown.Nodes.Interfaces; public abstract class LeafMarkdownNode : MarkdownNode { - protected LeafMarkdownNode(MarkdownNode? parent, string value) : base(parent, value) + protected LeafMarkdownNode(string value) : base(value) { } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs index 39b9ad0c0..f2555d170 100644 --- a/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs +++ b/cs/Markdown/Nodes/Interfaces/MarkdownNode.cs @@ -1,16 +1,22 @@ namespace Markdown.Nodes.Interfaces; public abstract class MarkdownNode -{ - public MarkdownNode? Parent; - protected string value; +{ + public readonly List Children = []; + public readonly string Value; - protected MarkdownNode(MarkdownNode? parent, string value) + protected MarkdownNode(string value) + { + Value = value; + } + + public virtual void AddChild(MarkdownNode node) + { + } + + public virtual void AddChildren(List nodes) { - Parent = parent; - this.value = value; } - public virtual void AddChild(MarkdownNode node) { } public abstract string ToHtml(); } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/AltNode.cs b/cs/Markdown/Nodes/Internal/AltNode.cs index a14f37ad8..418346b48 100644 --- a/cs/Markdown/Nodes/Internal/AltNode.cs +++ b/cs/Markdown/Nodes/Internal/AltNode.cs @@ -5,17 +5,15 @@ namespace Markdown.Nodes.Internal; public class AltNode : InternalMarkdownNode { - public AltNode(MarkdownNode? parent, string value) : base(parent, value) + public AltNode(string value) : base(value) { } public override string ToHtml() { var textBuilder = new StringBuilder(); - foreach (var child in children) - { + foreach (var child in Children) textBuilder.Append(child.ToHtml()); - } return textBuilder.ToString(); } diff --git a/cs/Markdown/Nodes/Internal/BoldNode.cs b/cs/Markdown/Nodes/Internal/BoldNode.cs index da8b10a92..d06ef57c8 100644 --- a/cs/Markdown/Nodes/Internal/BoldNode.cs +++ b/cs/Markdown/Nodes/Internal/BoldNode.cs @@ -5,16 +5,14 @@ namespace Markdown.Nodes.Internal; public class BoldNode : InternalMarkdownNode { - public BoldNode(MarkdownNode? parent, string value) : base(parent, value) + public BoldNode(string value) : base(value) { } + public override string ToHtml() { var textBuilder = new StringBuilder(); - foreach (var child in children) - { - textBuilder.Append(child.ToHtml()); - } + foreach (var child in Children) textBuilder.Append(child.ToHtml()); return $"{textBuilder}"; } diff --git a/cs/Markdown/Nodes/Internal/HeaderNode.cs b/cs/Markdown/Nodes/Internal/HeaderNode.cs index bb0116215..c9e5d0aad 100644 --- a/cs/Markdown/Nodes/Internal/HeaderNode.cs +++ b/cs/Markdown/Nodes/Internal/HeaderNode.cs @@ -5,18 +5,20 @@ namespace Markdown.Nodes.Internal; public class HeaderNode : InternalMarkdownNode { - public HeaderNode(MarkdownNode? parent, string value) : base(parent, value) + public HeaderNode(string value) : base(value) { } - + public override string ToHtml() { var textBuilder = new StringBuilder(); - foreach (var child in children) - { - textBuilder.Append(child.ToHtml()); - } + var controlCharacters = new StringBuilder(); + foreach (var child in Children) + if (child.Value is "\n" or "\r") + controlCharacters.Append(child.ToHtml()); + else + textBuilder.Append(child.ToHtml()); - return $"

{textBuilder}

"; + return $"

{textBuilder}

{controlCharacters}"; } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/ImageNode.cs b/cs/Markdown/Nodes/Internal/ImageNode.cs index 003132dca..e962fdc28 100644 --- a/cs/Markdown/Nodes/Internal/ImageNode.cs +++ b/cs/Markdown/Nodes/Internal/ImageNode.cs @@ -4,14 +4,14 @@ namespace Markdown.Nodes.Internal; public class ImageNode : InternalMarkdownNode { - public ImageNode(MarkdownNode? parent, string value) : base(parent, value) + public ImageNode(string value) : base(value) { } public override string ToHtml() { - var alt = children[0].ToHtml(); - var url = children[1].ToHtml(); + var alt = Children[0].ToHtml(); + var url = Children[1].ToHtml(); return $"\"{alt}\""; - } + } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Internal/ItalicNode.cs b/cs/Markdown/Nodes/Internal/ItalicNode.cs index 292b9efe5..eca912e8e 100644 --- a/cs/Markdown/Nodes/Internal/ItalicNode.cs +++ b/cs/Markdown/Nodes/Internal/ItalicNode.cs @@ -5,17 +5,14 @@ namespace Markdown.Nodes.Internal; public class ItalicNode : InternalMarkdownNode { - public ItalicNode(MarkdownNode? parent, string value) : base(parent, value) + public ItalicNode(string value) : base(value) { } public override string ToHtml() { var textBuilder = new StringBuilder(); - foreach (var child in children) - { - textBuilder.Append(child.ToHtml()); - } + foreach (var child in Children) textBuilder.Append(child.ToHtml()); return $"{textBuilder}"; } diff --git a/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs index 7ff01151a..9445a859e 100644 --- a/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs +++ b/cs/Markdown/Nodes/Internal/MarkdownDocumentNode.cs @@ -5,18 +5,15 @@ namespace Markdown.Nodes.Internal; public class MarkdownDocumentNode : InternalMarkdownNode { - public MarkdownDocumentNode(MarkdownNode? parent, string value) : base(parent, value) + public MarkdownDocumentNode(string value) : base(value) { - } public override string ToHtml() { var textBuilder = new StringBuilder(); - foreach (var child in children) - { + foreach (var child in Children) textBuilder.Append(child.ToHtml()); - } return textBuilder.ToString(); } diff --git a/cs/Markdown/Nodes/Internal/UrlNode.cs b/cs/Markdown/Nodes/Internal/UrlNode.cs deleted file mode 100644 index f686326e2..000000000 --- a/cs/Markdown/Nodes/Internal/UrlNode.cs +++ /dev/null @@ -1,22 +0,0 @@ -using System.Text; -using Markdown.Nodes.Interfaces; - -namespace Markdown.Nodes.Internal; - -public class UrlNode : InternalMarkdownNode -{ - public UrlNode(MarkdownNode? parent, string value) : base(parent, value) - { - } - - public override string ToHtml() - { - var textBuilder = new StringBuilder(); - foreach (var child in children) - { - textBuilder.Append(child.ToHtml()); - } - - return textBuilder.ToString(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/TextNode.cs b/cs/Markdown/Nodes/Leaf/TextNode.cs index 3e09933a3..5914a1056 100644 --- a/cs/Markdown/Nodes/Leaf/TextNode.cs +++ b/cs/Markdown/Nodes/Leaf/TextNode.cs @@ -4,12 +4,12 @@ namespace Markdown.Nodes.Leaf; public class TextNode : LeafMarkdownNode { - public override string ToHtml() + public TextNode(string value) : base(value) { - return value; } - public TextNode(MarkdownNode? parent, string value) : base(parent, value) + public override string ToHtml() { + return Value; } } \ No newline at end of file diff --git a/cs/Markdown/Nodes/Leaf/UrlNode.cs b/cs/Markdown/Nodes/Leaf/UrlNode.cs new file mode 100644 index 000000000..ff94c4ef4 --- /dev/null +++ b/cs/Markdown/Nodes/Leaf/UrlNode.cs @@ -0,0 +1,15 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Nodes.Leaf; + +public class UrlNode : LeafMarkdownNode +{ + public UrlNode(string value) : base(value) + { + } + + public override string ToHtml() + { + return Value; + } +} \ No newline at end of file diff --git a/cs/Markdown/ParseSelector.cs b/cs/Markdown/ParseSelector.cs deleted file mode 100644 index 58458d290..000000000 --- a/cs/Markdown/ParseSelector.cs +++ /dev/null @@ -1,72 +0,0 @@ -using Markdown.Parsers; -using Markdown.Parsers.Interfaces; - -namespace Markdown; - -public class ParseSelector -{ - private readonly ImageParser imageParser; - private readonly TextParser textParser; - private readonly UnderscoresParser underscoresParser; - private readonly HeaderParser headerParser; - private readonly EscapeParser escapeParser; - private readonly NewLineEofParser newLineEofParser; - public ParseSelector(ParserContext context) - { - imageParser = new ImageParser(context); - textParser = new TextParser(); - underscoresParser = new UnderscoresParser(context); - headerParser = new HeaderParser(context); - escapeParser = new EscapeParser(context); - newLineEofParser = new NewLineEofParser([imageParser, underscoresParser]); - } - - public ITokenParser GetParser(TokenType tokenType) - { - ITokenParser parser; - switch (tokenType) - { - case TokenType.LBracket: - case TokenType.LParenthesis: - case TokenType.RBracket: - case TokenType.RParenthesis: - case TokenType.Exclamation: - { - parser = imageParser; - break; - } - case TokenType.Space: - case TokenType.Underscore: - case TokenType.DoubleUnderscore: - case TokenType.WordUnderscore: - case TokenType.WordDoubleUnderscore: - { - parser = underscoresParser; - break; - } - case TokenType.Hash: - { - parser = headerParser; - break; - } - case TokenType.Escape: - { - parser = escapeParser; - break; - } - case TokenType.NewLine: - case TokenType.Eof: - { - parser = newLineEofParser; - break; - } - default: - { - parser = textParser; - break; - } - } - - return parser; - } -} \ No newline at end of file diff --git a/cs/Markdown/ParserContext.cs b/cs/Markdown/ParserContext.cs deleted file mode 100644 index fdc5ab851..000000000 --- a/cs/Markdown/ParserContext.cs +++ /dev/null @@ -1,20 +0,0 @@ -namespace Markdown; - -public class ParserContext -{ - public int Position { get; private set; } - public Token Current => Tokens[Position]; - public Token? Next => Tokens[Position + 1].Type == TokenType.Eof ? null : Tokens[Position + 1]; - public Token? Prev => Position - 1 < 0 ? null : Tokens[Position - 1]; - public readonly List Tokens; - - public ParserContext(List tokens) - { - Tokens = tokens; - } - - public void IncreasePosition() - { - Position++; - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsers/EscapeParser.cs b/cs/Markdown/Parsers/EscapeParser.cs index a10f34708..544738939 100644 --- a/cs/Markdown/Parsers/EscapeParser.cs +++ b/cs/Markdown/Parsers/EscapeParser.cs @@ -1,40 +1,28 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Leaf; using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; namespace Markdown.Parsers; -public class EscapeParser : ITokenParser +public class EscapeParser : IParser { - private readonly ParserContext context; - private readonly HashSet escapableTokens = - [ - TokenType.Underscore, - TokenType.DoubleUnderscore, - TokenType.WordUnderscore, - TokenType.WordDoubleUnderscore, - TokenType.LBracket, - TokenType.RBracket, - TokenType.LParenthesis, - TokenType.RParenthesis, - TokenType.Hash, - TokenType.Escape, - TokenType.Exclamation - ]; + private readonly MarkdownParser parser; - public EscapeParser(ParserContext context) + public EscapeParser(MarkdownParser parser) { - this.context = context; + this.parser = parser; } - public void Parse() + public ParseStatus TryParse(out MarkdownNode node) { - if (context.Next != null && escapableTokens.Contains(context.Next.Type)) - { - context.Next.Type = TokenType.Text; - context.Current.Type = TokenType.Text; - context.Current.Value = ""; - return; - } + node = new TextNode(@"\"); + if (parser.CurrentToken.Type != TokenType.Escape) + return ParseStatus.Fail(); - context.Current.Type = TokenType.Text; + parser.MoveNext(); + node = new TextNode($"{parser.CurrentToken.Value}"); + + return ParseStatus.Ok(); } } \ No newline at end of file diff --git a/cs/Markdown/Parsers/HeaderParser.cs b/cs/Markdown/Parsers/HeaderParser.cs index 11c426a64..ae9406e2f 100644 --- a/cs/Markdown/Parsers/HeaderParser.cs +++ b/cs/Markdown/Parsers/HeaderParser.cs @@ -1,24 +1,44 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; namespace Markdown.Parsers; -public class HeaderParser : ITokenParser +public class HeaderParser : IParser { - private readonly ParserContext context; + private readonly MarkdownParser parser; + private readonly HashSet expectedStopSymbols = [TokenType.NewLine, TokenType.Carriage]; - public HeaderParser(ParserContext context) + public HeaderParser(MarkdownParser parser) { - this.context = context; + this.parser = parser; } - public void Parse() + public ParseStatus TryParse(out MarkdownNode node) { - if ((context.Prev == null || context.Prev.Type == TokenType.NewLine) - && context.Next != null && context.Next.Value.StartsWith(" ")) + if (parser.NextToken != null + && (parser.CurrentToken.Type != TokenType.Hash + || (parser.CurrentToken.Type == TokenType.Hash && parser.NextToken.Type != TokenType.Space))) { - context.Next.Value = ""; - return; + node = new TextNode("#"); + return ParseStatus.Fail(); } - context.Current.Type = TokenType.Text; + parser.MoveNext(); + + node = new HeaderNode("#"); + parser.ParentStack.Push(parser.CurrentParent); + parser.CurrentParent = node; + parser.MoveNext(); + parser.ParseTokens(null, expectedStopSymbols); + + if (parser.CurrentToken.Type != TokenType.Eof) + node.AddChild(new TextNode($"{parser.CurrentToken.Value}")); + + if (!expectedStopSymbols.Contains(parser.CurrentToken.Type)) + parser.CurrentParent = parser.ParentStack.Pop(); + + return ParseStatus.Ok(); } } \ No newline at end of file diff --git a/cs/Markdown/Parsers/ImageParser.cs b/cs/Markdown/Parsers/ImageParser.cs index 08655574d..06baecc2d 100644 --- a/cs/Markdown/Parsers/ImageParser.cs +++ b/cs/Markdown/Parsers/ImageParser.cs @@ -1,158 +1,94 @@ using System.Text; +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; namespace Markdown.Parsers; -public class ImageParser : ITokenParser, ICompletableParse -{ - private readonly Stack markdownBrackets = new(); - private readonly Stack markdownParenthesis = new(); - private bool startImageParse; - private bool isAltEnd; - private bool isUrlEnd; - private readonly StringBuilder urlBuilder = new(); - private readonly ParserContext context; - private readonly Stack imageTokens = new(); - - public ImageParser(ParserContext context) - { - this.context = context; - } - - public void Parse() - { - imageTokens.Push(context.Current); - if (!startImageParse) - { - if (context.Next is not { Type: TokenType.LBracket }) - { - Finish(); - return; - } - startImageParse = true; - return; +public class ImageParser : IParser +{ + private readonly MarkdownParser parser; + private readonly HashSet altRollbackSymbols = [TokenType.NewLine, TokenType.Hash]; + private readonly HashSet altExpectedStopSymbols = [TokenType.RBracket]; - } - var currentType = context.Current.Type; - if (currentType is TokenType.LBracket or TokenType.RBracket) - ParseAlt(); - else - ParseUrl(); - } - - public void Finish() + public ImageParser(MarkdownParser parser) { - startImageParse = false; - isAltEnd = false; - isUrlEnd = false; - while (imageTokens.Count > 0) - { - imageTokens.Pop().Type = TokenType.Text; - } + this.parser = parser; } - - private void ParseAlt() + + public ParseStatus TryParse(out MarkdownNode node) { - if (isAltEnd) + node = new TextNode("!"); + if (parser.CurrentToken.Type != TokenType.Exclamation) + return ParseStatus.Fail(); + + parser.MoveNext(); + + parser.ParentStack.Push(parser.CurrentParent); + node = new ImageNode("!"); + parser.CurrentParent = node; + if (TryReadAlt(out var altNode)) { - Finish(); - return; + node.AddChild(altNode); + if (TryReadUrl(out var urlNode)) + { + node.AddChild(urlNode); + parser.CurrentParent = parser.ParentStack.Pop(); + return ParseStatus.Ok(); + } } + + var prevParent = parser.ParentStack.Peek(); + prevParent.AddChildren(MarkdownParser.Rollback(node)); + parser.CurrentParent = prevParent; - TrackBracket(markdownBrackets, TokenType.LBracket, TokenType.RBracket, out isAltEnd); + return ParseStatus.Fail(); } - private void ParseUrl() + private bool TryReadAlt(out MarkdownNode node) { - if (!isAltEnd || context.Current.Type != TokenType.LParenthesis) - { - Finish(); - return; - } + node = new TextNode("["); + if (parser.CurrentToken.Type != TokenType.LBracket) + return false; - while (context.Current.Type is not (TokenType.Eof or TokenType.NewLine or TokenType.Space)) - { - var current = context.Current; - if (current.Type is TokenType.LParenthesis or TokenType.RParenthesis) - { - imageTokens.Push(context.Current); - TrackBracket(markdownParenthesis, TokenType.LParenthesis, TokenType.RParenthesis, out isUrlEnd); - if (isUrlEnd) - { - var isValidUrl = IsValidUrl(urlBuilder.ToString()); - if (isValidUrl) - { - imageTokens.Clear(); - } - else - { - Finish(); - return; - } - } - context.IncreasePosition(); - } - else - { - urlBuilder.Append(current.Value); - current.Type = TokenType.Text; - context.IncreasePosition(); - } - - } + parser.ParentStack.Push(parser.CurrentParent); + node = new AltNode("["); + parser.CurrentParent = node; + parser.MoveNext(); + parser.ParseTokens(altRollbackSymbols, altExpectedStopSymbols); - Finish(); + return parser.CurrentToken.Type == TokenType.RBracket; } - - private void TrackBracket(Stack brackets, TokenType left, TokenType right, out bool parseEnd) - { - var current = context.Current; - - if (brackets.Count == 0 && current.Type == left) - { - brackets.Push(current); - parseEnd = false; - return; - } - if (current.Type == left) - { - current.Type = TokenType.Text; - brackets.Push(current); - parseEnd = false; - return; - } - - if (current.Type == right) + private bool TryReadUrl(out MarkdownNode node) + { + var urlBuilder = new StringBuilder(); + node = new TextNode(""); + parser.MoveNext(); + + if (parser.CurrentToken.Type != TokenType.LParenthesis) + return false; + + parser.MoveNext(); + while (parser.CurrentToken.Type != TokenType.RParenthesis + && parser.CurrentToken.Type != TokenType.NewLine + && parser.CurrentToken.Type != TokenType.Carriage + && parser.CurrentToken.Type != TokenType.Eof) { - if (brackets.Count > 1) - { - current.Type = TokenType.Text; - brackets.Pop(); - parseEnd = false; - return; - } - brackets.Pop(); - parseEnd = true; - return; + urlBuilder.Append(parser.CurrentToken.Value); + parser.MoveNext(); } - parseEnd = false; - } - - private bool IsValidUrl(string text) - { - var allowed = new HashSet{ ".jpg", ".jpeg", ".png", ".gif", ".svg", ".webp" }; - var canCreateUrl = Uri.TryCreate(text, UriKind.Absolute, out var url); - if (canCreateUrl && url != null) + if (parser.CurrentToken.Type != TokenType.RParenthesis) { - var path = url.AbsolutePath; - var ext = Path.GetExtension(path).ToLowerInvariant(); - if (allowed.Contains(ext)) - return true; + node = new TextNode(urlBuilder.ToString()); + return false; } - - return false; + parser.MoveNext(); + node = new UrlNode(urlBuilder.ToString()); + + return true; } - } \ No newline at end of file diff --git a/cs/Markdown/Parsers/Interfaces/ICompletableParse.cs b/cs/Markdown/Parsers/Interfaces/ICompletableParse.cs deleted file mode 100644 index 5624aeb88..000000000 --- a/cs/Markdown/Parsers/Interfaces/ICompletableParse.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Parsers.Interfaces; - -public interface ICompletableParse -{ - void Finish(); -} \ No newline at end of file diff --git a/cs/Markdown/Parsers/Interfaces/IParser.cs b/cs/Markdown/Parsers/Interfaces/IParser.cs new file mode 100644 index 000000000..5fa54cdc5 --- /dev/null +++ b/cs/Markdown/Parsers/Interfaces/IParser.cs @@ -0,0 +1,8 @@ +using Markdown.Nodes.Interfaces; + +namespace Markdown.Parsers.Interfaces; + +public interface IParser +{ + public ParseStatus TryParse(out MarkdownNode node); +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/Interfaces/ITokenParser.cs b/cs/Markdown/Parsers/Interfaces/ITokenParser.cs deleted file mode 100644 index c51077eaa..000000000 --- a/cs/Markdown/Parsers/Interfaces/ITokenParser.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Parsers.Interfaces; - -public interface ITokenParser -{ - void Parse(); -} \ No newline at end of file diff --git a/cs/Markdown/Parsers/MarkdownParser.cs b/cs/Markdown/Parsers/MarkdownParser.cs new file mode 100644 index 000000000..44ef183dd --- /dev/null +++ b/cs/Markdown/Parsers/MarkdownParser.cs @@ -0,0 +1,109 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; + +namespace Markdown.Parsers; + +public class MarkdownParser +{ + + public readonly Stack ParentStack = new(); + public MarkdownNode CurrentParent; + public Token CurrentToken => tokens[position]; + public Token? NextToken => position + 1 == tokens.Count ? null : tokens[position + 1]; + public Token? PrevToken => position - 1 == -1 ? null : tokens[position - 1]; + + private readonly List parsers; + private readonly List tokens; + private int position; + public MarkdownParser(List tokens) + { + this.tokens = tokens; + parsers = new List + { + new HeaderParser(this), + new ImageParser(this), + new UnderscoresParser(this), + new EscapeParser(this) + }; + var root = new MarkdownDocumentNode(""); + CurrentParent = root; + } + + public MarkdownNode ParseTokens( + HashSet? rollbackStopSymbols = null, + HashSet? expectedStopSymbols = null) + { + while (CurrentToken.Type != TokenType.Eof) + { + var parseStatus = TryParseNode(out var node); + + if (parseStatus.Type is not ParseResultType.Success) + if (parseStatus.Type is not ParseResultType.WasRollback) + { + if (expectedStopSymbols != null && expectedStopSymbols.Contains(CurrentToken.Type)) + { + var currNode = CurrentParent; + if (ParentStack.Count > 0) + CurrentParent = ParentStack.Pop(); + return currNode; + } + + if (parseStatus.Type == ParseResultType.NeedRollback + || (rollbackStopSymbols != null && rollbackStopSymbols.Contains(CurrentToken.Type))) + { + var children = Rollback(CurrentParent); + var currNode = CurrentParent; + CurrentParent = ParentStack.Pop(); + CurrentParent.AddChildren(children); + + return CurrentToken.Type != TokenType.Eof ? currNode : CurrentParent; + } + } + + CurrentParent.AddChild(node); + if (CurrentToken.Type != TokenType.Eof) + MoveNext(); + } + + return CurrentParent; + } + + private ParseStatus TryParseNode(out MarkdownNode node) + { + foreach (var parser in parsers) + { + var status = parser.TryParse(out var parsedNode); + if (status.Type is ParseResultType.Fail) + continue; + node = parsedNode; + return status; + } + + node = new TextNode(CurrentToken.Value); + + return ParseStatus.Fail(); + } + + public static List Rollback(MarkdownNode node) + { + var result = new List(); + RollBackRecursive(node, result); + + return result; + } + + private static void RollBackRecursive(MarkdownNode node, List result) + { + result.Add(new TextNode(node.Value)); + foreach (var child in node.Children) + RollBackRecursive(child, result); + } + + public void MoveNext() + { + position++; + } +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/NewLineEofParser.cs b/cs/Markdown/Parsers/NewLineEofParser.cs deleted file mode 100644 index 680fd64a9..000000000 --- a/cs/Markdown/Parsers/NewLineEofParser.cs +++ /dev/null @@ -1,21 +0,0 @@ -using Markdown.Parsers.Interfaces; - -namespace Markdown.Parsers; - -public class NewLineEofParser : ITokenParser -{ - private readonly List completableParsers; - - public NewLineEofParser(List completableParsers) - { - this.completableParsers = completableParsers; - } - - public void Parse() - { - foreach (var parser in completableParsers) - { - parser.Finish(); - } - } -} \ No newline at end of file diff --git a/cs/Markdown/Parsers/ParseStatus.cs b/cs/Markdown/Parsers/ParseStatus.cs new file mode 100644 index 000000000..bf624be20 --- /dev/null +++ b/cs/Markdown/Parsers/ParseStatus.cs @@ -0,0 +1,39 @@ +namespace Markdown.Parsers; + +public readonly struct ParseStatus +{ + public ParseResultType Type { get; } + + private ParseStatus(ParseResultType type) + { + Type = type; + } + + public static ParseStatus Ok() + { + return new ParseStatus(ParseResultType.Success); + } + + public static ParseStatus Fail() + { + return new ParseStatus(ParseResultType.Fail); + } + + public static ParseStatus WasRollback() + { + return new ParseStatus(ParseResultType.WasRollback); + } + + public static ParseStatus NeedRollback() + { + return new ParseStatus(ParseResultType.NeedRollback); + } +} + +public enum ParseResultType +{ + Success, + Fail, + WasRollback, + NeedRollback +} \ No newline at end of file diff --git a/cs/Markdown/Parsers/TextParser.cs b/cs/Markdown/Parsers/TextParser.cs deleted file mode 100644 index 054a46f1e..000000000 --- a/cs/Markdown/Parsers/TextParser.cs +++ /dev/null @@ -1,8 +0,0 @@ -using Markdown.Parsers.Interfaces; - -namespace Markdown.Parsers; - -public class TextParser : ITokenParser -{ - public void Parse(){} -} \ No newline at end of file diff --git a/cs/Markdown/Parsers/UnderscoresParser.cs b/cs/Markdown/Parsers/UnderscoresParser.cs index 2f98a83ec..76570b2fb 100644 --- a/cs/Markdown/Parsers/UnderscoresParser.cs +++ b/cs/Markdown/Parsers/UnderscoresParser.cs @@ -1,178 +1,353 @@ +using Markdown.Nodes.Interfaces; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; using Markdown.Parsers.Interfaces; +using Markdown.Tokenizer; namespace Markdown.Parsers; -public class UnderscoresParser : ITokenParser, ICompletableParse +public class UnderscoresParser : IParser { - private readonly ParserContext context; - private readonly Stack markdownUnderscores = new(); + private readonly HashSet boldExpectedStopSymbols = + [ + TokenType.DoubleUnderscore, + TokenType.WordDoubleUnderscore + ]; - public UnderscoresParser(ParserContext context) + private readonly HashSet boldRollbackSymbols = [TokenType.NewLine, TokenType.Eof]; + private readonly HashSet boldWordRollbackSymbols = [TokenType.Space]; + + private readonly HashSet italicExpectedStopSymbols = + [ + TokenType.Underscore, + TokenType.WordUnderscore + ]; + + private readonly HashSet italicRollbackSymbols = + [ + TokenType.DoubleUnderscore, + TokenType.WordDoubleUnderscore, + TokenType.NewLine, + TokenType.Eof + ]; + + private readonly HashSet italicWordRollbackSymbols = + [ + TokenType.DoubleUnderscore, + TokenType.WordDoubleUnderscore, + TokenType.Space + ]; + + private readonly MarkdownParser parser; + private readonly Stack underscores = new(); + + public UnderscoresParser(MarkdownParser parser) { - this.context = context; + this.parser = parser; } - public void Parse() + public ParseStatus TryParse(out MarkdownNode node) { - if (context.Current.Type == TokenType.Space) - { - ParseSpace(); - } + node = new TextNode(parser.CurrentToken.Value); + var status = ParseStatus.Fail(); + if (parser.CurrentToken.Type is not + (TokenType.Underscore or TokenType.DoubleUnderscore + or TokenType.WordUnderscore or TokenType.WordDoubleUnderscore)) + return status; - if (context.Current.Type is TokenType.Underscore or TokenType.DoubleUnderscore) + if (parser.CurrentToken.Type is TokenType.Underscore or TokenType.DoubleUnderscore) { - ParseUnderscore(); + status = TryReadUnderscores(out var parsedUnderscoresNode); + node = parsedUnderscoresNode; + return status; } + + status = TryReadWordUnderscores(out var parsedWordUnderscoresNode); + node = parsedWordUnderscoresNode; + return status; + } + + private ParseStatus TryReadUnderscores(out MarkdownNode node) + { + node = new TextNode(parser.CurrentToken.Value); - if (context.Current.Type is TokenType.WordUnderscore or TokenType.WordDoubleUnderscore) - { - CanParseWordUnderscores(); - } + return TryReadUnderscoreBase( + CanOpen, + CanClose, + t => t == TokenType.Underscore + ? italicExpectedStopSymbols + : boldExpectedStopSymbols, + t => t is TokenType.Underscore or TokenType.WordUnderscore + ? italicRollbackSymbols + : boldRollbackSymbols, + () => + { + var rollback = new HashSet + { + TokenType.Underscore, + TokenType.WordUnderscore + }; + rollback.UnionWith(italicRollbackSymbols); + return rollback; + }, + supportNeedRollbackStatus: true, + out node); } - public void Finish() + + private ParseStatus TryReadWordUnderscores(out MarkdownNode node) { - if (markdownUnderscores.Count > 0) - { - ConvertStackToText(); - } + node = new TextNode(parser.CurrentToken.Value); + if (parser.CurrentToken.Type is not (TokenType.WordUnderscore or TokenType.WordDoubleUnderscore)) + return ParseStatus.Fail(); + + return TryReadUnderscoreBase( + canOpenFunc: () => !CanCloseWordUnderscore(), + canCloseFunc: CanCloseWordUnderscore, + getExpectedStopSymbols: t => + t == TokenType.WordUnderscore + ? italicExpectedStopSymbols + : boldExpectedStopSymbols, + getRollbackSymbols: t => t is TokenType.Underscore or TokenType.WordUnderscore + ? italicWordRollbackSymbols + : boldWordRollbackSymbols, + getFallbackRollback: () => + { + var rollback = new HashSet + { + TokenType.Underscore, + TokenType.WordUnderscore + }; + rollback.UnionWith(italicWordRollbackSymbols); + return rollback; + }, + supportNeedRollbackStatus: false, + out node); } - private void ParseUnderscore() + private ParseStatus TryReadUnderscoreBase( + Func canOpenFunc, + Func canCloseFunc, + Func> getExpectedStopSymbols, + Func> getRollbackSymbols, + Func> getFallbackRollback, + bool supportNeedRollbackStatus, + out MarkdownNode node) { - HandleUnderscore(CanOpen(), CanClose(), IsValidOpenUnderscores, IsValidCloseUnderscores); - } + node = new TextNode(parser.CurrentToken.Value); + var underscoreType = parser.CurrentToken.Type; - private void CanParseWordUnderscores() - { - if (!IsValidWordUnderscores()) + if (parser.NextToken?.Type == underscoreType) + return ParseStatus.Fail(); + + var canOpen = canOpenFunc(); + var canClose = canCloseFunc(); + + var handled = TryHandleUnderscore( + canOpen, + canClose, + IsValidOpenUnderscores, + IsValidCloseUnderscores, + out var handledNode, + underscoreType); + + if (handled && canOpen) + { + parser.ParentStack.Push(parser.CurrentParent); + parser.CurrentParent = handledNode; + node = handledNode; + + var expectedStopSymbols = getExpectedStopSymbols(underscoreType); + var rollbackSymbols = getRollbackSymbols(underscoreType); + + parser.MoveNext(); + parser.ParseTokens(rollbackSymbols, expectedStopSymbols); + + if (supportNeedRollbackStatus && + rollbackSymbols.Contains(parser.CurrentToken.Type)) + { + node = new TextNode($"{parser.CurrentToken.Value}"); + + if (parser.CurrentToken.Type is TokenType.Eof or TokenType.NewLine) + { + underscores.Clear(); + if (parser.CurrentToken.Type == TokenType.Eof) + return ParseStatus.NeedRollback(); + } + + return ParseStatus.WasRollback(); + } + + return expectedStopSymbols.Contains(parser.CurrentToken.Type) + ? ParseStatus.Ok() + : ParseStatus.Fail(); + } + + if (!handled && canOpen) { - context.Current.Type = TokenType.Text; + var rollback = getFallbackRollback(); + parser.ParentStack.Push(parser.CurrentParent); + parser.CurrentParent = GetNodeFromUnderscoreType(underscoreType); + parser.MoveNext(); + parser.ParseTokens(rollback); + node = new TextNode($"{parser.CurrentToken.Value}"); + + return ParseStatus.Ok(); } - HandleUnderscore(CanOpenWordUnderscore(), CanCloseWordUnderscore(), _ => true, _ => true); + if (supportNeedRollbackStatus && canClose) + return ParseStatus.NeedRollback(); + + return ParseStatus.Fail(); } - private void HandleUnderscore( + private bool TryHandleUnderscore( bool canOpen, bool canClose, Func isValidOpen, - Func isValidClose) + Func isValidClose, + out MarkdownNode node, + TokenType type) { if (canOpen && canClose) { - if (markdownUnderscores.Count > 0) + if (underscores.Count > 0) { - var peeked = markdownUnderscores.Peek(); + var peeked = underscores.Peek(); if (!IsIntersecting(peeked)) - markdownUnderscores.Pop(); + underscores.Pop(); } else { - markdownUnderscores.Push(context.Current); + underscores.Push(parser.CurrentToken); } } else if (canOpen) { - HandleOpen(isValidOpen); + if (TryHandleOpen(isValidOpen, type, out var node1)) + { + node = node1; + return true; + } } - else if (canClose && markdownUnderscores.Count > 0) + else if (canClose && underscores.Count > 0) { - HandleClose(isValidClose); - } - else - { - context.Current.Type = TokenType.Text; + if (TryHandleClose(isValidClose, out var node2)) + { + node = node2; + return true; + } } + + node = new TextNode($"{parser.CurrentToken.Value}"); + return false; } - private void HandleOpen(Func isValidOpen) + private bool TryHandleOpen(Func isValidOpen, TokenType type, out MarkdownNode node) { - if (markdownUnderscores.Count > 0) + var value = GetValueFromUnderscoreType(type); + if (underscores.Count > 0) { - var peeked = markdownUnderscores.Peek(); + var peeked = underscores.Peek(); if (!isValidOpen(peeked)) { - context.Current.Type = TokenType.Text; - } - else - { - markdownUnderscores.Push(context.Current); + underscores.Push(parser.CurrentToken); + node = new TextNode(value); + return false; } - } - else - { - markdownUnderscores.Push(context.Current); - } - } - private void HandleClose(Func isValidClose) - { - var peeked = markdownUnderscores.Peek(); - if (!isValidClose(peeked) && markdownUnderscores.Count == 1) - { - context.Current.Type = TokenType.Text; - } - else if (IsIntersecting(peeked)) - { - markdownUnderscores.Push(context.Current); - ConvertStackToText(); + underscores.Push(parser.CurrentToken); } else { - markdownUnderscores.Pop(); + underscores.Push(parser.CurrentToken); } + + node = GetNodeFromUnderscoreType(type); + return true; } - private void ParseSpace() + + private bool TryHandleClose(Func isValidClose, out MarkdownNode node) { - if (markdownUnderscores.Count <= 0) + var peeked = underscores.Peek(); + node = new TextNode(""); + if (!isValidClose(peeked) && underscores.Count == 1) { - return; - } - - var peeked = markdownUnderscores.Peek(); - if (peeked.Type is TokenType.WordUnderscore or TokenType.WordDoubleUnderscore) - { - peeked.Type = TokenType.Text; - markdownUnderscores.Pop(); + underscores.Pop(); + return false; } - } - private bool IsValidWordUnderscores() - { - if (char.IsDigit(context.Next!.Value[0]) && char.IsDigit(context.Prev!.Value[^1])) + if (IsIntersecting(peeked)) + { + underscores.Pop(); return false; + } - if (char.IsDigit(context.Next!.Value[0]) && char.IsLetter(context.Prev!.Value[^1])) - return false; - - if (char.IsDigit(context.Next!.Value[^1]) && char.IsLetter(context.Prev!.Value[0])) - return false; + underscores.Pop(); return true; } private bool IsValidOpenUnderscores(Token peeked) { - var curr = context.Current.Type; + var curr = parser.CurrentToken.Type; var peekedType = peeked.Type; - + if (curr == peekedType) return false; - - return curr != TokenType.DoubleUnderscore || peekedType != TokenType.Underscore; + + return curr switch + { + TokenType.Underscore => peekedType is TokenType.DoubleUnderscore, + TokenType.WordUnderscore => peekedType is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore, + TokenType.DoubleUnderscore => peekedType is not TokenType.Underscore, + TokenType.WordDoubleUnderscore => peekedType != TokenType.Underscore && + peekedType != TokenType.WordUnderscore, + _ => true + }; } - + private bool IsValidCloseUnderscores(Token peeked) { - var currentType = context.Current.Type; + var currentType = parser.CurrentToken.Type; var peekedType = peeked.Type; - - return currentType != TokenType.DoubleUnderscore || peekedType != TokenType.Underscore; + + if (currentType == TokenType.Underscore) return peekedType is TokenType.Underscore or TokenType.WordUnderscore; + + return peekedType is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore; + } + + private bool CanOpen() + { + return parser.NextToken is { Type: TokenType.Text } && + !parser.NextToken.Value.StartsWith(" "); + } + + private bool CanClose() + { + if (underscores.Count == 0 + || parser.PrevToken == null + || parser.PrevToken.Value.EndsWith(" ") + || parser.PrevToken.Type is TokenType.Underscore or TokenType.DoubleUnderscore) + return false; + + return parser.NextToken != null; + } + private bool CanCloseWordUnderscore() + { + if (underscores.Count <= 0) + return false; + + var peeked = underscores.Peek(); + + if (parser.CurrentToken.Type is TokenType.WordUnderscore) + return peeked.Type is TokenType.WordUnderscore or TokenType.Underscore; + + return peeked.Type is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore; } private bool IsIntersecting(Token peeked) { - switch (context.Current.Type) + switch (parser.CurrentToken.Type) { case TokenType.Underscore: case TokenType.WordUnderscore: @@ -193,64 +368,27 @@ private bool IsIntersecting(Token peeked) return false; } - private void ConvertStackToText() + private string GetValueFromUnderscoreType(TokenType type) { - while (markdownUnderscores.Count > 0) + return type switch { - var token = markdownUnderscores.Pop(); - token.Type = TokenType.Text; - } - } - - private bool CanOpen() - { - var nextToken = context.Next; - - return nextToken != null && !nextToken.Value.StartsWith(" "); - } - - private bool CanClose() - { - if (context.Prev == null || context.Prev.Value.EndsWith(" ") || context.Prev.Type is TokenType.Underscore or TokenType.DoubleUnderscore) - return false; - - return context.Next == null || context.Next.Value.StartsWith(" ") || context.Next.Type is TokenType.Underscore or TokenType.DoubleUnderscore; - } - - private bool CanOpenWordUnderscore() - { - var current = context.Current; - if (markdownUnderscores.Count <= 0) - return true; - - var peeked = markdownUnderscores.Peek(); - return current.Type switch - { - TokenType.WordUnderscore => peeked.Type is not (TokenType.Underscore or TokenType.WordUnderscore), - TokenType.WordDoubleUnderscore => peeked.Type switch - { - TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore or TokenType.WordUnderscore - or TokenType.Underscore => false, - _ => true - }, - _ => true + TokenType.Underscore or TokenType.WordUnderscore => "_", + TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore => "__", + _ => throw new ArgumentException("Token type must be Underscore or WordUnderscore or DoubleUnderscore or " + + "WordDoubleUnderscore") }; } - private bool CanCloseWordUnderscore() + private MarkdownNode GetNodeFromUnderscoreType(TokenType type) { - var current = context.Current; - if (markdownUnderscores.Count <= 0) - return false; - - var peeked = markdownUnderscores.Peek(); + var value = GetValueFromUnderscoreType(type); - if (current.Type is TokenType.WordUnderscore) + return type switch { - return peeked.Type is TokenType.WordUnderscore or TokenType.Underscore; - } - - return peeked.Type is TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore; - + TokenType.Underscore or TokenType.WordUnderscore => new ItalicNode(value), + TokenType.DoubleUnderscore or TokenType.WordDoubleUnderscore => new BoldNode(value), + _ => throw new ArgumentException("Token type must be Underscore or WordUnderscore or DoubleUnderscore or " + + "WordDoubleUnderscore") + }; } } \ No newline at end of file diff --git a/cs/Markdown/Tests/MdPerformanceTest.cs b/cs/Markdown/Tests/MdPerformanceTest.cs new file mode 100644 index 000000000..67591c212 --- /dev/null +++ b/cs/Markdown/Tests/MdPerformanceTest.cs @@ -0,0 +1,53 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using NUnit.Framework; + +namespace Markdown.Tests; + +[TestFixture] +public class MdPerformanceTest +{ + [Test] + public void Render_Performance_WithAllTokens() + { + var repeatCounts = new List + { + 1000, + 2000, + 4000, + 8000, + 16000 + }; + const string markdownText = "# Заголовок _курсив_ __жирный__ ![Cat](https://example.com/image.jpg) текст" + + " \\экранирование_\n"; + long? baseTimeMs = null; + var baseRepeatCount = repeatCounts[0]; + + foreach (var repeatCount in repeatCounts) + { + var sb = new StringBuilder(); + var md = new Md(); + for (var i = 0; i < repeatCount; i++) + sb.Append(markdownText); + var input = sb.ToString(); + + var stopwatch = Stopwatch.StartNew(); + md.Render(input); + stopwatch.Stop(); + + var elapsedMs = stopwatch.ElapsedMilliseconds; + if (baseTimeMs == null) + { + baseTimeMs = elapsedMs; + continue; + } + + var multiplier = repeatCount / baseRepeatCount; + var expectedTime = baseTimeMs.Value * multiplier; + const double tolerance = 0.20; + var max = (long)(expectedTime * (1 + tolerance)); + elapsedMs.Should().BeLessThanOrEqualTo(max); + } + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/MdSpec.txt b/cs/Markdown/Tests/MdSpec.txt new file mode 100644 index 000000000..b75aa2501 --- /dev/null +++ b/cs/Markdown/Tests/MdSpec.txt @@ -0,0 +1,86 @@ +# Спецификация языка разметки + +Посмотрите этот файл в сыром виде. Сравните с тем, что показывает github. +Все совпадения случайны ;) + + + +# Курсив + +Текст, _окруженный с двух сторон_ одинарными символами подчерка, +должен помещаться в HTML-тег \ вот так: + +Текст, \окруженный с двух сторон\ одинарными символами подчерка, +должен помещаться в HTML-тег \. + + + +# Полужирный + +__Выделенный двумя символами текст__ должен становиться полужирным с помощью тега \. + + + +# Экранирование + +Любой символ можно экранировать, чтобы он не считался частью разметки. +\_Вот это\_, не должно выделиться тегом \. + +Символ экранирования исчезает из результата, только если экранирует что-то. +Здесь сим\волы экранирования\ \должны остаться.\ + +Символ экранирования тоже можно экранировать: \\_вот это будет выделено тегом_ \ + + + +# Взаимодействие тегов + +Внутри __двойного выделения _одинарное_ тоже__ работает. + +Но не наоборот — внутри _одинарного __двойное__ не_ работает. + +Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка. + +Однако выделять часть слова они могут: и в _нач_але, и в сер_еди_не, и в кон_це._ + +В то же время выделение в ра_зных сл_овах не работает. + +__Непарные_ символы в рамках одного абзаца не считаются выделением. + +За подчерками, начинающими выделение, должен следовать непробельный символ. Иначе эти_ подчерки_ не считаются выделением +и остаются просто символами подчерка. + +Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти _подчерки _не считаются_ окончанием выделения +и остаются просто символами подчерка. + +В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением. + +Если внутри подчерков пустая строка ____, то они остаются символами подчерка. + + + +# Заголовки + +Абзац, начинающийся с "# ", выделяется тегом \

в заголовок. +В тексте заголовка могут присутствовать все прочие символы разметки с указанными правилами. + +Таким образом + +\# Заголовок \__с \_разными_ символами__ + +превратится в: + +

Заголовок с разными символами

+ +# Изображения + +Изображения задаются с помощью специальной конструкции вида: \![alt](url), где: +alt — альтернативный текст изображения; +url — ссылка на изображение. + +Например, запись: + +\![Cat](https://example.com/image.jpg) + +превратится в: +![Cat](https://example.com/image.jpg) \ No newline at end of file diff --git a/cs/Markdown/Tests/MdSpecApprovalTest.cs b/cs/Markdown/Tests/MdSpecApprovalTest.cs new file mode 100644 index 000000000..51f5ec344 --- /dev/null +++ b/cs/Markdown/Tests/MdSpecApprovalTest.cs @@ -0,0 +1,23 @@ +using ApprovalTests; +using ApprovalTests.Namers; +using ApprovalTests.Reporters; +using NUnit.Framework; + +namespace Markdown.Tests; + +[UseApprovalSubdirectory("Results")] +[TestFixture] +public class MdSpecApprovalTest +{ + [Test] + [UseReporter(typeof(DiffReporter))] + public void Render_MdSpec_ReturnCorrectHtml() + { + var markdown = File.ReadAllText(@"..\..\..\Tests\MdSpec.txt"); + + var renderer = new Md(); + var html = renderer.Render(markdown); + + Approvals.Verify(html); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/MdTests.cs b/cs/Markdown/Tests/MdTests.cs index 39f82473c..ea2cfa335 100644 --- a/cs/Markdown/Tests/MdTests.cs +++ b/cs/Markdown/Tests/MdTests.cs @@ -7,41 +7,19 @@ namespace Markdown.Tests; [TestFixture] public class MdTests -{ - [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "\"Cat\"", TestName = "ValidUrlAndAlt")] - [TestCase("!\n[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "!\n[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "NewlineAfterExclamation")] - [TestCase("![Ca\nt](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![Ca\nt](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "AltWithNewline")] - [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6\na5b2c76c038ef0c8d2502fd2f6.jpg)", - "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6\na5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithNewline")] - [TestCase("![C[]at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "\"C[]at\"", TestName = "AltWithBrackets")] - [TestCase("![C[at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![C[at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UnclosedBracketInAlt")] - [TestCase("![C]at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![C]at](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "ExtraClosingBracketInAlt")] - [TestCase("![Cat](https://i.pinimg.com/originals()/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "\"Cat\"", TestName = "UrlWithEmptyParentheses")] - [TestCase("![Cat](https://i.pinimg.com/originals(/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![Cat](https://i.pinimg.com/originals(/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithOneOpenParentheses")] - [TestCase("![Cat](https://i.pinimg.com/originals)/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![Cat](https://i.pinimg.com/originals)/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithOneCloseParentheses")] - [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c7 6c038ef0c8d2502fd2f6.jpg)", - "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c7 6c038ef0c8d2502fd2f6.jpg)", TestName = "UrlWithSpace")] - [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.qqq)", - "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.qqq)", TestName = "InvalidImageFormat")] - [TestCase("![](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "\"\"", TestName = "EmptyAltText")] - [TestCase("![Cat]()", "![Cat]()", TestName = "EmptyUrl")] - [TestCase("![Cat](Cat)", "![Cat](Cat)", TestName = "InvalidUrl")] - [TestCase("![__Cat__](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "\"<strongCat
\">", TestName = "AltWithBold")] - [TestCase("![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a__5b2__c76c038ef0c8d2502fd2f6.jpg)", - "\"Cat\"", TestName = "UrlWithBold")] - [TestCase("![[][][]](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "\"[][][]\"", TestName = "AltWithMultipleBrackets")] +{ + [TestCase("![Cat](https://example.com/image.jpg)", + "\"Cat\"", TestName = "ValidUrlAndAlt")] + [TestCase("!\n[Cat](https://example.com/image.jpg)", + "!\n[Cat](https://example.com/image.jpg)", TestName = "NewlineAfterExclamation")] + [TestCase("![Ca\nt](https://example.com/image.jpg)", + "![Ca\nt](https://example.com/image.jpg)", TestName = "AltWithNewline")] + [TestCase("![](https://example.com/image.jpg)", + "\"\"", TestName = "EmptyAltText")] + [TestCase("![__Cat__](https://example.com/image.jpg)", + "\"<strongCat\">", TestName = "AltWithBold")] + [TestCase("![Cat](https://example.com/i__m__age.jpg)", + "\"Cat\"", TestName = "UrlWithBold")] public void Render_Image_CorrectHtmlText(string markdownText, string expectedResult) { var renderer = new Md(); @@ -49,18 +27,21 @@ public void Render_Image_CorrectHtmlText(string markdownText, string expectedRes html.Should().Be(expectedResult); } - + [TestCase("# текст", "

текст

", TestName = "SimpleHeader")] [TestCase("# Первый \n# Второй", "

Первый

\n

Второй

", TestName = "TwoHeadersInDifferentLines")] - [TestCase("# Первый \n просто текст \n# Второй", "

Первый

\n просто текст \n

Второй

", TestName = "HeadersWithTextBetween")] + [TestCase("# Первый \n просто текст \n# Второй", "

Первый

\n просто текст \n

Второй

", + TestName = "HeadersWithTextBetween")] [TestCase("#текст", "#текст", TestName = "WithNoSpace")] - [TestCase("# Заголовок с _курсивом_ и __жирным шрифтом__", "

Заголовок с курсивом и жирным шрифтом

", TestName = "WithInlineFormatting")] - [TestCase("# ![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "

\"Cat\"

", TestName = "WithImage")] + [TestCase("# Заголовок с _курсивом_ и __жирным шрифтом__", + "

Заголовок с курсивом и жирным шрифтом

", TestName = "WithInlineFormatting")] + [TestCase("# ![Cat](https://example.com/image.jpg)", + "

\"Cat\"

", TestName = "WithImage")] [TestCase("# __текст__", "

текст

", TestName = "WithBold")] [TestCase(" # текст", " # текст", TestName = "LeadingSpace")] [TestCase("# ", "

", TestName = "Empty")] [TestCase("# текст # ", "

текст #

", TestName = "TrailingHashIgnored")] + [TestCase("# текст", "

текст

", TestName = "ManySpaceAfterHash")] public void Render_Header_CorrectHtmlText(string markdownText, string expectedResult) { var renderer = new Md(); @@ -68,37 +49,33 @@ public void Render_Header_CorrectHtmlText(string markdownText, string expectedRe html.Should().Be(expectedResult); } - + [TestCase("_текст_", "текст", TestName = "SimpleItalic")] [TestCase("_те\nкст_", "_те\nкст_", TestName = "ItalicWithNewLineInside")] [TestCase("__текст__", "текст", TestName = "SimpleBold")] - [TestCase("Внутри __двойного выделения _одинарное_ тоже__ работает", + [TestCase("Внутри __двойного выделения _одинарное_ тоже__ работает", "Внутри двойного выделения одинарное тоже работает", TestName = "BoldWithItalicInside")] [TestCase("_те_кст", "текст", TestName = "ItalicAtStartWord")] [TestCase("т_екс_т", "текст", TestName = "ItalicInMiddleWord")] [TestCase("текс_т_", "текст", TestName = "ItalicAtEndWord")] [TestCase("__текст_", "__текст_", TestName = "UnpairedUnderscores")] - [TestCase("_ текст_","_ текст_", TestName = "LeadingSpacePreventsItalic")] - [TestCase("1_2_3","1_2_3", TestName = "DigitsPreventItalic")] - [TestCase("те_кст те_кст","те_кст те_кст", TestName = "SeparateWordsUnderscores")] - [TestCase("_текст _текст", "_текст _текст",TestName = "UnclosedItalicMultipleWords")] - [TestCase("__","__", TestName = "OnlyDoubleUnderscore")] - [TestCase("____","____", TestName = "OnlyFourUnderscores")] - [TestCase("__текст _текст__ текст_","__текст _текст__ текст_", TestName = "BoldWithNestedItalic")] - [TestCase("_текст __текст__ текст_","текст __текст__ текст", TestName = "ItalicWithBoldInside")] - [TestCase("_текст т__екс__т текст_","текст т__екс__т текст", TestName = "ItalicWithBoldInsideWord")] - [TestCase("__текст т_екст текст___","текст т_екст текст_", TestName = "BoldWithTrailingUnderscore")] - [TestCase("т__е_к_с__т","текст", TestName = "BoldInWordWithItalicInside")] - [TestCase("__те_к_ст__","текст", TestName = "BoldWithItalicInsideInWord")] - [TestCase("_те__к__ст_","те__к__ст", TestName = "ItalicWithBoldInsideInWord")] - [TestCase("_123_","123", TestName = "ItalicWithNumbers")] - [TestCase("__123__","123", TestName = "BoldWithNumbers")] - [TestCase("__++++__","++++", TestName = "BoldWithSymbols")] - [TestCase("__текст___текст_","тексттекст", TestName = "BoldThenItalic")] - [TestCase("__ текст __","__ текст __", TestName = "BoldWithSpacesPrevents")] - [TestCase("__текст __","__текст __", TestName = "BoldWithSpace")] - [TestCase("тек__с_т те_кс__т","тек__с_т те_кс__т", TestName = "UnclosedUnderscoresInDifferentWords")] + [TestCase("_ текст_", "_ текст_", TestName = "LeadingSpacePreventsItalic")] + [TestCase("1_2_3", "1_2_3", TestName = "DigitsPreventItalic")] + [TestCase("те_кст те_кст", "те_кст те_кст", TestName = "SeparateWordsUnderscores")] + [TestCase("__", "__", TestName = "OnlyDoubleUnderscore")] + [TestCase("____", "____", TestName = "OnlyFourUnderscores")] + [TestCase("__текст _текст__ текст_", "__текст _текст__ текст_", TestName = "BoldWithNestedItalic")] + [TestCase("_текст __текст__ текст_", "текст __текст__ текст", TestName = "ItalicWithBoldInside")] + [TestCase("_текст т__екс__т текст_", "текст т__екс__т текст", TestName = "ItalicWithBoldInsideWord")] + [TestCase("т__е_к_с__т", "текст", TestName = "BoldInWordWithItalicInside")] + [TestCase("__те_к_ст__", "текст", TestName = "BoldWithItalicInsideInWord")] + [TestCase("_те__к__ст_", "те__к__ст", TestName = "ItalicWithBoldInsideInWord")] + [TestCase("_123_", "123", TestName = "ItalicWithNumbers")] + [TestCase("__123__", "123", TestName = "BoldWithNumbers")] + [TestCase("__++++__", "++++", TestName = "BoldWithSymbols")] + [TestCase("__ текст __", "__ текст __", TestName = "BoldWithSpacesPrevents")] + [TestCase("__текст __", "__текст __", TestName = "BoldWithSpace")] public void Render_Underscores_CorrectHtmlText(string markdownText, string expectedResult) { var renderer = new Md(); @@ -106,18 +83,15 @@ public void Render_Underscores_CorrectHtmlText(string markdownText, string expec html.Should().Be(expectedResult); } - + [TestCase(@"\_текст_", "_текст_", TestName = "EscapedUnderscore")] [TestCase(@"\\_текст\\_", @"\текст\", TestName = "DoubleEscapedUnderscoresAroundText")] [TestCase(@"\# текст", "# текст", TestName = "EscapedHeader")] - [TestCase(@"\![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "EscapedExclamationInImage")] - [TestCase(@"!\[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "EscapedAltInImage")] - [TestCase(@"![Cat]\(https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", - "![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)", TestName = "EscapedUrlInImage")] - [TestCase(@"__текст \_текст__ текст_","текст _текст текст_", TestName = "BoldWithEscapedUnderscoreInside")] - [TestCase(@"т\екс\т",@"т\екс\т", TestName = "TextWithMultipleEscapes")] + [TestCase(@"\![Cat](https://example.com/image.jpg)", + "![Cat](https://example.com/image.jpg)", TestName = "EscapedExclamationInImage")] + [TestCase(@"__текст \_текст__ текст_", "текст _текст текст_", + TestName = "BoldWithEscapedUnderscoreInside")] + [TestCase(@"т\екс\т", @"т\екс\т", TestName = "TextWithMultipleEscapes")] public void Render_Escape_CorrectHtmlText(string markdownText, string expectedResult) { var renderer = new Md(); @@ -125,27 +99,6 @@ public void Render_Escape_CorrectHtmlText(string markdownText, string expectedRe html.Should().Be(expectedResult); } - - [TestCase(1000, TestName = "Parse_1000_repeat")] - [TestCase(2000, TestName = "Parse_2000_repeats")] - [TestCase(4000, TestName = "Parse_4000_repeats")] - [TestCase(8000, TestName = "Parse_8000_repeats")] - [TestCase(16000, TestName = "Parse_16000_repeats")] - public void Render_Performance_WithAllTokens(int repeatCount) - { - var markdownText = "# Заголовок _курсив_ __жирный__ ![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg) текст \\экранирование_\n"; - var sb = new StringBuilder(); - for (int i = 0; i < repeatCount; i++) - { - sb.Append(markdownText); - } - var input = sb.ToString(); - var stopwatch = Stopwatch.StartNew(); - var renderer = new Md(); - var result = renderer.Render(input); - stopwatch.Stop(); - Console.WriteLine($"Время выполнения: {stopwatch.ElapsedMilliseconds} ms"); - } } \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseEscapeTests.cs b/cs/Markdown/Tests/ParserTests/ParseEscapeTests.cs new file mode 100644 index 000000000..580b8881a --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseEscapeTests.cs @@ -0,0 +1,102 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseEscapeTests +{ + [Test] + public void ParseTokens_EscapeHeader_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("\\", TokenType.Escape), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("#")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("текст")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_EscapeUnderscores_ReturnsMarkdownDocument() + { + var tokens = new List + { + new ("\\", TokenType.Escape), + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("__")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_EscapeWordUnderscores_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("\\", TokenType.Escape), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("кст")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_EscapeImage_ReturnsMarkdownDocument() + { + var tokens = new List + { + new ("\\", TokenType.Escape), + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("__")); + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseHeaderTests.cs b/cs/Markdown/Tests/ParserTests/ParseHeaderTests.cs new file mode 100644 index 000000000..88f3bbe26 --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseHeaderTests.cs @@ -0,0 +1,147 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseHeaderTests +{ + [Test] + public void ParseTokens_SimpleHeader_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocumentDocument = new MarkdownDocumentNode(""); + var headerNode = new HeaderNode("#"); + var textNode = new TextNode("текст"); + expectedDocumentDocument.AddChild(headerNode); + headerNode.AddChild(textNode); + + + var actualNode = parser.ParseTokens([]); + + actualNode.Should().BeEquivalentTo(expectedDocumentDocument); + } + + [Test] + public void ParseTokens_TwoHeadersInDifferentLines_ReturnsCorrectTree() + { + var tokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Первый", TokenType.Text), + new(" ", TokenType.Space), + new("\n", TokenType.NewLine), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Второй", TokenType.Text), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var headerNode1 = new HeaderNode("#"); + var headerNode2 = new HeaderNode("#"); + expectedDocument.AddChild(headerNode1); + headerNode1.AddChild(new TextNode("Первый")); + headerNode1.AddChild(new TextNode(" ")); + headerNode1.AddChild(new TextNode("\n")); + expectedDocument.AddChild(headerNode2); + headerNode2.AddChild(new TextNode("Второй")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_HeaderWithInlineTextBetween_ReturnsCorrectTree() + { + var tokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Первый", TokenType.Text), + new(" ", TokenType.Space), + new("\n", TokenType.NewLine), + new("просто", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("\n", TokenType.NewLine), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("Второй", TokenType.Text), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var h1 = new HeaderNode("#"); + h1.AddChild(new TextNode("Первый")); + h1.AddChild(new TextNode(" ")); + h1.AddChild(new TextNode("\n")); + expectedDocument.AddChild(h1); + expectedDocument.AddChild(new TextNode("просто")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("\n")); + var h2 = new HeaderNode("#"); + h2.AddChild(new TextNode("Второй")); + expectedDocument.AddChild(h2); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_HeaderWithoutSpace_TreatedAsPlainText() + { + var tokens = new List + { + new("#текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("#текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_HeaderWithLeadingSpace_NotAHeader() + { + var tokens = new List + { + new(" ", TokenType.Space), + new("#", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("#")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseImageTests.cs b/cs/Markdown/Tests/ParserTests/ParseImageTests.cs new file mode 100644 index 000000000..a4d6d3b82 --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseImageTests.cs @@ -0,0 +1,118 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseImageTests +{ + [Test] + public void ParseTokens_SimpleImage_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var imageNode = new ImageNode("!"); + var altNode = new AltNode("["); + var altText = new TextNode("Cat"); + var urlNode = new UrlNode("https://example.com/image.jpg"); + expectedDocument.AddChild(imageNode); + imageNode.AddChild(altNode); + altNode.AddChild(altText); + imageNode.AddChild(urlNode); + + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_NewlineAfterExclamation_ReturnsPlainText() + { + var tokens = new List + { + new("!", TokenType.Text), + new("\n", TokenType.NewLine), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + + var expectedDocument = new MarkdownDocumentNode(""); + var exclamationText = new TextNode("!"); + var newLineText = new TextNode("\n"); + var lBracket = new TextNode("["); + var text = new TextNode("Cat"); + var rBracket = new TextNode("]"); + var lParenthesis = new TextNode("("); + var urlText = new TextNode("https://example.com/image.jpg"); + var rParenthesis = new TextNode(")"); + expectedDocument.AddChild(exclamationText); + expectedDocument.AddChild(newLineText); + expectedDocument.AddChild(lBracket); + expectedDocument.AddChild(text); + expectedDocument.AddChild(rBracket); + expectedDocument.AddChild(lParenthesis); + expectedDocument.AddChild(urlText); + expectedDocument.AddChild(rParenthesis); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_AltWithNewline_ReturnsPlainText() + { + var tokens = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Ca", TokenType.Text), + new("\n", TokenType.NewLine), + new("t", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("!")); + expectedDocument.AddChild(new TextNode("[")); + expectedDocument.AddChild(new TextNode("Ca")); + expectedDocument.AddChild(new TextNode("\n")); + expectedDocument.AddChild(new TextNode("t")); + expectedDocument.AddChild(new TextNode("]")); + expectedDocument.AddChild(new TextNode("(")); + expectedDocument.AddChild(new TextNode("https://example.com/image.jpg")); + expectedDocument.AddChild(new TextNode(")")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/ParserTests/ParseUnderscoresTests.cs b/cs/Markdown/Tests/ParserTests/ParseUnderscoresTests.cs new file mode 100644 index 000000000..f4b64922b --- /dev/null +++ b/cs/Markdown/Tests/ParserTests/ParseUnderscoresTests.cs @@ -0,0 +1,379 @@ +using FluentAssertions; +using Markdown.Nodes.Internal; +using Markdown.Nodes.Leaf; +using Markdown.Parsers; +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.ParserTests; + +[TestFixture] +public class ParseUnderscoresTests +{ + [Test] + public void ParseTokens_SimpleBold_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var boldNode = new BoldNode("__"); + expectedDocument.AddChild(boldNode); + boldNode.AddChild(new TextNode("текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_SimpleItalic_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italicNode = new ItalicNode("_"); + expectedDocument.AddChild(italicNode); + italicNode.AddChild(new TextNode("текст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_SingleInsideDouble_ReturnsMarkdownDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("двойное", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("одинарное", TokenType.Text), + new("_", TokenType.Underscore), + new(" ", TokenType.Space), + new("тоже", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var bold = new BoldNode("__"); + expectedDocument.AddChild(bold); + bold.AddChild(new TextNode("двойное")); + bold.AddChild(new TextNode(" ")); + var italic = new ItalicNode("_"); + italic.AddChild(new TextNode("одинарное")); + bold.AddChild(italic); + bold.AddChild(new TextNode(" ")); + bold.AddChild(new TextNode("тоже")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicWithBoldInside_ReturnsDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("начало", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.DoubleUnderscore), + new("внутри", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("конец", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italic = new ItalicNode("_"); + expectedDocument.AddChild(italic); + italic.AddChild(new TextNode("начало")); + italic.AddChild(new TextNode(" ")); + italic.AddChild(new TextNode("__")); + italic.AddChild(new TextNode("внутри")); + italic.AddChild(new TextNode("__")); + italic.AddChild(new TextNode(" ")); + italic.AddChild(new TextNode("конец")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_UnpairedUnderscores_ReturnsDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("текст")); + expectedDocument.AddChild(new TextNode("_")); + + var actual = parser.ParseTokens(); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicAtStartWord_ReturnsDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italicNode = new ItalicNode("_"); + expectedDocument.AddChild(italicNode); + italicNode.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("кст")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicInMiddleWord_ReturnsDocument() + { + var tokens = new List + { + new("т", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("екс", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("т")); + var italic = new ItalicNode("_"); + italic.AddChild(new TextNode("екс")); + expectedDocument.AddChild(italic); + expectedDocument.AddChild(new TextNode("т")); + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_ItalicInEndWord_ReturnsDocument() + { + var tokens = new List + { + new("тек", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("ст.", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("тек")); + var italic = new ItalicNode("_"); + italic.AddChild(new TextNode("ст.")); + expectedDocument.AddChild(italic); + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_SeparateWordsUnderscores_ReturnsDocument() + { + var tokens = new List + { + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new(" ", TokenType.Space), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("кст")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("те")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("кст")); + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_OnlyFourUnderscores_ReturnsDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("__")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X_ReturnsDocument() + { + var tokens = new List + { + new("\n", TokenType.NewLine), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("\n", TokenType.NewLine), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + var italic = new ItalicNode("_"); + expectedDocument.AddChild(new TextNode("\n")); + italic.AddChild(new TextNode("текст")); + expectedDocument.AddChild(italic); + expectedDocument.AddChild(new TextNode("\n")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X1_ReturnsDocument() + { + var tokens = new List + { + new("__", TokenType.DoubleUnderscore), + new("пересечения", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("двойных", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("и", TokenType.Text), + new(" ", TokenType.Space), + new("одинарных", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode("пересечения")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("двойных")); + expectedDocument.AddChild(new TextNode("__")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("и")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("одинарных")); + expectedDocument.AddChild(new TextNode("_")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X2_ReturnsDocument() + { + var tokens = new List + { + new("_", TokenType.Underscore), + new("подчерки", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("не", TokenType.Text), + new(" ", TokenType.Space), + new("считаются", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("подчерки")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("не")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("считаются")); + expectedDocument.AddChild(new TextNode("_")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } + + [Test] + public void ParseTokens_X3_ReturnsDocument() + { + var tokens = new List + { + new("эти", TokenType.Text), + new("_", TokenType.Underscore), + new(" ", TokenType.Space), + new("подчерки", TokenType.Text), + new("_", TokenType.Underscore), + new("\n", TokenType.NewLine), + new("", TokenType.Eof) + }; + var parser = new MarkdownParser(tokens); + var expectedDocument = new MarkdownDocumentNode(""); + expectedDocument.AddChild(new TextNode("эти")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode(" ")); + expectedDocument.AddChild(new TextNode("подчерки")); + expectedDocument.AddChild(new TextNode("_")); + expectedDocument.AddChild(new TextNode("\n")); + + var actual = parser.ParseTokens([]); + + actual.Should().BeEquivalentTo(expectedDocument); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Results/MdSpecApprovalTest.Render_MdSpec_ReturnCorrectHtml.approved.txt b/cs/Markdown/Tests/Results/MdSpecApprovalTest.Render_MdSpec_ReturnCorrectHtml.approved.txt new file mode 100644 index 000000000..d963fff5d --- /dev/null +++ b/cs/Markdown/Tests/Results/MdSpecApprovalTest.Render_MdSpec_ReturnCorrectHtml.approved.txt @@ -0,0 +1,86 @@ +

Спецификация языка разметки

+ +Посмотрите этот файл в сыром виде. Сравните с тем, что показывает github. +Все совпадения случайны ;) + + + +

Курсив

+ +Текст, окруженный с двух сторон одинарными символами подчерка, +должен помещаться в HTML-тег \ вот так: + +Текст, \окруженный с двух сторон\ одинарными символами подчерка, +должен помещаться в HTML-тег \. + + + +

Полужирный

+ +Выделенный двумя символами текст должен становиться полужирным с помощью тега \. + + + +

Экранирование

+ +Любой символ можно экранировать, чтобы он не считался частью разметки. +_Вот это_, не должно выделиться тегом \. + +Символ экранирования исчезает из результата, только если экранирует что-то. +Здесь сим\волы экранирования\ \должны остаться.\ + +Символ экранирования тоже можно экранировать: \вот это будет выделено тегом \ + + + +

Взаимодействие тегов

+ +Внутри двойного выделения одинарное тоже работает. + +Но не наоборот — внутри одинарного __двойное__ не работает. + +Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка. + +Однако выделять часть слова они могут: и в начале, и в середине, и в конце. + +В то же время выделение в ра_зных сл_овах не работает. + +__Непарные_ символы в рамках одного абзаца не считаются выделением. + +За подчерками, начинающими выделение, должен следовать непробельный символ. Иначе эти_ подчерки_ не считаются выделением +и остаются просто символами подчерка. + +Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти _подчерки _не считаются_ окончанием выделения +и остаются просто символами подчерка. + +В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением. + +Если внутри подчерков пустая строка ____, то они остаются символами подчерка. + + + +

Заголовки

+ +Абзац, начинающийся с "# ", выделяется тегом \

в заголовок. +В тексте заголовка могут присутствовать все прочие символы разметки с указанными правилами. + +Таким образом + +# Заголовок __с _разными_ символами__ + +превратится в: + +

Заголовок с разными символами

+ +

Изображения

+ +Изображения задаются с помощью специальной конструкции вида: ![alt](url), где: +alt — альтернативный текст изображения; +url — ссылка на изображение. + +Например, запись: + +![Cat](https://example.com/image.jpg) + +превратится в: +Cat \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenAssert.cs b/cs/Markdown/Tests/TokenizerTests/TokenAssert.cs new file mode 100644 index 000000000..a32360700 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenAssert.cs @@ -0,0 +1,16 @@ +using FluentAssertions; +using Markdown.Tokenizer; + +namespace Markdown.Tests.TokenizerTests; + +public static class TokenAssert +{ + public static void AssertToken(string markdownText, List expectedTokens) + { + var tokenizer = new MarkdownTokenizer(markdownText); + + var tokens = tokenizer.Tokenize(); + + tokens.Should().BeEquivalentTo(expectedTokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeEscapeTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeEscapeTests.cs new file mode 100644 index 000000000..6c64ddd55 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeEscapeTests.cs @@ -0,0 +1,122 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeEscapeTests +{ + [TestCase(@"\_текст_")] + public void Tokenize_EscapedUnderscore(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"\\_текст\\_")] + public void Tokenize_DoubleEscapedUnderscoresAroundText(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new(@"\", TokenType.Escape), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(@"\", TokenType.Escape), + new(@"\", TokenType.Escape), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"\# текст")] + public void Tokenize_EscapedHeader(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new("#", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"\![Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)")] + public void Tokenize_EscapedExclamationInImage(string markdown) + { + var expected = new List + { + new(@"\", TokenType.Escape), + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"!\[Cat](https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)")] + public void Tokenize_EscapedAltInImage(string markdown) + { + var expected = new List + { + new("!", TokenType.Text), + new(@"\", TokenType.Escape), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"![Cat]\(https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg)")] + public void Tokenize_EscapedUrlInImage(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new(@"\", TokenType.Escape), + new("(", TokenType.LParenthesis), + new("https://i.pinimg.com/originals/f5/ef/a6/f5efa6a5b2c76c038ef0c8d2502fd2f6.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase(@"т\екс\т")] + public void Tokenize_TextWithMultipleEscapes(string markdown) + { + var expected = new List + { + new("т", TokenType.Text), + new(@"\", TokenType.Text), + new("екс", TokenType.Text), + new(@"\", TokenType.Text), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeHeaderTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeHeaderTests.cs new file mode 100644 index 000000000..5a53012d6 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeHeaderTests.cs @@ -0,0 +1,85 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeHeaderTests +{ + [TestCase("# текст")] + public void Tokenize_SimpleHeader_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase("#текст")] + public void Tokenize_NoSpaceAfterHash_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Text), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase("# текст \n# текст")] + public void Tokenize_TwoHeadersInDifferentLines_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("\n", TokenType.NewLine), + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase(" # текст")] + public void Tokenize_X_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new(" ", TokenType.Space), + new("#", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } + + [TestCase("# текст")] + public void Tokenize_ManySpacesAfterHash_ReturnsCorrectTokens(string markdownText) + { + var expectedTokens = new List + { + new("#", TokenType.Hash), + new(" ", TokenType.Space), + new(" ", TokenType.Space), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedTokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeImageTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeImageTests.cs new file mode 100644 index 000000000..c239eadb1 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeImageTests.cs @@ -0,0 +1,118 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeImageTests +{ + [TestCase("![Cat](https://example.com/image.jpg)")] + public void Tokenize_ValidUrlAndAlt(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("!\n[Cat](https://example.com/image.jpg)")] + public void Tokenize_NewlineAfterExclamation(string markdown) + { + var expected = new List + { + new("!", TokenType.Text), + new("\n", TokenType.NewLine), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![C[]at](https://example.com/image.jpg)")] + public void Tokenize_AltWithBrackets(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("C", TokenType.Text), + new("[", TokenType.LBracket), + new("]", TokenType.RBracket), + new("at", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![Cat](https://example.com()/image.jpg)")] + public void Tokenize_UrlWithParenthesis(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("Cat", TokenType.Text), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com", TokenType.Text), + new("(", TokenType.LParenthesis), + new(")", TokenType.RParenthesis), + new("/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![](https://example.com/image.jpg)")] + public void Tokenize_EmptyAltText(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("![__Cat__](https://example.com/image.jpg)")] + public void Tokenize_AltWithBold(string markdown) + { + var expected = new List + { + new("!", TokenType.Exclamation), + new("[", TokenType.LBracket), + new("__", TokenType.DoubleUnderscore), + new("Cat", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("]", TokenType.RBracket), + new("(", TokenType.LParenthesis), + new("https://example.com/image.jpg", TokenType.Text), + new(")", TokenType.RParenthesis), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/TokenizerTests/TokenizeUnderscoresTests.cs b/cs/Markdown/Tests/TokenizerTests/TokenizeUnderscoresTests.cs new file mode 100644 index 000000000..76516fc13 --- /dev/null +++ b/cs/Markdown/Tests/TokenizerTests/TokenizeUnderscoresTests.cs @@ -0,0 +1,440 @@ +using Markdown.Tokenizer; +using NUnit.Framework; + +namespace Markdown.Tests.TokenizerTests; + +[TestFixture] +public class TokenizeUnderscoresTests +{ + [TestCase("_текст_")] + public void Tokenize_SimpleItalic_ReturnsCorrectTokens(string markdownText) + { + var expectedToken = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expectedToken); + } + + [TestCase("__текст__")] + public void Tokenize_SimpleBold_ReturnsCorrectTokens(string markdownText) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expected); + } + + [TestCase("__начало _внутри_ конец__")] + public void Tokenize_BoldWithItalicInside(string markdownText) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("начало", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("внутри", TokenType.Text), + new("_", TokenType.Underscore), + new(" ", TokenType.Space), + new("конец", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + + TokenAssert.AssertToken(markdownText, expected); + } + + [TestCase("_те\nкст_")] + public void Tokenize_ItalicWithNewLineInside(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("\n", TokenType.NewLine), + new("кст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст__")] + public void Tokenize_SimpleBold(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_те_кст")] + public void Tokenize_ItalicAtStartWord(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("т_екс_т")] + public void Tokenize_ItalicInMiddleWord(string markdown) + { + var expected = new List + { + new("т", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("екс", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("текс_т._")] + public void Tokenize_ItalicAtEndWord(string markdown) + { + var expected = new List + { + new("текс", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("т.", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст_")] + public void Tokenize_UnpairedUnderscores(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_ текст_")] + public void Tokenize_LeadingSpacePreventsItalic(string markdown) + { + var expected = new List + { + new("_", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("1_2_3")] + public void Tokenize_DigitsPreventItalic(string markdown) + { + var expected = new List + { + new("1", TokenType.Text), + new("_", TokenType.Text), + new("2", TokenType.Text), + new("_", TokenType.Text), + new("3", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("те_кст те_кст")] + public void Tokenize_SeparateWordsUnderscores(string markdown) + { + var expected = new List + { + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new(" ", TokenType.Space), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("кст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_текст _текст")] + public void Tokenize_UnclosedItalicMultipleWords(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__")] + public void Tokenize_OnlyDoubleUnderscore(string markdown) + { + var expected = new List + { + new("__", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("____")] + public void Tokenize_OnlyFourUnderscores(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст _текст__ текст_")] + public void Tokenize_BoldWithNestedItalic(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_текст __текст__ текст_")] + public void Tokenize_ItalicWithBoldInside(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_текст т__екс__т текст_")] + public void Tokenize_ItalicWithBoldInsideWord(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("т", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("екс", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("т", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст т_екст текст___")] + public void Tokenize_BoldWithTrailingUnderscore(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("т", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("екст", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("т__е_к_с__т")] + public void Tokenize_BoldInWordWithItalicInside(string markdown) + { + var expected = new List + { + new("т", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("е", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("к", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("с", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("т", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__те_к_ст__")] + public void Tokenize_BoldWithItalicInsideInWord(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("те", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("к", TokenType.Text), + new("_", TokenType.WordUnderscore), + new("ст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_те__к__ст_")] + public void Tokenize_ItalicWithBoldInsideInWord(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("те", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("к", TokenType.Text), + new("__", TokenType.WordDoubleUnderscore), + new("ст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("_123_")] + public void Tokenize_ItalicWithNumbers(string markdown) + { + var expected = new List + { + new("_", TokenType.Underscore), + new("123", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__123__")] + public void Tokenize_BoldWithNumbers(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("123", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__++++__")] + public void Tokenize_BoldWithSymbols(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("++++", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст___текст_")] + public void Tokenize_BoldThenItalic(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new("__", TokenType.DoubleUnderscore), + new("_", TokenType.Underscore), + new("текст", TokenType.Text), + new("_", TokenType.Underscore), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__ текст __")] + public void Tokenize_BoldWithSpacesPrevents(string markdown) + { + var expected = new List + { + new("__", TokenType.Text), + new(" ", TokenType.Space), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } + + [TestCase("__текст __")] + public void Tokenize_BoldAfterSpacesPrevents(string markdown) + { + var expected = new List + { + new("__", TokenType.DoubleUnderscore), + new("текст", TokenType.Text), + new(" ", TokenType.Space), + new("__", TokenType.Text), + new("", TokenType.Eof) + }; + TokenAssert.AssertToken(markdown, expected); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs new file mode 100644 index 000000000..5981f31a8 --- /dev/null +++ b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs @@ -0,0 +1,186 @@ +namespace Markdown.Tokenizer; + +public class MarkdownTokenizer +{ + private readonly string markdownText; + private readonly HashSet specialsSymbols = ['#', '_', '!', '[', ']', '(', ')', ' ', '\n', '\\', '\r']; + private readonly HashSet escapableSymbols = ['#', '_', '!', '[', ']', '(', ')', '\\']; + private char? PrevSymbol => Position - 1 < 0 ? null : markdownText[Position - 1]; + private char? NextSymbol => Position + 1 == markdownText.Length ? null : markdownText[Position + 1]; + private int Position { get; set; } + + public MarkdownTokenizer(string markdownText) + { + this.markdownText = markdownText; + } + + public List Tokenize() + { + var tokens = new List(); + while (Position < markdownText.Length) + { + var symbol = markdownText[Position]; + var token = symbol switch + { + '#' => TokenizeHeader(), + '_' => TokenizeUnderscore(), + '!' => TokenizeExclamation(), + '(' => TokenizeLParenthesis(), + ')' => TokenizeRParenthesis(), + '[' => TokenizeLBracket(), + ']' => TokenizeRBracket(), + '\\' => TokenizeEscape(), + ' ' => TokenizeSpace(), + '\n' => TokenizeNewLine(), + '\r' => TokenizeCarriage(), + _ => TokenizeText() + }; + tokens.Add(token); + Position++; + } + + var eof = TokenizeEndOfFile(); + tokens.Add(eof); + return tokens; + } + + private Token TokenizeCarriage() + { + return new Token("\r", TokenType.Carriage); + } + + private Token TokenizeHeader() + { + if (PrevSymbol is null or '\n' && NextSymbol is ' ') return new Token("#", TokenType.Hash); + + return new Token("#", TokenType.Text); + } + + private Token TokenizeEndOfFile() + { + return new Token("", TokenType.Eof); + } + + private Token TokenizeNewLine() + { + return new Token("\n", TokenType.NewLine); + } + + private Token TokenizeSpace() + { + return new Token(" ", TokenType.Space); + } + + private Token TokenizeEscape() + { + return NextSymbol != null && IsEscapableSymbol(NextSymbol.Value) + ? new Token(@"\", TokenType.Escape) + : new Token(@"\", TokenType.Text); + } + + private Token TokenizeUnderscore() + { + var startPos = Position; + + if (NextSymbol != '_') + { + if (!IsValidUnderscores(PrevSymbol, NextSymbol)) + return new Token("_", TokenType.Text); + + if (IsInsideWord(PrevSymbol, NextSymbol)) + return IsValidWordUnderscores(PrevSymbol!.Value, NextSymbol!.Value) + ? new Token("_", TokenType.WordUnderscore) + : new Token("_", TokenType.Text); + + return new Token("_", TokenType.Underscore); + } + + Position++; + char? leftSymbol = startPos == 0 ? null : markdownText[startPos - 1]; + if (!IsValidUnderscores(leftSymbol, NextSymbol)) + return new Token("__", TokenType.Text); + + if (IsInsideWord(leftSymbol, NextSymbol)) + return IsValidWordUnderscores(markdownText[startPos - 1], NextSymbol.Value) + ? new Token("__", TokenType.WordDoubleUnderscore) + : new Token("__", TokenType.Text); + + return new Token("__", TokenType.DoubleUnderscore); + } + + private static bool IsValidWordUnderscores(char left, char right) + { + if (char.IsDigit(right) && char.IsDigit(left)) + return false; + + if (char.IsDigit(right) && char.IsLetter(left)) + return false; + + if (char.IsDigit(left) && char.IsLetter(right)) + return false; + + return true; + } + + private static bool IsInsideWord(char? left, char? right) + { + return right != null && left != null + && char.IsLetterOrDigit(left.Value) + && char.IsLetterOrDigit(right.Value); + } + + private bool IsValidUnderscores(char? left, char? right) + { + var leftValid = left.HasValue && left != ' '; + var rightValid = right.HasValue && right != ' '; + + return leftValid || rightValid; + } + + private Token TokenizeLParenthesis() + { + return new Token("(", TokenType.LParenthesis); + } + + private Token TokenizeRParenthesis() + { + return new Token(")", TokenType.RParenthesis); + } + + private Token TokenizeLBracket() + { + return new Token("[", TokenType.LBracket); + } + + private Token TokenizeRBracket() + { + return new Token("]", TokenType.RBracket); + } + + private Token TokenizeExclamation() + { + return NextSymbol == '[' ? new Token("!", TokenType.Exclamation) : new Token("!", TokenType.Text); + } + + private Token TokenizeText() + { + var start = Position; + while (Position < markdownText.Length && !IsSpecialSymbol(markdownText[Position])) + Position++; + + var text = markdownText[start..Position]; + Position--; + + return new Token(text, TokenType.Text); + } + + private bool IsSpecialSymbol(char c) + { + return specialsSymbols.Contains(c); + } + + private bool IsEscapableSymbol(char c) + { + return escapableSymbols.Contains(c); + } +} \ No newline at end of file diff --git a/cs/Markdown/Token.cs b/cs/Markdown/Tokenizer/Token.cs similarity index 55% rename from cs/Markdown/Token.cs rename to cs/Markdown/Tokenizer/Token.cs index 7d91e08be..ab584cff6 100644 --- a/cs/Markdown/Token.cs +++ b/cs/Markdown/Tokenizer/Token.cs @@ -1,9 +1,9 @@ -namespace Markdown; +namespace Markdown.Tokenizer; public class Token { - public TokenType Type; - public string Value; + public readonly TokenType Type; + public readonly string Value; public Token(string value, TokenType type) { diff --git a/cs/Markdown/TokenType.cs b/cs/Markdown/Tokenizer/TokenType.cs similarity index 82% rename from cs/Markdown/TokenType.cs rename to cs/Markdown/Tokenizer/TokenType.cs index 019bb5145..b4c243d9b 100644 --- a/cs/Markdown/TokenType.cs +++ b/cs/Markdown/Tokenizer/TokenType.cs @@ -1,4 +1,4 @@ -namespace Markdown; +namespace Markdown.Tokenizer; public enum TokenType { @@ -16,5 +16,6 @@ public enum TokenType RParenthesis, Escape, Space, - Eof, + Carriage, + Eof } \ No newline at end of file