Skip to content

Commit 332aae1

Browse files
authored
Merge pull request #40 from AlexRadch/Tests_Fails_On_Empty_Lines
Tests fails on empty lines
2 parents 31fa019 + 0232a31 commit 332aae1

File tree

12 files changed

+50
-31
lines changed

12 files changed

+50
-31
lines changed

src/TurnerSoftware.RobotsExclusionTools/Tokenization/TokenDefinition.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public TokenMatch Match(string input, int offset = 0)
3131
}
3232
else
3333
{
34-
return new TokenMatch { IsMatch = false };
34+
return TokenMatch.NoMatch;
3535
}
3636
}
3737
}

src/TurnerSoftware.RobotsExclusionTools/Tokenization/TokenMatch.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@ public class TokenMatch
1010
public TokenType TokenType { get; set; }
1111
public string Value { get; set; }
1212
public int MatchLength { get; set; }
13+
14+
public static readonly TokenMatch NoMatch = new TokenMatch { IsMatch = false };
1315
}
1416
}

src/TurnerSoftware.RobotsExclusionTools/Tokenization/TokenType.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public enum TokenType
1212
Comment,
1313
FieldValueDelimiter,
1414
NewLine,
15-
ValueDelimiter
15+
ValueDelimiter,
16+
Blank
1617
}
1718
}

src/TurnerSoftware.RobotsExclusionTools/Tokenization/TokenizerBase.cs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,24 @@ private void Tokenize(string text, ICollection<Token> tokenCollection)
5252
var offset = 0;
5353
var numberOfChars = text.Length;
5454

55+
var lastTokenEnd = 0;
56+
5557
while (offset < numberOfChars)
5658
{
5759
var match = FindMatch(text, offset);
5860
if (match.IsMatch)
5961
{
62+
if (lastTokenEnd != offset)
63+
{
64+
tokenCollection.Add(new Token(
65+
TokenType.NotDefined,
66+
text.Substring(lastTokenEnd, offset - lastTokenEnd)
67+
));
68+
}
69+
6070
tokenCollection.Add(new Token(match.TokenType, match.Value));
6171
offset += match.MatchLength;
72+
lastTokenEnd = offset;
6273
}
6374
else
6475
{
@@ -78,7 +89,7 @@ private TokenMatch FindMatch(string text, int offset)
7889
}
7990
}
8091

81-
return new TokenMatch { IsMatch = false };
92+
return TokenMatch.NoMatch;
8293
}
8394
}
8495
}

src/TurnerSoftware.RobotsExclusionTools/Tokenization/Tokenizers/RobotsFileTokenizer.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ public class RobotsFileTokenizer : TokenizerBase
3636
new TokenDefinition(TokenType.Comment, @"\G#[^\x0A\x0D]*"),
3737
new TokenDefinition(TokenType.Field, @"\G[\x21\x23-\x27\x2a\x2b\x2d\x2e\x41-\x5a\x5e-\x7a\x7c\x7e]+(?=:[ ])"),
3838
new TokenDefinition(TokenType.FieldValueDelimiter, @"\G:[ ]"),
39-
new TokenDefinition(TokenType.Value, @"\G[^\x0A\x0D#]+"),
40-
new TokenDefinition(TokenType.NewLine, @"\G\x0D?\x0A")
39+
new TokenDefinition(TokenType.Value, @"\G(?<=:[ ])[^\x0A\x0D#]+"),
40+
new TokenDefinition(TokenType.NewLine, @"\G\x0D?\x0A"),
41+
new TokenDefinition(TokenType.Blank, @"\G[ \t]+")
4142
};
4243

4344
protected override IEnumerable<TokenDefinition> GetTokenDefinitions()

tests/TurnerSoftware.RobotsExclusionTools.Tests/Resources/RobotsFile/Comprehensive-Example.txt

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,41 @@
11
User-agent: *
22
Crawl-delay: 60
33
Disallow:
4-
4+
55
Sitemap: http://www.example.org/sitemap.xml
6-
6+
77
User-agent: SinglePathAllowed
88
Allow: /orgs/plan.html
99
Disallow: /
10-
10+
1111
User-agent: ImplicitWildcardAllowed
1212
Allow: /orgs
1313
Disallow: /
14-
14+
1515
User-agent: ExplicitWildcardSuffix
1616
Allow: /org*
1717
Disallow: /
18-
18+
1919
User-agent: ExplicitWildcardPrefix
2020
Allow: *.html
2121
Disallow: /
22-
22+
2323
User-agent: ContainedWildcard
2424
Allow: /org*/plan.html
2525
Disallow: /
2626

2727
User-agent: PathWithAnyQueryString
2828
Disallow: /org/plan.html?
29-
29+
3030
User-agent: PathWithPartQueryString
3131
Disallow: /org/plan.html?foo=bar*
32-
32+
3333
User-agent: PathMustStartWith
3434
Disallow: /org/plan.html
3535

3636
User-agent: PathMustEndWith
3737
Disallow: /org/plan.html$
3838
Disallow: /org/planb.html*$
39+
40+
Sitemap: http://www.example.org/sitemap2.xml
3941

40-
Sitemap: http://www.example.org/sitemap2.xml
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
User-agent: A
22
Disallow: /RFCCompliant
3-
3+
44
User-Agent: B
55
Disallow: /DifferentCaseUserAgent
6-
6+
77
User-agent: C
8-
DisAllow: /DifferentCaseRule
8+
DisAllow: /DifferentCaseRule
9+
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
# /robots.txt for http://www.fict.org/
22
# comments to [email protected]
3-
43
User-agent: unhipbot
54
Disallow: /
6-
5+
76
User-agent: webcrawler
87
User-agent: excite
98
Disallow:
10-
9+
1110
User-agent: *
1211
Disallow: /org/plans.html
1312
Allow: /org/
1413
Allow: /serv
1514
Allow: /~mak
16-
Disallow: /
15+
Disallow: /
16+
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
Disallow: /
1+

2+
Disallow: /
3+

tests/TurnerSoftware.RobotsExclusionTools.Tests/RobotsFile/RobotsFileTokenPatternValidatorTests.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,8 @@ public void MalformedFieldPatterns()
3838
Assert.IsFalse(result.IsValid);
3939

4040
var firstErrorExpectedTokens = result.Errors.First().Expected;
41-
Assert.AreEqual(TokenType.FieldValueDelimiter, firstErrorExpectedTokens.ElementAt(0));
42-
Assert.AreEqual(TokenType.Field, firstErrorExpectedTokens.ElementAt(1));
43-
Assert.AreEqual(2, firstErrorExpectedTokens.Count());
41+
Assert.AreEqual(TokenType.NewLine, firstErrorExpectedTokens.ElementAt(0));
42+
Assert.AreEqual(1, firstErrorExpectedTokens.Count());
4443
Assert.AreEqual(19, result.Errors.Count());
4544
}
4645
}

0 commit comments

Comments
 (0)