diff --git a/README.md b/README.md index f9741e0b..b80bbbad 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Send JSON log events from a file or `STDIN`. Example: ``` -seqcli ingest -i events.clef --filter="@Level <> 'Debug'" -p Environment=Test +seqcli ingest -i events.clef --json --filter="@Level <> 'Debug'" -p Environment=Test ``` | Option | Description | @@ -63,6 +63,8 @@ seqcli ingest -i events.clef --filter="@Level <> 'Debug'" -p Environment=Test | `-i`, `--input=VALUE` | CLEF file to ingest; if not specified, `STDIN` will be used | | `--invalid-data=VALUE` | Specify how invalid data is handled: fail (default) or ignore | | `-p`, `--property=VALUE1=VALUE2` | Specify event properties, e.g. `-p Customer=C123 -p Environment=Production` | +| `-x`, `--extract=VALUE` | An extraction pattern to apply to plain-text logs (ignored when `--json` is specified) | +| `--json` | Read the events as JSON (the default assumes plain text) | | `-f`, `--filter=VALUE` | Filter expression to select a subset of events | | `-s`, `--server=VALUE` | The URL of the Seq server; by default the `connection.serverUrl` value will be used | | `-a`, `--apikey=VALUE` | The API key to use when connecting to the server; by default `config.apiKey` value will be used | @@ -147,3 +149,72 @@ Stream log events matching a filter. ### `version` Print the current executable version. + +## Extraction Patterns + +The `seqcli ingest` command can be used for parsing plain text logs into structured log events. + +```shell +seqcli ingest -x "{@t:timestamp} [{@l:ident}] {@m:*}{:n}{@x:*}" +``` + +The `-x` argument above is an _extraction pattern_ that will parse events like: + +``` +2018-02-21 13:29:00.123 +10:00 [ERR] The operation failed +System.DivideByZeroException: Attempt to divide by zero + at SomeClass.SomeMethod() +``` + +### Syntax + +Extraction patterns have a simple high-level syntax: + + * Text that appears in the pattern is matched literally - so a pattern like `Hello, world!` will match logging statements that are made up of this greeting only, + * Text between `{curly braces}` is a _match expression_ that identifies a part of the event to be extracted, and + * Literal curly braces are escaped by doubling, so `{{` will match the literal text `{`, and `}}` matches `}`. + +Match expressions have the form: + +``` +{name:matcher} +``` + +Both the name and matcher are optional, but either one or the other must be specified. Hence `{@t:timestamp}` specifies a name of `@t` and value `timestamp`, `{IPAddress}` specifies a name only, and `{:n}` a value only (in this case the built-in newline matcher). + +The _name_ is the property name to be extracted; there are four built-in property names that get special handling: + + * `@t` - the event's timestamp + * `@m` - the textual message associated with the event + * `@l` - the event's level + * `@x` - the exception or backtrace associated with the event + +Other property names are attached to the event payload, so `{Elapsed:dec}` will extract a property called `Elapsed`, using the `dec` decimal matcher. + +Match expressions with no name are consumed from the input, but are not added to the event payload. + +### Matchers + +Matchers identify chunks of the input event. + +Different matchers are needed so that a piece of text like `200OK` can be separated into separate properties, i.e. `{StatusCode:nat}{Status:alpha}`. Here, the `nat` (natural number) matcher also coerces the result into a numeric value, so that it is attached to the event payload numerically as `200` instead of as the text `"200"`. + +There are three kinds of matchers: + + * Matchers like `alpha` and `nat` are built-in _named_ matchers. These are built-in. + * The special matchers `*`, `**` and so-on, are _non-greedy content_ matchers; these will match any text up until the next pattern element matches (`*`), the next two elements match, and so-on. We saw this in action with the `{@m:*}{:n}` elements in the example - the message is all of the text up until the next newline. + * More complex _compound_ matchers are described using a sub-expression. These are prefixed with an equals sign `=`, like `{Phone:={:nat}-{:nat}-{:nat}}`. This will extract chunks of text like `123-456-7890` into the `Phone` property. + +### Processing + +Extraction patterns are processed from left to right. When the first non-matching pattern is encountered, extraction stops; any remaining text that couldn't be matched will be attached to the resulting event in an `@unmatched` property. + +Multi-line events are handled by looking for lines that start with the first element of the extraction pattern to be used. This works well if the first line of each event begins with something unambiguous like an `iso8601dt` timestamp; if the lines begin with less specific syntax, the first few elements of the extraction pattern might be grouped to identify the start of events more accurately: + +``` +{:=[{@t} {@l}]} {@m:*} +``` + +Here the literal text `[`, a timestamp token, adjacent space ` `, level and closing `]` are all grouped so that they constitute a single logical pattern element to identify the start of events. + +When logs are streamed into `seqcli ingest` in real time, a 10 ms deadline is applied, within which any trailing lines that make up the event must be received. diff --git a/src/SeqCli/PlainText/Extraction/ExtractionPatternInterpreter.cs b/src/SeqCli/PlainText/Extraction/ExtractionPatternInterpreter.cs index 16834b93..7883d668 100644 --- a/src/SeqCli/PlainText/Extraction/ExtractionPatternInterpreter.cs +++ b/src/SeqCli/PlainText/Extraction/ExtractionPatternInterpreter.cs @@ -9,10 +9,10 @@ static class ExtractionPatternInterpreter { public static NameValueExtractor MultilineMessageExtractor { get; } = new NameValueExtractor(new[] { - new PatternElement(Matchers.MultiLineMessage, ReifiedProperties.Message) + new SimplePatternElement(Matchers.MultiLineMessage, ReifiedProperties.Message) }); - public static NameValueExtractor CreateNameValueExtractor(ExtractionPattern pattern) + static PatternElement[] CreatePatternElements(ExtractionPattern pattern) { if (pattern == null) throw new ArgumentNullException(nameof(pattern)); @@ -22,39 +22,39 @@ public static NameValueExtractor CreateNameValueExtractor(ExtractionPattern patt var element = pattern.Elements[i]; switch (element) { - case LiteralTextPatternExpression text: - patternElements[i] = new PatternElement(Matchers.LiteralText(text.Text)); - break; - case CapturePatternExpression capture - when capture.Content is NonGreedyContentExpression ngc: - patternElements[i] = new PatternElement( - Matchers.NonGreedyContent(patternElements.Skip(i + 1).Take(ngc.Lookahead).ToArray()), - capture.Name); - break; - case CapturePatternExpression capture - when capture.Content is MatchTypeContentExpression mtc: - patternElements[i] = new PatternElement( - mtc.Type == null ? Matchers.Token : Matchers.GetByType(mtc.Type), - capture.Name); - break; - default: - throw new InvalidOperationException($"Element `{element}` not recognized."); + case LiteralTextPatternExpression text: + patternElements[i] = new SimplePatternElement(Matchers.LiteralText(text.Text)); + break; + case CapturePatternExpression capture + when capture.Content is NonGreedyContentExpression ngc: + patternElements[i] = new SimplePatternElement( + Matchers.NonGreedyContent(patternElements.Skip(i + 1).Take(ngc.Lookahead).ToArray()), + capture.Name); + break; + case CapturePatternExpression capture + when capture.Content is MatchTypeContentExpression mtc: + patternElements[i] = new SimplePatternElement( + mtc.Type == null ? Matchers.Token : Matchers.GetByType(mtc.Type), + capture.Name); + break; + case CapturePatternExpression capture + when capture.Content is GroupedContentExpression gc: + patternElements[i] = new GroupedPatternElement( + CreatePatternElements(gc.ExtractionPattern), + capture.Name); + break; + default: + throw new InvalidOperationException($"Element `{element}` not recognized."); } } - - return new NameValueExtractor(patternElements); + + return patternElements; } - // What we need to do here is: - // - for each parsed token - // - if it's literal text, map it an anonymous PatternElement with - // BuiltInPatterns.LiteralText() - // - otherwise, if it specifies no format, it's a named element with - // the BuiltInPatterns.Token parser - // - if it does specify a format, look up the parser based on the name, except - // - if the format is `$` it is BuiltInPatterns.SingleLineContent - // - if the format is `$$` it is BuiltInPatterns.MultiLineContent - // - if it's `*`, it's BuiltInPatterns.NonGreedyContent() passing the - // parser that follows it + public static NameValueExtractor CreateNameValueExtractor(ExtractionPattern pattern) + { + var patternElements = CreatePatternElements(pattern); + return new NameValueExtractor(patternElements); + } } } \ No newline at end of file diff --git a/src/SeqCli/PlainText/Extraction/GroupedPatternElement.cs b/src/SeqCli/PlainText/Extraction/GroupedPatternElement.cs new file mode 100644 index 00000000..d269cfca --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/GroupedPatternElement.cs @@ -0,0 +1,51 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Superpower; +using Superpower.Model; + +namespace SeqCli.PlainText.Extraction +{ + class GroupedPatternElement : PatternElement + { + readonly PatternElement[] _content; + + public GroupedPatternElement(IEnumerable content, string name = null) + : base(name) + { + _content = content?.ToArray() ?? throw new ArgumentNullException(nameof(content)); + if (_content.Length == 0) throw new ArgumentException("A grouped pattern must include at least one element."); + + Match = _content.Select(c => c.Match).Aggregate((a, b) => a.IgnoreThen(b)); + } + + public override TextParser Match { get; } + + public override bool TryExtract( + TextSpan input, + Dictionary result, + out TextSpan remainder) + { + var temp = new Dictionary(); + + var rem = input; + foreach (var element in _content) + { + if (!element.TryExtract(rem, temp, out rem)) + { + remainder = input; + return false; + } + } + + foreach (var pair in temp) + result.Add(pair.Key, pair.Value); + + var value = input.Until(rem); + remainder = rem; + CollectResult(result, value); + + return true; + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Extraction/Matchers.cs b/src/SeqCli/PlainText/Extraction/Matchers.cs index ec2725bd..1eb482a6 100644 --- a/src/SeqCli/PlainText/Extraction/Matchers.cs +++ b/src/SeqCli/PlainText/Extraction/Matchers.cs @@ -112,10 +112,10 @@ public static TextParser NonGreedyContent(params PatternElement[] follow return SpanEx.MatchedBy(Character.AnyChar.Many()) .Select(span => span.Length > 0 ? (object) span : null); - var rest = following[0].Parser; + var rest = following[0].Match; for (var i = 1; i < following.Length; ++i) { - rest = rest.IgnoreThen(following[i].Parser); + rest = rest.IgnoreThen(following[i].Match); } return i => diff --git a/src/SeqCli/PlainText/Extraction/NameValueExtractor.cs b/src/SeqCli/PlainText/Extraction/NameValueExtractor.cs index 243fe6a3..c1933c98 100644 --- a/src/SeqCli/PlainText/Extraction/NameValueExtractor.cs +++ b/src/SeqCli/PlainText/Extraction/NameValueExtractor.cs @@ -18,7 +18,7 @@ public NameValueExtractor(IEnumerable elements) throw new ArgumentException("An extraction pattern must contain at least one element."); } - public TextParser StartMarker => _elements[0].Parser; + public TextParser StartMarker => _elements[0].Match; public (IDictionary, string) ExtractValues(string plainText) { @@ -28,21 +28,13 @@ public NameValueExtractor(IEnumerable elements) var remainder = input; foreach (var element in _elements) { - var match = element.Parser(remainder); - if (!match.HasValue) + if (!element.TryExtract(remainder, result, out remainder)) { if (remainder.IsAtEnd || Span.WhiteSpace.IsMatch(remainder)) return (result, null); return (result, remainder.ToStringValue()); } - - remainder = match.Remainder; - - if (!element.IsIgnored) - { - result.Add(element.Name, match.Value); - } } return (result, null); diff --git a/src/SeqCli/PlainText/Extraction/PatternElement.cs b/src/SeqCli/PlainText/Extraction/PatternElement.cs index 29670dd9..42412afd 100644 --- a/src/SeqCli/PlainText/Extraction/PatternElement.cs +++ b/src/SeqCli/PlainText/Extraction/PatternElement.cs @@ -1,18 +1,31 @@ -using System; +using System.Collections.Generic; using Superpower; +using Superpower.Model; namespace SeqCli.PlainText.Extraction { - class PatternElement + abstract class PatternElement { - public PatternElement(TextParser parser, string name = null) + readonly string _name; + + bool IsIgnored => _name == null; + + protected PatternElement(string name) { - Parser = parser ?? throw new ArgumentNullException(nameof(parser)); - Name = name; + _name = name; } - public TextParser Parser { get; } - public string Name { get; } - public bool IsIgnored => Name == null; + public abstract TextParser Match { get; } + + public abstract bool TryExtract( + TextSpan input, + Dictionary result, + out TextSpan remainder); + + protected void CollectResult(Dictionary result, object value) + { + if (!IsIgnored) + result.Add(_name, value); + } } -} \ No newline at end of file +} diff --git a/src/SeqCli/PlainText/Extraction/SimplePatternElement.cs b/src/SeqCli/PlainText/Extraction/SimplePatternElement.cs new file mode 100644 index 00000000..46ef94be --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/SimplePatternElement.cs @@ -0,0 +1,39 @@ +using System; +using System.Collections.Generic; +using Superpower; +using Superpower.Model; + +namespace SeqCli.PlainText.Extraction +{ + class SimplePatternElement : PatternElement + { + readonly TextParser _parser; + + public override TextParser Match { get; } + + public SimplePatternElement(TextParser parser, string name = null) + : base(name) + { + _parser = parser ?? throw new ArgumentNullException(nameof(parser)); + Match = _parser.Select(s => Unit.Value); + } + + public override bool TryExtract( + TextSpan input, + Dictionary result, + out TextSpan remainder) + { + var match = _parser(input); + if (!match.HasValue) + { + remainder = input; + return false; + } + + CollectResult(result, match.Value); + remainder = match.Remainder; + + return true; + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/ExtractionPatternParser.cs b/src/SeqCli/PlainText/Patterns/ExtractionPatternParser.cs index 493edb1d..ff3a2145 100644 --- a/src/SeqCli/PlainText/Patterns/ExtractionPatternParser.cs +++ b/src/SeqCli/PlainText/Patterns/ExtractionPatternParser.cs @@ -20,11 +20,24 @@ static class ExtractionPatternParser .IgnoreThen(Character.LetterOrDigit.Or(Character.EqualTo('_')).Many())) .Select(s => s.ToStringValue()); + static readonly TextParser NonGreedyContent = + Character.EqualTo('*').AtLeastOnce() + .Select(chs => (CaptureContentExpression) new NonGreedyContentExpression(chs.Length)); + + static readonly TextParser MatchTypeContent = + SpanEx.MatchedBy(Character.Letter.Or(Character.EqualTo('_')) + .IgnoreThen(Character.LetterOrDigit.Or(Character.EqualTo('_')).Many())) + .Select(s => (CaptureContentExpression) new MatchTypeContentExpression(s.ToStringValue())); + + static readonly TextParser GroupedContent = + Span.EqualTo("=") + .IgnoreThen(Superpower.Parse.Ref(() => Elements)) + .Select(els => (CaptureContentExpression) new GroupedContentExpression(new ExtractionPattern(els))); + static readonly TextParser CaptureContent = - Character.EqualTo('*').AtLeastOnce().Select(chs => (CaptureContentExpression)new NonGreedyContentExpression(chs.Length)) - .Or(SpanEx.MatchedBy(Character.Letter.Or(Character.EqualTo('_')) - .IgnoreThen(Character.LetterOrDigit.Or(Character.EqualTo('_')).Many())) - .Select(s => (CaptureContentExpression)new MatchTypeContentExpression(s.ToStringValue()))); + NonGreedyContent + .Or(MatchTypeContent) + .Or(GroupedContent); static readonly TextParser Capture = from _ in Character.EqualTo('{') @@ -40,8 +53,11 @@ from __ in Character.EqualTo('}') LiteralText.Cast() .Or(Capture.Cast()); + static readonly TextParser Elements = + Element.AtLeastOnce(); + static readonly TextParser Pattern = - Element.AtLeastOnce().AtEnd().Select(e => new ExtractionPattern(e)); + Elements.AtEnd().Select(e => new ExtractionPattern(e)); public static ExtractionPattern Parse(string extractionPattern) { diff --git a/src/SeqCli/PlainText/Patterns/GroupedContentExpression.cs b/src/SeqCli/PlainText/Patterns/GroupedContentExpression.cs new file mode 100644 index 00000000..23cf2f33 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/GroupedContentExpression.cs @@ -0,0 +1,14 @@ +using System; + +namespace SeqCli.PlainText.Patterns +{ + class GroupedContentExpression : CaptureContentExpression + { + public ExtractionPattern ExtractionPattern { get; } + + public GroupedContentExpression(ExtractionPattern extractionPattern) + { + ExtractionPattern = extractionPattern ?? throw new ArgumentNullException(nameof(extractionPattern)); + } + } +} \ No newline at end of file diff --git a/test/SeqCli.Tests/PlainText/ExtractionPatternParserTests.cs b/test/SeqCli.Tests/PlainText/ExtractionPatternParserTests.cs index 85fd84fc..000f64ce 100644 --- a/test/SeqCli.Tests/PlainText/ExtractionPatternParserTests.cs +++ b/test/SeqCli.Tests/PlainText/ExtractionPatternParserTests.cs @@ -53,6 +53,9 @@ public void CaptureNameAndTypeAreParsed() [InlineData("}", false)] [InlineData("{a} b{c} ", true)] [InlineData("d {a}b {c}", true)] + [InlineData("{:={@m}}", true)] + [InlineData("Loaded {SignalId:=signal-{:nat}}", true)] + [InlineData("{:={Year:num}-{Month:num}}", true)] public void OnlyValidPatternsAreAccepted(string attempt, bool isValid) { if (isValid) diff --git a/test/SeqCli.Tests/PlainText/NameValueExtractorTests.cs b/test/SeqCli.Tests/PlainText/NameValueExtractorTests.cs index 785ede83..38fc85ac 100644 --- a/test/SeqCli.Tests/PlainText/NameValueExtractorTests.cs +++ b/test/SeqCli.Tests/PlainText/NameValueExtractorTests.cs @@ -30,9 +30,9 @@ public void TheDefaultPatternDoesNotMatchLinesStartingWithWhitespace() static NameValueExtractor ClassMethodPattern { get; } = new NameValueExtractor(new[] { - new PatternElement(Matchers.Identifier, "class"), - new PatternElement(Matchers.LiteralText(".")), - new PatternElement(Matchers.Identifier, "method") + new SimplePatternElement(Matchers.Identifier, "class"), + new SimplePatternElement(Matchers.LiteralText(".")), + new SimplePatternElement(Matchers.Identifier, "method") }); [Fact] @@ -50,7 +50,7 @@ public void PatternsExtractElements() [Fact] public void TheFirstPatternElementIsExposed() { - Assert.Same(Matchers.Identifier, ClassMethodPattern.StartMarker); + Assert.NotNull(ClassMethodPattern.StartMarker); } [Fact] @@ -58,12 +58,12 @@ public void SingleLineContentMatchesUntilEol() { var pattern = new NameValueExtractor(new[] { - new PatternElement(Matchers.Identifier, "first"), - new PatternElement(Matchers.LiteralText(" ")), - new PatternElement(Matchers.SingleLineContent, "content"), - new PatternElement(Matchers.LiteralText(" (")), - new PatternElement(Matchers.Identifier, "last"), - new PatternElement(Matchers.LiteralText(")")) + new SimplePatternElement(Matchers.Identifier, "first"), + new SimplePatternElement(Matchers.LiteralText(" ")), + new SimplePatternElement(Matchers.SingleLineContent, "content"), + new SimplePatternElement(Matchers.LiteralText(" (")), + new SimplePatternElement(Matchers.Identifier, "last"), + new SimplePatternElement(Matchers.LiteralText(")")) }); var frame = "abc def ghi (jkl)"; @@ -80,16 +80,16 @@ public void NonGreedyContentStopsMatchingWhenFollowingTokensMatch() // the "following" list, since they effectively become "mandatory" var following = new[] { - new PatternElement(Matchers.LiteralText(" (")), - new PatternElement(Matchers.Identifier, "last"), - new PatternElement(Matchers.LiteralText(")")) + new SimplePatternElement(Matchers.LiteralText(" (")), + new SimplePatternElement(Matchers.Identifier, "last"), + new SimplePatternElement(Matchers.LiteralText(")")) }; var pattern = new NameValueExtractor(new[] { - new PatternElement(Matchers.Identifier, "first"), - new PatternElement(Matchers.LiteralText(" ")), - new PatternElement(Matchers.NonGreedyContent(following), "content"), + new SimplePatternElement(Matchers.Identifier, "first"), + new SimplePatternElement(Matchers.LiteralText(" ")), + new SimplePatternElement(Matchers.NonGreedyContent(following), "content"), }.Concat(following)); var frame = "abc def ghi (jkl)";