diff --git a/README.md b/README.md index 170f7955..00b62546 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Send JSON log events from a file or `STDIN`. Example: ``` -seqcli ingest -i events.clef --filter="@Level <> 'Debug'" -p Environment=Test +seqcli ingest -i events.clef --json --filter="@Level <> 'Debug'" -p Environment=Test ``` | Option | Description | @@ -63,6 +63,8 @@ seqcli ingest -i events.clef --filter="@Level <> 'Debug'" -p Environment=Test | `-i`, `--input=VALUE` | CLEF file to ingest; if not specified, `STDIN` will be used | | `--invalid-data=VALUE` | Specify how invalid data is handled: fail (default) or ignore | | `-p`, `--property=VALUE1=VALUE2` | Specify event properties, e.g. `-p Customer=C123 -p Environment=Production` | +| `-x`, `--extract=VALUE` | An extraction pattern to apply to plain-text logs (ignored when `--json` is specified) | +| `--json` | Read the events as JSON (the default assumes plain text) | | `-f`, `--filter=VALUE` | Filter expression to select a subset of events | | `-s`, `--server=VALUE` | The URL of the Seq server; by default the `connection.serverUrl` value will be used | | `-a`, `--apikey=VALUE` | The API key to use when connecting to the server; by default `config.apiKey` value will be used | @@ -147,3 +149,72 @@ Stream log events matching a filter. ### `version` Print the current executable version. + +## Extraction Patterns + +The `seqcli ingest` command can be used for parsing plain text logs into structured log events. + +```shell +seqcli ingest -x "{@t:timestamp} [{@l:ident}] {@m:*}{:n}{@x:*}" +``` + +The `-x` argument above is an _extraction pattern_ that will parse events like: + +``` +2018-02-21 13:29:00.123 +10:00 [ERR] The operation failed +System.DivideByZeroException: Attempt to divide by zero + at SomeClass.SomeMethod() +``` + +### Syntax + +Extraction patterns have a simple high-level syntax: + + * Text that appears in the pattern is matched literally - so a pattern like `Hello, world!` will match logging statements that are made up of this greeting only, + * Text between `{curly braces}` is a _match expression_ that identifies a part of the event to be extracted, and + * Literal curly braces are escaped by doubling, so `{{` will match the literal text `{`, and `}}` matches `}`. + +Match expressions have the form: + +``` +{name:matcher} +``` + +Both the name and matcher are optional, but either one or the other must be specified. Hence `{@t:timestamp}` specifies a name of `@t` and value `timestamp`, `{IPAddress}` specifies a name only, and `{:n}` a value only (in this case the built-in newline matcher). + +The _name_ is the property name to be extracted; there are four built-in property names that get special handling: + + * `@t` - the event's timestamp + * `@m` - the textual message associated with the event + * `@l` - the event's level + * `@x` - the exception or backtrace associated with the event + +Other property names are attached to the event payload, so `{Elapsed:dec}` will extract a property called `Elapsed`, using the `dec` decimal matcher. + +Match expressions with no name are consumed from the input, but are not added to the event payload. + +### Matchers + +Matchers identify chunks of the input event. + +Different matchers are needed so that a piece of text like `200OK` can be separated into separate properties, i.e. `{StatusCode:nat}{Status:alpha}`. Here, the `nat` (natural number) matcher also coerces the result into a numeric value, so that it is attached to the event payload numerically as `200` instead of as the text `"200"`. + +There are three kinds of matchers: + + * Matchers like `alpha` and `nat` are built-in _named_ matchers. These are built-in. + * The special matchers `*`, `**` and so-on, are _non-greedy content_ matchers; these will match any text up until the next pattern element matches (`*`), the next two elements match, and so-on. We saw this in action with the `{@m:*}{:n}` elements in the example - the message is all of the text up until the next newline. + * More complex _compound_ matchers are described using a sub-expression. These are prefixed with an equals sign `=`, like `{Phone:={:nat}-{:nat}-{:nat}}`. This will extract chunks of text like `123-456-7890` into the `Phone` property. + +### Processing + +Extraction patterns are processed from left to right. When the first non-matching pattern is encountered, extraction stops; any remaining text that couldn't be matched will be attached to the resulting event in an `@unmatched` property. + +Multi-line events are handled by looking for lines that start with the first element of the extraction pattern to be used. This works well if the first line of each event begins with something unambiguous like an `iso8601dt` timestamp; if the lines begin with less specific syntax, the first few elements of the extraction pattern might be grouped to identify the start of events more accurately: + +``` +{:=[{@t} {@l}]} {@m:*} +``` + +Here the literal text `[`, a timestamp token, adjacent space ` `, level and closing `]` are all grouped so that they constitute a single logical pattern element to identify the start of events. + +When logs are streamed into `seqcli ingest` in real time, a 10 ms deadline is applied, within which any trailing lines that make up the event must be received. diff --git a/src/SeqCli/Cli/Commands/IngestCommand.cs b/src/SeqCli/Cli/Commands/IngestCommand.cs index c69162e3..900cec5a 100644 --- a/src/SeqCli/Cli/Commands/IngestCommand.cs +++ b/src/SeqCli/Cli/Commands/IngestCommand.cs @@ -19,6 +19,7 @@ using SeqCli.Cli.Features; using SeqCli.Connection; using SeqCli.Ingestion; +using SeqCli.PlainText; using Serilog; using Serilog.Core; using Serilog.Events; @@ -28,7 +29,7 @@ namespace SeqCli.Cli.Commands { [Command("ingest", "Send JSON log events from a file or `STDIN`", - Example = "seqcli ingest -i events.clef --filter=\"@Level <> 'Debug'\" -p Environment=Test")] + Example = "seqcli ingest -i events.clef --json --filter=\"@Level <> 'Debug'\" -p Environment=Test")] class IngestCommand : Command { readonly SeqConnectionFactory _connectionFactory; @@ -36,7 +37,8 @@ class IngestCommand : Command readonly FileInputFeature _fileInputFeature; readonly PropertiesFeature _properties; readonly ConnectionFeature _connection; - string _filter; + string _filter, _pattern; + bool _json; public IngestCommand(SeqConnectionFactory connectionFactory) { @@ -45,10 +47,18 @@ public IngestCommand(SeqConnectionFactory connectionFactory) _invalidDataHandlingFeature = Enable(); _properties = Enable(); + Options.Add("x=|extract=", + "An extraction pattern to apply to plain-text logs (ignored when `--json` is specified)", + v => _pattern = string.IsNullOrWhiteSpace(v) ? null : v.Trim()); + + Options.Add("json", + "Read the events as JSON (the default assumes plain text)", + v => _json = true); + Options.Add("f=|filter=", "Filter expression to select a subset of events", v => _filter = string.IsNullOrWhiteSpace(v) ? null : v.Trim()); - + _connection = Enable(); } @@ -71,14 +81,22 @@ protected override async Task Run() ? new StreamReader(File.Open(_fileInputFeature.InputFilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) : null) - using (var reader = new LogEventReader(inputFile ?? Console.In)) { - return await LogShipper.ShipEvents( - _connectionFactory.Connect(_connection), - reader, - enrichers, - _invalidDataHandlingFeature.InvalidDataHandling, - filter); + var input = inputFile ?? Console.In; + + var reader = _json ? + (ILogEventReader)new ClefLogEventReader(input) : + new PlainTextLogEventReader(input, _pattern); + + using (reader as IDisposable) + { + return await LogShipper.ShipEvents( + _connectionFactory.Connect(_connection), + reader, + enrichers, + _invalidDataHandlingFeature.InvalidDataHandling, + filter); + } } } catch (Exception ex) diff --git a/src/SeqCli/Csv/CsvTokenizer.cs b/src/SeqCli/Csv/CsvTokenizer.cs index e7ec7411..656d1b74 100644 --- a/src/SeqCli/Csv/CsvTokenizer.cs +++ b/src/SeqCli/Csv/CsvTokenizer.cs @@ -8,7 +8,7 @@ namespace SeqCli.Csv { class CsvTokenizer : Tokenizer { - static readonly TextParser Content = Span.While(ch => ch != '"'); + static readonly TextParser Content = Span.WithoutAny(ch => ch == '"'); protected override IEnumerable> Tokenize(TextSpan span) { @@ -26,9 +26,9 @@ protected override IEnumerable> Tokenize(TextSpan span) if (!next.HasValue) yield break; var text = Content(next.Location); - while (text.HasValue) + while (text.HasValue || !text.Remainder.IsAtEnd) { - if (text.Value.Length > 0) + if (text.HasValue) { if (TryMatchSpecialContent(text.Value, out var specialTokenType) && !IsEscapedDoubleQuote(text.Remainder)) diff --git a/src/SeqCli/Ingestion/ClefLogEventReader.cs b/src/SeqCli/Ingestion/ClefLogEventReader.cs new file mode 100644 index 00000000..d29c351e --- /dev/null +++ b/src/SeqCli/Ingestion/ClefLogEventReader.cs @@ -0,0 +1,45 @@ +// Copyright 2018 Datalust Pty Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.IO; +using System.Threading.Tasks; +using Serilog.Events; +using Serilog.Formatting.Compact.Reader; + +namespace SeqCli.Ingestion +{ + class ClefLogEventReader : ILogEventReader, IDisposable + { + readonly LogEventReader _reader; + + public ClefLogEventReader(TextReader input) + { + _reader = new LogEventReader(input ?? throw new ArgumentNullException(nameof(input))); + } + + public Task TryReadAsync() + { + if (_reader.TryRead(out var evt)) + return Task.FromResult(evt); + + return Task.FromResult(null); + } + + public void Dispose() + { + _reader.Dispose(); + } + } +} diff --git a/src/SeqCli/Ingestion/ILogEventReader.cs b/src/SeqCli/Ingestion/ILogEventReader.cs new file mode 100644 index 00000000..a7fd95fa --- /dev/null +++ b/src/SeqCli/Ingestion/ILogEventReader.cs @@ -0,0 +1,10 @@ +using System.Threading.Tasks; +using Serilog.Events; + +namespace SeqCli.Ingestion +{ + interface ILogEventReader + { + Task TryReadAsync(); + } +} diff --git a/src/SeqCli/Ingestion/LogShipper.cs b/src/SeqCli/Ingestion/LogShipper.cs index 6c3f9a18..969d955c 100644 --- a/src/SeqCli/Ingestion/LogShipper.cs +++ b/src/SeqCli/Ingestion/LogShipper.cs @@ -25,7 +25,6 @@ using Serilog.Core; using Serilog.Events; using Serilog.Formatting.Compact; -using Serilog.Formatting.Compact.Reader; namespace SeqCli.Ingestion { @@ -38,7 +37,7 @@ static class LogShipper public static async Task ShipEvents( SeqConnection connection, - LogEventReader reader, + ILogEventReader reader, List enrichers, InvalidDataHandling invalidDataHandling, Func filter = null) @@ -47,7 +46,7 @@ public static async Task ShipEvents( if (reader == null) throw new ArgumentNullException(nameof(reader)); if (enrichers == null) throw new ArgumentNullException(nameof(enrichers)); - var batch = ReadBatch(reader, filter, BatchSize, invalidDataHandling); + var batch = await ReadBatchAsync(reader, filter, BatchSize, invalidDataHandling); while (batch.Length > 0) { StringContent content; @@ -67,7 +66,7 @@ public static async Task ShipEvents( if (result.IsSuccessStatusCode) { - batch = ReadBatch(reader, filter, BatchSize, invalidDataHandling); + batch = await ReadBatchAsync(reader, filter, BatchSize, invalidDataHandling); continue; } @@ -80,7 +79,7 @@ public static async Task ShipEvents( Log.Error("Failed with status code {StatusCode}: {ErrorMessage}", result.StatusCode, - (string)error.ErrorMessage); + (string)error.Error); } catch { @@ -95,16 +94,23 @@ public static async Task ShipEvents( return 0; } - static LogEvent[] ReadBatch(LogEventReader reader, Func filter, - int count, InvalidDataHandling invalidDataHandling) + static async Task ReadBatchAsync( + ILogEventReader reader, + Func filter, + int count, + InvalidDataHandling invalidDataHandling) { var batch = new List(); do { try { - while (batch.Count < count && reader.TryRead(out var evt)) + while (batch.Count < count) { + var evt = await reader.TryReadAsync(); + if (evt == null) + break; + if (filter == null || filter(evt)) { batch.Add(evt); diff --git a/src/SeqCli/PlainText/Extraction/ExtractionPatternInterpreter.cs b/src/SeqCli/PlainText/Extraction/ExtractionPatternInterpreter.cs new file mode 100644 index 00000000..7883d668 --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/ExtractionPatternInterpreter.cs @@ -0,0 +1,60 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using SeqCli.PlainText.Patterns; + +namespace SeqCli.PlainText.Extraction +{ + static class ExtractionPatternInterpreter + { + public static NameValueExtractor MultilineMessageExtractor { get; } = new NameValueExtractor(new[] + { + new SimplePatternElement(Matchers.MultiLineMessage, ReifiedProperties.Message) + }); + + static PatternElement[] CreatePatternElements(ExtractionPattern pattern) + { + if (pattern == null) throw new ArgumentNullException(nameof(pattern)); + + var patternElements = new PatternElement[pattern.Elements.Count]; + for (var i = pattern.Elements.Count - 1; i >= 0; --i) + { + var element = pattern.Elements[i]; + switch (element) + { + case LiteralTextPatternExpression text: + patternElements[i] = new SimplePatternElement(Matchers.LiteralText(text.Text)); + break; + case CapturePatternExpression capture + when capture.Content is NonGreedyContentExpression ngc: + patternElements[i] = new SimplePatternElement( + Matchers.NonGreedyContent(patternElements.Skip(i + 1).Take(ngc.Lookahead).ToArray()), + capture.Name); + break; + case CapturePatternExpression capture + when capture.Content is MatchTypeContentExpression mtc: + patternElements[i] = new SimplePatternElement( + mtc.Type == null ? Matchers.Token : Matchers.GetByType(mtc.Type), + capture.Name); + break; + case CapturePatternExpression capture + when capture.Content is GroupedContentExpression gc: + patternElements[i] = new GroupedPatternElement( + CreatePatternElements(gc.ExtractionPattern), + capture.Name); + break; + default: + throw new InvalidOperationException($"Element `{element}` not recognized."); + } + } + + return patternElements; + } + + public static NameValueExtractor CreateNameValueExtractor(ExtractionPattern pattern) + { + var patternElements = CreatePatternElements(pattern); + return new NameValueExtractor(patternElements); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Extraction/GroupedPatternElement.cs b/src/SeqCli/PlainText/Extraction/GroupedPatternElement.cs new file mode 100644 index 00000000..d269cfca --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/GroupedPatternElement.cs @@ -0,0 +1,51 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Superpower; +using Superpower.Model; + +namespace SeqCli.PlainText.Extraction +{ + class GroupedPatternElement : PatternElement + { + readonly PatternElement[] _content; + + public GroupedPatternElement(IEnumerable content, string name = null) + : base(name) + { + _content = content?.ToArray() ?? throw new ArgumentNullException(nameof(content)); + if (_content.Length == 0) throw new ArgumentException("A grouped pattern must include at least one element."); + + Match = _content.Select(c => c.Match).Aggregate((a, b) => a.IgnoreThen(b)); + } + + public override TextParser Match { get; } + + public override bool TryExtract( + TextSpan input, + Dictionary result, + out TextSpan remainder) + { + var temp = new Dictionary(); + + var rem = input; + foreach (var element in _content) + { + if (!element.TryExtract(rem, temp, out rem)) + { + remainder = input; + return false; + } + } + + foreach (var pair in temp) + result.Add(pair.Key, pair.Value); + + var value = input.Until(rem); + remainder = rem; + CollectResult(result, value); + + return true; + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Extraction/MatcherAttribute.cs b/src/SeqCli/PlainText/Extraction/MatcherAttribute.cs new file mode 100644 index 00000000..903c00cd --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/MatcherAttribute.cs @@ -0,0 +1,15 @@ +using System; + +namespace SeqCli.PlainText.Extraction +{ + [AttributeUsage(AttributeTargets.Property)] + class MatcherAttribute : Attribute + { + public string Name { get; } + + public MatcherAttribute(string name) + { + Name = name ?? throw new ArgumentNullException(nameof(name)); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Extraction/Matchers.cs b/src/SeqCli/PlainText/Extraction/Matchers.cs new file mode 100644 index 00000000..1eb482a6 --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/Matchers.cs @@ -0,0 +1,136 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Reflection; +using SeqCli.PlainText.Parsers; +using Superpower; +using Superpower.Model; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Extraction +{ + // ReSharper disable UnusedMember.Global + static class Matchers + { + [Matcher("ident")] + public static TextParser Identifier { get; } = + IdentifierEx.CStyle + .Select(span => (object) span); + + [Matcher("nat")] + public static TextParser Natural { get; } = + Numerics.NaturalUInt64 + .Select(span => (object) span); + + [Matcher("int")] + public static TextParser Integer { get; } = + Numerics.IntegerInt64 + .Select(span => (object) span); + + [Matcher("dec")] + public static TextParser Decimal { get; } = + NumericsEx.Decimal + .Select(span => (object) span); + + [Matcher("alpha")] + public static TextParser Alphabetical { get; } = + Span.WithAll(char.IsLetter) + .Select(span => (object) span); + + [Matcher("alphanum")] + public static TextParser Alphanumeric { get; } = + Span.WithAll(char.IsLetterOrDigit) + .Select(span => (object) span); + + [Matcher("token")] + public static TextParser Token { get; } = + SpanEx.NonWhiteSpace.Select(span => (object)span); + + [Matcher("iso8601dt")] + // A date and time are required by this pattern, though not necessarily by the spec. + public static TextParser Iso8601DateTime { get; } = + DateTimesEx.Iso8601DateTime + .Select(span => (object) span); + + public static TextParser SerilogFileTimestamp { get; } = + Span.Regex("\\d{4}-\\d\\d-\\d\\d \\d\\d:\\d\\d:\\d\\d(\\.\\d+)? ([+-]\\d\\d:\\d\\d)?") + .Select(span => (object) DateTimeOffset.ParseExact(span.ToStringValue(), "yyyy-MM-dd HH:mm:ss.fff zzz", CultureInfo.InvariantCulture)); + + [Matcher("timestamp")] + public static TextParser Timestamp { get; } = + Iso8601DateTime.Try().Or(SerilogFileTimestamp); + + // Unclear whether we need to name this + public static TextParser MultiLineMessage { get; } = + SpanEx.MatchedBy( + Character.Matching(ch => !char.IsWhiteSpace(ch), "non whitespace character") + .IgnoreThen(Character.AnyChar.Many())) + .Select(span => (object)span); + + // Equivalent to :* at end-of-pattern + public static TextParser MultiLineContent { get; } = + Span.WithAll(ch => true) + .Select(span => (object)span); + + [Matcher("line")] + public static TextParser SingleLineContent { get; } = + from content in Span.WithoutAny(ch => ch == '\r' || ch == '\n') + from _ in NewLine.OptionalOrDefault() + select (object) content; + + [Matcher("n")] + public static TextParser NewLine { get; } = + Span.EqualTo("\r\n").Or(Span.EqualTo("\n")) + .Select(span => (object)span); + + [Matcher("t")] + public static TextParser Tab { get; } = + Span.EqualTo("\t") + .Select(span => (object)span); + + static readonly Dictionary> ByType = new Dictionary>( + from pi in typeof(Matchers).GetTypeInfo().DeclaredProperties + let attr = pi.GetCustomAttribute() + where attr != null + select KeyValuePair.Create(attr.Name, (TextParser) pi.GetValue(null))); + + public static TextParser GetByType(string type) + { + if (type == null) throw new ArgumentNullException(nameof(type)); + return ByType[type]; + } + + public static TextParser LiteralText(string literalText) + { + return Span.EqualTo(literalText).Select(span => (object) span); + } + + public static TextParser NonGreedyContent(params PatternElement[] following) + { + if (following.Length == 0) + return SpanEx.MatchedBy(Character.AnyChar.Many()) + .Select(span => span.Length > 0 ? (object) span : null); + + var rest = following[0].Match; + for (var i = 1; i < following.Length; ++i) + { + rest = rest.IgnoreThen(following[i].Match); + } + + return i => + { + var remainder = i; + var attempt = rest(remainder); + while (!attempt.HasValue || attempt.Remainder == remainder) // A zero-length match doesn't tell us anything + { + remainder = remainder.ConsumeChar().Remainder; + attempt = rest(remainder); + } + + var span = i.Until(remainder); + return Result.Value(span.Length > 0 ? (object) span : null, i, remainder); + }; + } + } +} diff --git a/src/SeqCli/PlainText/Extraction/NameValueExtractor.cs b/src/SeqCli/PlainText/Extraction/NameValueExtractor.cs new file mode 100644 index 00000000..c1933c98 --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/NameValueExtractor.cs @@ -0,0 +1,43 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Superpower; +using Superpower.Model; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Extraction +{ + class NameValueExtractor + { + readonly PatternElement[] _elements; + + public NameValueExtractor(IEnumerable elements) + { + _elements = elements?.ToArray() ?? throw new ArgumentNullException(nameof(elements)); + if (_elements.Length == 0) + throw new ArgumentException("An extraction pattern must contain at least one element."); + } + + public TextParser StartMarker => _elements[0].Match; + + public (IDictionary, string) ExtractValues(string plainText) + { + var input = new TextSpan(plainText); + var result = new Dictionary(); + + var remainder = input; + foreach (var element in _elements) + { + if (!element.TryExtract(remainder, result, out remainder)) + { + if (remainder.IsAtEnd || Span.WhiteSpace.IsMatch(remainder)) + return (result, null); + + return (result, remainder.ToStringValue()); + } + } + + return (result, null); + } + } +} diff --git a/src/SeqCli/PlainText/Extraction/PatternElement.cs b/src/SeqCli/PlainText/Extraction/PatternElement.cs new file mode 100644 index 00000000..42412afd --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/PatternElement.cs @@ -0,0 +1,31 @@ +using System.Collections.Generic; +using Superpower; +using Superpower.Model; + +namespace SeqCli.PlainText.Extraction +{ + abstract class PatternElement + { + readonly string _name; + + bool IsIgnored => _name == null; + + protected PatternElement(string name) + { + _name = name; + } + + public abstract TextParser Match { get; } + + public abstract bool TryExtract( + TextSpan input, + Dictionary result, + out TextSpan remainder); + + protected void CollectResult(Dictionary result, object value) + { + if (!IsIgnored) + result.Add(_name, value); + } + } +} diff --git a/src/SeqCli/PlainText/Extraction/SimplePatternElement.cs b/src/SeqCli/PlainText/Extraction/SimplePatternElement.cs new file mode 100644 index 00000000..46ef94be --- /dev/null +++ b/src/SeqCli/PlainText/Extraction/SimplePatternElement.cs @@ -0,0 +1,39 @@ +using System; +using System.Collections.Generic; +using Superpower; +using Superpower.Model; + +namespace SeqCli.PlainText.Extraction +{ + class SimplePatternElement : PatternElement + { + readonly TextParser _parser; + + public override TextParser Match { get; } + + public SimplePatternElement(TextParser parser, string name = null) + : base(name) + { + _parser = parser ?? throw new ArgumentNullException(nameof(parser)); + Match = _parser.Select(s => Unit.Value); + } + + public override bool TryExtract( + TextSpan input, + Dictionary result, + out TextSpan remainder) + { + var match = _parser(input); + if (!match.HasValue) + { + remainder = input; + return false; + } + + CollectResult(result, match.Value); + remainder = match.Remainder; + + return true; + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Framing/Frame.cs b/src/SeqCli/PlainText/Framing/Frame.cs new file mode 100644 index 00000000..8458dbc0 --- /dev/null +++ b/src/SeqCli/PlainText/Framing/Frame.cs @@ -0,0 +1,23 @@ +// Copyright 2018 Datalust Pty Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +namespace SeqCli.PlainText +{ + struct Frame + { + public bool HasValue { get; set; } + public bool IsOrphan { get; set; } + public string Value { get; set; } + } +} diff --git a/src/SeqCli/PlainText/Framing/FrameReader.cs b/src/SeqCli/PlainText/Framing/FrameReader.cs new file mode 100644 index 00000000..143025cd --- /dev/null +++ b/src/SeqCli/PlainText/Framing/FrameReader.cs @@ -0,0 +1,131 @@ +// Copyright 2018 Datalust Pty Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using Superpower; +using Superpower.Model; + +namespace SeqCli.PlainText +{ + class FrameReader : IDisposable + { + readonly TextReader _source; + readonly TimeSpan _trailingLineArrivalDeadline; + readonly TextParser _frameStart; + + string _unconsumedFirstLine; + Task _unawaitedNextLine; + + public FrameReader(TextReader source, TextParser frameStart, TimeSpan trailingLineArrivalDeadline) + { + _source = source ?? throw new ArgumentNullException(nameof(source)); + _frameStart = frameStart ?? throw new ArgumentNullException(nameof(frameStart)); + _trailingLineArrivalDeadline = trailingLineArrivalDeadline; + } + + public async Task TryReadAsync() + { + var valueBuilder = new StringBuilder(); + var hasValue = false; + + if (_unconsumedFirstLine != null) + { + valueBuilder.AppendLine(_unconsumedFirstLine); + _unconsumedFirstLine = null; + hasValue = true; + } + else if (_unawaitedNextLine != null) + { + var line = await _unawaitedNextLine; + _unawaitedNextLine = null; + if (line == null) + return new Frame(); + + valueBuilder.AppendLine(line); + hasValue = true; + + if (!IsFrameStart(line)) + return new Frame {HasValue = true, IsOrphan = true, Value = valueBuilder.ToString()}; + } + + Task readLine = null; + while (true) + { + readLine = readLine ?? Task.Run(_source.ReadLineAsync); + var index = Task.WaitAny(new Task[] {readLine}, _trailingLineArrivalDeadline); + if (index == -1) // Timeout + { + if (hasValue) + { + _unawaitedNextLine = readLine; + return new Frame {HasValue = true, Value = valueBuilder.ToString()}; + } + + // else, around we go! + } + else + { + var line = await readLine; + readLine = null; + if (line == null) + { + if (hasValue) + { + return new Frame {HasValue = true, Value = valueBuilder.ToString()}; + } + + return new Frame(); + } + + if (IsFrameStart(line)) + { + if (hasValue) + { + _unconsumedFirstLine = line; + return new Frame {HasValue = true, Value = valueBuilder.ToString()}; + } + + valueBuilder.AppendLine(line); + hasValue = true; + } + else + { + if (!hasValue) + { + valueBuilder.AppendLine(line); + return new Frame {HasValue = true, Value = valueBuilder.ToString(), IsOrphan = true}; + } + + valueBuilder.AppendLine(line); + } + } + } + + bool IsFrameStart(string line) + { + if (line == null) throw new ArgumentNullException(nameof(line)); + var result = _frameStart(new TextSpan(line)); + return result.HasValue && result.Value.Length > 0; + } + } + + public void Dispose() + { + _unawaitedNextLine?.Dispose(); + } + } +} diff --git a/src/SeqCli/PlainText/LogEvents/LogEventBuilder.cs b/src/SeqCli/PlainText/LogEvents/LogEventBuilder.cs new file mode 100644 index 00000000..786b27cd --- /dev/null +++ b/src/SeqCli/PlainText/LogEvents/LogEventBuilder.cs @@ -0,0 +1,154 @@ +// Copyright 2018 Datalust Pty Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Serilog.Events; +using Serilog.Parsing; +using Superpower.Model; + +namespace SeqCli.PlainText +{ + static class LogEventBuilder + { + public static LogEvent FromProperties(IDictionary properties, string remainder) + { + var timestamp = GetTimestamp(properties); + var level = GetLevel(properties); + var exception = TryGetException(properties); + var messageTemplate = GetMessageTemplate(properties); + var props = GetLogEventProperties(properties, remainder); + + return new LogEvent( + timestamp, + level, + exception, + messageTemplate, + props); + } + + static readonly MessageTemplate NoMessage = new MessageTemplateParser().Parse(""); + + static MessageTemplate GetMessageTemplate(IDictionary properties) + { + if (properties.TryGetValue(ReifiedProperties.Message, out var m) && + m is TextSpan ts) + { + var text = ts.ToStringValue(); + return new MessageTemplate(new MessageTemplateToken[] {new TextToken(text) }); + } + + return NoMessage; + } + + static LogEventLevel GetLevel(IDictionary properties) + { + if (properties.TryGetValue(ReifiedProperties.Level, out var l) && + l is TextSpan ts && + LevelsByName.TryGetValue(ts.ToStringValue(), out var level)) + return level; + return LogEventLevel.Information; + } + + static Exception TryGetException(IDictionary properties) + { + if (properties.TryGetValue(ReifiedProperties.Exception, out var x) && + x is TextSpan ts) + return new TextOnlyException(ts.ToStringValue()); + return null; + } + + static IEnumerable GetLogEventProperties(IDictionary properties, string remainder) + { + var payload = properties + .Where(p => !ReifiedProperties.IsReifiedProperty(p.Key)) + .Select(p => new LogEventProperty(p.Key, new ScalarValue(p.Value))); + + if (remainder != null) + payload = payload.Concat(new[] + { + new LogEventProperty("@unmatched", new ScalarValue(remainder)) + }); + return payload; + } + + static DateTimeOffset GetTimestamp(IDictionary properties) + { + if (properties.TryGetValue(ReifiedProperties.Timestamp, out var t)) + { + if (t is TextSpan span && DateTimeOffset.TryParse(span.ToStringValue(), + CultureInfo.InvariantCulture, DateTimeStyles.AssumeLocal, out var ts)) + return ts; + + if (t is DateTimeOffset dto) + return dto; + } + + return DateTimeOffset.Now; + } + + static readonly Dictionary LevelsByName = new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["t"] = LogEventLevel.Verbose, + ["tr"] = LogEventLevel.Verbose, + ["trc"] = LogEventLevel.Verbose, + ["trce"] = LogEventLevel.Verbose, + ["trace"] = LogEventLevel.Verbose, + ["v"] = LogEventLevel.Verbose, + ["ver"] = LogEventLevel.Verbose, + ["vrb"] = LogEventLevel.Verbose, + ["verb"] = LogEventLevel.Verbose, + ["verbose"] = LogEventLevel.Verbose, + ["d"] = LogEventLevel.Debug, + ["de"] = LogEventLevel.Debug, + ["dbg"] = LogEventLevel.Debug, + ["deb"] = LogEventLevel.Debug, + ["dbug"] = LogEventLevel.Debug, + ["debu"] = LogEventLevel.Debug, + ["debub"] = LogEventLevel.Debug, + ["i"] = LogEventLevel.Information, + ["in"] = LogEventLevel.Information, + ["inf"] = LogEventLevel.Information, + ["info"] = LogEventLevel.Information, + ["information"] = LogEventLevel.Information, + ["w"] = LogEventLevel.Warning, + ["wa"] = LogEventLevel.Warning, + ["war"] = LogEventLevel.Warning, + ["wrn"] = LogEventLevel.Warning, + ["warn"] = LogEventLevel.Warning, + ["warning"] = LogEventLevel.Warning, + ["e"] = LogEventLevel.Error, + ["er"] = LogEventLevel.Error, + ["err"] = LogEventLevel.Error, + ["erro"] = LogEventLevel.Error, + ["eror"] = LogEventLevel.Error, + ["error"] = LogEventLevel.Error, + ["f"] = LogEventLevel.Fatal, + ["fa"] = LogEventLevel.Fatal, + ["ftl"] = LogEventLevel.Fatal, + ["fat"] = LogEventLevel.Fatal, + ["fatl"] = LogEventLevel.Fatal, + ["fatal"] = LogEventLevel.Fatal, + ["c"] = LogEventLevel.Fatal, + ["cr"] = LogEventLevel.Fatal, + ["crt"] = LogEventLevel.Fatal, + ["cri"] = LogEventLevel.Fatal, + ["crit"] = LogEventLevel.Fatal, + ["critical"] = LogEventLevel.Fatal + }; + + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/LogEvents/TextOnlyException.cs b/src/SeqCli/PlainText/LogEvents/TextOnlyException.cs new file mode 100644 index 00000000..d0f67b60 --- /dev/null +++ b/src/SeqCli/PlainText/LogEvents/TextOnlyException.cs @@ -0,0 +1,33 @@ +// Copyright 2018 Datalust Pty Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; + +namespace SeqCli.PlainText +{ + class TextOnlyException : Exception + { + readonly string _toStringValue; + + public TextOnlyException(string toStringValue) + { + _toStringValue = toStringValue ?? throw new ArgumentNullException(nameof(toStringValue)); + } + + public override string ToString() + { + return _toStringValue; + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Parsers/DateTimesEx.cs b/src/SeqCli/PlainText/Parsers/DateTimesEx.cs new file mode 100644 index 00000000..7653505a --- /dev/null +++ b/src/SeqCli/PlainText/Parsers/DateTimesEx.cs @@ -0,0 +1,12 @@ +using Superpower; +using Superpower.Model; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Parsers +{ + static class DateTimesEx + { + public static TextParser Iso8601DateTime { get; } = + Span.Regex("\\d{4}-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d(\\.\\d+)?(([+-]\\d\\d:\\d\\d)|Z)?"); + } +} diff --git a/src/SeqCli/PlainText/Parsers/IdentifierEx.cs b/src/SeqCli/PlainText/Parsers/IdentifierEx.cs new file mode 100644 index 00000000..c7248029 --- /dev/null +++ b/src/SeqCli/PlainText/Parsers/IdentifierEx.cs @@ -0,0 +1,14 @@ +using Superpower; +using Superpower.Model; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Parsers +{ + static class IdentifierEx + { + public static TextParser CStyle { get; } = + SpanEx.MatchedBy( + Character.Letter.Or(Character.EqualTo('_')) + .IgnoreThen(Character.LetterOrDigit.Or(Character.EqualTo('_')).Many())); + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Parsers/NumericsEx.cs b/src/SeqCli/PlainText/Parsers/NumericsEx.cs new file mode 100644 index 00000000..1810372b --- /dev/null +++ b/src/SeqCli/PlainText/Parsers/NumericsEx.cs @@ -0,0 +1,21 @@ +using Superpower; +using Superpower.Model; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Parsers +{ + static class NumericsEx + { + public static TextParser Decimal { get; } = + Numerics.Integer + .Then(n => Character.EqualTo('.').IgnoreThen(Numerics.Integer).OptionalOrDefault() + .Select(f => f == TextSpan.None ? n : new TextSpan(n.Source, n.Position, n.Length + f.Length + 1))); + + public static TextParser HexNatural { get; } = + SpanEx.MatchedBy(Span.EqualTo("0x") + .IgnoreThen(Character.Digit + .Or(Character.Matching(ch => ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F', "a-f")) + .Named("hex digit") + .AtLeastOnce())); + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Parsers/SpanEx.cs b/src/SeqCli/PlainText/Parsers/SpanEx.cs new file mode 100644 index 00000000..0564a735 --- /dev/null +++ b/src/SeqCli/PlainText/Parsers/SpanEx.cs @@ -0,0 +1,28 @@ +using Superpower; +using Superpower.Model; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Parsers +{ + static class SpanEx + { + public static TextParser MatchedBy(TextParser parser) + { + return i => + { + var result = parser(i); + + if (!result.HasValue) + return Result.CastEmpty(result); + + return Result.Value( + i.Until(result.Remainder), + i, + result.Remainder); + }; + } + + public static TextParser NonWhiteSpace { get; } = + Span.WithoutAny(char.IsWhiteSpace); + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Parsers/StringsEx.cs b/src/SeqCli/PlainText/Parsers/StringsEx.cs new file mode 100644 index 00000000..a39ffff9 --- /dev/null +++ b/src/SeqCli/PlainText/Parsers/StringsEx.cs @@ -0,0 +1,16 @@ +using Superpower; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Parsers +{ + static class StringsEx + { + static readonly TextParser SqlStringContentChar = + Span.EqualTo("''").Value('\'').Try().Or(Character.ExceptIn('\'', '\r', '\n')); + + public static TextParser SqlStyle { get; } = + Character.EqualTo('\'') + .IgnoreThen(SqlStringContentChar.Many()) + .Then(s => Character.EqualTo('\'').Value(new string(s))); + } +} diff --git a/src/SeqCli/PlainText/Parsers/TextParserExtensions.cs b/src/SeqCli/PlainText/Parsers/TextParserExtensions.cs new file mode 100644 index 00000000..52d4aacb --- /dev/null +++ b/src/SeqCli/PlainText/Parsers/TextParserExtensions.cs @@ -0,0 +1,13 @@ +using Superpower; + +namespace SeqCli.PlainText.Parsers +{ + public static class TextParserExtensions + { + public static TextParser Cast(this TextParser parser) + where T : U + { + return parser.Select(t => (U) t); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/CaptureContentExpression.cs b/src/SeqCli/PlainText/Patterns/CaptureContentExpression.cs new file mode 100644 index 00000000..ef4d8ec1 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/CaptureContentExpression.cs @@ -0,0 +1,6 @@ +namespace SeqCli.PlainText.Patterns +{ + abstract class CaptureContentExpression + { + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/CapturePatternExpression.cs b/src/SeqCli/PlainText/Patterns/CapturePatternExpression.cs new file mode 100644 index 00000000..4e76bca5 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/CapturePatternExpression.cs @@ -0,0 +1,14 @@ +namespace SeqCli.PlainText.Patterns +{ + class CapturePatternExpression : ExtractionPatternExpression + { + public string Name { get; } + public CaptureContentExpression Content { get; } + + public CapturePatternExpression(string name, CaptureContentExpression content) + { + Name = name; + Content = content; + } + } +} diff --git a/src/SeqCli/PlainText/Patterns/ExtractionPattern.cs b/src/SeqCli/PlainText/Patterns/ExtractionPattern.cs new file mode 100644 index 00000000..4b810ed9 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/ExtractionPattern.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace SeqCli.PlainText.Patterns +{ + class ExtractionPattern + { + public IReadOnlyList Elements { get; } + + public ExtractionPattern(IEnumerable items) + { + if (items == null) throw new ArgumentNullException(nameof(items)); + Elements = items.ToArray(); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/ExtractionPatternExpression.cs b/src/SeqCli/PlainText/Patterns/ExtractionPatternExpression.cs new file mode 100644 index 00000000..fca4ec75 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/ExtractionPatternExpression.cs @@ -0,0 +1,6 @@ +namespace SeqCli.PlainText.Patterns +{ + abstract class ExtractionPatternExpression + { + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/ExtractionPatternParser.cs b/src/SeqCli/PlainText/Patterns/ExtractionPatternParser.cs new file mode 100644 index 00000000..ff3a2145 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/ExtractionPatternParser.cs @@ -0,0 +1,69 @@ +using System; +using SeqCli.PlainText.Parsers; +using Superpower; +using Superpower.Parsers; + +namespace SeqCli.PlainText.Patterns +{ + static class ExtractionPatternParser + { + static readonly TextParser LiteralText = + Span.EqualTo("{{").Value('{').Try() + .Or(Span.EqualTo("}}").Value('}').Try()) + .Or(Character.ExceptIn('{', '}')) + .AtLeastOnce() + .Select(ch => new LiteralTextPatternExpression(new string(ch))); + + static readonly TextParser CaptureName = + SpanEx.MatchedBy( + Character.Letter.Or(Character.In('@', '_')) + .IgnoreThen(Character.LetterOrDigit.Or(Character.EqualTo('_')).Many())) + .Select(s => s.ToStringValue()); + + static readonly TextParser NonGreedyContent = + Character.EqualTo('*').AtLeastOnce() + .Select(chs => (CaptureContentExpression) new NonGreedyContentExpression(chs.Length)); + + static readonly TextParser MatchTypeContent = + SpanEx.MatchedBy(Character.Letter.Or(Character.EqualTo('_')) + .IgnoreThen(Character.LetterOrDigit.Or(Character.EqualTo('_')).Many())) + .Select(s => (CaptureContentExpression) new MatchTypeContentExpression(s.ToStringValue())); + + static readonly TextParser GroupedContent = + Span.EqualTo("=") + .IgnoreThen(Superpower.Parse.Ref(() => Elements)) + .Select(els => (CaptureContentExpression) new GroupedContentExpression(new ExtractionPattern(els))); + + static readonly TextParser CaptureContent = + NonGreedyContent + .Or(MatchTypeContent) + .Or(GroupedContent); + + static readonly TextParser Capture = + from _ in Character.EqualTo('{') + from name in CaptureName.OptionalOrDefault() + from content in Character.EqualTo(':') + .IgnoreThen(CaptureContent) + .OptionalOrDefault() + where name != null || content != null + from __ in Character.EqualTo('}') + select new CapturePatternExpression(name, content); + + static readonly TextParser Element = + LiteralText.Cast() + .Or(Capture.Cast()); + + static readonly TextParser Elements = + Element.AtLeastOnce(); + + static readonly TextParser Pattern = + Elements.AtEnd().Select(e => new ExtractionPattern(e)); + + public static ExtractionPattern Parse(string extractionPattern) + { + if (extractionPattern == null) throw new ArgumentNullException(nameof(extractionPattern)); + if (extractionPattern == "") throw new ParseException("Zero-length extraction patterns are not allowed."); + return Pattern.Parse(extractionPattern); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/GroupedContentExpression.cs b/src/SeqCli/PlainText/Patterns/GroupedContentExpression.cs new file mode 100644 index 00000000..23cf2f33 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/GroupedContentExpression.cs @@ -0,0 +1,14 @@ +using System; + +namespace SeqCli.PlainText.Patterns +{ + class GroupedContentExpression : CaptureContentExpression + { + public ExtractionPattern ExtractionPattern { get; } + + public GroupedContentExpression(ExtractionPattern extractionPattern) + { + ExtractionPattern = extractionPattern ?? throw new ArgumentNullException(nameof(extractionPattern)); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/LiteralTextPatternExpression.cs b/src/SeqCli/PlainText/Patterns/LiteralTextPatternExpression.cs new file mode 100644 index 00000000..a8399e81 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/LiteralTextPatternExpression.cs @@ -0,0 +1,14 @@ +using System; + +namespace SeqCli.PlainText.Patterns +{ + class LiteralTextPatternExpression : ExtractionPatternExpression + { + public string Text { get; } + + public LiteralTextPatternExpression(string text) + { + Text = text ?? throw new ArgumentNullException(nameof(text)); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/MatchTypeContentExpression.cs b/src/SeqCli/PlainText/Patterns/MatchTypeContentExpression.cs new file mode 100644 index 00000000..1a0f9bf9 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/MatchTypeContentExpression.cs @@ -0,0 +1,12 @@ +namespace SeqCli.PlainText.Patterns +{ + class MatchTypeContentExpression : CaptureContentExpression + { + public string Type { get; } + + public MatchTypeContentExpression(string type) + { + Type = type; + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/Patterns/NonGreedyContentExpression.cs b/src/SeqCli/PlainText/Patterns/NonGreedyContentExpression.cs new file mode 100644 index 00000000..bb4a2969 --- /dev/null +++ b/src/SeqCli/PlainText/Patterns/NonGreedyContentExpression.cs @@ -0,0 +1,12 @@ +namespace SeqCli.PlainText.Patterns +{ + class NonGreedyContentExpression : CaptureContentExpression + { + public int Lookahead { get; } + + public NonGreedyContentExpression(int lookahead) + { + Lookahead = lookahead; + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/PlainTextLogEventReader.cs b/src/SeqCli/PlainText/PlainTextLogEventReader.cs new file mode 100644 index 00000000..af1cac3a --- /dev/null +++ b/src/SeqCli/PlainText/PlainTextLogEventReader.cs @@ -0,0 +1,46 @@ +using System; +using System.IO; +using System.Threading.Tasks; +using SeqCli.Ingestion; +using SeqCli.PlainText.Extraction; +using SeqCli.PlainText.Parsers; +using SeqCli.PlainText.Patterns; +using Serilog.Events; + +namespace SeqCli.PlainText +{ + class PlainTextLogEventReader : ILogEventReader, IDisposable + { + static readonly TimeSpan TrailingLineArrivalDeadline = TimeSpan.FromMilliseconds(10); + + readonly NameValueExtractor _nameValueExtractor; + readonly FrameReader _reader; + + public PlainTextLogEventReader(TextReader input, string extractionPattern) + { + _nameValueExtractor = string.IsNullOrEmpty(extractionPattern) ? + ExtractionPatternInterpreter.MultilineMessageExtractor : + ExtractionPatternInterpreter.CreateNameValueExtractor(ExtractionPatternParser.Parse(extractionPattern)); + + _reader = new FrameReader(input, SpanEx.MatchedBy(_nameValueExtractor.StartMarker), TrailingLineArrivalDeadline); + } + + public async Task TryReadAsync() + { + var frame = await _reader.TryReadAsync(); + if (!frame.HasValue) + return null; + + if (frame.IsOrphan) + throw new InvalidDataException($"A line arrived late or could not be parsed: `{frame.Value.Trim()}`."); + + var (properties, remainder) = _nameValueExtractor.ExtractValues(frame.Value); + return LogEventBuilder.FromProperties(properties, remainder); + } + + public void Dispose() + { + _reader.Dispose(); + } + } +} \ No newline at end of file diff --git a/src/SeqCli/PlainText/ReifiedProperties.cs b/src/SeqCli/PlainText/ReifiedProperties.cs new file mode 100644 index 00000000..aa9950be --- /dev/null +++ b/src/SeqCli/PlainText/ReifiedProperties.cs @@ -0,0 +1,23 @@ +using System.Collections.Generic; + +namespace SeqCli.PlainText +{ + static class ReifiedProperties + { + public const string + Message = "@m", + Timestamp = "@t", + Level = "@l", + Exception = "@x"; + + static readonly HashSet All = new HashSet + { + Message, Timestamp, Level, Exception + }; + + public static bool IsReifiedProperty(string name) + { + return All.Contains(name); + } + } +} diff --git a/src/SeqCli/SeqCli.csproj b/src/SeqCli/SeqCli.csproj index e0447852..0292c67e 100644 --- a/src/SeqCli/SeqCli.csproj +++ b/src/SeqCli/SeqCli.csproj @@ -25,7 +25,7 @@ - + diff --git a/test/SeqCli.Tests/PlainText/ExtractionPatternInterpreterTests.cs b/test/SeqCli.Tests/PlainText/ExtractionPatternInterpreterTests.cs new file mode 100644 index 00000000..f14b1b77 --- /dev/null +++ b/test/SeqCli.Tests/PlainText/ExtractionPatternInterpreterTests.cs @@ -0,0 +1,93 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using SeqCli.PlainText; +using SeqCli.PlainText.Extraction; +using SeqCli.PlainText.Patterns; +using Xunit; + +namespace SeqCli.Tests.PlainText +{ + public class ExtractionPatternInterpreterTests + { + static (IDictionary, string) ExtractValues(string pattern, string candidate) + { + var parsed = ExtractionPatternParser.Parse(pattern); + var extractor = ExtractionPatternInterpreter.CreateNameValueExtractor(parsed); + return extractor.ExtractValues(candidate); + } + + [Fact] + public void NonGreedyMatchCanLookaheadMultipleTokens() + { + var (properties, remainder) = ExtractValues("[{test:**}]!", "[0]abc[1]!"); + Assert.Null(remainder); + Assert.Equal("0]abc[1", properties["test"].ToString()); + } + + [Fact] + public void TheMatchingPatternCanExtractDefaultSerilogFileOutput() + { + // This is the default format: "{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} [{Level:u3}] {Message:lj}{NewLine}{Exception}" + // See: https://github.com/serilog/serilog-sinks-file#controlling-event-formatting + + // {@l:ident} is required so that the default "token" pattern doesn't greedily eat up the `]`. + // "timestamp" is intended to be an aggregate timestamp parser that tries ISO 8601, RFC 2822, and various other + // popular timestamp formats. + + var pattern = "{@t:timestamp} [{@l:ident}] {@m:*}{:n}{@x:*}"; + + var candidate = +@"2018-02-21 13:29:00.123 +10:00 [ERR] The operation failed +System.DivideByZeroException: Attempt to divide by zero + at SomeClass.SomeMethod() +"; + + var (properties, remainder) = ExtractValues(pattern, candidate); + + Assert.Equal( + DateTimeOffset.ParseExact("2018-02-21 13:29:00.123 +10:00", "yyyy-MM-dd HH:mm:ss.fff zzz", CultureInfo.InvariantCulture), + properties["@t"]); + Assert.Equal("ERR", properties["@l"].ToString()); + Assert.Equal("The operation failed", properties["@m"].ToString()); + Assert.Equal(@"System.DivideByZeroException: Attempt to divide by zero + at SomeClass.SomeMethod() +", properties["@x"].ToString()); + Assert.Null(remainder); + } + + // Work-in-progress... + + // [Fact] + public void TheMatchingPatternCanExtractDefaultSerilogConsoleOutput() + { + // This is the default format: "[{Timestamp:HH:mm:ss} {Level:u3}] {Message:lj}{NewLine}{Exception}" + // See: https://github.com/serilog/serilog-sinks-console#output-templates + + // "localtime" will add the closest non-future date to the time component that is matched + // by the pattern + + // The pattern language needs to be extended here so that the brackets, timestamp, spacing and + // level are all used as the start-frame marker. The strawman syntax proposes that to the + // right of `:` will always be either an alphanumeric matcher name, or a subexpression. This + // does have the issue that `{?:foo}` would be ambiguous (optional 'foo' matcher or optional 'foo' + // literal, so some escaping would be necessary - e.g. `{?:\foo}` to indicate a literal 'foo' and + // `{?:\*}` for an optional literal asterisk, `{?:\\}` for an optional literal backslash. + +#pragma warning disable 219 + var pattern = "{:[{@t:localtime} {@l:ident}] }{@m:*}{:n}{@x:*}"; +#pragma warning restore 219 + } + + // [Fact] + public void OptionalSourceContextCanBeExtracted() + { + // The {?: optional grouping is just an anonymous optional property, e.g. if the formatting was + // not dynamic, it might be written {SourceContext?:*}; using the grouping means the surrounding + // whitespace and parens are required only if the optional group is matched. +#pragma warning disable 219 + var pattern = "{:[{@t} {@l:ident}] }{?:({SourceContext:*}) }{@m:*}{:n}{@x:*}"; +#pragma warning restore 219 + } + } +} \ No newline at end of file diff --git a/test/SeqCli.Tests/PlainText/ExtractionPatternParserTests.cs b/test/SeqCli.Tests/PlainText/ExtractionPatternParserTests.cs new file mode 100644 index 00000000..000f64ce --- /dev/null +++ b/test/SeqCli.Tests/PlainText/ExtractionPatternParserTests.cs @@ -0,0 +1,67 @@ +using System; +using System.Linq; +using SeqCli.PlainText.Patterns; +using Superpower; +using Xunit; + +namespace SeqCli.Tests.PlainText +{ + public class ExtractionPatternParserTests + { + [Fact] + public void ARegularStringIsASingleTextLiteral() + { + var pattern = ExtractionPatternParser.Parse("Hello!"); + Assert.Single(pattern.Elements); + var tt = Assert.IsType(pattern.Elements.Single()); + Assert.Equal("Hello!", tt.Text); + } + + [Fact] + public void CaptureNameAndTypeAreParsed() + { + var pattern = ExtractionPatternParser.Parse("{abc:def}"); + Assert.Single(pattern.Elements); + var ct = Assert.IsType(pattern.Elements.Single()); + Assert.Equal("abc", ct.Name); + Assert.Equal("def", ((MatchTypeContentExpression)ct.Content).Type); + } + + [Theory] + [InlineData("", false)] + [InlineData("{}", false)] + [InlineData("{a", false)] + [InlineData("a", true)] + [InlineData("{a}", true)] + [InlineData("{@m}", true)] + [InlineData("{@@m}", false)] + [InlineData("{m@}", false)] + [InlineData("{@m:n}", true)] + [InlineData("{@m:*}", true)] + [InlineData("{@m:***}", true)] + [InlineData("{:*}", true)] + [InlineData("{a:}", false)] + [InlineData("{@m:n}", true)] + [InlineData("{m_N}", true)] + [InlineData("{_9}", true)] + [InlineData("{:n}", true)] + [InlineData("{:}", false)] + [InlineData("{{@m}}", true)] + [InlineData("{{a", true)] + [InlineData("a}}", true)] + [InlineData("{", false)] + [InlineData("}", false)] + [InlineData("{a} b{c} ", true)] + [InlineData("d {a}b {c}", true)] + [InlineData("{:={@m}}", true)] + [InlineData("Loaded {SignalId:=signal-{:nat}}", true)] + [InlineData("{:={Year:num}-{Month:num}}", true)] + public void OnlyValidPatternsAreAccepted(string attempt, bool isValid) + { + if (isValid) + ExtractionPatternParser.Parse(attempt); + else + Assert.Throws(() => ExtractionPatternParser.Parse(attempt)); + } + } +} \ No newline at end of file diff --git a/test/SeqCli.Tests/PlainText/FrameReaderTests.cs b/test/SeqCli.Tests/PlainText/FrameReaderTests.cs new file mode 100644 index 00000000..d1f97058 --- /dev/null +++ b/test/SeqCli.Tests/PlainText/FrameReaderTests.cs @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using SeqCli.PlainText; +using SeqCli.PlainText.Parsers; +using Superpower; +using Superpower.Model; +using Superpower.Parsers; +using Xunit; + +namespace SeqCli.Tests.PlainText +{ + public class FrameReaderTests + { + [Fact] + public async Task SplitsLinesIntoFrames() + { + var source = new StringBuilder(); + source.AppendLine("first"); + source.AppendLine("second"); + + var reader = new FrameReader( + new StringReader(source.ToString()), + SpanEx.MatchedBy(Character.Letter), + TimeSpan.FromMilliseconds(1)); + + var first = await reader.TryReadAsync(); + Assert.True(first.HasValue); + Assert.Equal("first" + Environment.NewLine, first.Value); + + var second = await reader.TryReadAsync(); + Assert.True(second.HasValue); + Assert.Equal("second" + Environment.NewLine, second.Value); + + var empty = await reader.TryReadAsync(); + Assert.False(empty.HasValue); + } + + [Fact] + public async Task TerminatesWhenNoLinesArePresent() + { + var reader = new FrameReader( + new StringReader(""), + SpanEx.MatchedBy(Character.Letter), + TimeSpan.FromMilliseconds(1)); + + var none = await reader.TryReadAsync(); + Assert.False(none.HasValue); + } + + [Fact] + public async Task CollectsTrailingLines() + { + var source = new StringBuilder(); + source.AppendLine("first"); + source.AppendLine(" some more"); + source.AppendLine(" and more"); + source.AppendLine("second"); + source.AppendLine("third"); + source.AppendLine(" and yet more"); + + var frames = await ReadAllFrames(source.ToString(), SpanEx.MatchedBy(Character.Letter)); + Assert.Equal(3, frames.Length); + Assert.StartsWith("first", frames[0].Value); + Assert.EndsWith("and more" + Environment.NewLine, frames[0].Value); + } + + static async Task ReadAllFrames(string source, TextParser frameStart) + { + var reader = new FrameReader( + new StringReader(source), + frameStart, + TimeSpan.FromMilliseconds(1)); + + var result = new List(); + + var frame = await reader.TryReadAsync(); + while (frame.HasValue) + { + result.Add(frame); + frame = await reader.TryReadAsync(); + } + + return result.ToArray(); + } + } +} \ No newline at end of file diff --git a/test/SeqCli.Tests/PlainText/LogEventBuilderTests.cs b/test/SeqCli.Tests/PlainText/LogEventBuilderTests.cs new file mode 100644 index 00000000..7cf38310 --- /dev/null +++ b/test/SeqCli.Tests/PlainText/LogEventBuilderTests.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections.Generic; +using SeqCli.PlainText; +using Serilog.Events; +using Superpower.Model; +using Xunit; + +namespace SeqCli.Tests.PlainText +{ + public class LogEventBuilderTests + { + [Fact] + public void SuppliedValuesAreUsed() + { + var properties = new Dictionary + { + ["@t"] = new TextSpan("2018-02-01T13:00:00.123Z"), + ["@l"] = new TextSpan("WRN"), + ["@m"] = new TextSpan("Hello, world"), + ["@x"] = new TextSpan("EverythingFailedException"), + ["MachineName"] = new TextSpan("TP"), + ["Count"] = 42 + }; + + var remainder = "rem"; + var evt = LogEventBuilder.FromProperties(properties, remainder); + + Assert.Equal("2018-02-01T13:00:00.1230000+00:00", evt.Timestamp.ToString("o")); + Assert.Equal("Hello, world", evt.RenderMessage()); + Assert.Equal(LogEventLevel.Warning, evt.Level); + Assert.Equal("EverythingFailedException", evt.Exception.ToString()); + Assert.Equal(42, ((ScalarValue)evt.Properties["Count"]).Value); + Assert.Equal("TP", ((ScalarValue)evt.Properties["MachineName"]).Value.ToString()); + Assert.Equal("rem", ((ScalarValue)evt.Properties["@unmatched"]).Value.ToString()); + } + + [Fact] + public void MissingValuesAreDefaulted() + { + var evt = LogEventBuilder.FromProperties(new Dictionary(), null); + + Assert.True(evt.Timestamp > DateTimeOffset.Now.AddSeconds(-5)); + Assert.Equal("", evt.RenderMessage()); + Assert.Equal(LogEventLevel.Information, evt.Level); + Assert.Null(evt.Exception); + Assert.Empty(evt.Properties); + } + + [Fact] + public void DateTimeOffsetTimestampsAreAccepted() + { + var then = DateTimeOffset.Now.AddDays(-5); + var evt = LogEventBuilder.FromProperties(new Dictionary{["@t"] = then}, null); + Assert.Equal(then, evt.Timestamp); + } + } +} \ No newline at end of file diff --git a/test/SeqCli.Tests/PlainText/NameValueExtractorTests.cs b/test/SeqCli.Tests/PlainText/NameValueExtractorTests.cs new file mode 100644 index 00000000..38fc85ac --- /dev/null +++ b/test/SeqCli.Tests/PlainText/NameValueExtractorTests.cs @@ -0,0 +1,103 @@ +using System; +using System.Linq; +using SeqCli.PlainText; +using SeqCli.PlainText.Extraction; +using Superpower.Model; +using Xunit; + +namespace SeqCli.Tests.PlainText +{ + public class NameValueExtractorTests + { + [Fact] + public void TheDefaultPatternMatchesMultilineMessages() + { + var frame = $"Hello,{Environment.NewLine} world!"; + var (properties, remainder) = ExtractionPatternInterpreter.MultilineMessageExtractor.ExtractValues(frame); + Assert.Null(remainder); + Assert.Single(properties, p => p.Key == ReifiedProperties.Message && + ((TextSpan)p.Value).ToStringValue() == frame); + } + + [Fact] + public void TheDefaultPatternDoesNotMatchLinesStartingWithWhitespace() + { + var frame = " world"; + var (properties, remainder) = ExtractionPatternInterpreter.MultilineMessageExtractor.ExtractValues(frame); + Assert.Empty(properties); + Assert.Equal(frame, remainder); + } + + static NameValueExtractor ClassMethodPattern { get; } = new NameValueExtractor(new[] + { + new SimplePatternElement(Matchers.Identifier, "class"), + new SimplePatternElement(Matchers.LiteralText(".")), + new SimplePatternElement(Matchers.Identifier, "method") + }); + + [Fact] + public void PatternsExtractElements() + { + var pattern = ClassMethodPattern; + + var frame = "this.that"; + var (properties, remainder) = pattern.ExtractValues(frame); + Assert.Null(remainder); + Assert.Equal("this", properties["class"].ToString()); + Assert.Equal("that", properties["method"].ToString()); + } + + [Fact] + public void TheFirstPatternElementIsExposed() + { + Assert.NotNull(ClassMethodPattern.StartMarker); + } + + [Fact] + public void SingleLineContentMatchesUntilEol() + { + var pattern = new NameValueExtractor(new[] + { + new SimplePatternElement(Matchers.Identifier, "first"), + new SimplePatternElement(Matchers.LiteralText(" ")), + new SimplePatternElement(Matchers.SingleLineContent, "content"), + new SimplePatternElement(Matchers.LiteralText(" (")), + new SimplePatternElement(Matchers.Identifier, "last"), + new SimplePatternElement(Matchers.LiteralText(")")) + }); + + var frame = "abc def ghi (jkl)"; + var (properties, remainder) = pattern.ExtractValues(frame); + Assert.Null(remainder); + Assert.Equal("abc", properties["first"].ToString()); + Assert.Equal("def ghi (jkl)", properties["content"].ToString()); + } + + [Fact] + public void NonGreedyContentStopsMatchingWhenFollowingTokensMatch() + { + // It's likely we'll only be able to get one or two tokens into + // the "following" list, since they effectively become "mandatory" + var following = new[] + { + new SimplePatternElement(Matchers.LiteralText(" (")), + new SimplePatternElement(Matchers.Identifier, "last"), + new SimplePatternElement(Matchers.LiteralText(")")) + }; + + var pattern = new NameValueExtractor(new[] + { + new SimplePatternElement(Matchers.Identifier, "first"), + new SimplePatternElement(Matchers.LiteralText(" ")), + new SimplePatternElement(Matchers.NonGreedyContent(following), "content"), + }.Concat(following)); + + var frame = "abc def ghi (jkl)"; + var (properties, remainder) = pattern.ExtractValues(frame); + Assert.Null(remainder); + Assert.Equal("abc", properties["first"].ToString()); + Assert.Equal("def ghi", properties["content"].ToString()); + Assert.Equal("jkl", properties["last"].ToString()); + } + } +} \ No newline at end of file