From 33bb1a019c18d2b66f370e9bbbe0369d8428581d Mon Sep 17 00:00:00 2001
From: Dom Ule <dom@ule.de>
Date: Wed, 13 Mar 2019 16:14:32 +0000
Subject: [PATCH 1/4] - Can now be run on Windows. - Now adds a copy of the
 [@metadata] field to the output called __[@metadata]. This allows testing
 metadata fields. - Now also produces an output file in Awesome Print format
 using the rubydebug codec. This is just another format in addition to the
 JSON file that's already produced and it's not actually used by
 logstash-filter-test, but it can be useful for debugging as it contains
 [@metadata] field and is formatted in a more human-readable way.

---
 logstash_filter_run.py | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/logstash_filter_run.py b/logstash_filter_run.py
index 867019a..21c2869 100644
--- a/logstash_filter_run.py
+++ b/logstash_filter_run.py
@@ -17,6 +17,14 @@
   pipeline.workers: 1
 """
 
+# Filter config which copies the otherwise hidden @metadata field and its sub-fields to a separate field
+# that will be accessible in the output. This allows writing test cases that can assert metadata values.
+POST_PROCESSOR_FILTER_CONF = """\
+filter {
+  mutate { copy => { "[@metadata]" => "__@metadata" } }
+}
+"""
+
 INPUT_OUTPUT_CONF = """\
 input {
   stdin {
@@ -27,10 +35,13 @@
   file {
     path => "%s"
   }
+  file {
+    path => "%s"
+    codec => rubydebug { metadata => true }
+  }
 }
 """
 
-
 def logstash_filter_run(inputs, filter_def, logstash_bin=None, remove_tempdir=True):
     """
     Run a bunch of json through logstash given the filter definition
@@ -59,13 +70,25 @@ def logstash_filter_run(inputs, filter_def, logstash_bin=None, remove_tempdir=Tr
     os.mkdir(config_dir)
     os.mkdir(pipeline_dir)
     open(join(config_dir, 'logstash.yml'), 'w').close()
+
     with open(join(config_dir, 'pipelines.yml'), 'w') as f:
-        f.write(PIPELINES_YML.format(pipeline_dir))
-    output_fn = join(workdir, 'output')
+        if os.name == 'nt':
+            # Somehow, on Windows the path has to be prefixed with a slash (in front of the drive letter)
+            # and the path separator has to be the forward slash.
+            formatted_pipeline_dir = '/' + pipeline_dir.replace('\\', '/')
+        else:
+            formatted_pipeline_dir = pipeline_dir
+        f.write(PIPELINES_YML.format(formatted_pipeline_dir))
+
+    output_json_fn = join(workdir, 'output-json')
+    output_ap_fn = join(workdir, 'output-ap')
     with open(join(pipeline_dir, 'io.conf'), 'w') as f:
-        f.write(INPUT_OUTPUT_CONF % output_fn)
-    with open(join(pipeline_dir, 'filter.conf'), 'w') as f:
+        f.write(INPUT_OUTPUT_CONF % (output_json_fn, output_ap_fn))
+    with open(join(pipeline_dir, 'filter_1_candidate.conf'), 'w') as f:
         f.write(filter_def)
+    with open(join(pipeline_dir, 'filter_2_post_processor.conf'), 'w') as f:
+        f.write(POST_PROCESSOR_FILTER_CONF)
+        
     inputs_s = ''.join(s+'\n' for s in input_jsons)
     args = [logstash_bin, '--log.level=warn',
             '--path.settings', config_dir, '--path.data', data_dir]
@@ -76,7 +99,7 @@ def logstash_filter_run(inputs, filter_def, logstash_bin=None, remove_tempdir=Tr
     if rc != 0:
         raise RuntimeError("logstash returned non-zero return code {}"
                            .format(rc))
-    output_lines = list(open(output_fn))
+    output_lines = list(open(output_json_fn))
     if len(output_lines) != len(inputs):
         raise RuntimeError("Received {} outputs, expecting {}"
                            .format(len(output_lines), len(inputs)))

From 2f893ee39517af0e0c87ad39026135ce01eff85e Mon Sep 17 00:00:00 2001
From: Dom Ule <dom@ule.de>
Date: Wed, 13 Mar 2019 16:17:06 +0000
Subject: [PATCH 2/4] - Now accepts the new argument --remove_tempdir with
 possible values of "yes" and "no". - Now accepts the new argument --logstash
 to set the location of the Logstash executable.

---
 logstash_filter_test.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/logstash_filter_test.py b/logstash_filter_test.py
index 1ae6207..928844e 100755
--- a/logstash_filter_test.py
+++ b/logstash_filter_test.py
@@ -5,6 +5,7 @@
 import json
 
 from logstash_filter_run import logstash_filter_run
+from logstash_filter_run import LOGSTASH_BIN_ALTERNATIVES
 
 
 # This is copied from https://github.com/linjackson78/jstyleson
@@ -110,7 +111,6 @@ def _remove_last_comma(str_list, before_index):
     if str_list[i] == ',':
         str_list[i] = ''
 
-
 def print_results(testcases, outputs):
     expecteds = [expected for _inp, expected in testcases]
     n_errs = 0
@@ -138,11 +138,11 @@ def json_dumps(x):
     return n_errs
 
 
-def logstash_filter_test(filter_fn='filter.conf', testcases_fn='testcases.js'):
+def logstash_filter_test(filter_fn='filter.conf', testcases_fn='testcases.js', logstash_bin_fn=None, remove_tempdir="yes"):
     testcases = json.loads(dispose(open(testcases_fn).read()))
     inputs = [inp for inp, _expected in testcases]
     filter_def = open(filter_fn).read()
-    outputs = logstash_filter_run(inputs, filter_def)
+    outputs = logstash_filter_run(inputs, filter_def, logstash_bin_fn, remove_tempdir == "yes")
     n_errs = print_results(testcases, outputs)
     return outputs, n_errs
 
@@ -154,9 +154,14 @@ def main():
                         help="File with logstash filter definition to test. default: filter.conf")
     parser.add_argument("--testcases", default="testcases.js",
                         help="File with testcases. default: testcases.js")
+    parser.add_argument("--logstash", default=None,
+                        help="Path to Logstash executable. default: " + ",".join(LOGSTASH_BIN_ALTERNATIVES))
+    parser.add_argument("--remove_tempdir", default="yes",
+                        help="Whether to remove the temp dir (yes/no). default: yes")
+
     args = parser.parse_args()
 
-    _outputs, n_errs = logstash_filter_test(args.filters, args.testcases)
+    _outputs, n_errs = logstash_filter_test(args.filters, args.testcases, args.logstash, args.remove_tempdir)
 
     return 0 if n_errs == 0 else 1
 

From 873edaab0c1882ed483b57a2d7fcc80052af6c4e Mon Sep 17 00:00:00 2001
From: Dom Ule <dom@ule.de>
Date: Wed, 13 Mar 2019 16:36:45 +0000
Subject: [PATCH 3/4] Added section describing command line arguments

---
 README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/README.md b/README.md
index ed75d34..2554ecf 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,21 @@ fields that are defined must be equal to the fields in the output document.
 The output document may include other fields. To test that an output field
 doesn't exist, use `"field": null`.
 
+## Command line arguments
+
+| Argument | Description | Default |
+| -------- | ----------- | ------- |
+| `--filters` | File with Logstash filter definition to test. | `filter.conf` |
+| `--testcases` | File with test cases. | `testcases.js` |
+| `--remove_tempdir` | Whether to remove the temp dir that is created during execution (yes/no). | `yes` (any other value will be equivalent to `no`) |
+| `--logstash` | Path to the Logstash executable. | \[ `/opt/logstash/bin/logstash`, `/usr/share/logstash/bin/logstash` \] |
+
+Example on Windows:
+
+```
+logstash_filter_test.py --remove_tempdir=yes --logstash C:\path\to\logstash-6.2.3\bin\logstash.bat --filters C:\path\to\logstash\indexer\config\filter.conf --testcases C:\path\to\logstash\indexer\test\testcases.js
+```
+
 ## Testing from Python
 
 If you don't like the testcase file format, it's easy to test by yourself:

From dc8a8b27d1c0c792e71a6171b2a8e7159b78306d Mon Sep 17 00:00:00 2001
From: Dom Ule <dom@ule.de>
Date: Wed, 13 Mar 2019 17:03:20 +0000
Subject: [PATCH 4/4] Added section on testing [@metadata]

---
 README.md | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/README.md b/README.md
index 2554ecf..44ee051 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,36 @@ fields that are defined must be equal to the fields in the output document.
 The output document may include other fields. To test that an output field
 doesn't exist, use `"field": null`.
 
+## Testing `[@metadata]`
+
+Logstash allows using a transient field called `[@metadata]`, which is not produced by any output plugins. This is useful if you want to influence the flow of the filter (and/or the output) section, but not by using a field that would be present in the output. The metadata field acts like a bucket for internal variables (it is a nested field on which you can set sub-fields. For example, in the filter section you could set `[@metadata][target_index]` to a desired value for an Elasticsearch index and then in the output section in the `elasticsearch` plugin you could use `%{[@metadata][target_index]}-%{+yyyy.MM.dd}` as your index name pattern.
+
+However, since `[@metadata]` is by definition not produced by the output section, it is consequently not possible to write test cases that verify that the filter section set the correct data in the `[@metadata]` field (or that it correctly didn't set metadata). Once your filter and output sections start relying on metadata it becomes critical that you are also able to write test cases that cover metadata. But, rejoice, as with logstash-filter-test you can still test metadata! The whole `[@metadata]` field will be copied into another field called `[__@metadata]`, allowing you do to something like this:
+
+```
+    [
+        {
+            "message" : "my sample log line",
+            "path" : "/path/to/source/file.log",
+            "host" : "my-host-name"
+        },
+        {
+            "__@metadata" : 
+            {
+                "target_index" : "kittycat"
+            },
+           "@timestamp" : "2019-02-10T00:19:02.106Z",
+           "hostname" : "my-host-name",
+           "level" : "INFO",
+           "source.file.path.raw" : "/path/to/source/file.log",
+           "tags" : null
+        }
+    ]
+```
+
+In addition, if you choose to not remove the temp directory that logstash-filter-test creates during execution (see corresponding command line argument), then you can have a look at the file `pipeline.d/output-ap`. Running your test cases through your filters will generate this output in the Ruby Awesome Print format, which is what Logstash's rubydebug codec uses. The AP format is just another format in addition to the JSON file `output-json` that logstash-filter-test also produces. The AP file is not actually used by logstash-filter-test, but it can be useful for debugging your test cases and filters, because the file contains the `[@metadata]` field exactly as set and modified by your filters (no renaming to `[__@metadata]`). It is also formatted in a more human-readable way than the JSON output file.
+
+
 ## Command line arguments
 
 | Argument | Description | Default |