diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 06f3f80ee..a1eaf69cd 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -17,6 +17,14 @@ endif::[] This document provides a high-level view of the changes to the macOS Security Compliance Project. +== [Unreleased] + +=== Scripts +* Add `--markdown-tree` flag to `mscp guidance` - generates a paginated Markdown directory tree (one page per rule, `NN-` ordered filenames, `index.md` per section) ready to use with Docusaurus, Starlight, MkDocs, VitePress, or any CommonMark-based static site generator. Drop the output directory into a docs folder - no post-processing required. Also included in `--all`. + +=== Bug Fixes +* Fix crash in `adoc/rule.adoc.jinja` when `rule.references.hhs` is absent - rules predating the HICP framework do not carry this key. + == [mSCP 2.x] - 2025-02-28 IMPORTANT: This release is a major update and includes breaking changes. Please review the documentation before upgrading. diff --git a/pyproject.toml b/pyproject.toml index b8042af04..6aaa312a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,3 +74,5 @@ packages = ["src/mscp"] [tool.pytest.ini_options] testpaths = ["tests"] +pythonpath = ["src"] +addopts = "--import-mode=importlib" diff --git a/src/mscp/classes/macsecurityrule.py b/src/mscp/classes/macsecurityrule.py index d2b8924c3..b39696e42 100644 --- a/src/mscp/classes/macsecurityrule.py +++ b/src/mscp/classes/macsecurityrule.py @@ -19,7 +19,7 @@ # Additional python modules from lxml import etree -from pydantic import BaseModel, ConfigDict, Field +from pydantic import ConfigDict, Field # Local python modules from ._base import BaseModelWithAccessors diff --git a/src/mscp/cli.py b/src/mscp/cli.py index 7758638a3..77f12878a 100644 --- a/src/mscp/cli.py +++ b/src/mscp/cli.py @@ -396,6 +396,13 @@ def parse_cli() -> None: help="generate documentation in markdown format", action="store_true", ) + guidance_parser.add_argument( + "--markdown-tree", + help="""R|generate documentation as a paginated Markdown directory tree +(one page per rule, NN- ordered filenames, index.md per section) +ready to use with Docusaurus, Starlight, MkDocs, or VitePress""", + action="store_true", + ) guidance_parser.add_argument( "-p", "--profiles", diff --git a/src/mscp/common_utils/__init__.py b/src/mscp/common_utils/__init__.py index 9fc43119e..8074c9c8d 100644 --- a/src/mscp/common_utils/__init__.py +++ b/src/mscp/common_utils/__init__.py @@ -13,7 +13,13 @@ (`conditional_inject_spinner`). """ -from .config import config, set_custom_dir, ensure_custom_dirs, search_paths +from .config import ( + config, + CONFIG_PATH, + set_custom_dir, + ensure_custom_dirs, + search_paths, +) from .constants import SCHEMA_PATH, APPLE_OS, NIX_OS, PLATFORM_MAP from .customization import collect_overrides from .file_handling import ( @@ -77,7 +83,8 @@ "SCHEMA_PATH", "APPLE_OS", "NIX_OS", - "PLATFORM_MAPvalidate_yaml_file", + "PLATFORM_MAP", + "validate_yaml_file", "logger", "get_supported_languages", "collect_overrides", diff --git a/src/mscp/data/templates/documents/markdown/rule.md.jinja b/src/mscp/data/templates/documents/markdown/rule.md.jinja index c5af35958..04b7cd492 100644 --- a/src/mscp/data/templates/documents/markdown/rule.md.jinja +++ b/src/mscp/data/templates/documents/markdown/rule.md.jinja @@ -2,9 +2,11 @@ {% set additional_info = rule | get_nested(["platforms", rule.os_type, "enforcement_info", "fix", "additional_info"]) %} {% set check_shell = rule | get_nested(["platforms", rule.os_type, "enforcement_info", "check", "shell"]) %} {% set fix_shell = rule | get_nested(["platforms", rule.os_type, "enforcement_info", "fix", "shell"]) %} +{% if not markdown_tree | default(false) %} ### {{ rule.title }} +{% endif %} {% if rule.tags is defined or rule.tags is not none and "supplemental" in rule.tags %} {{ rule.discussion | include_replace | asciidoc_to_markdown }} {% else %} @@ -18,6 +20,10 @@ {% if not rule.tags | select('in', check_tags) | list %} {% if rule.tags is not defined or rule.tags is none or "supplemental" not in rule.tags %} {% if rule.os_type.lower() in NIX_OS %} +{% if markdown_tree | default(false) %} +## {% trans %}Check{% endtrans %} + +{% endif %} {% trans %}To check the state of the system, run the following command(s){% endtrans %}: ```bash @@ -35,6 +41,87 @@ {% trans %}If the result is not{% endtrans %} _{{ rule.result_value }}_, {% trans %}this is a finding{% endtrans %}. {% endif %} +{% if markdown_tree | default(false) %} +## {% trans %}Remediation Description{% endtrans %} + +{% if rule.fix is not none %} +{% trans %}Perform the following to configure the system to meet the requirements{% endtrans %}: +{% endif %} + +{% if rule.mobileconfig_info is none and rule.fix is not none or fix_shell %} +{% trans %}Run the following command(s){% endtrans %}: + +```bash +{{ rule.fix | asciidoc_to_markdown if rule.fix is not none else fix_shell }} +``` + +{% elif additional_info is not none %} +{{ additional_info | asciidoc_to_markdown }} +{% elif rule.mobileconfig_info is not none and fix_shell %} +{% if fix_shell %} +```bash +{{ fix_shell -}} +``` +{% endif %} + +{% if rule.mobileconfig_info %} +```xml +{{ rule.mobileconfig_info | mobileconfig_payloads_to_xml -}} +``` +{% endif %} +{% else %} +{% if rule.os_name == "macos" %} +{{ rule.fix | asciidoc_to_markdown }} +{% else %} +{% trans %}Deploy a configuration profile containing the following payload.{% endtrans %} + +```xml +{{ rule.mobileconfig_info | mobileconfig_payloads_to_xml -}} +``` +{% endif %} +{% endif %} + +## {% trans %}References{% endtrans %} + +| | | +|---|---| +| **ID** | `{{ rule.rule_id }}` | +{% if rule.severity is not none %} +| **Severity** | {{ rule.severity }} | +{% endif %} +| **800-53r5** | {{ rule.references.nist.nist_800_53r5 | group_ulify if rule.references.nist.nist_800_53r5 is not none }} | +{% if "800-171" in baseline.title | upper or show_all_tags %} +| **800-171r3** | {{ rule.references.nist.nist_800_171r3 | render_rules if rule.references.nist.nist_800_171r3 is not none }} | +{% endif %} +{% if "STIG" in baseline.title | upper or show_all_tags %} +| **DISA STIG(s)** | {{ rule.references.disa.disa_stig | render_rules if rule.references.disa.disa_stig is not none }} | +{% if rule.references.disa.sfr is not none %} +| **SFR** | {{ rule.references.disa.sfr | render_rules if rule.references.disa.sfr is not none }} | +{% endif %} +{% endif %} +{% if "CIS" in baseline.title | upper or show_all_tags %} +| **CIS Benchmark** | {{ rule.references.cis.benchmark | render_rules if rule.references.cis.benchmark is not none }} | +| **CIS Controls V8** | {{ rule.references.cis.controls_v8 | render_rules if rule.references.cis.controls_v8 is not none }} | +{% endif %} +{% if "INDIGO" in baseline.title | upper or show_all_tags %} +| **indigo** | {{ rule.references.bsi.indigo | render_rules if rule.references.bsi.indigo is not none }} | +{% endif %} +{% if "CMMC" in baseline.title | upper or show_all_tags %} +| **CMMC** | {{ rule.references.disa.cmmc | render_rules if rule.references.disa.cmmc is not none }} | +{% endif %} +{% if "HICP_LP" in baseline.title | upper or show_all_tags %} +| **HICP** | {{ rule.references.hhs.hicp | render_rules if rule.references.hhs.hicp is not none }} | +{% endif %} +{% if rule.references.nist.cce is not none %} +| **CCE** | {{ rule.references.nist.cce | render_rules }} | +{% endif %} +{% if "references" in rule.customized %} +| **Custom References** | {{ rule.references.custom_refs.references | render_references if rule.references.custom_refs is not none }} | +{% endif %} +{% if show_all_tags %} +| **TAGS** | {{ rule.tags | render_rules }} | +{% endif %} +{% else %}
@@ -84,6 +171,7 @@
{% endif %} +{% if not markdown_tree | default(false) %} @@ -170,3 +258,5 @@
ID
{% endif %} +{% endif %} +{% endif %} diff --git a/src/mscp/data/templates/documents/markdown_tree/index.md.jinja b/src/mscp/data/templates/documents/markdown_tree/index.md.jinja new file mode 100644 index 000000000..93cf34d7a --- /dev/null +++ b/src/mscp/data/templates/documents/markdown_tree/index.md.jinja @@ -0,0 +1,7 @@ +{% include "foreword.jinja" %} + +{% include "scope.jinja" %} + +{% include "authors.jinja" %} + +{% include "acronyms.jinja" %} diff --git a/src/mscp/generate/guidance.py b/src/mscp/generate/guidance.py index 7ef0bdf06..3cf6b6254 100644 --- a/src/mscp/generate/guidance.py +++ b/src/mscp/generate/guidance.py @@ -37,6 +37,7 @@ from ..generate.guidance_support import ( generate_ddm, generate_documents, + generate_markdown_tree, generate_excel, generate_profiles, generate_script, @@ -155,8 +156,9 @@ def generate_guidance(sp: Yaspin, args: argparse.Namespace) -> None: args (argparse.Namespace): Parsed CLI arguments. Expected attributes: ``baseline``, ``os_name``, ``language``, ``dark``, ``hash``, ``reference``, ``logo``, ``audit_name``, ``profiles``, ``ddm``, - ``script``, ``xlsx``, ``gary``, ``markdown``, ``manifest``, - ``all``, ``consolidated_profile``, ``granular_profiles``. + ``script``, ``xlsx``, ``gary``, ``markdown``, ``markdown_tree``, + ``manifest``, ``all``, ``consolidated_profile``, + ``granular_profiles``. """ # Transparently migrate legacy (pre-2.0) baselines before deriving any # paths. Updating args.baseline here means all subsequent path derivations @@ -329,6 +331,18 @@ def generate_guidance(sp: Yaspin, args: argparse.Namespace) -> None: language=args.language, ) + if args.markdown_tree: + logger.info("Generating paginated Markdown tree") + sp.text = "Generating Markdown tree" + time.sleep(1) + generate_markdown_tree( + build_path, + baseline, + current_version_data, + show_all_tags, + language=args.language, + ) + if args.manifest: logger.info("Generating JSON manifest") sp.text = "Generating JSON manifest" @@ -380,23 +394,36 @@ def generate_guidance(sp: Yaspin, args: argparse.Namespace) -> None: time.sleep(1) generate_excel(spreadsheet_output_file, baseline) - logger.info("Generating markdown documents") - sp.text = "Generating markdown" - time.sleep(1) - generate_documents( - sp, - md_output_file, - baseline, - b64logo, - pdf_theme, - html_css, - logo_path, - baseline.platform["os"], - current_version_data, - show_all_tags, - output_format="markdown", - language=args.language, - ) + if not args.markdown: + logger.info("Generating markdown documents") + sp.text = "Generating markdown" + time.sleep(1) + generate_documents( + sp, + md_output_file, + baseline, + b64logo, + pdf_theme, + html_css, + logo_path, + baseline.platform["os"], + current_version_data, + show_all_tags, + output_format="markdown", + language=args.language, + ) + + if not args.markdown_tree: + logger.info("Generating paginated Markdown tree") + sp.text = "Generating Markdown tree" + time.sleep(1) + generate_markdown_tree( + build_path, + baseline, + current_version_data, + show_all_tags, + language=args.language, + ) logger.info("Generating JSON manifest") sp.text = "Generating JSON manifest" diff --git a/src/mscp/generate/guidance_support/__init__.py b/src/mscp/generate/guidance_support/__init__.py index bae8fe5ca..ec870b017 100644 --- a/src/mscp/generate/guidance_support/__init__.py +++ b/src/mscp/generate/guidance_support/__init__.py @@ -2,7 +2,8 @@ """Guidance artifact sub-generators used by `generate_guidance`. Re-exports: `generate_ddm` (DDM JSON/ZIP artifacts), `generate_documents` -(AsciiDoc / PDF / HTML / Markdown), `generate_excel` (Excel workbook), +(AsciiDoc / PDF / HTML / Markdown), `generate_markdown_tree` (paginated +Markdown tree for static site generators), `generate_excel` (Excel workbook), `generate_profiles` (configuration profiles), `generate_script` and `generate_restore_script` (compliance shell scripts), and `generate_manifest` (JSON manifest). @@ -11,6 +12,7 @@ __all__ = [ "generate_ddm", "generate_documents", + "generate_markdown_tree", "generate_excel", "generate_profiles", "generate_script", @@ -21,6 +23,7 @@ from .ddm import generate_ddm from .documents import generate_documents +from .markdown_tree import generate_markdown_tree from .excel import generate_excel from .profiles import generate_profiles from .script import generate_script, generate_restore_script diff --git a/src/mscp/generate/guidance_support/markdown_tree.py b/src/mscp/generate/guidance_support/markdown_tree.py new file mode 100644 index 000000000..9ec1bfc67 --- /dev/null +++ b/src/mscp/generate/guidance_support/markdown_tree.py @@ -0,0 +1,410 @@ +# mscp/generate/guidance_support/markdown_tree.py +"""Paginated Markdown tree output for mSCP guidance documents. + +Provides `generate_markdown_tree`, which renders a baseline as a directory tree +suitable for any CommonMark-based static site generator (Docusaurus, Starlight, +MkDocs, VitePress, etc.): + + //markdown_tree/ + index.md # overview: foreword, scope, authors, acronyms + 02-/ + index.md # section description (category landing page) + 01-.md # one page per rule + 03-/ + ... + +Ordering uses ``NN-`` numeric prefixes on directory and file names so all +generators that sort alphabetically by filename display sections and rules in the +correct order without any extra configuration. The ``index.md`` naming +convention is honoured by Docusaurus (category-index doc), Starlight +(auto-generated sidebar root), MkDocs (default nav), and is also the natural +GitHub browsing entrypoint. + +Frontmatter is kept minimal - ``title`` only - so it works across all +generators without modification. Docusaurus-specific sidebar metadata +(``sidebar_position``, +``description``, ``_category_.json`` sidecars) is intentionally omitted; the +``NN-`` prefix scheme provides the same ordering guarantee without requiring any +framework-specific files. + +Rendering reuses the existing shared Jinja includes and Markdown filters +(``asciidoc_to_markdown``, ``group_ulify_md``, ``render_rules_md``) plus the +shared ``markdown_tree/rule.md.jinja`` template (which extends +``markdown/rule.md.jinja`` with per-page structural changes). A whole-page +MDX-safety pass is applied once per rendered page rather than per-field in the +template, giving complete coverage and a single point of maintenance. + +MDX-safety notes (Docusaurus / Starlight compile ``.md`` through MDX by +default): +- ``{``/``}`` are entity-encoded outside fenced blocks (JSX expression syntax). +- Bare ``<`` not opening a known HTML tag is entity-encoded (JSX element syntax). +- HTML void tags (``
``, ``
``, ````) are normalised to self-closing + form (``
``) so MDX does not flag unclosed elements. + +Known limitation: bare ```` autolinks in rule prose are +entity-encoded to visible text (AsciiDoc ``link:`` macros convert correctly). + +No dependencies beyond what the project already uses (Jinja2 + stdlib). +""" + +# Standard python modules +import gettext +import re +from pathlib import Path +from typing import Any + +# Additional python modules +from jinja2 import Environment, FileSystemLoader + +# Local python modules +from ...classes import Baseline, Macsecurityrule +from ...common_utils import ( + NIX_OS, + config, + logger, + make_dir, + mscp_data, + open_file, + search_paths, +) +from .documents import ( + asciidoc_to_markdown, + get_nested, + group_ulify_md, + render_rules_md, + replace_include_with_file_content, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def create_slug(text: str) -> str: + """Convert *text* to a URL/filename-safe slug. + + Produces a lowercase, hyphen-separated identifier from arbitrary display + text. Used for section directory names and rule filenames so they are + stable, ASCII-safe, and readable in the filesystem. + + Args: + text (str): Arbitrary display text (section or rule title). + + Returns: + str: Slug such as ``"system-settings"``. + """ + slug = text.lower() + slug = re.sub(r"[^\w\s-]", "", slug) + slug = re.sub(r"[\s_]+", "-", slug) + slug = re.sub(r"-{2,}", "-", slug) + return slug.strip("-") + + +def mdx_escape(value: str) -> str: + """Entity-encode MDX expression delimiters outside fenced code blocks. + + MDX parses ``{…}`` as a JSX expression and a bare ``<`` that does not open + a valid element as a parse error, so prose containing shell snippets or + plist fragments breaks the page. Fenced code blocks are already literal in + MDX; inline code spans are also left untouched. Everything else gets ``{``, + ``}`` and any ``<`` not opening a plausible HTML/JSX tag entity-encoded. + + This function is applied once to the fully-rendered page body (not per-field + in templates) so coverage is complete. + + Args: + value (str): Markdown text (may contain fenced code blocks). + + Returns: + str: MDX- and CommonMark-safe Markdown. + """ + if not value: + return value + + # ``<`` is left alone only when it opens/closes a KNOWN HTML tag the + # markdown templates emit. Anything else - including plist fragments such + # as ```` or ```` - is entity-encoded, since MDX would otherwise + # parse it as an (unclosed) JSX element. + html_tags = ( + "a|br|hr|table|thead|tbody|tr|td|th|p|div|span|strong|em|b|i|u|code|pre" + "|ul|ol|li|img|sub|sup|details|summary|h[1-6]" + ) + tag_like = re.compile(rf"<(/?(?:{html_tags}))(\s[^<>]*)?/?>", re.IGNORECASE) + # HTML void elements must be self-closing in MDX/JSX. Applied to stashed + # tags only (inside _escape_prose), so code blocks stay untouched. + # The trailing ``/?`` makes the rewrite idempotent for already-self-closing. + void_tag = re.compile(r"<(br|hr|img)((?:\s[^<>]*?)?)\s*/?\s*>", re.IGNORECASE) + + def _escape_prose(text: str) -> str: + placeholders: list[str] = [] + + def _stash(m: re.Match) -> str: + tag = m.group(0) + # Normalise allowlisted void tags to self-closing JSX form here - + # never on raw input, which would mutate fenced/inline code. + tag = void_tag.sub( + lambda v: f"<{v.group(1)}{(v.group(2) or '').rstrip()} />", tag + ) + placeholders.append(tag) + return f"\x00TAG{len(placeholders) - 1}\x00" + + # Protect inline code spans (single-backtick) before escaping. + # NOTE: double-backtick spans (``code``) are matched as two empty spans; + # their interior is entity-encoded - semantically identical output for + # braces, but documented here as a known limitation. + spans: list[str] = [] + + def _stash_span(m: re.Match) -> str: + spans.append(m.group(0)) + return f"\x00SPAN{len(spans) - 1}\x00" + + text = re.sub(r"`[^`\n]*`", _stash_span, text) + text = tag_like.sub(_stash, text) + text = text.replace("{", "{").replace("}", "}").replace("<", "<") + for i, tag in enumerate(placeholders): + text = text.replace(f"\x00TAG{i}\x00", tag) + for i, span in enumerate(spans): + text = text.replace(f"\x00SPAN{i}\x00", span) + return text + + # Split on fenced code blocks; escape only the prose segments. + parts = re.split(r"(```.*?```)", value, flags=re.DOTALL) + return "".join( + part if part.startswith("```") else _escape_prose(part) for part in parts + ) + + +def render_references_md(reference_set) -> str: + """Render custom-reference dicts as a single GFM-table-safe cell string. + + The shared ``render_references`` emits AsciiDoc cell rows (newlines + ``!`` + markers), which terminate a GFM pipe-table row. This variant flattens each + dict to ``**key**: value`` pairs joined with ``
`` and escapes ``|``. + + Args: + reference_set: Sequence of dicts (same contract as ``render_references``). + + Returns: + str: ``
``-joined cell content, or ``""`` when empty. + """ + if not reference_set: + return "" + parts: list[str] = [] + for d in reference_set: + for key, val in d.items(): + if isinstance(val, (list, tuple)): + rendered = ", ".join(str(v) for v in val) + else: + rendered = str(val) + parts.append(f"**{key}**: {rendered}".replace("|", r"\|")) + return "
".join(parts) + + +def _frontmatter(fields: dict[str, Any]) -> str: + """Render a minimal YAML frontmatter block. + + Strings are single-quoted with embedded single-quotes doubled (YAML + single-quote escaping); other scalars are emitted bare. + + Args: + fields (dict[str, Any]): Frontmatter key/value pairs, in order. + + Returns: + str: The complete ``---``-delimited frontmatter block. + """ + lines = ["---"] + for key, value in fields.items(): + if isinstance(value, str): + sanitised = value.replace("\r", "").replace("\n", " ") + quoted = sanitised.replace("'", "''") + lines.append(f"{key}: '{quoted}'") + else: + lines.append(f"{key}: {value}") + lines.append("---") + return "\n".join(lines) + + +def _translations(language: str) -> gettext.NullTranslations: + """Load gettext translations for *language* (with English fallback). + + Args: + language (str): BCP-47 language code. + + Returns: + gettext.NullTranslations: Translation catalogue for the language. + """ + return gettext.translation( + domain="messages", + localedir=config["locales_dir"], + languages=[language], + fallback=True, + ) + + +def _build_env(template_dirs: list[str], language: str) -> Environment: + """Construct the Jinja environment for markdown-tree rendering. + + Identical filter set to the existing Markdown rendering environment plus + the GFM-table-safe ``render_references`` variant. The ``mdx_escape`` + filter is registered but intentionally NOT called from templates - escaping + is applied once per page in Python after the full body is rendered. + + Args: + template_dirs (list[str]): Ordered template search paths (custom + directories shadow bundled ones). + language (str): BCP-47 language code for gettext lookup. + + Returns: + Environment: Configured Jinja environment. + """ + translations = _translations(language) + env = Environment( + loader=FileSystemLoader(template_dirs), + trim_blocks=True, + lstrip_blocks=True, + autoescape=False, + extensions=["jinja2.ext.i18n"], + keep_trailing_newline=True, + ) + env.filters["group_ulify"] = group_ulify_md + env.filters["include_replace"] = replace_include_with_file_content + env.filters["render_rules"] = render_rules_md + env.filters["render_references"] = render_references_md + env.filters["get_nested"] = get_nested + env.filters["mobileconfig_payloads_to_xml"] = ( + Macsecurityrule.mobileconfig_info_to_xml + ) + env.filters["asciidoc_to_markdown"] = asciidoc_to_markdown + # Registered for template compatibility; whole-page escaping is done in + # Python rather than per-field to ensure complete coverage. + env.filters["mdx_escape"] = mdx_escape + env.install_gettext_translations(translations) + return env + + +# --------------------------------------------------------------------------- +# Public entry point +# --------------------------------------------------------------------------- + + +def generate_markdown_tree( + build_path: Path, + baseline: Baseline, + version_info: dict[str, Any], + show_all_tags: bool = False, + language: str = "en", +) -> None: + """Render *baseline* as a paginated Markdown directory tree. + + Writes ``markdown_tree/`` under *build_path*: an ``index.md`` overview + (foreword, scope, authors, acronyms) and one subdirectory per section, each + containing a section ``index.md`` and one ``.md`` file per rule. + + Directory and file names carry ``NN-`` numeric prefixes so generators that + sort alphabetically by filename display content in the correct order without + extra configuration. The ``index.md`` naming follows the category-index + convention recognised by Docusaurus, Starlight, and MkDocs. + + Frontmatter is minimal - ``title`` only - and is safe to use in any + CommonMark-based static site generator. The rendered text is MDX- + and CommonMark-safe by construction: ``{``/``}`` and bare ``<`` outside + fenced code blocks are entity-encoded, and HTML void tags are normalised to + self-closing form. + + Drop the entire ``markdown_tree/`` directory (or its contents) into the + docs source folder of any CommonMark-based SSG - no post-processing + required. + + Args: + build_path (Path): Baseline build directory (e.g. ``build/``). + baseline (Baseline): Baseline data model. + version_info (dict[str, Any]): OS/compliance version metadata. + show_all_tags (bool): Render all reference tags regardless of + benchmark. Defaults to ``False``. + language (str): BCP-47 language code. Defaults to ``"en"``. + """ + output_root = build_path / "markdown_tree" + make_dir(output_root) + + template_dirs = search_paths("documents_templates_dir") + env = _build_env(template_dirs, language) + + baseline_dict = baseline.model_dump() + benchmark = baseline.title.split()[-1] + benchmarks = mscp_data.get("benchmarks", "") + baseline_dict["tailored"] = "Tailored from" in baseline.title + baseline_dict["benchmark_description"] = next( + (d["description"] for d in benchmarks if d.get("keyword") == benchmark), + benchmark, + ) + if any(author.is_additional for author in baseline.authors): + baseline_dict["additional_authors"] = True + + acronyms_file = Path(config["includes_dir"], "acronyms.yaml").absolute() + acronyms_data: dict[str, Any] = open_file(acronyms_file, language) + + context: dict[str, Any] = { + "baseline": baseline_dict, + "show_all_tags": show_all_tags, + "os_name": baseline.platform["os"].strip().lower(), + "os_version": str(version_info.get("os_version", None)), + "version": version_info.get("compliance_version", None), + "release_date": version_info.get("date", None), + "format": "markdown", + "markdown_tree": True, + "acronyms": acronyms_data.get("acronyms", []), + "terminology": acronyms_data.get("terminology", []), + "NIX_OS": NIX_OS, + } + + # Overview page: foreword / scope / authors / acronyms. + # Rendered using the same shared includes as the single-file markdown mode. + overview_template = env.get_template("markdown_tree/index.md.jinja") + index_body = overview_template.render(**context) + index_page = ( + _frontmatter({"title": baseline.title}) + + "\n\n" + + mdx_escape(index_body) + ) + (output_root / "index.md").write_text(index_page, encoding="utf-8") + + rule_template = env.get_template("markdown/rule.md.jinja") + + # Sections: position 2+ (overview is position 1 / no-prefix index.md). + # The section directory carries a NN- prefix matching section_position so + # the filesystem sort order matches the document order. + for section_position, profile in enumerate(baseline.profile, start=2): + section_slug = create_slug(profile.section) + section_dir = output_root / f"{section_position:02d}-{section_slug}" + make_dir(section_dir) + + section_description = asciidoc_to_markdown(profile.description).strip() + section_index_body = section_description + "\n" if section_description else "" + section_index = ( + _frontmatter({"title": profile.section}) + + "\n\n" + + mdx_escape(section_index_body) + ) + (section_dir / "index.md").write_text(section_index, encoding="utf-8") + + for rule_position, rule in enumerate(profile.rules, start=1): + rule_dict = rule.model_dump() + body = rule_template.render(rule=rule_dict, **context) + page = ( + _frontmatter({"title": rule.title}) + + "\n\n" + + mdx_escape(body) + ) + rule_file = ( + section_dir / f"{rule_position:02d}-{create_slug(rule.title)}.md" + ) + rule_file.write_text(page, encoding="utf-8") + + logger.debug( + "Markdown tree: wrote {} rules for section '{}'", + len(profile.rules), + profile.section, + ) + + logger.success(f"Markdown tree output written to {output_root}") diff --git a/src/mscp/generate/guidance_support/script.py b/src/mscp/generate/guidance_support/script.py index a56be8516..6f6352c8e 100644 --- a/src/mscp/generate/guidance_support/script.py +++ b/src/mscp/generate/guidance_support/script.py @@ -17,7 +17,7 @@ # Local python modules from ...classes import Baseline, Macsecurityrule -from ...common_utils import config, create_file, logger, make_dir, mscp_data, search_paths, NIX_OS +from ...common_utils import create_file, logger, make_dir, mscp_data, search_paths, NIX_OS def group_ulify(elements: list[str]) -> str: diff --git a/src/mscp/generate/scap.py b/src/mscp/generate/scap.py index 1f9b50e1a..79a1a4395 100644 --- a/src/mscp/generate/scap.py +++ b/src/mscp/generate/scap.py @@ -504,7 +504,6 @@ def replace_vars(text: str) -> str: if rule.result_value == 0: check_existence = "none_exist" - xccdfrules = ( xccdfrules + """{3}{4} @@ -736,7 +735,7 @@ def replace_vars(text: str) -> str: sp.text = "Writing output files" time.sleep(1) - + create_file(output_file, totaloutput) sp.text = f"Generated new SCAP file: {output_file}" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_markdown_tree.py b/tests/test_markdown_tree.py new file mode 100644 index 000000000..5d72cbc9f --- /dev/null +++ b/tests/test_markdown_tree.py @@ -0,0 +1,382 @@ +"""Tests for the --markdown-tree output mode. + +Covers: +- `create_slug`: slug normalisation +- `mdx_escape`: entity-encoding outside fenced blocks / inline code spans +- `render_references_md`: GFM-table-safe cell rendering +- `_frontmatter`: YAML single-quote escaping +- Integration: generate_markdown_tree writes a valid, SSG-safe tree from a + real bundled baseline (cis_lvl1_macos_26.0). +""" + +from __future__ import annotations + +import json +import re +import textwrap +from pathlib import Path + +import pytest + +from mscp.generate.guidance_support.markdown_tree import ( + _frontmatter, + create_slug, + mdx_escape, + render_references_md, +) + + +# --------------------------------------------------------------------------- +# create_slug +# --------------------------------------------------------------------------- + + +class TestCreateSlug: + def test_spaces_to_hyphens(self): + assert create_slug("System Settings") == "system-settings" + + def test_lowercase(self): + assert create_slug("Auditing") == "auditing" + + def test_strips_punctuation(self): + assert create_slug("Password/Policy") == "passwordpolicy" + + def test_collapses_multiple_hyphens(self): + assert create_slug("foo -- bar") == "foo-bar" + + def test_strips_leading_trailing_hyphens(self): + assert create_slug("-foo-") == "foo" + + def test_underscore_becomes_hyphen(self): + assert create_slug("os_ssh_fips") == "os-ssh-fips" + + def test_empty_string(self): + assert create_slug("") == "" + + +# --------------------------------------------------------------------------- +# mdx_escape +# --------------------------------------------------------------------------- + + +class TestMdxEscape: + def test_braces_encoded_in_prose(self): + result = mdx_escape("value is {foo}") + assert "{" in result + assert "}" in result + assert "{" not in result + + def test_braces_untouched_inside_fence(self): + src = "```bash\necho {hello}\n```" + assert mdx_escape(src) == src + + def test_braces_untouched_in_inline_code(self): + src = "use `{key}` here" + result = mdx_escape(src) + assert "`{key}`" in result + + def test_bare_lt_encoded_in_prose(self): + result = mdx_escape("plist has and ") + assert "<dict>" in result or "<" in result + # '<' in prose must be entity-encoded + assert re.search(r"<(?!br|hr|img|/)", result) is None + + def test_known_html_tags_preserved(self): + result = mdx_escape("line
next") + assert "
" in result + + def test_br_normalised_to_self_closing(self): + result = mdx_escape("line
next") + assert "
" in result + assert "
" not in result + + def test_br_idempotent(self): + result = mdx_escape("line
next") + assert result.count("
") == 1 + + def test_fence_with_plist(self): + src = "```xml\nfoo\n```" + assert mdx_escape(src) == src + + def test_empty_string(self): + assert mdx_escape("") == "" + + def test_none_passthrough(self): + # mdx_escape returns value unchanged when falsy + assert mdx_escape(None) is None # type: ignore[arg-type] + + def test_multiple_fences(self): + src = "intro {x}\n```bash\nif {a}; then\n```\noutro {y}" + result = mdx_escape(src) + assert "{x}" in result # prose encoded + assert "```bash\nif {a}; then\n```" in result # fence unchanged + assert "{y}" in result + + +# --------------------------------------------------------------------------- +# render_references_md +# --------------------------------------------------------------------------- + + +class TestRenderReferencesMd: + def test_empty_returns_empty_string(self): + assert render_references_md([]) == "" + assert render_references_md(None) == "" + + def test_single_dict(self): + result = render_references_md([{"SRG": "SRG-OS-000001"}]) + assert "**SRG**" in result + assert "SRG-OS-000001" in result + + def test_pipe_escaped(self): + result = render_references_md([{"Key": "a|b"}]) + assert r"\|" in result + + def test_list_value_joined(self): + result = render_references_md([{"IDs": ["ID-1", "ID-2"]}]) + assert "ID-1" in result + assert "ID-2" in result + + def test_multiple_dicts_joined_with_br(self): + refs = [{"A": "1"}, {"B": "2"}] + result = render_references_md(refs) + assert "
" in result + + +# --------------------------------------------------------------------------- +# _frontmatter +# --------------------------------------------------------------------------- + + +class TestFrontmatter: + def test_basic(self): + fm = _frontmatter({"title": "Hello"}) + assert fm.startswith("---") + assert fm.endswith("---") + assert "title: 'Hello'" in fm + + def test_single_quote_escaped(self): + fm = _frontmatter({"title": "It's a title"}) + assert "title: 'It''s a title'" in fm + + def test_non_string_bare(self): + fm = _frontmatter({"position": 3}) + assert "position: 3" in fm + + def test_slug_field(self): + fm = _frontmatter({"title": "A Rule", "slug": "a-rule"}) + assert "slug: 'a-rule'" in fm + + +# --------------------------------------------------------------------------- +# Integration: generate_markdown_tree +# --------------------------------------------------------------------------- + + +def _get_baseline_path() -> Path: + """Return a bundled baseline YAML for testing.""" + root = Path(__file__).parent.parent + candidates = [ + root / "src/mscp/data/baselines/macos/cis_lvl1_macos_26.0.yaml", + ] + for p in candidates: + if p.exists(): + return p + pytest.fail( + f"Bundled baseline not found; checked: {[str(p) for p in candidates]}" + ) + + +@pytest.fixture(scope="module") +def markdown_tree_output(tmp_path_factory): + """Generate the markdown tree for cis_lvl1 into a temp dir.""" + from mscp.classes import Baseline + from mscp.common_utils import get_version_data, mscp_data + from mscp.generate.guidance_support.markdown_tree import generate_markdown_tree + + baseline_path = _get_baseline_path() + tmp = tmp_path_factory.mktemp("tree_output") + + baseline = Baseline.from_yaml(baseline_path) + version_data = get_version_data( + baseline.platform["os"], + float(baseline.platform["version"]), + mscp_data, + ) + + generate_markdown_tree( + build_path=tmp, + baseline=baseline, + version_info=version_data, + show_all_tags=False, + language="en", + ) + return tmp / "markdown_tree" + + +class TestMarkdownTreeIntegration: + def test_output_dir_exists(self, markdown_tree_output): + assert markdown_tree_output.is_dir() + + def test_root_index_exists(self, markdown_tree_output): + assert (markdown_tree_output / "index.md").is_file() + + def test_root_index_has_frontmatter(self, markdown_tree_output): + content = (markdown_tree_output / "index.md").read_text() + assert content.startswith("---\n") + assert "title:" in content + + def test_section_dirs_exist(self, markdown_tree_output): + section_dirs = [ + d for d in markdown_tree_output.iterdir() if d.is_dir() + ] + assert len(section_dirs) > 0 + + def test_section_dirs_have_nn_prefix(self, markdown_tree_output): + section_dirs = [ + d for d in markdown_tree_output.iterdir() if d.is_dir() + ] + for d in section_dirs: + assert re.match(r"^\d{2}-", d.name), ( + f"Section dir '{d.name}' missing NN- prefix" + ) + + def test_section_dirs_sorted_correctly(self, markdown_tree_output): + section_dirs = sorted( + d for d in markdown_tree_output.iterdir() if d.is_dir() + ) + prefixes = [int(d.name[:2]) for d in section_dirs] + assert prefixes == sorted(prefixes) + + def test_each_section_has_index(self, markdown_tree_output): + for section_dir in markdown_tree_output.iterdir(): + if section_dir.is_dir(): + assert (section_dir / "index.md").is_file(), ( + f"Missing index.md in {section_dir.name}" + ) + + def test_rule_files_have_nn_prefix(self, markdown_tree_output): + for section_dir in markdown_tree_output.iterdir(): + if not section_dir.is_dir(): + continue + rule_files = [ + f for f in section_dir.iterdir() + if f.is_file() and f.name != "index.md" + ] + for rf in rule_files: + assert re.match(r"^\d{2}-", rf.name), ( + f"Rule file '{rf.name}' missing NN- prefix" + ) + + def test_rule_files_are_md_not_mdx(self, markdown_tree_output): + for section_dir in markdown_tree_output.iterdir(): + if not section_dir.is_dir(): + continue + for f in section_dir.iterdir(): + assert f.suffix == ".md", ( + f"Expected .md extension, got '{f.name}'" + ) + + def test_rule_frontmatter_parses(self, markdown_tree_output): + """All rule files must have valid YAML frontmatter with title only.""" + for section_dir in markdown_tree_output.iterdir(): + if not section_dir.is_dir(): + continue + for rf in section_dir.iterdir(): + if rf.name == "index.md": + continue + content = rf.read_text() + assert content.startswith("---\n"), ( + f"{rf.name}: missing frontmatter" + ) + # Extract frontmatter block + end = content.index("---\n", 4) + fm_block = content[4:end] + assert "title:" in fm_block, f"{rf.name}: missing title" + + + def test_no_raw_braces_outside_fences(self, markdown_tree_output): + """No unescaped { or } should appear outside fenced blocks.""" + for md_file in markdown_tree_output.rglob("*.md"): + content = md_file.read_text() + # Strip fenced code blocks + stripped = re.sub(r"```.*?```", "", content, flags=re.DOTALL) + # Strip frontmatter + if stripped.startswith("---\n"): + end = stripped.index("---\n", 4) + stripped = stripped[end + 4:] + assert "{" not in stripped, ( + f"{md_file.name}: raw '{{' found outside fenced block" + ) + assert "}" not in stripped, ( + f"{md_file.name}: raw '}}' found outside fenced block" + ) + + def test_balanced_fences(self, markdown_tree_output): + """Fenced code blocks must be properly opened and closed.""" + for md_file in markdown_tree_output.rglob("*.md"): + content = md_file.read_text() + fences = re.findall(r"^```", content, flags=re.MULTILINE) + assert len(fences) % 2 == 0, ( + f"{md_file.name}: unbalanced fences ({len(fences)} markers)" + ) + + def test_no_category_json_files(self, markdown_tree_output): + """No Docusaurus-specific _category_.json files should be present.""" + category_files = list(markdown_tree_output.rglob("_category_.json")) + assert category_files == [], ( + f"Found _category_.json files: {category_files}" + ) + + def test_single_file_markdown_unchanged(self, tmp_path): + """The single-file --markdown output must not be affected by the tree changes. + + Verifies that rendering without markdown_tree=True in context still + produces the HTML-table remediation block (not the heading-based one). + """ + # This is a template-level smoke test: render the rule template + # directly without markdown_tree context and confirm HTML table + # structure is present. + from jinja2 import Environment, DictLoader + + # Minimal template that exercises only the remediation branch + template_src = textwrap.dedent("""\ + {% set check_tags = ["permanent", "inherent", "n_a", "not_applicable"] %} + {% set additional_info = none %} + {% set check_shell = none %} + {% set fix_shell = none %} + {% if not markdown_tree | default(false) %} + ### {{ rule.title }} + {% endif %} + {{ rule.discussion }} + {% if not rule.tags | select('in', check_tags) | list %} + {% if markdown_tree | default(false) %} + ## Remediation Description + {% else %} + remediation
+ {% endif %} + {% endif %} + """) + + env = Environment(loader=DictLoader({"rule.md.jinja": template_src})) + env.filters["default"] = lambda v, d=None: v if v else d + + rule = { + "title": "Test Rule", + "discussion": "Some text.", + "tags": ["other"], + "mechanism": "automated", + "fix": "sudo defaults write ...", + } + + # Without markdown_tree - should use HTML table remediation + result_std = env.get_template("rule.md.jinja").render(rule=rule) + assert "