fix(lint): fix all sonarcloud detected issues (#279)

* fix(ci): replace broad permissions with specific scopes in workflows Replace read-all/write-all with minimum required permission scopes across all GitHub Actions workflows to follow the principle of least privilege (SonarCloud rule githubactions:S8234). * fix(shell): use [[ instead of [ for conditional tests Replace single brackets with double brackets in bash conditional expressions across 14 files (28 changes). All scripts use bash shebangs so [[ is safe everywhere (SonarCloud rule shelldre:S7688). * fix(shell): add explicit return statements to functions Add return 0 as the last statement in ~46 shell functions across 17 files that previously relied on implicit return codes (SonarCloud rule shelldre:S7682). * fix(shell): assign positional parameters to local variables Replace direct $1/$2/$3 usage with named local variables in _log(), msg(), msg_err(), msg_done(), msg_run(), msg_ok(), and array_diff() (SonarCloud rule shelldre:S7679). * fix(python): replace dict() constructor with literal Use {} instead of dict() for empty dictionary initialization (SonarCloud rule python:S7498). * fix(shell): fix husky shebang and tolerate npm outdated exit code * docs(shell): add function docstring comments * fix(shell): fix heredoc indentation in x-sonarcloud * feat(python): add ruff linter and formatter configuration * fix(ci): align megalinter config with biome, ruff, and shfmt settings * fix(ci): disable black and yaml-prettier in megalinter config * chore(ci): update ruff-pre-commit to v0.15.0 and fix hook name * fix(scripts): check for .git dir before skipping clone in install-fonts * fix(shell): address code review issues in scripts and shared.sh - Guard wezterm show-keys failure in create-wezterm-keymaps.sh - Stop masking git failures with return 0 in install-cheat-purebashbible.sh - Add missing shared.sh source in install-xcode-cli-tools.sh - Replace exit 1 with return 1 in sourced shared.sh * fix(scripts): address code review and security findings - Guard wezterm show-keys failure in create-wezterm-keymaps.sh - Stop masking git failures with return 0 in install-cheat-purebashbible.sh - Add missing shared.sh source in install-xcode-cli-tools.sh - Replace exit 1 with return 1 in sourced shared.sh - Remove shell=True subprocess calls in x-git-largest-files.py * style(shell): apply shfmt formatting and add args to pre-commit hook * fix(python): suppress bandit false positives in x-git-largest-files * fix(python): add nosemgrep suppression for check_output call * feat(format): add prettier for YAML formatting Install prettier, add .prettierrc.json config (200-char width, 2-space indent, LF endings), .prettierignore, yarn scripts (lint:prettier, fix:prettier, format:yaml), and pre-commit hook scoped to YAML files. * style(yaml): apply prettier formatting * fix(scripts): address remaining code review findings - Python: use list comprehension to filter empty strings instead of slicing off the last element - create-wezterm-keymaps: write to temp file and mv for atomic updates - install-xcode-cli-tools: fix shellcheck source directive path * fix(python): sort imports alphabetically in x-git-largest-files * fix(lint): disable PYTHON_ISORT in MegaLinter, ruff handles it * chore(git): add __pycache__ to gitignore * fix(python): rename ambiguous variable l to line (E741) * style: remove trailing whitespace and blank lines * style(fzf): apply shfmt formatting * style(shell): apply shfmt formatting * docs(plans): add design documents * style(docs): add language specifier to fenced code block * feat(lint): add markdown-table-formatter to dev tooling Add markdown-table-formatter as a dev dependency with yarn scripts (lint:md-table, fix:md-table) and a local pre-commit hook to automatically format markdown tables on commit.
2026-02-12 11:52:32 +00:00 · 2026-02-07 19:01:02 +02:00
parent cff3d1dd8a
commit 6d72003446
86 changed files with 1264 additions and 425 deletions
--- a/local/bin/x-git-largest-files.py
+++ b/local/bin/x-git-largest-files.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-

 # Python script to find the largest files in a git repository.
 # The general method is based on the script in this blog post:
@@ -32,60 +31,60 @@

 # vim:tw=120:ts=4:ft=python:norl:

-from subprocess import check_output, Popen, PIPE
 import argparse
+import glob
 import signal
 import sys
+from subprocess import PIPE, Popen, check_output  # nosec B404

 sortByOnDiskSize = False

-class Blob(object):
-  sha1 = ''
-  size = 0
-  packed_size = 0
-  path = ''

-  def __init__(self, line):
-    cols = line.split()
-    self.sha1, self.size, self.packed_size = cols[0], int(cols[2]), int(cols[3])
+class Blob:
+    sha1 = ""
+    size = 0
+    packed_size = 0
+    path = ""

-  def __repr__(self):
-    return '{} - {} - {} - {}'.format(
-      self.sha1, self.size, self.packed_size, self.path)
+    def __init__(self, line):
+        cols = line.split()
+        self.sha1, self.size, self.packed_size = cols[0], int(cols[2]), int(cols[3])

-  def __lt__(self, other):
-    if (sortByOnDiskSize):
-      return self.size < other.size
-    else:
-      return self.packed_size < other.packed_size
+    def __repr__(self):
+        return f"{self.sha1} - {self.size} - {self.packed_size} - {self.path}"

-  def csv_line(self):
-    return "{},{},{},{}".format(
-      self.size/1024, self.packed_size/1024, self.sha1, self.path)
+    def __lt__(self, other):
+        if sortByOnDiskSize:
+            return self.size < other.size
+        else:
+            return self.packed_size < other.packed_size
+
+    def csv_line(self):
+        return f"{self.size / 1024},{self.packed_size / 1024},{self.sha1},{self.path}"


 def main():
-  global sortByOnDiskSize
+    global sortByOnDiskSize

-  signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGINT, signal_handler)

-  args = parse_arguments()
-  sortByOnDiskSize = args.sortByOnDiskSize
-  size_limit = 1024*args.filesExceeding
+    args = parse_arguments()
+    sortByOnDiskSize = args.sortByOnDiskSize
+    size_limit = 1024 * args.filesExceeding

-  if args.filesExceeding > 0:
-    print("Finding objects larger than {}kB…".format(args.filesExceeding))
-  else:
-    print("Finding the {} largest objects…".format(args.matchCount))
+    if args.filesExceeding > 0:
+        print(f"Finding objects larger than {args.filesExceeding}kB…")
+    else:
+        print(f"Finding the {args.matchCount} largest objects…")

-  blobs = get_top_blobs(args.matchCount, size_limit)
+    blobs = get_top_blobs(args.matchCount, size_limit)

-  populate_blob_paths(blobs)
-  print_out_blobs(blobs)
+    populate_blob_paths(blobs)
+    print_out_blobs(blobs)


 def get_top_blobs(count, size_limit):
-  """Get top blobs from git repository
+    """Get top blobs from git repository

    Args:
        count (int): How many items to return
@@ -93,110 +92,141 @@ def get_top_blobs(count, size_limit):

    Returns:
        dict: Dictionary of Blobs
-  """
-  sort_column = 4
+    """
+    sort_column = 4

-  if sortByOnDiskSize:
-    sort_column = 3
+    if sortByOnDiskSize:
+        sort_column = 3

-  verify_pack = "git verify-pack -v `git rev-parse --git-dir`/objects/pack/pack-*.idx | grep blob | sort -k{}nr".format(sort_column)  # noqa: E501
-  output = check_output(verify_pack, shell=True).decode('utf-8').strip().split("\n")[:-1]  # noqa: E501
+    git_dir = check_output(["git", "rev-parse", "--git-dir"]).decode("utf-8").strip()  # nosec B603 # nosemgrep
+    idx_files = glob.glob(f"{git_dir}/objects/pack/pack-*.idx")
+    verify_pack = Popen(  # nosec B603
+        ["git", "verify-pack", "-v", *idx_files],
+        stdout=PIPE,
+        stderr=PIPE,
+    )
+    grep_blob = Popen(["grep", "blob"], stdin=verify_pack.stdout, stdout=PIPE, stderr=PIPE)  # nosec B603
+    if verify_pack.stdout:
+        verify_pack.stdout.close()
+    sort_cmd = Popen(  # nosec B603
+        ["sort", f"-k{sort_column}nr"],
+        stdin=grep_blob.stdout,
+        stdout=PIPE,
+        stderr=PIPE,
+    )
+    if grep_blob.stdout:
+        grep_blob.stdout.close()
+    output = [line for line in sort_cmd.communicate()[0].decode("utf-8").strip().split("\n") if line]

-  blobs = dict()
-  # use __lt__ to do the appropriate comparison
-  compare_blob = Blob("a b {} {} c".format(size_limit, size_limit))
-  for obj_line in output:
-    blob = Blob(obj_line)
+    blobs = {}
+    # use __lt__ to do the appropriate comparison
+    compare_blob = Blob(f"a b {size_limit} {size_limit} c")
+    for obj_line in output:
+        blob = Blob(obj_line)

-    if size_limit > 0:
-      if compare_blob < blob:
-        blobs[blob.sha1] = blob
-      else:
-        break
-    else:
-      blobs[blob.sha1] = blob
+        if size_limit > 0:
+            if compare_blob < blob:
+                blobs[blob.sha1] = blob
+            else:
+                break
+        else:
+            blobs[blob.sha1] = blob

-      if len(blobs) == count:
-        break
+            if len(blobs) == count:
+                break

-  return blobs
+    return blobs


 def populate_blob_paths(blobs):
-  """Populate blob paths that only have a path
+    """Populate blob paths that only have a path

-  Args:
-    blobs (Blob, dict): Dictionary of Blobs
-  """
-  if len(blobs):
-    print("Finding object paths…")
+    Args:
+      blobs (Blob, dict): Dictionary of Blobs
+    """
+    if len(blobs):
+        print("Finding object paths…")

-    # Only include revs which have a path. Other revs aren't blobs.
-    rev_list = "git rev-list --all --objects | awk '$2 {print}'"
-    all_object_lines = check_output(rev_list, shell=True).decode('utf-8').strip().split("\n")[:-1]  # noqa: E501
-    outstanding_keys = list(blobs.keys())
+        # Only include revs which have a path. Other revs aren't blobs.
+        rev_list = Popen(["git", "rev-list", "--all", "--objects"], stdout=PIPE, stderr=PIPE)  # nosec B603
+        awk_filter = Popen(["awk", "$2 {print}"], stdin=rev_list.stdout, stdout=PIPE, stderr=PIPE)  # nosec B603
+        if rev_list.stdout:
+            rev_list.stdout.close()
+        all_object_lines = [line for line in awk_filter.communicate()[0].decode("utf-8").strip().split("\n") if line]
+        outstanding_keys = list(blobs.keys())

-    for line in all_object_lines:
-      cols = line.split()
-      sha1, path = cols[0], " ".join(cols[1:])
+        for line in all_object_lines:
+            cols = line.split()
+            sha1, path = cols[0], " ".join(cols[1:])

-      if (sha1 in outstanding_keys):
-        outstanding_keys.remove(sha1)
-        blobs[sha1].path = path
+            if sha1 in outstanding_keys:
+                outstanding_keys.remove(sha1)
+                blobs[sha1].path = path

-        # short-circuit the search if we're done
-        if not len(outstanding_keys):
-          break
+                # short-circuit the search if we're done
+                if not len(outstanding_keys):
+                    break


 def print_out_blobs(blobs):
-  if len(blobs):
-    csv_lines = ["size,pack,hash,path"]
+    if len(blobs):
+        csv_lines = ["size,pack,hash,path"]

-    for blob in sorted(blobs.values(), reverse=True):
-      csv_lines.append(blob.csv_line())
+        for blob in sorted(blobs.values(), reverse=True):
+            csv_lines.append(blob.csv_line())

-    command = ["column", "-t", "-s", ","]
-    p = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)
+        command = ["column", "-t", "-s", ","]
+        p = Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE)

-    # Encode the input as bytes
-    input_data = ("\n".join(csv_lines) + "\n").encode()
+        # Encode the input as bytes
+        input_data = ("\n".join(csv_lines) + "\n").encode()

-    stdout, _ = p.communicate(input_data)
+        stdout, _ = p.communicate(input_data)

-    print("\nAll sizes in kB. The pack column is the compressed size of the object inside the pack file.\n")  # noqa: E501
+        print("\nAll sizes in kB. The pack column is the compressed size of the object inside the pack file.\n")

-    print(stdout.decode("utf-8").rstrip('\n'))
-  else:
-    print("No files found which match those criteria.")
+        print(stdout.decode("utf-8").rstrip("\n"))
+    else:
+        print("No files found which match those criteria.")


 def parse_arguments():
-  parser = argparse.ArgumentParser(
-    description='List the largest files in a git repository'
-  )
-  parser.add_argument(
-    '-c', '--match-count', dest='matchCount', type=int, default=10,
-    help='Files to return. Default is 10. Ignored if --files-exceeding is used.'
-  )
-  parser.add_argument(
-    '--files-exceeding', dest='filesExceeding', type=int, default=0,
-    help='The cutoff amount, in KB. Files with a pack size (or physical size, with -p) larger than this will be printed.' # noqa: E501
-  )
-  parser.add_argument(
-    '-p', '--physical-sort', dest='sortByOnDiskSize',
-    action='store_true', default=False,
-    help='Sort by the on-disk size. Default is to sort by the pack size.'
-  )
+    parser = argparse.ArgumentParser(description="List the largest files in a git repository")
+    parser.add_argument(
+        "-c",
+        "--match-count",
+        dest="matchCount",
+        type=int,
+        default=10,
+        help="Files to return. Default is 10. Ignored if --files-exceeding is used.",
+    )
+    parser.add_argument(
+        "--files-exceeding",
+        dest="filesExceeding",
+        type=int,
+        default=0,
+        help=(
+            "The cutoff amount, in KB. Files with a pack size"
+            " (or physical size, with -p) larger than this will be printed."
+        ),
+    )
+    parser.add_argument(
+        "-p",
+        "--physical-sort",
+        dest="sortByOnDiskSize",
+        action="store_true",
+        default=False,
+        help="Sort by the on-disk size. Default is to sort by the pack size.",
+    )

-  return parser.parse_args()
+    return parser.parse_args()


-def signal_handler(signal, frame):
-    print('Caught Ctrl-C. Exiting.')
+def signal_handler(_signal, _frame):
+    print("Caught Ctrl-C. Exiting.")
    sys.exit(0)


 # Default function is main()
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()