falcosecurity · poiana · Jul 25, 2025 · Jul 25, 2025
diff --git a/kernel_crawler/rpm.py b/kernel_crawler/rpm.py
@@ -20,18 +20,11 @@
 import sqlite3
 import tempfile
 import re
-import zstandard as zstd
 import io
 
 from . import repo
 from kernel_crawler.utils.download import get_url
 
-try:
-    import lzma
-except ImportError:
-    from backports import lzma
-
-
 class RpmRepository(repo.Repository):
     def __init__(self, base_url):
         self.base_url = base_url
@@ -258,25 +251,17 @@ def build_kernel_devel_noarch_url(self, kernel_release):
         '''
         return f'{self.base_url}noarch/kernel-devel-{kernel_release}.rpm'.replace(self.arch, 'noarch')
 
-    def open_repo(self, repo_path, isZstd):
+    def open_repo(self, repo_path):
         package_match = f'{self.arch}/{self._kernel_devel_pattern}'
         # regex searching through a file is more memory efficient
         # than parsing the xml into an object structure with lxml etree
-        open_mode = 'r'
-        if isZstd:
-            open_mode = 'rb'
-        with open(repo_path, mode=open_mode) as f:
-            if isZstd:
-                dctx = zstd.ZstdDecompressor(max_window_size=2147483648)
-                stream_reader = dctx.stream_reader(f)
-                text = io.TextIOWrapper(stream_reader, encoding='utf-8').read()
-            else:
-                text = str(f.read())
-
+        with open(repo_path, mode='r') as f:
+            text = str(f.read())
             search = re.search(f'.*href="({package_match}.*rpm)', text)
             if search:
                 return search.group(1)
             return None
+        return None
 
     def get_package_tree(self, filter=''):
         '''
@@ -300,10 +285,7 @@ def get_package_tree(self, filter=''):
         with tempfile.NamedTemporaryFile() as tf:
             tf.write(repodb)
             tf.flush()
-            try:
-                kernel_default_devel_pkg_url = self.open_repo(tf.name, False)
-            except UnicodeDecodeError:
-                kernel_default_devel_pkg_url = self.open_repo(tf.name, True)
+            kernel_default_devel_pkg_url = self.open_repo(tf.name)
             tf.close()  # delete the tempfile to free up memory
 
         # check to ensure a kernel_devel_pkg was found

diff --git a/kernel_crawler/utils/download.py b/kernel_crawler/utils/download.py
@@ -1,6 +1,8 @@
 import bz2
 import zlib
+import zstandard
 import requests
+import io
 
 try:
     import lzma
@@ -21,13 +23,16 @@ def get_url(url):
     else:  # if any other error, raise the error - might be a bug in crawler
         resp.raise_for_status()
 
-    # if no error, return the contents
+    # if no error, return the (eventually decompressed) contents
     if url.endswith('.gz'):
         return zlib.decompress(resp.content, 47)
     elif url.endswith('.xz'):
         return lzma.decompress(resp.content)
     elif url.endswith('.bz2'):
         return bz2.decompress(resp.content)
+    elif url.endswith('.zst'):
+        with zstandard.ZstdDecompressor().stream_reader(io.BytesIO(resp.content)) as rr:
+            return rr.read()
     else:
         return resp.content