Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ runs:
shell: bash
working-directory: ${{ github.action_path }}
run: |
kernel-crawler crawl --distro=${{ inputs.distro }} --arch=${{ inputs.arch }} > ${{ runner.temp }}/kernels_${{ inputs.arch }}.json
kernel-crawler crawl --distro=${{ inputs.distro }} --arch=${{ inputs.arch }} --output ${{ runner.temp }}/kernels_${{ inputs.arch }}.json

- name: Validate json
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion kernel_crawler/bottlerocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def fetch_base_config(self, kverspec):
if source is None:
return None

alkernel = requests.get(source)
alkernel = requests.get(source, timeout = 15)
alkernel.raise_for_status()
with open('/tmp/alkernel.rpm', 'wb') as f:
f.write(alkernel.content)
Expand Down
51 changes: 32 additions & 19 deletions kernel_crawler/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from requests.exceptions import ConnectTimeout, ReadTimeout, Timeout, RequestException, ConnectionError
from . import repo
from .minikube import MinikubeMirror
from .aliyunlinux import AliyunLinuxMirror
Expand Down Expand Up @@ -84,25 +85,37 @@ def crawl_kernels(distro, version, arch, images):

for distname, dist in DISTROS.items():
if distname == distro or distro == "*":
# If the distro requires an image (Redhat only so far), we need to amalgamate
# the kernel versions from the supplied images before choosing the output.
if issubclass(dist, repo.ContainerDistro):
if images:
kv = {}
for image in images:
d = dist(image)
if len(kv) == 0:
kv = d.get_kernel_versions()
else:
kv.update(d.get_kernel_versions())
# We should now have a list of all kernel versions for the supplied images
res = kv
try:
# If the distro requires an image (Redhat only so far), we need to amalgamate
# the kernel versions from the supplied images before choosing the output.
if issubclass(dist, repo.ContainerDistro):
if images:
kv = {}
for image in images:
d = dist(image)
if len(kv) == 0:
kv = d.get_kernel_versions()
else:
kv.update(d.get_kernel_versions())
# We should now have a list of all kernel versions for the supplied images
res = kv
else:
d = None
else:
d = None
else:
d = dist(arch)
res = d.get_package_tree(version)
d = dist(arch)
res = d.get_package_tree(version)

if d and res:
ret[distname] = to_driverkit_config(d, res)

except (ConnectTimeout, ReadTimeout, Timeout):
print(f"[ERROR] Timeout while fetching data for distro '{distname}'")
except ConnectionError:
print(f"[ERROR] Network unreachable or host down for distro '{distname}'")
except RequestException as e:
print(f"[ERROR] Request failed for distro '{distname}': {e}")
except Exception as e:
# Catch-all for unexpected issues
print(f"[ERROR] Unexpected error in distro '{distname}': {e}")

if d and res:
ret[distname] = to_driverkit_config(d, res)
return ret
2 changes: 1 addition & 1 deletion kernel_crawler/deb.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def scan_repo(self, dist):

def list_repos(self):
dists_url = self.base_url + 'dists/'
dists = requests.get(dists_url)
dists = requests.get(dists_url, timeout = 15)
dists.raise_for_status()
dists = dists.content
doc = html.fromstring(dists, dists_url)
Expand Down
2 changes: 1 addition & 1 deletion kernel_crawler/flatcar.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, arch):

def scan_repo(self, base_url):
try:
dists = requests.get(base_url)
dists = requests.get(base_url, timeout = 15)
dists.raise_for_status()
except requests.exceptions.RequestException:
return {}
Expand Down
12 changes: 9 additions & 3 deletions kernel_crawler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,20 @@ def handle_parse_result(self, ctx, opts, args):
return super(DistroImageValidation, self).handle_parse_result(ctx, opts, args)

@click.command()
@click.option('--distro', type=click.Choice(sorted(list(DISTROS.keys())) + ['*'], case_sensitive=True))
@click.option('--distro', type=click.Choice(sorted(list(DISTROS.keys())) + ['*'], case_sensitive=True), required=True)
@click.option('--version', required=False, default='')
@click.option('--arch', required=False, type=click.Choice(['x86_64', 'aarch64'], case_sensitive=True), default='x86_64')
@click.option('--image', cls=DistroImageValidation, required_if_distro=["Redhat"], multiple=True)
def crawl(distro, version='', arch='', image=''):
@click.option('--output', type=click.Path(dir_okay=False, writable=True), help="Optional file path to write JSON output")
def crawl(distro, version='', arch='', image='', output=None):
res = crawl_kernels(distro, version, arch, image)
json_object = json.dumps(res, indent=2, default=vars)
print(json_object)
if output:
with open(output, 'w', encoding='utf-8') as f:
f.write(json_object)
click.echo(f"[INFO] JSON output written to {output}")
else:
click.echo(json_object)

cli.add_command(crawl, 'crawl')

Expand Down
9 changes: 6 additions & 3 deletions kernel_crawler/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ def dist_exists(self, dist):
self.dist_url(dist),
headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated
'user-agent': 'dummy'
}
},
timeout = 15
)
r.raise_for_status()
except requests.exceptions.RequestException:
Expand All @@ -145,7 +146,8 @@ def list_repos(self):
self.base_url,
headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated
'user-agent': 'dummy'
}
},
timeout = 15
)
dists.raise_for_status()
dists = dists.content
Expand Down Expand Up @@ -185,7 +187,8 @@ def list_repos(self):
self.base_url,
headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated
'user-agent': 'dummy'
}
},
timeout = 15
)
dists.raise_for_status()
dists = dists.content
Expand Down
71 changes: 46 additions & 25 deletions kernel_crawler/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,53 @@
except ImportError:
from backports import lzma

from requests.exceptions import (
ConnectTimeout,
ReadTimeout,
Timeout,
ConnectionError,
RequestException,
)


def get_url(url):
resp = requests.get(
url,
headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated
'user-agent': 'dummy'
}
)

# if 404, silently fail
if resp.status_code == 404:
return None
else: # if any other error, raise the error - might be a bug in crawler
resp.raise_for_status()

# if no error, return the (eventually decompressed) contents
if url.endswith('.gz'):
return zlib.decompress(resp.content, 47)
elif url.endswith('.xz'):
return lzma.decompress(resp.content)
elif url.endswith('.bz2'):
return bz2.decompress(resp.content)
elif url.endswith('.zst'):
with zstandard.ZstdDecompressor().stream_reader(io.BytesIO(resp.content)) as rr:
return rr.read()
else:
return resp.content
try:
resp = requests.get(
url,
headers={ # some URLs require a user-agent, otherwise they return HTTP 406 - this one is fabricated
'user-agent': 'dummy'
},
timeout=15,
)

# if 404, silently fail
if resp.status_code == 404:
return None
else: # if any other error, raise the error - might be a bug in crawler
resp.raise_for_status()

# if no error, return the (eventually decompressed) contents
if url.endswith('.gz'):
return zlib.decompress(resp.content, 47)
elif url.endswith('.xz'):
return lzma.decompress(resp.content)
elif url.endswith('.bz2'):
return bz2.decompress(resp.content)
elif url.endswith('.zst'):
with zstandard.ZstdDecompressor().stream_reader(io.BytesIO(resp.content)) as rr:
return rr.read()
else:
return resp.content

except (ConnectTimeout, ReadTimeout, Timeout):
print(f"[ERROR] Timeout fetching {url}")
except ConnectionError:
print(f"[ERROR] Network unreachable or host down: {url}")
except RequestException as e:
print(f"[ERROR] Request failed for {url}: {e}")
except Exception as e:
print(f"[ERROR] Unexpected error fetching {url}: {e}")
return None


def get_first_of(urls):
Expand Down
Loading