Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
04266f3
Add tests __init__.py
martinburchell Apr 29, 2025
6e9f343
Test document_to_text exceptions
martinburchell Apr 29, 2025
9d78d2c
Test document_to_text CSV extraction
martinburchell Apr 29, 2025
5a8d542
Test doc extraction
martinburchell Apr 29, 2025
a8f8cb5
Test dot file extraction
martinburchell Apr 29, 2025
2cb2866
Update docs
martinburchell Apr 29, 2025
699645d
Test DOCX conversion
martinburchell Apr 30, 2025
78873eb
Test HTML conversion
martinburchell Apr 30, 2025
9219cab
Test log file conversion
martinburchell Apr 30, 2025
04b0c37
Test ODT file conversion
martinburchell Apr 30, 2025
21e2b81
Test PDF file conversion
martinburchell Apr 30, 2025
82737b1
Test RTF file conversion
martinburchell Apr 30, 2025
1427e82
Install Faker when building docs and running tests
martinburchell Apr 30, 2025
d1f8977
Test TXT file conversion
martinburchell Apr 30, 2025
10b1ac0
Test XML and anything else converted to text
martinburchell Apr 30, 2025
37d3257
Fix name clashes with python built-ins
martinburchell May 6, 2025
4ba8610
Ignore shadowing of python built-ins
martinburchell May 6, 2025
b0520cb
Remove check for conflicting email import
martinburchell May 6, 2025
d1b00b0
Update docs
martinburchell May 6, 2025
dc511c7
Fixups following module renaming
martinburchell May 6, 2025
be15403
extract_text.py type hints
martinburchell May 7, 2025
c9a06ce
Use html.parser for BeautifulSoup
martinburchell May 7, 2025
761e404
Support .eml text extraction
martinburchell May 9, 2025
75b9ce6
Replace deprecated BeautifulStoneSoup as advised
martinburchell May 9, 2025
e58d8fd
Default to UTF-8 when no charset in emails
martinburchell May 9, 2025
4a11b49
Default to UTF-8 when no content type header in emails
martinburchell May 10, 2025
5fb204f
Allow docx files to include document files with document[nn].xml form
martinburchell May 12, 2025
de72344
Allow blobs to be empty when extracting text
martinburchell May 12, 2025
87f7754
Fix docx filename generation to yield string, not bytes
martinburchell May 12, 2025
e17023e
Fix missing return value
martinburchell May 12, 2025
bdc9983
Workaround BeautifulSoup not handling empty byte array correctly
martinburchell May 12, 2025
499f994
Note BS4 bug report
martinburchell May 12, 2025
dc92a17
Replace illegal multibyte sequences when encoding emails
martinburchell May 13, 2025
51e9295
Handle invalid surrogate characters in HTML conversion
martinburchell May 13, 2025
fdccb76
Better names for test methods
martinburchell May 13, 2025
dba72a9
Fix test comment
martinburchell May 13, 2025
97b5a0a
Update changelog
martinburchell May 14, 2025
32cfc58
Align version of faker-file used in docs to that used in tests
martinburchell May 14, 2025
b00e82e
Revert empty filename check when extracting text
martinburchell May 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/install_test_python_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ ${PYTHON} -m pip install xlrd
${PYTHON} -m pip install dogpile.cache==0.9.2 # Later versions incompatible
${PYTHON} -m pip install pytest
${PYTHON} -m pip install xhtml2pdf weasyprint pdfkit # For PDF tests
${PYTHON} -m pip install faker==13.3.1 faker-file'[common]'==0.18.3
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ repos:
rev: 5.0.4
hooks:
- id: flake8
additional_dependencies:
- flake8-builtins==2.5.0
- repo: https://github.com/asottile/yesqa
rev: v1.5.0
hooks:
- id: yesqa
additional_dependencies:
- flake8-builtins==2.5.0
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.9.0
hooks:
Expand Down
2 changes: 1 addition & 1 deletion cardinal_pythonlib/bulk_email/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
Recipient,
SendAttempt,
)
from cardinal_pythonlib.email.sendmail import (
from cardinal_pythonlib.email_utils.sendmail import (
CONTENT_TYPE_HTML,
CONTENT_TYPE_TEXT,
is_email_valid,
Expand Down
2 changes: 1 addition & 1 deletion cardinal_pythonlib/bulk_email/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
USERNAME_MAX_LENGTH,
)
from cardinal_pythonlib.colander_utils import EMAIL_ADDRESS_MAX_LEN
from cardinal_pythonlib.email.sendmail import (
from cardinal_pythonlib.email_utils.sendmail import (
ASCII,
CONTENT_TYPE_TEXT,
is_email_valid,
Expand Down
2 changes: 1 addition & 1 deletion cardinal_pythonlib/django/fields/jsonclassfield.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def my_decoder_hook(d: Dict) -> Any:
# noinspection PyUnresolvedReferences
from django.db.models import TextField

from cardinal_pythonlib.json.serialize import json_decode, json_encode
from cardinal_pythonlib.json_utils.serialize import json_decode, json_encode


# =============================================================================
Expand Down
2 changes: 1 addition & 1 deletion cardinal_pythonlib/django/function_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from django.core.cache import cache # default cache

from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
from cardinal_pythonlib.json.serialize import json_encode
from cardinal_pythonlib.json_utils.serialize import json_encode

log = get_brace_style_log_with_null_handler(__name__)

Expand Down
10 changes: 5 additions & 5 deletions cardinal_pythonlib/django/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import logging
import os
from re import compile
import re
import sys
from typing import Optional

Expand Down Expand Up @@ -107,9 +107,9 @@ def process_exception(
Modified according to: https://djangosnippets.org/snippets/2845/
"""

# EXEMPT_URLS = [compile(settings.LOGIN_URL.lstrip('/'))]
# EXEMPT_URLS = [re.compile(settings.LOGIN_URL.lstrip('/'))]
# if hasattr(settings, 'LOGIN_EXEMPT_URLS'):
# EXEMPT_URLS += [compile(expr) for expr in settings.LOGIN_EXEMPT_URLS]
# EXEMPT_URLS += [re.compile(expr) for expr in settings.LOGIN_EXEMPT_URLS]
#
#
# class LoginRequiredMiddleware:
Expand Down Expand Up @@ -166,10 +166,10 @@ def process_exception(
# 3. RNC; composite of those patterns.
# -----------------------------------------------------------------------------

EXEMPT_URLS = [compile(settings.LOGIN_URL.lstrip("/"))]
EXEMPT_URLS = [re.compile(settings.LOGIN_URL.lstrip("/"))]
if hasattr(settings, "LOGIN_EXEMPT_URLS"):
EXEMPT_URLS += [
compile(expr.lstrip("/")) for expr in settings.LOGIN_EXEMPT_URLS
re.compile(expr.lstrip("/")) for expr in settings.LOGIN_EXEMPT_URLS
]


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# cardinal_pythonlib/email/__init__.py
# cardinal_pythonlib/email_utils/__init__.py

"""
===============================================================================
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# cardinal_pythonlib/email/mailboxpurge.py
# cardinal_pythonlib/email_utils/mailboxpurge.py

"""
Remove all binary attachments from email messages
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# cardinal_pythonlib/email/sendmail.py
# cardinal_pythonlib/email_utils/sendmail.py

"""
===============================================================================
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# cardinal_pythonlib/email/tests/sendmail_tests.py
# cardinal_pythonlib/email_utils/tests/sendmail_tests.py

"""
===============================================================================
Expand Down Expand Up @@ -28,7 +28,7 @@

import unittest

from cardinal_pythonlib.email.sendmail import is_email_valid
from cardinal_pythonlib.email_utils.sendmail import is_email_valid


class TestIsEmailValid(unittest.TestCase):
Expand Down
40 changes: 0 additions & 40 deletions cardinal_pythonlib/ensure_test_executed_correctly.py

This file was deleted.

Loading