Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

import plain_spec
from plain2code_console import console
from plain2code_exceptions import UnsupportedResourceType
from plain2code_exceptions import UnsupportedBase64Content, UnsupportedResourceType
from plain2code_nodes import Plain2CodeIncludeTag, Plain2CodeLoaderMixin
from plain2code_utils import find_large_base64_blob
from plain_modules import CODEPLAIN_MEMORY_SUBFOLDER, CODEPLAIN_METADATA_FOLDER

BINARY_FILE_EXTENSIONS = [".pyc"]
Expand Down Expand Up @@ -206,6 +207,16 @@ def load_linked_resources(template_dirs: list[str], resources_list, module_name:
Please ensure that the resource exists in one of these locations, or specify the correct --template-dir if using custom templates.
""")

blob = find_large_base64_blob(content)
if blob is not None:
raise UnsupportedBase64Content(
f"Referenced resource '{file_name}' in module '{module_name}' contains a large "
f"base64-encoded blob ({len(blob)} characters), such as an embedded image. Inline "
"base64 data is not supported. Remove the data from the resource. "
"If the data should be used by the end software, "
"save the data to a separate file and include the file path in the specification without it being a reference file."
)

linked_resources[file_name] = content

return linked_resources
Expand Down
2 changes: 2 additions & 0 deletions plain2code.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
PlainSyntaxError,
RenderCancelledError,
RenderingCreditBalanceTooLow,
UnsupportedBase64Content,
UnsupportedResourceType,
)
from plain2code_logger import (
Expand Down Expand Up @@ -74,6 +75,7 @@
NetworkConnectionError,
ModuleDoesNotExistError,
UnsupportedResourceType,
UnsupportedBase64Content,
GitNotInstalledError,
SystemExit,
)
Expand Down
5 changes: 5 additions & 0 deletions plain2code_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ class PlainSyntaxError(Exception):
pass


class UnsupportedBase64Content(Exception):

pass


class InternalClientError(Exception):
pass

Expand Down
18 changes: 18 additions & 0 deletions plain2code_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
import re
from typing import Optional

#
MAX_BASE64_BLOB_LENGTH = 8192

# Matches a long contiguous base64 / base64url run, optionally preceded by a data: URI header.
_BASE64_BLOB_PATTERN = re.compile(
r"(?:data:[\w.+-]+/[\w.+-]+;base64,)?[A-Za-z0-9+/_-]{%d,}={0,2}" % MAX_BASE64_BLOB_LENGTH
)


def find_large_base64_blob(text: str) -> Optional[str]:
"""Return the first contiguous base64 blob at or above the threshold, or None."""
match = _BASE64_BLOB_PATTERN.search(text)
return match.group(0) if match else None


def format_duration_hms(total_seconds: int) -> str:
"""Format a duration in seconds as hours, minutes, and seconds (e.g. ``1h 2m 3.45s``, ``45.67s``)."""
if total_seconds < 0:
Expand Down
13 changes: 12 additions & 1 deletion plain_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
import concept_utils
import file_utils
import plain_spec
from plain2code_exceptions import ModuleDoesNotExistError, PlainSyntaxError
from plain2code_exceptions import ModuleDoesNotExistError, PlainSyntaxError, UnsupportedBase64Content
from plain2code_nodes import Plain2CodeIncludeTag, Plain2CodeLoaderMixin
from plain2code_utils import find_large_base64_blob

RESOURCE_MARKER = "[resource]"

Expand Down Expand Up @@ -536,6 +537,16 @@ def read_module_plain_source(module_name: str, template_dirs: list[str]) -> str:
plain_source_text = file_utils.open_from(template_dirs, module_name + PLAIN_SOURCE_FILE_EXTENSION)
if plain_source_text is None:
raise ModuleDoesNotExistError(f"Module does not exist ({module_name}).")

blob = find_large_base64_blob(plain_source_text)
if blob is not None:
raise UnsupportedBase64Content(
f"Module '{module_name}' contains a base64-encoded blob ({len(blob)} characters) "
"inlined in the specification. This is not supported."
"Remove the base64 data from the .plain file or if necessary,"
"include the binary file path in the specification."
)

return plain_source_text


Expand Down
1 change: 1 addition & 0 deletions tests/data/sample_base64_image.txt

Large diffs are not rendered by default.

42 changes: 41 additions & 1 deletion tests/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import pytest

from file_utils import load_linked_resources, store_response_files
from plain2code_exceptions import UnsupportedResourceType
from plain2code_exceptions import UnsupportedBase64Content, UnsupportedResourceType
from plain2code_utils import MAX_BASE64_BLOB_LENGTH


@pytest.fixture
Expand Down Expand Up @@ -41,6 +42,45 @@ def test_load_linked_resources_missing_file_raises_file_not_found(template_dir):
load_linked_resources([template_dir], [{"text": "Missing", "target": "missing.md"}], "my_thing")


def test_load_linked_resources_base64_blob_raises(template_dir):
file_path = os.path.join(template_dir, "request.txt")
with open(file_path, "w") as f:
f.write("curl -d 'selfie_image=" + "A" * (MAX_BASE64_BLOB_LENGTH + 100) + "'")

with pytest.raises(UnsupportedBase64Content) as exc_info:
load_linked_resources([template_dir], [{"text": "Request", "target": "request.txt"}], "my_thing")

assert "request.txt" in str(exc_info.value)
assert "my_thing" in str(exc_info.value)


def test_load_linked_resources_small_base64_allowed(template_dir):
file_path = os.path.join(template_dir, "token.txt")
with open(file_path, "w") as f:
f.write("token=" + "A" * (MAX_BASE64_BLOB_LENGTH - 100))

result = load_linked_resources([template_dir], [{"text": "Token", "target": "token.txt"}], "my_thing")

assert "token.txt" in result


def test_load_linked_resources_real_base64_image_raises(template_dir):
# Real base64-encoded JPEG that made Gemini 3 fail, inlined into a curl example resource.
sample_path = os.path.join(os.path.dirname(__file__), "data", "sample_base64_image.txt")
with open(sample_path) as f:
blob = f.read()

file_path = os.path.join(template_dir, "face_match_request.txt")
with open(file_path, "w") as f:
f.write(f"curl -d 'selfie_image={blob}' https://example.com/face-match")

with pytest.raises(UnsupportedBase64Content) as exc_info:
load_linked_resources([template_dir], [{"text": "Request", "target": "face_match_request.txt"}], "face_match")

assert "face_match_request.txt" in str(exc_info.value)
assert str(len(blob)) in str(exc_info.value)


def test_store_response_files_writes_unicode_as_utf8(template_dir):
# Content with a non-cp1252 character (📍 U+1F4CD) must be written as UTF-8
# regardless of the platform's default text encoding (e.g. cp1252 on Windows).
Expand Down
Loading