Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

- Add proper typing @overload to `zimscraperlib.image.optimize_xxx` methods (#273)
- Backport wabac.JS change around rewriting: detect when JS is in 'strict' mode and avoid using 'arguments' (#286)

### Changed

- Upgrade to wombat 3.10.3 (#286)

## [5.3.0] - 2025-11-14

Expand Down
2 changes: 1 addition & 1 deletion openzim.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ execute_after=[

[files.assets.actions."wombat.js"]
action="get_file"
source="https://cdn.jsdelivr.net/npm/@webrecorder/wombat@3.9.1/dist/wombat.js"
source="https://cdn.jsdelivr.net/npm/@webrecorder/wombat@3.10.3/dist/wombat.js"
target_file="wombat.js"
63 changes: 57 additions & 6 deletions src/zimscraperlib/rewriting/js.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
ZIM at `_zim_static/__wb_module_decl.js`

This code is based on https://github.com/webrecorder/wabac.js/blob/main/src/rewrite/jsrewriter.ts
Last backport of upstream changes is from Oct 12, 2025
Commit 1849552c3dbcbc065c05afac2dd80061db37b64d
Last backport of upstream changes is from wabac.js commit:
Feb 20, 2026 - 25061cb53ff113d5cff28f2f1354819f6c41034b
"""

import re
Expand Down Expand Up @@ -67,17 +67,36 @@
this_rw = "_____WB$wombat$check$this$function_____(this)"


def remove_args_if_strict(
target: str, opts: dict[str, Any] | None, offset: int, full_string: str
) -> str:
"""
Replace 'arguments' with '[]' if the code is in strict mode.
In strict mode, the arguments keyword is not allowed.
"""
opts = opts or {}

# Detect strict mode if not already set by checking for class declaration
if "isStrict" not in opts:
opts["isStrict"] = full_string[:offset].find("class ") >= 0
if opts.get("isStrict"):
return target.replace("arguments", "[]")
return target


def add_suffix_non_prop(suffix: str) -> TransformationAction:
"""
Create a rewrite_function which add a `suffix` to the match str.
The suffix is added only if the match is not preceded by `.` or `$`.
Applies strict mode transformation to handle 'arguments' keyword.
"""

def f(m_object: re.Match[str], _opts: dict[str, Any] | None) -> str:
def f(m_object: re.Match[str], opts: dict[str, Any] | None) -> str:
offset = m_object.start()
if offset > 0 and m_object.string[offset - 1] in ".$":
full_string = m_object.string
if offset > 0 and full_string[offset - 1] in ".$":
return m_object[0]
return m_object[0] + suffix
return m_object[0] + remove_args_if_strict(suffix, opts, offset, full_string)

return f

Expand Down Expand Up @@ -145,7 +164,7 @@ def create_js_rules() -> list[TransformationRule]:
# be set.
check_loc = (
"((self.__WB_check_loc && self.__WB_check_loc(location, arguments)) || "
"{}).href = "
"{}).maybeHref = "
)

# This will replace `eval(...)`.
Expand Down Expand Up @@ -258,6 +277,30 @@ def _get_module_decl(self, local_decls: Iterable[str]) -> str:
f"""import {{ {", ".join(local_decls)} }} from "{wb_module_decl_url}";\n"""
)

def _detect_strict_mode(self, text: str) -> bool:
"""
Detect if the JavaScript code is in strict mode.

Returns True if the code contains:
- "use strict"; directive
- import statements
- export statements
- class declarations
"""
# Check for "use strict"; directive
if '"use strict";' in text or "'use strict';" in text:
return True

# Check for import or export statements
if re.search(r"(?:^|\s)(?:im|ex)port\s+", text):
return True

# Check for class declaration
if re.search(r"\bclass\s+", text):
return True

return False

def rewrite(self, text: str | bytes, opts: dict[str, Any] | None = None) -> str:
"""
Rewrite the js code in `text`.
Expand All @@ -269,6 +312,14 @@ def rewrite(self, text: str | bytes, opts: dict[str, Any] | None = None) -> str:

is_module = opts.get("isModule", False)

# Detect and set strict mode
# Modules are always strict mode
if is_module:
opts["isStrict"] = True
elif "isStrict" not in opts:
# Detect strict mode from the code itself
opts["isStrict"] = self._detect_strict_mode(text)

rules = REWRITE_JS_RULES[:]

if is_module:
Expand Down
12 changes: 3 additions & 9 deletions tests/rewriting/test_js_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def wrap_script(text: str) -> str:
input_="location = http://example.com/",
expected="location = ((self.__WB_check_loc && "
"self.__WB_check_loc(location, argument"
"s)) || {}).href = http://example.com/",
"s)) || {}).maybeHref = http://example.com/",
),
WrappedTestContent(
input_='location => "http://example.com/"',
Expand All @@ -193,16 +193,10 @@ def wrap_script(text: str) -> str:
WrappedTestContent(
input_=" location = http://example.com/2",
expected=" location = ((self.__WB_check_loc && "
"self.__WB_check_loc(location, arguments)) || {}).href = "
"self.__WB_check_loc(location, arguments)) || {}).maybeHref = "
"http://example.com/2",
),
WrappedTestContent(input_="func(location = 0)", expected="func(location = 0)"),
WrappedTestContent(
input_=" location = http://example.com/2",
expected=" location = ((self.__WB_check_loc && "
"self.__WB_check_loc(location, arguments)) || {}).href = "
"http://example.com/2",
),
WrappedTestContent(input_="window.eval(a)", expected="window.eval(a)"),
WrappedTestContent(
input_="x = window.eval; x(a);", expected="x = window.eval; x(a);"
Expand Down Expand Up @@ -239,7 +233,7 @@ class A {}
var D = 3;

location = ((self.__WB_check_loc && """
"self.__WB_check_loc(location, arguments)) || {}).href "
"self.__WB_check_loc(location, [])) || {}).maybeHref "
"""= "http://example.com/2" """,
),
WrappedTestContent(input_=" var self ", expected=" let self "),
Expand Down
Loading