From 650666356466ba877d0d53ddf7ed3f12ce41e538 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Thu, 12 Mar 2026 22:31:28 +0100 Subject: [PATCH 1/7] Breaking: Adapting the project structure to the coreutils structure. This allows more usage of the coreutils macros and is easier for the developers if working in both projects. Each utility can now be opened standalone which makes it also easier to test and benchmark. Since it is a restructure, the commit is containing a lot of files. All files were kept as much as possible and mostly the import locations were changed. Changes: * a workspace was introduced * main.rs is now /bin/diffutils.rs * uumain is now used as start of utility * all utilites have a separate folder under /uu * Exitcodes are now i32 and push to * coreutils only has License files for MIT * args are now read by using uucore::args_os() * context_diff and unified_diff now use TempDir * benches are separated for each utility and have centralized support functions Issues: * The uucore::bin! macro needs to be implemented for all main functions. Currently missing internationalization. * Tests must be run with "cargo test --workspace". * Unclear, if features can be defined globally. --- Cargo.lock | 370 ++++++++++++++- Cargo.toml | 174 +++++-- LICENSE | 18 + LICENSE-MIT | 26 -- benches/bench-diffutils.rs | 377 --------------- fuzz/Cargo.lock | 553 ++++++++++++++++++++++- fuzz/Cargo.toml | 2 + fuzz/fuzz_targets/fuzz_cmp.rs | 10 +- fuzz/fuzz_targets/fuzz_cmp_args.rs | 3 +- fuzz/fuzz_targets/fuzz_ed.rs | 5 +- fuzz/fuzz_targets/fuzz_normal.rs | 4 +- fuzz/fuzz_targets/fuzz_patch.rs | 6 +- fuzz/fuzz_targets/fuzz_side.rs | 6 +- src/{main.rs => bin/diffutils.rs} | 29 +- src/lib.rs | 22 +- src/uu/cmp/Cargo.toml | 48 ++ src/uu/cmp/LICENSE | 1 + src/uu/cmp/benches/cmp_bench.rs | 141 ++++++ src/{ => uu/cmp/src}/cmp.rs | 62 ++- src/uu/cmp/src/main.rs | 19 + src/uu/diff/Cargo.toml | 43 ++ src/uu/diff/LICENSE | 1 + src/uu/diff/benches/diff_bench.rs | 119 +++++ src/{ => uu/diff/src}/context_diff.rs | 55 ++- src/{ => uu/diff/src}/diff.rs | 70 ++- src/{ => uu/diff/src}/ed_diff.rs | 16 +- src/uu/diff/src/main.rs | 19 + src/{ => uu/diff/src}/normal_diff.rs | 18 +- src/{ => uu/diff/src}/params.rs | 198 ++++---- src/{ => uu/diff/src}/side_diff.rs | 6 +- src/{ => uu/diff/src}/unified_diff.rs | 94 ++-- src/uudiff/Cargo.toml | 42 ++ src/uudiff/LICENSE | 1 + src/uudiff/src/lib/features.rs | 2 + src/uudiff/src/lib/features/benchmark.rs | 230 ++++++++++ src/uudiff/src/lib/lib.rs | 10 + src/{ => uudiff/src/lib}/macros.rs | 0 src/uudiff/src/lib/mods.rs | 2 + src/{ => uudiff/src/lib/mods}/utils.rs | 26 +- tests/integration.rs | 4 +- 40 files changed, 2110 insertions(+), 722 deletions(-) create mode 100644 LICENSE delete mode 100644 LICENSE-MIT delete mode 100644 benches/bench-diffutils.rs rename src/{main.rs => bin/diffutils.rs} (79%) create mode 100644 src/uu/cmp/Cargo.toml create mode 120000 src/uu/cmp/LICENSE create mode 100644 src/uu/cmp/benches/cmp_bench.rs rename src/{ => uu/cmp/src}/cmp.rs (96%) create mode 100644 src/uu/cmp/src/main.rs create mode 100644 src/uu/diff/Cargo.toml create mode 120000 src/uu/diff/LICENSE create mode 100644 src/uu/diff/benches/diff_bench.rs rename src/{ => uu/diff/src}/context_diff.rs (94%) rename src/{ => uu/diff/src}/diff.rs (60%) rename src/{ => uu/diff/src}/ed_diff.rs (98%) create mode 100644 src/uu/diff/src/main.rs rename src/{ => uu/diff/src}/normal_diff.rs (98%) rename src/{ => uu/diff/src}/params.rs (88%) rename src/{ => uu/diff/src}/side_diff.rs (99%) rename src/{ => uu/diff/src}/unified_diff.rs (92%) create mode 100644 src/uudiff/Cargo.toml create mode 120000 src/uudiff/LICENSE create mode 100644 src/uudiff/src/lib/features.rs create mode 100644 src/uudiff/src/lib/features/benchmark.rs create mode 100644 src/uudiff/src/lib/lib.rs rename src/{ => uudiff/src/lib}/macros.rs (100%) create mode 100644 src/uudiff/src/lib/mods.rs rename src/{ => uudiff/src/lib/mods}/utils.rs (90%) diff --git a/Cargo.lock b/Cargo.lock index c28f80b..20ffc7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,12 +20,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.1", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.1", +] + [[package]] name = "anyhow" version = "1.0.102" @@ -43,9 +87,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c5bcfa8749ac45dd12cb11055aeeb6b27a3895560d60d71e3c23bf979e60514" +checksum = "9a686bbee5efb88a82df0621b236e74d925f470e5445d3220a5648b892ec99c9" dependencies = [ "anstyle", "bstr", @@ -142,8 +186,10 @@ version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", "terminal_size", ] @@ -213,6 +259,12 @@ dependencies = [ "regex-lite", ] +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "colored" version = "2.2.0" @@ -258,20 +310,27 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "diffutils" -version = "0.5.0" +version = "0.5.1" dependencies = [ "assert_cmd", - "chrono", - "codspeed-divan-compat", - "diff", - "itoa", "predicates", - "pretty_assertions", - "rand", "regex", - "same-file", "tempfile", - "unicode-width", + "uu_cmp", + "uu_diff", + "uucore", + "uudiff", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -322,6 +381,51 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fluent" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8137a6d5a2c50d6b0ebfcb9aaa91a28154e0a70605f112d30cb0cd4a78670477" +dependencies = [ + "fluent-bundle", + "unic-langid", +] + +[[package]] +name = "fluent-bundle" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01203cb8918f5711e73891b347816d932046f95f54207710bda99beaeb423bf4" +dependencies = [ + "fluent-langneg", + "fluent-syntax", + "intl-memoizer", + "intl_pluralrules", + "rustc-hash", + "self_cell", + "smallvec", + "unic-langid", +] + +[[package]] +name = "fluent-langneg" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eebbe59450baee8282d71676f3bfed5689aeab00b27545e83e5f14b1195e8b0" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "fluent-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f0d287c53ffd184d04d8677f590f4ac5379785529e5e08b1c8083acdd5c198" +dependencies = [ + "memchr", + "thiserror", +] + [[package]] name = "foldhash" version = "0.1.5" @@ -421,6 +525,31 @@ dependencies = [ "serde_core", ] +[[package]] +name = "intl-memoizer" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310da2e345f5eb861e7a07ee182262e94975051db9e4223e909ba90f392f163f" +dependencies = [ + "type-map", + "unic-langid", +] + +[[package]] +name = "intl_pluralrules" +version = "7.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078ea7b7c29a2b4df841a7f6ac8775ff6074020c6776d48491ce2268e068f972" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -520,6 +649,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "os_display" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5fd71b79026fb918650dde6d125000a233764f1c2f1659a1c71118e33ea08f" +dependencies = [ + "unicode-width", +] + [[package]] name = "predicates" version = "3.1.4" @@ -590,9 +734,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -655,6 +799,12 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustix" version = "0.38.44" @@ -690,6 +840,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "self_cell" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" + [[package]] name = "semver" version = "1.0.27" @@ -739,6 +895,12 @@ dependencies = [ "zmij", ] +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "statrs" version = "0.18.0" @@ -749,6 +911,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.117" @@ -762,9 +930,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.26.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", "getrandom 0.4.2", @@ -789,6 +957,37 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "serde_core", + "zerovec", +] + [[package]] name = "toml_datetime" version = "1.0.0+spec-1.1.0" @@ -819,6 +1018,33 @@ dependencies = [ "winnow", ] +[[package]] +name = "type-map" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" +dependencies = [ + "rustc-hash", +] + +[[package]] +name = "unic-langid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ba52c9b05311f4f6e62d5d9d46f094bd6e84cb8df7b3ef952748d752a7d05" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce1bf08044d4b7a94028c93786f8566047edc11110595914de93362559bc658" +dependencies = [ + "tinystr", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -837,6 +1063,86 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uu_cmp" +version = "0.5.1" +dependencies = [ + "codspeed-divan-compat", + "diff", + "itoa", + "pretty_assertions", + "regex", + "same-file", + "tempfile", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uu_diff" +version = "0.5.1" +dependencies = [ + "codspeed-divan-compat", + "diff", + "pretty_assertions", + "rand", + "regex", + "same-file", + "tempfile", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uucore" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8038531f506a34ab4612b93f97d5f40759768cd34a83fd2af041b84fcbde474" +dependencies = [ + "clap", + "fluent", + "fluent-bundle", + "fluent-syntax", + "nix", + "os_display", + "rustc-hash", + "thiserror", + "unic-langid", + "uucore_procs", + "wild", +] + +[[package]] +name = "uucore_procs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f63e2d5083ff0983193a33e2d57fd271c7e3e3e7df8e46e8f471865647b2cbc" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "uudiff" +version = "0.5.1" +dependencies = [ + "chrono", + "pretty_assertions", + "rand", + "same-file", + "tempfile", + "unicode-width", + "uucore", +] + [[package]] name = "wait-timeout" version = "0.2.0" @@ -958,6 +1264,15 @@ dependencies = [ "semver", ] +[[package]] +name = "wild" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1" +dependencies = [ + "glob", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1022,6 +1337,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -1246,6 +1570,22 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "serde", + "zerofrom", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index 1673839..a50b2bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,52 +1,166 @@ +# diffutils (uutils) +# * see the repository LICENSE, README, and CONTRIBUTING files for more information + +# spell-checker:ignore (libs) ahash bigdecimal datetime serde gethostid kqueue libselinux mangen memmap uuhelp startswith constness expl unnested logind cfgs interner + [package] name = "diffutils" -version = "0.5.0" -edition = "2021" -description = "A CLI app for generating diff files" -license = "MIT OR Apache-2.0" +description = "diffutils ~ GNU diffutils (updated); implemented as universal (cross-platform) utils, written in Rust" +default-run = "diffutils" repository = "https://github.com/uutils/diffutils" - -[lib] -name = "diffutilslib" -path = "src/lib.rs" +edition.workspace = true +rust-version.workspace = true +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true [[bin]] name = "diffutils" -path = "src/main.rs" +path = "src/bin/diffutils.rs" -[dependencies] +[features] +# default = ["feat_common_core"] +## OS feature shortcodes +# macos = ["feat_os_macos"] +# unix = ["feat_os_unix"] +# windows = ["feat_os_windows"] +# +## (primary platforms) feature sets +# "feat_os_macos" == set of utilities which can be built/run on the MacOS platform +feat_os_macos = [ + "feat_os_unix", ## == a modern/usual *nix platform + # +# "feat_require_unix_hostid", +] +# "feat_os_unix" == set of utilities which can be built/run on modern/usual *nix platforms. +feat_os_unix = [ +# "feat_Tier1", +# # +# "feat_require_unix", +# "feat_require_unix_hostid", +# "feat_require_unix_utmpx", +] +# "feat_os_windows" == set of utilities which can be built/run on modern/usual windows platforms +feat_os_windows = [ +# "feat_Tier1", ## == "feat_os_windows_legacy" + "hostname" +] +# +# TODO How are features centralized in this workspace file? +# # instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +# feat_allow_case_insensitive_number_units = [] +# # Enables a check on options defined in NOT_YET_IMPLEMENTED. +# # If on the parser will return an error message in these cases. +# # This is preferable when running the util as unsupported options +# # are pointed out to the user, but can make tests fail. +# feat_check_not_yet_implemented = [] + +[workspace] +resolver = "3" +members = [ + ".", + "src/uu/*", +# "src/uu/stdbuf/src/libstdbuf", + "src/uudiff", +# "src/uucore_procs", +# "tests/uutests", + # "fuzz", # TODO +] + +[workspace.package] +authors = ["uutils developers"] +categories = ["command-line-utilities"] +edition = "2021" +rust-version = "1.88.0" +homepage = "https://github.com/uutils/diffutils" +description = "A CLI app for generating diff files" +keywords = ["diffutils", "uutils", "cross-platform", "cli", "utility"] +license = "MIT" +# license = "MIT OR Apache-2.0" +readme = "README.package.md" +version = "0.5.1" + + +[workspace.dependencies] +assert_cmd = "2.2.0" +const_format = "0.2.35" chrono = "0.4.38" -diff = "0.1.13" +diff_crate = { package = "diff", version = "0.1.13" } +divan = { version = "4.3.0", package = "codspeed-divan-compat" } itoa = "1.0.11" +predicates = "3.1.0" +pretty_assertions = "1.4.0" +rand = "0.10.0" regex = "1.10.4" same-file = "1.0.6" +tempfile = "3.27.0" unicode-width = "0.2.0" +uucore = "0.7.0" +uudiff = { package = "uudiff", path = "src/uudiff" } +diff = { package = "uu_diff", path = "src/uu/diff" } +# sdiff = {package = "uu_sdiff", path = "src/uu/sdiff" } + + +[dependencies] +diff.workspace = true +# sdiff.workspace = true +uucore.workspace = true +uudiff.workspace = true + +# * uutils +# uu_test = { optional = true, package = "uu_test", path = "src/uu/test" } +# +cmp = { package = "uu_cmp", path = "src/uu/cmp" } +# diff3 = { package = "uu_diff3", path = "src/uu/diff3" } +# sdiff = {optional = true, package = "uu_sdiff", path = "src/uu/sdiff" } [dev-dependencies] -assert_cmd = "2.0.14" -divan = { version = "4.3.0", package = "codspeed-divan-compat" } -pretty_assertions = "1.4.0" -predicates = "3.1.0" -rand = "0.10.0" -tempfile = "3.26.0" +assert_cmd.workspace = true +predicates.workspace = true +regex.workspace = true +tempfile.workspace = true [profile.release] -lto = "thin" +lto = true +panic = "abort" codegen-units = 1 + +# A release-like profile that is as small as possible. +[profile.release-small] +inherits = "release" +opt-level = "z" +strip = true + +[profile.release-fast] +inherits = "release" panic = "abort" -# alias profile for 'dist' -[profile.dist] +# A release-like profile with debug info for profiling. +# See https://github.com/mstange/samply . +[profile.profiling] inherits = "release" +panic = "unwind" +debug = true -[[bench]] -name = "bench_diffutils" -path = "benches/bench-diffutils.rs" -harness = false +# +# # The profile that 'dist' will build with +# [profile.dist] +# inherits = "release" +# lto = "thin" + +# [lints] +# workspace = true +# +# # This is the linting configuration for all crates. +# # In order to use these, all crates have `[lints] workspace = true` section. +# [workspace.lints.rust] +# # Allow "fuzzing" as a "cfg" condition name and "cygwin" as a value for "target_os" +# # https://doc.rust-lang.org/nightly/rustc/check-cfg/cargo-specifics.html +# unexpected_cfgs = { level = "warn", check-cfg = [ +# 'cfg(fuzzing)', +# 'cfg(target_os, values("cygwin"))', +# ] } +# unused_qualifications = "warn" -[features] -# default = ["feat_bench_not_diff"] -# Turn bench for diffutils cmp off -feat_bench_not_cmp = [] -# Turn bench for diffutils diff off -feat_bench_not_diff = [] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..21bd444 --- /dev/null +++ b/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) uutils developers + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/LICENSE-MIT b/LICENSE-MIT deleted file mode 100644 index ba40932..0000000 --- a/LICENSE-MIT +++ /dev/null @@ -1,26 +0,0 @@ -Copyright (c) Michael Howell -Copyright (c) uutils developers - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/benches/bench-diffutils.rs b/benches/bench-diffutils.rs deleted file mode 100644 index e506b3f..0000000 --- a/benches/bench-diffutils.rs +++ /dev/null @@ -1,377 +0,0 @@ -// This file is part of the uutils diffutils package. -// -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. - -//! Benches for all utils in diffutils. -//! -//! There is a file generator included to create files of different sizes for comparison. \ -//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ -//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. - -/// Generate test files with these sizes in KB. -const FILE_SIZE_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; -// const FILE_SIZE_KILO_BYTES: [u64; 3] = [100, 1 * MB, 5 * MB]; -// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files -const TEMP_DIR: &str = ""; -const NUM_DIFF: u64 = 4; -// just for FILE_SIZE_KILO_BYTES -const MB: u64 = 1_000; -const CHANGE_CHAR: u8 = b'#'; - -#[cfg(not(feature = "feat_bench_not_cmp"))] -mod diffutils_cmp { - use std::hint::black_box; - - use diffutilslib::cmp; - use divan::Bencher; - - use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; - - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmp_compare_files_equal(bencher: Bencher, kb: u64) { - let (from, to) = get_context().get_test_files_equal(kb); - let cmd = format!("cmp {from} {to}"); - let opts = str_to_options(&cmd).into_iter().peekable(); - let params = cmp::parse_params(opts).unwrap(); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| params.clone()) - .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); - } - - // bench the actual compare; cmp exits on first difference - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmp_compare_files_different(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_different(bytes); - let cmd = format!("cmp {from} {to} -s"); - let opts = str_to_options(&cmd).into_iter().peekable(); - let params = cmp::parse_params(opts).unwrap(); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| params.clone()) - .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); - } - - // bench original GNU cmp - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_cmp_gnu_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("{from} {to}"); - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("cmp", cmd_args)); - } - - // bench the compiled release version - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_cmp_release_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("cmp {from} {to}"); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); - } -} - -#[cfg(not(feature = "feat_bench_not_diff"))] -mod diffutils_diff { - // use std::hint::black_box; - - use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; - // use diffutilslib::params; - use divan::Bencher; - - // bench the actual compare - // TODO diff does not have a diff function - // #[divan::bench(args = [100_000,10_000])] - // fn diff_compare_files(bencher: Bencher, bytes: u64) { - // let (from, to) = gen_testfiles(lines, 0, "id"); - // let cmd = format!("cmp {from} {to}"); - // let opts = str_to_options(&cmd).into_iter().peekable(); - // let params = params::parse_params(opts).unwrap(); - // - // bencher - // // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - // .with_inputs(|| params.clone()) - // .bench_refs(|params| diff::diff(¶ms).unwrap()); - // } - - // bench original GNU diff - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_diff_gnu_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("{from} {to}"); - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("diff", cmd_args)); - } - - // bench the compiled release version - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_diff_release_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("diff {from} {to}"); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); - } -} - -mod parser { - use std::hint::black_box; - - use diffutilslib::{cmp, params}; - use divan::Bencher; - - use crate::prepare::str_to_options; - - // bench the time it takes to parse the command line arguments - #[divan::bench] - fn cmp_parser(bencher: Bencher) { - let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - let args = str_to_options(&cmd).into_iter().peekable(); - bencher - .with_inputs(|| args.clone()) - .bench_values(|data| black_box(cmp::parse_params(data))); - } - - // // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) - // #[divan::bench] - // fn cmp_parser_no_prepare() { - // let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - // let args = str_to_options(&cmd).into_iter().peekable(); - // let _ = cmp::parse_params(args); - // } - - // bench the time it takes to parse the command line arguments - #[divan::bench] - fn diff_parser(bencher: Bencher) { - let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; - let args = str_to_options(&cmd).into_iter().peekable(); - bencher - .with_inputs(|| args.clone()) - .bench_values(|data| black_box(params::parse_params(data))); - } -} - -mod prepare { - use std::{ - ffi::OsString, - fs::{self, File}, - io::{BufWriter, Write}, - path::Path, - sync::OnceLock, - }; - - use rand::RngExt; - use tempfile::TempDir; - - use crate::{CHANGE_CHAR, FILE_SIZE_KILO_BYTES, NUM_DIFF, TEMP_DIR}; - - // file lines and .txt will be added - const FROM_FILE: &str = "from_file"; - const TO_FILE: &str = "to_file"; - const LINE_LENGTH: usize = 60; - - /// Contains test data (file names) which only needs to be created once. - #[derive(Debug, Default)] - pub struct BenchContext { - pub tmp_dir: Option, - pub dir: String, - pub files_equal: Vec<(String, String)>, - pub files_different: Vec<(String, String)>, - } - - impl BenchContext { - pub fn get_path(&self) -> &Path { - match &self.tmp_dir { - Some(tmp) => tmp.path(), - None => Path::new(&self.dir), - } - } - - pub fn get_test_files_equal(&self, kb: u64) -> &(String, String) { - let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); - &self.files_equal[p] - } - - #[allow(unused)] - pub fn get_test_files_different(&self, kb: u64) -> &(String, String) { - let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); - &self.files_different[p] - } - } - - // Since each bench function is separate in Divan it is more difficult to dynamically create test data. - // This keeps the TempDir alive until the program exits and generates the files only once. - static SHARED_CONTEXT: OnceLock = OnceLock::new(); - /// Creates the test files once and provides them to all tests. - pub fn get_context() -> &'static BenchContext { - SHARED_CONTEXT.get_or_init(|| { - let mut ctx = BenchContext::default(); - if TEMP_DIR.is_empty() { - let tmp_dir = TempDir::new().expect("Failed to create temp dir"); - ctx.tmp_dir = Some(tmp_dir); - } else { - // uses current directory, the generated files are kept - let path = Path::new(TEMP_DIR); - if !path.exists() { - fs::create_dir_all(path).expect("Path {path} could not be created"); - } - ctx.dir = TEMP_DIR.to_string(); - }; - - // generate test bytes - for kb in FILE_SIZE_KILO_BYTES { - let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") - .expect("generate_test_files failed"); - ctx.files_equal.push(f); - let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") - .expect("generate_test_files failed"); - ctx.files_different.push(f); - } - - ctx - }) - } - - pub fn str_to_options(opt: &str) -> Vec { - let s: Vec = opt - .split(" ") - .into_iter() - .filter(|s| !s.is_empty()) - .map(|s| OsString::from(s)) - .collect(); - - s - } - - /// Generates two test files for comparison with size. - /// - /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. - /// If num_differences is set, '#' will be inserted between the first two words of a line, - /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. - fn generate_test_files_bytes( - dir: &Path, - bytes: u64, - num_differences: u64, - id: &str, - ) -> std::io::Result<(String, String)> { - let id = if id.is_empty() { - "".to_string() - } else { - format!("{id}_") - }; - let f1 = format!("{id}{FROM_FILE}_{bytes}.txt"); - let f2 = format!("{id}{TO_FILE}_{bytes}.txt"); - let from_path = dir.join(f1); - let to_path = dir.join(f2); - - generate_file_bytes(&from_path, &to_path, bytes, num_differences)?; - - Ok(( - from_path.to_string_lossy().to_string(), - to_path.to_string_lossy().to_string(), - )) - } - - fn generate_file_bytes( - from_name: &Path, - to_name: &Path, - bytes: u64, - num_differences: u64, - ) -> std::io::Result<()> { - let file_from = File::create(from_name)?; - let file_to = File::create(to_name)?; - // for int division, lines will be smaller than requested bytes - let n_lines = bytes / LINE_LENGTH as u64; - let change_every_n_lines = if num_differences == 0 { - 0 - } else { - let c = n_lines / num_differences; - if c == 0 { - 1 - } else { - c - } - }; - // Use a larger 128KB buffer for massive files - let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); - let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); - let mut rng = rand::rng(); - - // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes - let mut line_buffer = [b' '; 60]; - line_buffer[59] = b'\n'; // Set the newline once at the end - - for i in (0..n_lines).rev() { - // Fill only the letter positions, skipping spaces and the newline - for word_idx in 0..10 { - let start = word_idx * 6; // Each word + space block is 6 bytes - for i in 0..5 { - line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); - } - } - - // Write the raw bytes directly to both files - writer_from.write_all(&line_buffer)?; - // make changes in the file - if num_differences == 0 { - writer_to.write_all(&line_buffer)?; - } else { - if i % change_every_n_lines == 0 && n_lines - i > 2 { - line_buffer[5] = CHANGE_CHAR; - } - writer_to.write_all(&line_buffer)?; - line_buffer[5] = b' '; - } - } - - // create last line - let missing = (bytes - n_lines as u64 * LINE_LENGTH as u64) as usize; - if missing > 0 { - for word_idx in 0..10 { - let start = word_idx * 6; // Each word + space block is 6 bytes - for i in 0..5 { - line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); - } - } - line_buffer[missing - 1] = b'\n'; - writer_from.write_all(&line_buffer[0..missing])?; - writer_to.write_all(&line_buffer[0..missing])?; - } - - writer_from.flush()?; - writer_to.flush()?; - - Ok(()) - } -} - -mod binary { - use std::process::Command; - - use crate::prepare::str_to_options; - - pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus { - let args = str_to_options(cmd_args); - Command::new(program) - .args(args) - .status() - .expect("Failed to execute binary") - } -} - -fn main() { - // Run registered benchmarks. - divan::main(); -} diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 545c6ec..3da1875 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -20,6 +20,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + [[package]] name = "arbitrary" version = "1.4.2" @@ -32,6 +82,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + [[package]] name = "bumpalo" version = "3.19.1" @@ -56,6 +112,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.42" @@ -70,25 +132,39 @@ dependencies = [ ] [[package]] -name = "const_format" -version = "0.2.35" +name = "clap" +version = "4.5.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" +checksum = "52fa72306bb30daf11bc97773431628e5b4916e97aaa74b7d3f625d4d495da02" dependencies = [ - "const_format_proc_macros", + "clap_builder", ] [[package]] -name = "const_format_proc_macros" -version = "0.2.34" +name = "clap_builder" +version = "4.5.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +checksum = "2071365c5c56eae7d77414029dde2f4f4ba151cf68d5a3261c9a40de428ace93" dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", + "anstream", + "anstyle", + "clap_lex", + "strsim", + "terminal_size", ] +[[package]] +name = "clap_lex" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e78417baa3b3114dc0e95e7357389a249c4da97c3c2b540700079db6171bfd7" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -103,15 +179,33 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "diffutils" -version = "0.5.0" +version = "0.5.1" dependencies = [ - "chrono", - "const_format", - "diff", - "itoa", - "regex", - "same-file", - "unicode-width", + "uu_cmp", + "uu_diff", + "uucore", + "uudiff", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", ] [[package]] @@ -120,6 +214,51 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +[[package]] +name = "fluent" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8137a6d5a2c50d6b0ebfcb9aaa91a28154e0a70605f112d30cb0cd4a78670477" +dependencies = [ + "fluent-bundle", + "unic-langid", +] + +[[package]] +name = "fluent-bundle" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01203cb8918f5711e73891b347816d932046f95f54207710bda99beaeb423bf4" +dependencies = [ + "fluent-langneg", + "fluent-syntax", + "intl-memoizer", + "intl_pluralrules", + "rustc-hash", + "self_cell", + "smallvec", + "unic-langid", +] + +[[package]] +name = "fluent-langneg" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eebbe59450baee8282d71676f3bfed5689aeab00b27545e83e5f14b1195e8b0" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "fluent-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f0d287c53ffd184d04d8677f590f4ac5379785529e5e08b1c8083acdd5c198" +dependencies = [ + "memchr", + "thiserror", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -132,6 +271,12 @@ dependencies = [ "wasip2", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -156,6 +301,31 @@ dependencies = [ "cc", ] +[[package]] +name = "intl-memoizer" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310da2e345f5eb861e7a07ee182262e94975051db9e4223e909ba90f392f163f" +dependencies = [ + "type-map", + "unic-langid", +] + +[[package]] +name = "intl_pluralrules" +version = "7.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078ea7b7c29a2b4df841a7f6ac8775ff6074020c6776d48491ce2268e068f972" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itoa" version = "1.0.17" @@ -198,6 +368,12 @@ dependencies = [ "cc", ] +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "log" version = "0.4.29" @@ -210,6 +386,18 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -225,6 +413,21 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "os_display" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5fd71b79026fb918650dde6d125000a233764f1c2f1659a1c71118e33ea08f" +dependencies = [ + "unicode-width", +] + [[package]] name = "proc-macro2" version = "1.0.104" @@ -278,6 +481,25 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -293,12 +515,60 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "self_cell" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.112" @@ -310,6 +580,74 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "terminal_size" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" +dependencies = [ + "rustix", + "windows-sys 0.60.2", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "serde_core", + "zerovec", +] + +[[package]] +name = "type-map" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" +dependencies = [ + "rustc-hash", +] + +[[package]] +name = "unic-langid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ba52c9b05311f4f6e62d5d9d46f094bd6e84cb8df7b3ef952748d752a7d05" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce1bf08044d4b7a94028c93786f8566047edc11110595914de93362559bc658" +dependencies = [ + "tinystr", +] + [[package]] name = "unicode-ident" version = "1.0.22" @@ -322,18 +660,84 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - [[package]] name = "unified-diff-fuzz" version = "0.0.0" dependencies = [ "diffutils", "libfuzzer-sys", + "uu_cmp", + "uu_diff", +] + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uu_cmp" +version = "0.5.1" +dependencies = [ + "diff", + "itoa", + "regex", + "same-file", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uu_diff" +version = "0.5.1" +dependencies = [ + "diff", + "regex", + "same-file", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uucore" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8038531f506a34ab4612b93f97d5f40759768cd34a83fd2af041b84fcbde474" +dependencies = [ + "clap", + "fluent", + "fluent-bundle", + "fluent-syntax", + "nix", + "os_display", + "rustc-hash", + "thiserror", + "unic-langid", + "uucore_procs", + "wild", +] + +[[package]] +name = "uucore_procs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f63e2d5083ff0983193a33e2d57fd271c7e3e3e7df8e46e8f471865647b2cbc" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "uudiff" +version = "0.5.1" +dependencies = [ + "chrono", + "same-file", + "unicode-width", + "uucore", ] [[package]] @@ -390,13 +794,22 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wild" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1" +dependencies = [ + "glob", +] + [[package]] name = "winapi-util" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -458,6 +871,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -467,8 +889,89 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "serde", + "zerofrom", +] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 39efd70..be9b9d2 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,6 +11,8 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4.7" diffutils = { path = "../" } +uu_cmp = { path = "../src/uu/cmp" } +uu_diff = { path = "../src/uu/diff" } # Prevent this from interfering with workspaces [workspace] diff --git a/fuzz/fuzz_targets/fuzz_cmp.rs b/fuzz/fuzz_targets/fuzz_cmp.rs index e9d0e4c..1da7366 100644 --- a/fuzz/fuzz_targets/fuzz_cmp.rs +++ b/fuzz/fuzz_targets/fuzz_cmp.rs @@ -1,12 +1,14 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::cmp::{self, Cmp}; +// use diffutilslib::cmp::{self, Cmp}; use std::ffi::OsString; use std::fs::File; use std::io::Write; +use uu_cmp::Cmp; + fn os(s: &str) -> OsString { OsString::from(s) } @@ -14,7 +16,7 @@ fn os(s: &str) -> OsString { fuzz_target!(|x: (Vec, Vec)| { let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"] .into_iter() - .map(|s| os(s)) + .map(os) .peekable(); let (from, to) = x; @@ -30,8 +32,8 @@ fuzz_target!(|x: (Vec, Vec)| { .unwrap(); let params = - cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); - let ret = cmp::cmp(¶ms); + uu_cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let ret = uu_cmp::cmp_compare(¶ms); if from == to && !matches!(ret, Ok(Cmp::Equal)) { panic!( "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.", diff --git a/fuzz/fuzz_targets/fuzz_cmp_args.rs b/fuzz/fuzz_targets/fuzz_cmp_args.rs index 579cf34..5522705 100644 --- a/fuzz/fuzz_targets/fuzz_cmp_args.rs +++ b/fuzz/fuzz_targets/fuzz_cmp_args.rs @@ -1,7 +1,6 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::cmp; use libfuzzer_sys::Corpus; use std::ffi::OsString; @@ -18,6 +17,6 @@ fuzz_target!(|x: Vec| -> Corpus { return Corpus::Reject; } } - let _ = cmp::parse_params(x.into_iter().peekable()); + let _ = uu_cmp::parse_params(x.into_iter().peekable()); Corpus::Keep }); diff --git a/fuzz/fuzz_targets/fuzz_ed.rs b/fuzz/fuzz_targets/fuzz_ed.rs index 7c38fda..18359a1 100644 --- a/fuzz/fuzz_targets/fuzz_ed.rs +++ b/fuzz/fuzz_targets/fuzz_ed.rs @@ -1,12 +1,11 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::ed_diff; -use diffutilslib::ed_diff::DiffError; -use diffutilslib::params::Params; use std::fs::{self, File}; use std::io::Write; use std::process::Command; +use uu_diff::ed_diff::{self, DiffError}; +use uu_diff::params::Params; fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { let mut output = ed_diff::diff(expected, actual, &Params::default())?; diff --git a/fuzz/fuzz_targets/fuzz_normal.rs b/fuzz/fuzz_targets/fuzz_normal.rs index 6b1e6b9..34e0512 100644 --- a/fuzz/fuzz_targets/fuzz_normal.rs +++ b/fuzz/fuzz_targets/fuzz_normal.rs @@ -1,8 +1,8 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::normal_diff; -use diffutilslib::params::Params; +use uu_diff::normal_diff; +use uu_diff::params::Params; use std::fs::{self, File}; use std::io::Write; diff --git a/fuzz/fuzz_targets/fuzz_patch.rs b/fuzz/fuzz_targets/fuzz_patch.rs index 4dea4b5..5f256d6 100644 --- a/fuzz/fuzz_targets/fuzz_patch.rs +++ b/fuzz/fuzz_targets/fuzz_patch.rs @@ -1,11 +1,11 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::params::Params; -use diffutilslib::unified_diff; use std::fs::{self, File}; use std::io::Write; use std::process::Command; +use uu_diff::params::Params; +use uu_diff::unified_diff; fuzz_target!(|x: (Vec, Vec, u8)| { let (from, to, context) = x; @@ -29,7 +29,7 @@ fuzz_target!(|x: (Vec, Vec, u8)| { to: "target/fuzz.file".into(), context_count: context as usize, ..Default::default() - } + }, ); File::create("target/fuzz.file.original") .unwrap() diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs index 8a69c07..6de5420 100644 --- a/fuzz/fuzz_targets/fuzz_side.rs +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -2,11 +2,11 @@ #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::side_diff; +use uu_diff::side_diff; use std::fs::File; use std::io::Write; -use diffutilslib::params::Params; +use uu_diff::params::Params; fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { let (original, new, /* width, tabsize, */ expand) = x; @@ -39,4 +39,4 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { .unwrap() .write_all(&output_buf) .unwrap(); -}); \ No newline at end of file +}); diff --git a/src/main.rs b/src/bin/diffutils.rs similarity index 79% rename from src/main.rs rename to src/bin/diffutils.rs index b7c2712..114222a 100644 --- a/src/main.rs +++ b/src/bin/diffutils.rs @@ -4,27 +4,15 @@ // files that was distributed with this source code. use std::{ - env::ArgsOs, ffi::{OsStr, OsString}, iter::Peekable, path::{Path, PathBuf}, process::ExitCode, }; -mod cmp; -mod context_diff; -mod diff; -mod ed_diff; -mod macros; -mod normal_diff; -mod params; -mod side_diff; -mod unified_diff; -mod utils; - /// # Panics /// Panics if the binary path cannot be determined -fn binary_path(args: &mut Peekable) -> PathBuf { +fn binary_path>(args: &mut Peekable) -> PathBuf { match args.peek() { Some(ref s) if !s.is_empty() => PathBuf::from(s), _ => std::env::current_exe().unwrap(), @@ -53,7 +41,7 @@ fn second_arg_error(name: &OsStr) -> ! { } fn main() -> ExitCode { - let mut args = std::env::args_os().peekable(); + let mut args = uucore::args_os().peekable(); let exe_path = binary_path(&mut args); let exe_name = name(&exe_path); @@ -69,13 +57,16 @@ fn main() -> ExitCode { OsString::from(exe_name) }; - match util_name.to_str() { - Some("diff") => diff::main(args), - Some("cmp") => cmp::main(args), + let code = match util_name.to_str() { + Some("cmp") => cmp::uumain(args), + Some("diff") => diff::uumain(args), Some(name) => { eprintln!("{name}: utility not supported"); - ExitCode::from(2) + // ExitCode::from(2) + 2 } None => second_arg_error(exe_name), - } + }; + + ExitCode::from(code as u8) } diff --git a/src/lib.rs b/src/lib.rs index 342b01c..b626abc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,8 @@ -pub mod cmp; -pub mod context_diff; -pub mod ed_diff; -pub mod macros; -pub mod normal_diff; -pub mod params; -pub mod side_diff; -pub mod unified_diff; -pub mod utils; +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. -// Re-export the public functions/types you need -pub use context_diff::diff as context_diff; -pub use ed_diff::diff as ed_diff; -pub use normal_diff::diff as normal_diff; -pub use side_diff::diff as side_by_side_diff; -pub use unified_diff::diff as unified_diff; +// pub mod common; +// +// pub use common::validation; diff --git a/src/uu/cmp/Cargo.toml b/src/uu/cmp/Cargo.toml new file mode 100644 index 0000000..0cea317 --- /dev/null +++ b/src/uu/cmp/Cargo.toml @@ -0,0 +1,48 @@ +[package] +name = "uu_cmp" +description = "cmp ~ (uutils) decode/encode input (cmp file compare)" +repository = "https://github.com/uutils/diffutils/tree/main/src/uu/cmp" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true + +# [lints] +# workspace = true + +[[bin]] +name = "cmp" +path = "src/main.rs" + +[lib] +path = "src/cmp.rs" + +[features] +# TODO How to sync over all modules? +# instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +feat_allow_case_insensitive_number_units = [] + +[dependencies] +# const_format = { workspace = true } +diff_crate = { workspace = true } +itoa = { workspace = true } +regex = { workspace = true } +same-file = { workspace = true } +uucore = { workspace = true } +uudiff = { workspace = true } +unicode-width = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +pretty_assertions = { workspace = true } +uudiff = { workspace = true } +tempfile = { workspace = true } + +[[bench]] +name = "cmp_bench" +harness = false diff --git a/src/uu/cmp/LICENSE b/src/uu/cmp/LICENSE new file mode 120000 index 0000000..5853aae --- /dev/null +++ b/src/uu/cmp/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/cmp/benches/cmp_bench.rs b/src/uu/cmp/benches/cmp_bench.rs new file mode 100644 index 0000000..fa69bf2 --- /dev/null +++ b/src/uu/cmp/benches/cmp_bench.rs @@ -0,0 +1,141 @@ +#![allow(unused)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! Benches for all utils in diffutils. +//! +//! There is a file generator included to create files of different sizes for comparison. \ +//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ +//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. + +/// Generate test files with these sizes in KB. +const FILE_SIZES_IN_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +const NUM_DIFF: u64 = 4; +// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files +const TEMP_DIR: &str = "/home/gunnar/DevNoSync/data"; +// just for FILE_SIZE_KILO_BYTES +const MB: u64 = 1_000; + +use std::sync::OnceLock; + +use divan::Bencher; +use tempfile::TempDir; +use uu_cmp::parse_params; +use uudiff::benchmark::{ + binary, + prepare_bench::{generate_test_files_bytes, BenchContext}, + str_to_args, +}; + +// bench the time it takes to parse the command line arguments +#[divan::bench] +fn cmp_parser(bencher: Bencher) { + let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; + let args = str_to_args(&cmd).into_iter().peekable(); + bencher.with_inputs(|| args.clone()).bench_values( + |params: std::iter::Peekable>| parse_params(params), + ); +} + +// // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) +#[divan::bench] +fn cmp_parser_no_prepare() { + let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; + let args = str_to_args(&cmd).into_iter().peekable(); + let _ = parse_params(args); +} + +// bench equal, full file read +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmp_compare_files_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let cmd = format!("cmp {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_cmp::uumain(params.peekable())); +} + +// bench different; cmp exits on first difference +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmp_compare_files_different(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_different_kb(kb).unwrap(); + let cmd = format!("cmp -s {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_cmp::uumain(params.peekable())); +} + +// bench original GNU cmp +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_cmp_gnu_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary("cmp", cmd_args)); +} + +// bench the compiled release version +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_cmp_release_equal(bencher: Bencher, kb: u64) { + // search for src, then shorten path + let dir = std::env::current_dir().unwrap(); + let path = dir.to_string_lossy(); + let path = path.trim_end_matches("src/uu/cmp"); + let prg = path.to_string() + "target/release/diffutils"; + + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("cmp {} {}", fp.from, fp.to); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary(&prg, cmd_args)); +} + +// Since each bench function is separate in Divan it is more difficult to dynamically create test data. +// This keeps the TempDir alive until the program exits and generates the files only once. +static SHARED_CONTEXT: OnceLock = OnceLock::new(); +/// Creates the test files once and provides them to all tests. +pub fn get_context() -> &'static BenchContext { + SHARED_CONTEXT.get_or_init(|| { + let mut ctx = BenchContext::default(); + if TEMP_DIR.is_empty() { + let tmp_dir = TempDir::new().expect("Failed to create temp dir"); + ctx.tmp_dir = Some(tmp_dir); + } else { + // uses current directory, the generated files are kept + let path = std::path::Path::new(TEMP_DIR); + if !path.exists() { + std::fs::create_dir_all(path).expect("Path {path} could not be created"); + } + ctx.dir = TEMP_DIR.to_string(); + }; + + // generate test bytes + for kb in FILE_SIZES_IN_KILO_BYTES { + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") + .expect("generate_test_files failed"); + ctx.files_equal.push(f); + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") + .expect("generate_test_files failed"); + ctx.files_different.push(f); + } + + ctx + }) +} + +fn main() { + // Run registered benchmarks. + divan::main(); +} diff --git a/src/cmp.rs b/src/uu/cmp/src/cmp.rs similarity index 96% rename from src/cmp.rs rename to src/uu/cmp/src/cmp.rs index 587d5cc..87b1fcf 100644 --- a/src/cmp.rs +++ b/src/uu/cmp/src/cmp.rs @@ -3,13 +3,13 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use crate::utils::format_failure_to_read_input_file; -use std::env::{self, ArgsOs}; +use std::env::{self}; use std::ffi::OsString; use std::io::{BufRead, BufReader, BufWriter, Read, Write}; use std::iter::Peekable; -use std::process::ExitCode; use std::{cmp, fs, io}; +use uucore::error::UResult; +use uudiff::utils::{format_failure_to_read_input_file, format_io_error}; #[cfg(not(target_os = "windows"))] use std::os::fd::{AsRawFd, FromRawFd}; @@ -76,7 +76,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(_) => { return Err(format!( "{executable_str}: invalid --ignore-initial value '{skip_desc}'" - )) + )); } }; @@ -179,7 +179,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(_) => { return Err(format!( "{executable_str}: invalid --bytes value '{max_bytes}'" - )) + )); } }; params.max_bytes = Some(max_bytes); @@ -233,7 +233,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu } // Do as GNU cmp, and completely disable printing if we are - // outputing to /dev/null. + // outputting to /dev/null. #[cfg(not(target_os = "windows"))] if is_stdout_dev_null() { params.quiet = true; @@ -303,6 +303,7 @@ fn prepare_reader( } }; + #[allow(clippy::collapsible_if)] if let Some(skip) = skip { if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { return Err(format_failure_to_read_input_file( @@ -322,7 +323,7 @@ pub enum Cmp { Different, } -pub fn cmp(params: &Params) -> Result { +pub fn cmp_compare(params: &Params) -> Result { let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; @@ -441,7 +442,7 @@ pub fn cmp(params: &Params) -> Result { })?; output.clear(); } else { - report_difference(from_byte, to_byte, at_byte, at_line, params); + report_difference(from_byte, to_byte, at_byte, at_line, params)?; return Ok(Cmp::Different); } } @@ -473,31 +474,37 @@ pub fn cmp(params: &Params) -> Result { // An exit status of 0 means no differences were found, // 1 means some differences were found, // and 2 means trouble. -pub fn main(opts: Peekable) -> ExitCode { - let params = match parse_params(opts) { +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let args = args.peekable(); + let params = match parse_params(args) { Ok(param) => param, Err(e) => { eprintln!("{e}"); - return ExitCode::from(2); + uucore::error::set_exit_code(2); + return Ok(()); } }; if params.from == "-" && params.to == "-" || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) { - return ExitCode::SUCCESS; + uucore::error::set_exit_code(0); + return Ok(()); } - match cmp(¶ms) { - Ok(Cmp::Equal) => ExitCode::SUCCESS, - Ok(Cmp::Different) => ExitCode::from(1), + match cmp_compare(¶ms) { + Ok(Cmp::Equal) => uucore::error::set_exit_code(0), + Ok(Cmp::Different) => uucore::error::set_exit_code(1), Err(e) => { if !params.quiet { eprintln!("{e}"); } - ExitCode::from(2) + uucore::error::set_exit_code(2); } - } + }; + + Ok(()) } #[inline] @@ -707,9 +714,15 @@ fn is_posix_locale() -> bool { } #[inline] -fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { +fn report_difference( + from_byte: u8, + to_byte: u8, + at_byte: usize, + at_line: usize, + params: &Params, +) -> Result<(), String> { if params.quiet { - return; + return Ok(()); } let term = if is_posix_locale() && !params.print_bytes { @@ -734,7 +747,16 @@ fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, format_visible_byte(to_byte) ); } - println!(); + // Instead of println!(), which panics in case of error (> /dev/full). + let mut stdout = io::stdout(); + if let Err(e) = writeln!(stdout) { + return Err(format_io_error(&e)); + }; + if let Err(e) = stdout.flush() { + return Err(format_io_error(&e)); + }; + + Ok(()) } #[cfg(test)] diff --git a/src/uu/cmp/src/main.rs b/src/uu/cmp/src/main.rs new file mode 100644 index 0000000..35e46c5 --- /dev/null +++ b/src/uu/cmp/src/main.rs @@ -0,0 +1,19 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// TODO implement macro and internationalization +// uucore::bin!(uu_cmp); + +use std::io::Write; + +pub fn main() { + let code = uu_cmp::uumain(uucore::args_os()); + if let Err(e) = std::io::stdout().flush() { + { + eprint!("Error flushing stdout: {e}"); + }; + } + std::process::exit(code); +} diff --git a/src/uu/diff/Cargo.toml b/src/uu/diff/Cargo.toml new file mode 100644 index 0000000..0ef3ab0 --- /dev/null +++ b/src/uu/diff/Cargo.toml @@ -0,0 +1,43 @@ +[package] +name = "uu_diff" +description = "diff ~ (uutils) decode/encode input (diff file compare)" +repository = "https://github.com/uutils/diffutils/tree/main/src/uu/diff" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true + +# [lints] +# workspace = true + +[[bin]] +name = "diff" +path = "src/main.rs" + +[lib] +path = "src/diff.rs" + +[dependencies] +# const_format = { workspace = true } +diff_crate = { workspace = true } +regex = { workspace = true } +same-file = { workspace = true } +# sdiff = { workspace = true } +uucore = { workspace = true } +uudiff = { workspace = true } +unicode-width = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +pretty_assertions = { workspace = true } +rand = { workspace = true } +tempfile = { workspace = true } + +[[bench]] +name = "diff_bench" +harness = false diff --git a/src/uu/diff/LICENSE b/src/uu/diff/LICENSE new file mode 120000 index 0000000..5853aae --- /dev/null +++ b/src/uu/diff/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/diff/benches/diff_bench.rs b/src/uu/diff/benches/diff_bench.rs new file mode 100644 index 0000000..0ec21fc --- /dev/null +++ b/src/uu/diff/benches/diff_bench.rs @@ -0,0 +1,119 @@ +// #![allow(unused)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! Benches for all utils in diffutils. +//! +//! There is a file generator included to create files of different sizes for comparison. \ +//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ +//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. + +/// Generate test files with these sizes in KB. +const FILE_SIZES_IN_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +const NUM_DIFF: u64 = 4; +// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files +const TEMP_DIR: &str = ""; +// just for FILE_SIZE_KILO_BYTES +const MB: u64 = 1_000; + +use divan::Bencher; +use std::{path::Path, sync::OnceLock}; +use tempfile::TempDir; +use uudiff::benchmark::{ + binary, + prepare_bench::{generate_test_files_bytes, BenchContext}, + str_to_args, +}; + +// bench the time it takes to parse the command line arguments +#[divan::bench] +fn diff_parser(bencher: Bencher) { + let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; + let args = str_to_args(&cmd).into_iter().peekable(); + bencher + .with_inputs(|| args.clone()) + .bench_values(|data| uu_diff::params::parse_params(data)); +} + +// bench the actual compare +// bench equal, full file read +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn diff_compare_files_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let cmd = format!("diff {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::diff_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_diff::uumain(params.peekable())); +} + +// bench original GNU diff +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_diff_gnu_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary("diff", cmd_args)); +} + +// bench the compiled release version +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_diff_release_equal(bencher: Bencher, kb: u64) { + // search for src, then shorten path + let dir = std::env::current_dir().unwrap(); + let path = dir.to_string_lossy(); + let path = path.trim_end_matches("src/uu/diff"); + let prg = path.to_string() + "target/release/diffutils"; + + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("diff {} {}", fp.from, fp.to); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| binary::bench_binary(&prg, cmd_args)); +} + +// Since each bench function is separate in Divan it is more difficult to dynamically create test data. +// This keeps the TempDir alive until the program exits and generates the files only once. +static SHARED_CONTEXT: OnceLock = OnceLock::new(); +/// Creates the test files once and provides them to all tests. +pub fn get_context() -> &'static BenchContext { + SHARED_CONTEXT.get_or_init(|| { + let mut ctx = BenchContext::default(); + if TEMP_DIR.is_empty() { + let tmp_dir = TempDir::new().expect("Failed to create temp dir"); + ctx.tmp_dir = Some(tmp_dir); + } else { + // uses current directory, the generated files are kept + let path = Path::new(TEMP_DIR); + if !path.exists() { + std::fs::create_dir_all(path).expect("Path {path} could not be created"); + } + ctx.dir = TEMP_DIR.to_string(); + }; + + // generate test bytes + for kb in FILE_SIZES_IN_KILO_BYTES { + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") + .expect("generate_test_files failed"); + ctx.files_equal.push(f); + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") + .expect("generate_test_files failed"); + ctx.files_different.push(f); + } + + ctx + }) +} + +fn main() { + // Run registered benchmarks. + divan::main(); +} diff --git a/src/context_diff.rs b/src/uu/diff/src/context_diff.rs similarity index 94% rename from src/context_diff.rs rename to src/uu/diff/src/context_diff.rs index 873fc3d..c616c03 100644 --- a/src/context_diff.rs +++ b/src/uu/diff/src/context_diff.rs @@ -7,8 +7,7 @@ use std::collections::VecDeque; use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; -use crate::utils::get_modification_time; +use uudiff::utils::{do_write_line, get_modification_time}; #[derive(Debug, PartialEq)] pub enum DiffLine { @@ -77,9 +76,9 @@ fn make_diff( // Rust only allows allocations to grow to isize::MAX, and this is bigger than that. let mut expected_lines_change_idx: usize = !0; - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if lines_since_mismatch > context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -101,7 +100,7 @@ fn make_diff( line_number_expected += 1; lines_since_mismatch = 0; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { if lines_since_mismatch > context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -132,7 +131,7 @@ fn make_diff( line_number_actual += 1; lines_since_mismatch = 0; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { expected_lines_change_idx = !0; // if one of them is missing a newline and the other isn't, then they don't actually match if (line_number_actual > actual_lines_count) @@ -381,10 +380,13 @@ pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { mod tests { use super::*; use pretty_assertions::assert_eq; + use tempfile::TempDir; #[test] fn test_permutations() { - // test all possible six-line files. - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "target/context-diff/"; let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { for &b in &[0, 1, 2] { @@ -450,14 +452,16 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/ab.diff")).unwrap()) .output() .unwrap(); assert!(output.status.success(), "{output:?}"); - //println!("{}", String::from_utf8_lossy(&output.stdout)); - //println!("{}", String::from_utf8_lossy(&output.stderr)); + // println!("{}", String::from_utf8_lossy(&output.stdout)); + // println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(format!("{target}/alef")).unwrap(); assert_eq!(alef, bet); } @@ -470,7 +474,10 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/context-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -531,7 +538,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/ab_.diff")).unwrap()) .output() @@ -551,7 +560,10 @@ mod tests { #[test] fn test_permutations_missing_lines() { - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/context-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -615,7 +627,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/abx.diff")).unwrap()) .output() @@ -635,7 +649,10 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/context-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -702,7 +719,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/abr.diff")).unwrap()) .output() @@ -722,7 +741,7 @@ mod tests { #[test] fn test_stop_early() { - use crate::assert_diff_eq; + use uudiff::assert_diff_eq; let from_filename = "foo"; let from = ["a", "b", "c", ""].join("\n"); diff --git a/src/diff.rs b/src/uu/diff/src/diff.rs similarity index 60% rename from src/diff.rs rename to src/uu/diff/src/diff.rs index f4c0614..64565e3 100644 --- a/src/diff.rs +++ b/src/uu/diff/src/diff.rs @@ -3,26 +3,37 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +pub mod context_diff; +pub mod ed_diff; +pub mod normal_diff; +pub mod params; +pub mod side_diff; +pub mod unified_diff; + use crate::params::{parse_params, Format}; -use crate::utils::report_failure_to_read_input_file; -use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; -use std::env::ArgsOs; use std::ffi::OsString; use std::fs; use std::io::{self, stdout, Read, Write}; -use std::iter::Peekable; -use std::process::{exit, ExitCode}; +// use std::process::{ExitCode, exit}; +use uucore::error::{FromIo, UResult}; +use uudiff::utils::{format_io_error, report_failure_to_read_input_file}; // Exit codes are documented at // https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. // An exit status of 0 means no differences were found, // 1 means some differences were found, // and 2 means trouble. -pub fn main(opts: Peekable) -> ExitCode { - let params = parse_params(opts).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }); +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let args = args.peekable(); + let params = match parse_params(args) { + Ok(p) => p, + Err(error) => { + eprintln!("{error}"); + uucore::error::set_exit_code(2); + return Ok(()); + } + }; // if from and to are the same file, no need to perform any comparison let maybe_report_identical_files = || { if params.report_identical_files { @@ -37,7 +48,9 @@ pub fn main(opts: Peekable) -> ExitCode { || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) { maybe_report_identical_files(); - return ExitCode::SUCCESS; + // ExitCode::SUCCESS; + uucore::error::set_exit_code(0); + return Ok(()); } // read files @@ -67,7 +80,8 @@ pub fn main(opts: Peekable) -> ExitCode { } }; if io_error { - return ExitCode::from(2); + uucore::error::set_exit_code(2); + return Ok(()); } // run diff @@ -77,7 +91,8 @@ pub fn main(opts: Peekable) -> ExitCode { Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { eprintln!("{error}"); - exit(2); + uucore::error::set_exit_code(2); + std::process::exit(2); }), Format::SideBySide => { let mut output = stdout().lock(); @@ -91,12 +106,35 @@ pub fn main(opts: Peekable) -> ExitCode { params.to.to_string_lossy() ); } else { - io::stdout().write_all(&result).unwrap(); + let result = io::stdout().write_all(&result); + match result { + // This code is taken from coreutils. + // + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => { + // GNU seq prints the Broken pipe message but still exits with status 0 + // unless SIGPIPE was explicitly ignored, in which case it should fail. + let err = err.map_err_context(|| "write error".into()); + uucore::show_error!("{err}"); + #[cfg(unix)] + if uucore::signals::sigpipe_was_ignored() { + uucore::error::set_exit_code(1); + } + } + Err(error) => { + eprintln!("{}", format_io_error(&error)); + uucore::error::set_exit_code(1); + return Ok(()); + } + } } if result.is_empty() { maybe_report_identical_files(); - ExitCode::SUCCESS + // ExitCode::SUCCESS; + uucore::error::set_exit_code(0); } else { - ExitCode::from(1) + uucore::error::set_exit_code(1); } + + Ok(()) } diff --git a/src/ed_diff.rs b/src/uu/diff/src/ed_diff.rs similarity index 98% rename from src/ed_diff.rs rename to src/uu/diff/src/ed_diff.rs index b8cdbc5..7e431f1 100644 --- a/src/ed_diff.rs +++ b/src/uu/diff/src/ed_diff.rs @@ -6,7 +6,7 @@ use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; +use uudiff::utils::do_write_line; #[derive(Debug, PartialEq)] struct Mismatch { @@ -71,9 +71,9 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result { + diff_crate::Result::Left(str) => { if !mismatch.actual.is_empty() { results.push(mismatch); mismatch = Mismatch::new(line_number_expected, line_number_actual); @@ -81,11 +81,11 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result { + diff_crate::Result::Right(str) => { mismatch.actual.push(str.to_vec()); line_number_actual += 1; } - diff::Result::Both(_str, _) => { + diff_crate::Result::Both(_str, _) => { line_number_expected += 1; line_number_actual += 1; if !mismatch.actual.is_empty() || !mismatch.expected.is_empty() { @@ -179,7 +179,7 @@ mod tests { #[test] fn test_permutations() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -259,7 +259,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -333,7 +333,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { diff --git a/src/uu/diff/src/main.rs b/src/uu/diff/src/main.rs new file mode 100644 index 0000000..e580046 --- /dev/null +++ b/src/uu/diff/src/main.rs @@ -0,0 +1,19 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// TODO implement macro and internationalization +// uucore::bin!(uu_diff); + +use std::io::Write; + +pub fn main() { + let code = uu_diff::uumain(uucore::args_os()); + if let Err(e) = std::io::stdout().flush() { + { + eprint!("Error flushing stdout: {e}"); + }; + } + std::process::exit(code); +} diff --git a/src/normal_diff.rs b/src/uu/diff/src/normal_diff.rs similarity index 98% rename from src/normal_diff.rs rename to src/uu/diff/src/normal_diff.rs index 002cd01..69a4628 100644 --- a/src/normal_diff.rs +++ b/src/uu/diff/src/normal_diff.rs @@ -6,7 +6,7 @@ use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; +use uudiff::utils::do_write_line; #[derive(Debug, PartialEq)] struct Mismatch { @@ -54,9 +54,9 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec actual_lines.pop(); } - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if !mismatch.actual.is_empty() && !mismatch.actual_missing_nl { results.push(mismatch); mismatch = Mismatch::new(line_number_expected, line_number_actual); @@ -65,12 +65,12 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec mismatch.expected_missing_nl = line_number_expected > expected_lines_count; line_number_expected += 1; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { mismatch.actual.push(str.to_vec()); mismatch.actual_missing_nl = line_number_actual > actual_lines_count; line_number_actual += 1; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { match ( line_number_expected > expected_lines_count, line_number_actual > actual_lines_count, @@ -228,7 +228,7 @@ mod tests { #[test] fn test_permutations() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -306,7 +306,7 @@ mod tests { #[test] fn test_permutations_missing_line_ending() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -400,7 +400,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -472,7 +472,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { diff --git a/src/params.rs b/src/uu/diff/src/params.rs similarity index 88% rename from src/params.rs rename to src/uu/diff/src/params.rs index 74ef3e3..6b560f6 100644 --- a/src/params.rs +++ b/src/uu/diff/src/params.rs @@ -1,9 +1,17 @@ +#![allow(clippy::collapsible_if)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + use std::ffi::OsString; use std::iter::Peekable; use std::path::PathBuf; use regex::Regex; +// use crate::side_diff; + #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum Format { #[default] @@ -45,10 +53,20 @@ impl Default for Params { } } -pub fn parse_params>(mut opts: Peekable) -> Result { +// impl From<&Params> for side_diff::Params { +// fn from(param: &Params) -> Self { +// Self { +// expand_tabs: param.expand_tabs, +// tabsize: param.tabsize, +// width: param.width, +// } +// } +// } + +pub fn parse_params>(mut args: Peekable) -> Result { // parse CLI - let Some(executable) = opts.next() else { + let Some(executable) = args.next() else { return Err("Usage: ".to_string()); }; let mut params = Params { @@ -61,8 +79,8 @@ pub fn parse_params>(mut opts: Peekable) -> Resu let mut context = None; let tabsize_re = Regex::new(r"^--tabsize=(?\d+)$").unwrap(); let width_re = Regex::new(r"--width=(?P\d+)$").unwrap(); - while let Some(param) = opts.next() { - let next_param = opts.peek(); + while let Some(param) = args.next() { + let next_param = args.peek(); if param == "--" { break; } @@ -168,7 +186,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu context = context_count; } if next_param_consumed { - opts.next(); + args.next(); } continue; } @@ -187,7 +205,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu context = context_count; } if next_param_consumed { - opts.next(); + args.next(); } continue; } @@ -210,7 +228,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu } params.from = if let Some(from) = from { from - } else if let Some(param) = opts.next() { + } else if let Some(param) = args.next() { param } else { return Err(format!( @@ -220,7 +238,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu }; params.to = if let Some(to) = to { to - } else if let Some(param) = opts.next() { + } else if let Some(param) = args.next() { param } else { return Err(format!( @@ -764,53 +782,65 @@ mod tests { .peekable() ) ); - assert!(parse_params( - [os("diff"), os("--tabsize"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize="), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + assert!( + parse_params( + [os("diff"), os("--tabsize"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize="), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [ + os("diff"), + os("--tabsize=92233720368547758088"), + os("foo"), + os("bar") + ] .iter() .cloned() .peekable() - ) - .is_err()); - assert!(parse_params( - [ - os("diff"), - os("--tabsize=92233720368547758088"), - os("foo"), - os("bar") - ] - .iter() - .cloned() - .peekable() - ) - .is_err()); + ) + .is_err() + ); } #[test] fn double_dash() { @@ -858,20 +888,24 @@ mod tests { }), parse_params([os("diff"), os("-"), os("-")].iter().cloned().peekable()) ); - assert!(parse_params( - [os("diff"), os("foo"), os("bar"), os("-")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("-"), os("-"), os("-")] - .iter() - .cloned() - .peekable() - ) - .is_err()); + assert!( + parse_params( + [os("diff"), os("foo"), os("bar"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("-"), os("-"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); } #[test] fn missing_arguments() { @@ -880,13 +914,15 @@ mod tests { } #[test] fn unknown_argument() { - assert!(parse_params( - [os("diff"), os("-g"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); + assert!( + parse_params( + [os("diff"), os("-g"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); assert!( parse_params([os("diff"), os("-g"), os("bar")].iter().cloned().peekable()).is_err() ); @@ -907,13 +943,15 @@ mod tests { ("--normal", "-e"), ("--context", "--normal"), ] { - assert!(parse_params( - [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); + assert!( + parse_params( + [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); } } } diff --git a/src/side_diff.rs b/src/uu/diff/src/side_diff.rs similarity index 99% rename from src/side_diff.rs rename to src/uu/diff/src/side_diff.rs index 56953d2..9a1c41e 100644 --- a/src/side_diff.rs +++ b/src/uu/diff/src/side_diff.rs @@ -4,7 +4,7 @@ // files that was distributed with this source code. use core::cmp::{max, min}; -use diff::Result; +use diff_crate::Result; use std::{io::Write, vec}; use unicode_width::UnicodeWidthStr; @@ -348,7 +348,7 @@ pub fn diff( More studies are needed to cover GNU diff side by side with 100% accuracy, which is one of the goals of this project : ) */ - for result in diff::slice(&left_lines, &right_lines) { + for result in diff_crate::slice(&left_lines, &right_lines) { match result { Result::Left(left_ln) => push_output(left_ln, b"", b'<', output, &config).unwrap(), Result::Right(right_ln) => push_output(b"", right_ln, b'>', output, &config).unwrap(), @@ -575,7 +575,7 @@ mod tests { let mut buf = vec![]; let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding - // ^ é char, start multi byte + // ^ é char, start multi byte process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap(); assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter } diff --git a/src/unified_diff.rs b/src/uu/diff/src/unified_diff.rs similarity index 92% rename from src/unified_diff.rs rename to src/uu/diff/src/unified_diff.rs index 0f504a8..ba0ed31 100644 --- a/src/unified_diff.rs +++ b/src/uu/diff/src/unified_diff.rs @@ -7,8 +7,7 @@ use std::collections::VecDeque; use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; -use crate::utils::get_modification_time; +use uudiff::utils::{do_write_line, get_modification_time}; #[derive(Debug, PartialEq)] pub enum DiffLine { @@ -65,9 +64,9 @@ fn make_diff( actual_lines.pop(); } - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if lines_since_mismatch >= context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -93,7 +92,9 @@ fn make_diff( mismatch.lines.push(DiffLine::Actual(res)); mismatch.lines.push(DiffLine::MissingNL); } - _ => unreachable!("unterminated Left and Common lines shouldn't be followed by more Left lines"), + _ => unreachable!( + "unterminated Left and Common lines shouldn't be followed by more Left lines" + ), } } else { mismatch.lines.push(DiffLine::Expected(str.to_vec())); @@ -104,7 +105,7 @@ fn make_diff( line_number_expected += 1; lines_since_mismatch = 0; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { if lines_since_mismatch >= context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -125,7 +126,7 @@ fn make_diff( line_number_actual += 1; lines_since_mismatch = 0; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { // if one of them is missing a newline and the other isn't, then they don't actually match if (line_number_actual > actual_lines_count) && (line_number_expected > expected_lines_count) @@ -407,10 +408,11 @@ pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { mod tests { use super::*; use pretty_assertions::assert_eq; + use tempfile::TempDir; #[test] fn test_permutations() { - let target = "target/unified-diff/"; + let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -476,29 +478,31 @@ mod tests { fb.write_all(&bet[..]).unwrap(); let _ = fa; let _ = fb; - println!( - "diff: {:?}", - String::from_utf8(diff.clone()) - .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) - ); - println!( - "alef: {:?}", - String::from_utf8(alef.clone()) - .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) - ); - println!( - "bet: {:?}", - String::from_utf8(bet.clone()) - .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) - ); + // println!( + // "diff: {:?}", + // String::from_utf8(diff.clone()) + // .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) + // ); + // println!( + // "alef: {:?}", + // String::from_utf8(alef.clone()) + // .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) + // ); + // println!( + // "bet: {:?}", + // String::from_utf8(bet.clone()) + // .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) + // ); let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/ab.diff")).unwrap()) .output() .unwrap(); - println!("{}", String::from_utf8_lossy(&output.stdout)); - println!("{}", String::from_utf8_lossy(&output.stderr)); + // println!("{}", String::from_utf8_lossy(&output.stdout)); + // println!("{}", String::from_utf8_lossy(&output.stderr)); assert!(output.status.success(), "{output:?}"); let alef = fs::read(format!("{target}/alef")).unwrap(); assert_eq!(alef, bet); @@ -512,7 +516,10 @@ mod tests { #[test] fn test_permutations_missing_line_ending() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -593,7 +600,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abn.diff")).unwrap()) .output() .unwrap(); @@ -613,7 +622,10 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -689,7 +701,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/ab_.diff")).unwrap()) .output() .unwrap(); @@ -709,7 +723,10 @@ mod tests { #[test] fn test_permutations_missing_lines() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -770,7 +787,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abx.diff")).unwrap()) .output() .unwrap(); @@ -789,7 +808,10 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -856,7 +878,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abr.diff")).unwrap()) .output() .unwrap(); @@ -875,7 +899,7 @@ mod tests { #[test] fn test_stop_early() { - use crate::assert_diff_eq; + use uudiff::assert_diff_eq; let from_filename = "foo"; let from = ["a", "b", "c", ""].join("\n"); diff --git a/src/uudiff/Cargo.toml b/src/uudiff/Cargo.toml new file mode 100644 index 0000000..25b2db1 --- /dev/null +++ b/src/uudiff/Cargo.toml @@ -0,0 +1,42 @@ +# spell-checker:ignore (features) bigdecimal zerocopy extendedbigdecimal tzdb zoneinfo logind + +[package] +name = "uudiff" +description = "uutils ~ 'diff' uutils code library (cross-platform)" +repository = "https://github.com/uutils/diffutils/tree/main/src/uudiff" +authors.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +version.workspace = true + +# [package.metadata.docs.rs] +# all-features = true +# +# [lints] +# workspace = true + +[lib] +path = "src/lib/lib.rs" + +[features] +# TODO How are features centralized in this workspace file? +# instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +feat_allow_case_insensitive_number_units = [] + +[dependencies] +chrono.workspace = true +# coreutils.workspace = true +# regex.workspace = true +rand = { workspace = true } +same-file.workspace = true +tempfile = { workspace = true } +unicode-width.workspace = true +uucore.workspace = true + +[dev-dependencies] +pretty_assertions.workspace = true +tempfile.workspace = true diff --git a/src/uudiff/LICENSE b/src/uudiff/LICENSE new file mode 120000 index 0000000..30cff74 --- /dev/null +++ b/src/uudiff/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/uudiff/src/lib/features.rs b/src/uudiff/src/lib/features.rs new file mode 100644 index 0000000..75a11fa --- /dev/null +++ b/src/uudiff/src/lib/features.rs @@ -0,0 +1,2 @@ +// #[cfg(feature = "benchmark")] +pub mod benchmark; diff --git a/src/uudiff/src/lib/features/benchmark.rs b/src/uudiff/src/lib/features/benchmark.rs new file mode 100644 index 0000000..791dbb4 --- /dev/null +++ b/src/uudiff/src/lib/features/benchmark.rs @@ -0,0 +1,230 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Common benchmark utilities for uutils coreutils +//! +//! This module provides shared functionality for benchmarking utilities, +//! including test data generation and binary execution helpers. + +use std::ffi::OsString; + +/// Converts a String to a Vec which can be used as args \ +/// to pass to the utilities, e.g. "diff file_a file_b -w 150". +/// +/// # Returns +/// A vec OsString which can be used instead of ArgsOs. +pub fn str_to_args(args: &str) -> Vec { + let s: Vec = args + .split(" ") + .filter(|s| !s.is_empty()) + .map(OsString::from) + .collect(); + + s +} + +pub mod prepare_bench { + use std::{ + fs::File, + io::{BufWriter, Write}, + path::Path, + }; + + use rand::RngExt; + use tempfile::TempDir; + + /// When a file is changed to be different, a char is inserted. + const CHANGE_INDICATION_CHAR: u8 = b'#'; + // const FILE_SIZES_IN_KILO_BYTES: [u64; 2] = [100, 1 * 1000]; + + // file lines and .txt will be added + const FROM_FILE: &str = "from_file"; + const TO_FILE: &str = "to_file"; + const LINE_LENGTH: usize = 60; + + #[derive(Debug, Default)] + pub struct FilePair { + pub from: String, + pub to: String, + pub size_bytes: u64, + } + + /// Contains test data (file names) which only needs to be created once. + #[derive(Debug, Default)] + pub struct BenchContext { + /// Optional TempDir directory. When set, the dir is of no relevance. + pub tmp_dir: Option, + /// Directory path if TempDir is not set. + pub dir: String, + /// list of files in different sizes + pub files_equal: Vec, + /// list of files in different sizes + pub files_different: Vec, + } + + impl BenchContext { + pub fn get_path(&self) -> &Path { + match &self.tmp_dir { + Some(tmp) => tmp.path(), + None => Path::new(&self.dir), + } + } + + pub fn get_files_equal_kb(&self, kb: u64) -> Option<&FilePair> { + self.get_files_equal(kb * 1000) + } + + pub fn get_files_equal(&self, bytes: u64) -> Option<&FilePair> { + let p = self.files_equal.iter().find(|f| f.size_bytes == bytes)?; + Some(p) + } + + pub fn get_files_different_kb(&self, kb: u64) -> Option<&FilePair> { + self.get_files_different(kb * 1000) + } + + pub fn get_files_different(&self, bytes: u64) -> Option<&FilePair> { + let p = self + .files_different + .iter() + .find(|f| f.size_bytes == bytes)?; + Some(p) + } + } + + /// Generates two test files for comparison with size. + /// + /// # Params + /// * dir: the directory where the files are created (TempDir suggested) + /// * bytes: the number of bytes the files will be long (exactly) + /// * num_difference: the number of differences inserted in the diff file + /// * id: added to the file names to differentiate for different tests + /// + /// # Returns + /// (from_file_name, to_file_name): Two files of the specified size in bytes. + /// + /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. + /// If num_differences is set, '#' will be inserted between the first two words of a line, + /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. + pub fn generate_test_files_bytes( + dir: &Path, + bytes: u64, + num_differences: u64, + id: &str, + ) -> std::io::Result { + let id = if id.is_empty() { + "".to_string() + } else { + format!("{id}_") + }; + let f1 = format!("{id}{FROM_FILE}_{bytes}.txt"); + let f2 = format!("{id}{TO_FILE}_{bytes}.txt"); + let from_path = dir.join(f1); + let to_path = dir.join(f2); + + generate_file_bytes(&from_path, &to_path, bytes, num_differences)?; + + Ok(FilePair { + from: from_path.to_string_lossy().to_string(), + to: to_path.to_string_lossy().to_string(), + size_bytes: bytes, + }) + } + + /// Generates two test files for comparison with size. + /// + /// # Returns + /// Ok when the files were created. + /// + /// Like [generate_test_files_bytes] with specified file names. \ + /// The function must generate two files at once to quickly create + /// files with minimal differences. + pub fn generate_file_bytes( + from_name: &Path, + to_name: &Path, + bytes: u64, + num_differences: u64, + ) -> std::io::Result<()> { + let file_from = File::create(from_name)?; + let file_to = File::create(to_name)?; + // for int division, lines will be smaller than requested bytes + let n_lines = bytes / LINE_LENGTH as u64; + let change_every_n_lines = if num_differences == 0 { + 0 + } else { + let c = n_lines / num_differences; + if c == 0 { + 1 + } else { + c + } + }; + // Use a larger 128KB buffer for massive files + let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); + let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); + let mut rng = rand::rng(); + + // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes + let mut line_buffer = [b' '; 60]; + line_buffer[59] = b'\n'; // Set the newline once at the end + + for i in (0..n_lines).rev() { + // Fill only the letter positions, skipping spaces and the newline + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + + // Write the raw bytes directly to both files + writer_from.write_all(&line_buffer)?; + // make changes in the file + if num_differences == 0 { + writer_to.write_all(&line_buffer)?; + } else { + if i % change_every_n_lines == 0 && n_lines - i > 2 { + line_buffer[5] = CHANGE_INDICATION_CHAR; + } + writer_to.write_all(&line_buffer)?; + line_buffer[5] = b' '; + } + } + + // create last line + let missing = (bytes - n_lines * LINE_LENGTH as u64) as usize; + if missing > 0 { + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + line_buffer[missing - 1] = b'\n'; + writer_from.write_all(&line_buffer[0..missing])?; + writer_to.write_all(&line_buffer[0..missing])?; + } + + writer_from.flush()?; + writer_to.flush()?; + + Ok(()) + } +} + +/// Benchmark tools which are designed to call the compiled executable. +pub mod binary { + use std::process::Command; + + use crate::benchmark::str_to_args; + + pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus { + let args = str_to_args(cmd_args); + Command::new(program) + .args(args) + .status() + .expect("Failed to execute binary") + } +} diff --git a/src/uudiff/src/lib/lib.rs b/src/uudiff/src/lib/lib.rs new file mode 100644 index 0000000..b50be23 --- /dev/null +++ b/src/uudiff/src/lib/lib.rs @@ -0,0 +1,10 @@ +mod features; // feature-gated code modules +mod macros; // crate macros (macro_rules-type; exported to `crate::...`) +mod mods; // core cross-platform modules + +// pub use crate::mods::arg_parser; +pub use crate::mods::utils; + +// * feature-gated modules +// #[cfg(feature = "benchmark")] +pub use crate::features::benchmark; diff --git a/src/macros.rs b/src/uudiff/src/lib/macros.rs similarity index 100% rename from src/macros.rs rename to src/uudiff/src/lib/macros.rs diff --git a/src/uudiff/src/lib/mods.rs b/src/uudiff/src/lib/mods.rs new file mode 100644 index 0000000..d257538 --- /dev/null +++ b/src/uudiff/src/lib/mods.rs @@ -0,0 +1,2 @@ +// pub mod arg_parser; +pub mod utils; diff --git a/src/utils.rs b/src/uudiff/src/lib/mods/utils.rs similarity index 90% rename from src/utils.rs rename to src/uudiff/src/lib/mods/utils.rs index daca18d..c6e1271 100644 --- a/src/utils.rs +++ b/src/uudiff/src/lib/mods/utils.rs @@ -3,8 +3,10 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use regex::Regex; -use std::{ffi::OsString, io::Write}; +use std::{ + ffi::{OsStr, OsString}, + io::Write, +}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -13,11 +15,11 @@ use unicode_width::UnicodeWidthStr; #[must_use] pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { let tab = b'\t'; - let ntabs = line.iter().filter(|c| **c == tab).count(); - if ntabs == 0 { + let n_tabs = line.iter().filter(|c| **c == tab).count(); + if n_tabs == 0 { return line.to_vec(); } - let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1)); + let mut result = Vec::with_capacity(line.len() + n_tabs * (tabsize - 1)); let mut offset = 0; let mut iter = line.split(|c| *c == tab).peekable(); @@ -71,6 +73,11 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } +/// Checks if files are the same (same file link), which must return 'equal'. +pub fn is_same_file(from: &OsStr, to: &OsStr) -> bool { + (from == "-" && to == "-") || same_file::is_same_file(from, to).unwrap_or(false) +} + pub fn format_failure_to_read_input_file( executable: &OsString, filepath: &OsString, @@ -78,15 +85,20 @@ pub fn format_failure_to_read_input_file( ) -> String { // std::io::Error's display trait outputs "{detail} (os error {code})" // but we want only the {detail} (error string) part - let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); format!( "{}: {}: {}", executable.to_string_lossy(), filepath.to_string_lossy(), - error_code_re.replace(error.to_string().as_str(), ""), + format_io_error(&error), ) } +/// Removes the (os error x) part of the error message +pub fn format_io_error(error: &dyn std::error::Error) -> String { + let s = error.to_string(); + s.split(" (os error").next().unwrap_or(&s).to_string() +} + pub fn report_failure_to_read_input_file( executable: &OsString, filepath: &OsString, diff --git a/tests/integration.rs b/tests/integration.rs index 0e8d21e..47ee6f9 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -9,7 +9,7 @@ use std::fs::File; #[cfg(not(windows))] use std::fs::OpenOptions; use std::io::Write; -use tempfile::{tempdir, NamedTempFile}; +use tempfile::{NamedTempFile, tempdir}; // Integration tests for the diffutils command mod common { @@ -97,7 +97,7 @@ mod common { } mod diff { - use diffutilslib::assert_diff_eq; + use uudiff::assert_diff_eq; use super::*; From d6eef8c3882a14806823aeaabf097e9da5ec4ff5 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Thu, 12 Mar 2026 22:38:57 +0100 Subject: [PATCH 2/7] removed local dir --- src/uu/cmp/benches/cmp_bench.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/cmp/benches/cmp_bench.rs b/src/uu/cmp/benches/cmp_bench.rs index fa69bf2..5370cff 100644 --- a/src/uu/cmp/benches/cmp_bench.rs +++ b/src/uu/cmp/benches/cmp_bench.rs @@ -14,7 +14,7 @@ const FILE_SIZES_IN_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; const NUM_DIFF: u64 = 4; // Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files -const TEMP_DIR: &str = "/home/gunnar/DevNoSync/data"; +const TEMP_DIR: &str = ""; // just for FILE_SIZE_KILO_BYTES const MB: u64 = 1_000; From cf8cdad437d4a8f69e5cc2992d7bcd6d17fe05a5 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Sat, 14 Mar 2026 06:46:36 +0100 Subject: [PATCH 3/7] fix: cargo fmt --all -- --check --- Cargo.lock | 16 +++- src/uu/diff/src/params.rs | 164 +++++++++++++++-------------------- src/uu/diff/src/side_diff.rs | 2 +- tests/integration.rs | 2 +- 4 files changed, 88 insertions(+), 96 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b967e3e..7f00b6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,7 +211,7 @@ dependencies = [ "getrandom 0.2.17", "glob", "libc", - "nix", + "nix 0.31.2", "serde", "serde_json", "statrs", @@ -616,6 +616,18 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nix" version = "0.31.2" @@ -1111,7 +1123,7 @@ dependencies = [ "fluent", "fluent-bundle", "fluent-syntax", - "nix", + "nix 0.30.1", "os_display", "rustc-hash", "thiserror", diff --git a/src/uu/diff/src/params.rs b/src/uu/diff/src/params.rs index 6b560f6..7df7e25 100644 --- a/src/uu/diff/src/params.rs +++ b/src/uu/diff/src/params.rs @@ -782,65 +782,53 @@ mod tests { .peekable() ) ); - assert!( - parse_params( - [os("diff"), os("--tabsize"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); - assert!( - parse_params( - [os("diff"), os("--tabsize="), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); - assert!( - parse_params( - [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); - assert!( - parse_params( - [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); - assert!( - parse_params( - [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); - assert!( - parse_params( - [ - os("diff"), - os("--tabsize=92233720368547758088"), - os("foo"), - os("bar") - ] + assert!(parse_params( + [os("diff"), os("--tabsize"), os("foo"), os("bar")] .iter() .cloned() .peekable() - ) - .is_err() - ); + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize="), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [ + os("diff"), + os("--tabsize=92233720368547758088"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + .is_err()); } #[test] fn double_dash() { @@ -888,24 +876,20 @@ mod tests { }), parse_params([os("diff"), os("-"), os("-")].iter().cloned().peekable()) ); - assert!( - parse_params( - [os("diff"), os("foo"), os("bar"), os("-")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); - assert!( - parse_params( - [os("diff"), os("-"), os("-"), os("-")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); + assert!(parse_params( + [os("diff"), os("foo"), os("bar"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); + assert!(parse_params( + [os("diff"), os("-"), os("-"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err()); } #[test] fn missing_arguments() { @@ -914,15 +898,13 @@ mod tests { } #[test] fn unknown_argument() { - assert!( - parse_params( - [os("diff"), os("-g"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); + assert!(parse_params( + [os("diff"), os("-g"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); assert!( parse_params([os("diff"), os("-g"), os("bar")].iter().cloned().peekable()).is_err() ); @@ -943,15 +925,13 @@ mod tests { ("--normal", "-e"), ("--context", "--normal"), ] { - assert!( - parse_params( - [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err() - ); + assert!(parse_params( + [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err()); } } } diff --git a/src/uu/diff/src/side_diff.rs b/src/uu/diff/src/side_diff.rs index 9a1c41e..d65b4a7 100644 --- a/src/uu/diff/src/side_diff.rs +++ b/src/uu/diff/src/side_diff.rs @@ -574,8 +574,8 @@ mod tests { let config = create_test_config(false, DEF_TAB_SIZE); let mut buf = vec![]; + // ↓ é char, start multi byte let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding - // ^ é char, start multi byte process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap(); assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter } diff --git a/tests/integration.rs b/tests/integration.rs index 47ee6f9..696d629 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -9,7 +9,7 @@ use std::fs::File; #[cfg(not(windows))] use std::fs::OpenOptions; use std::io::Write; -use tempfile::{NamedTempFile, tempdir}; +use tempfile::{tempdir, NamedTempFile}; // Integration tests for the diffutils command mod common { From b08018fe47dac87f3e3024d68bd787e13cac67c3 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Sat, 14 Mar 2026 07:26:18 +0100 Subject: [PATCH 4/7] try fixing benchmarks --- .github/workflows/codspeed.yml | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index b799ca0..4ca32ee 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -17,8 +17,19 @@ jobs: codspeed: name: Run benchmarks runs-on: ubuntu-latest + env: + CARGO_INCREMENTAL: 0 + strategy: + matrix: + type: [simulation, memory] + package: [ + uu_cmp, + uu_diff, + ] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + with: + persist-credentials: false - name: Setup rust toolchain, cache and cargo-codspeed binary uses: moonrepo/setup-rust@v1 @@ -27,11 +38,20 @@ jobs: cache-target: release bins: cargo-codspeed - - name: Build the benchmark target(s) - run: cargo codspeed build -m simulation + - name: Build benchmarks for ${{ matrix.package }} (${{ matrix.type }}) + shell: bash + run: | + echo "Building ${{ matrix.type }} benchmarks for ${{ matrix.package }}" + cargo codspeed build -m ${{ matrix.type }} -p ${{ matrix.package }} - - name: Run the benchmarks + - name: Run ${{ matrix.type }} benchmarks for ${{ matrix.package }} uses: CodSpeedHQ/action@v4 + env: + CODSPEED_LOG: debug with: - mode: simulation - run: cargo codspeed run + mode: ${{ matrix.type }} + run: | + echo "Running ${{ matrix.type }} benchmarks for ${{ matrix.package }}" + cargo codspeed run -p ${{ matrix.package }} > /dev/null + token: ${{ secrets.CODSPEED_TOKEN }} + From 77dd93b7a0a305ceaff39c53e6a7e26bb255129c Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Sat, 14 Mar 2026 08:01:10 +0100 Subject: [PATCH 5/7] fix: benchmarks * fix prg name and parameters * added feature to enable some benches as they should not run during github checks --- src/uu/cmp/Cargo.toml | 5 +++++ src/uu/cmp/benches/cmp_bench.rs | 12 +++++++----- src/uu/diff/Cargo.toml | 6 ++++++ src/uu/diff/benches/diff_bench.rs | 12 +++++++----- src/uudiff/src/lib/features/benchmark.rs | 2 +- 5 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/uu/cmp/Cargo.toml b/src/uu/cmp/Cargo.toml index 0cea317..7fd2e0c 100644 --- a/src/uu/cmp/Cargo.toml +++ b/src/uu/cmp/Cargo.toml @@ -26,6 +26,11 @@ path = "src/cmp.rs" # TODO How to sync over all modules? # instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. feat_allow_case_insensitive_number_units = [] +# default = ["feat_run_binary_bench" ] +# The cmd benchmarks start the binaries and take a lot of runtime on the github checks. +# Only run them locally. +feat_run_binary_bench = [] + [dependencies] # const_format = { workspace = true } diff --git a/src/uu/cmp/benches/cmp_bench.rs b/src/uu/cmp/benches/cmp_bench.rs index 5370cff..1c625f0 100644 --- a/src/uu/cmp/benches/cmp_bench.rs +++ b/src/uu/cmp/benches/cmp_bench.rs @@ -24,7 +24,7 @@ use divan::Bencher; use tempfile::TempDir; use uu_cmp::parse_params; use uudiff::benchmark::{ - binary, + bench_binary, prepare_bench::{generate_test_files_bytes, BenchContext}, str_to_args, }; @@ -74,6 +74,7 @@ fn cmp_compare_files_different(bencher: Bencher, kb: u64) { } // bench original GNU cmp +#[cfg(feature = "feat_run_binary_bench")] #[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] fn cmd_cmp_gnu_equal(bencher: Bencher, kb: u64) { let fp = get_context().get_files_equal_kb(kb).unwrap(); @@ -81,25 +82,26 @@ fn cmd_cmp_gnu_equal(bencher: Bencher, kb: u64) { bencher // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("cmp", cmd_args)); + .bench_refs(|cmd_args| bench_binary::bench_binary("cmp", cmd_args)); } // bench the compiled release version +#[cfg(feature = "feat_run_binary_bench")] #[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] fn cmd_cmp_release_equal(bencher: Bencher, kb: u64) { // search for src, then shorten path let dir = std::env::current_dir().unwrap(); let path = dir.to_string_lossy(); let path = path.trim_end_matches("src/uu/cmp"); - let prg = path.to_string() + "target/release/diffutils"; + let prg = path.to_string() + "target/release/cmp"; let fp = get_context().get_files_equal_kb(kb).unwrap(); - let args_str = format!("cmp {} {}", fp.from, fp.to); + let args_str = format!("{} {}", fp.from, fp.to); bencher // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary(&prg, cmd_args)); + .bench_refs(|cmd_args| bench_binary::bench_binary(&prg, cmd_args)); } // Since each bench function is separate in Divan it is more difficult to dynamically create test data. diff --git a/src/uu/diff/Cargo.toml b/src/uu/diff/Cargo.toml index 0ef3ab0..bb778b0 100644 --- a/src/uu/diff/Cargo.toml +++ b/src/uu/diff/Cargo.toml @@ -22,6 +22,12 @@ path = "src/main.rs" [lib] path = "src/diff.rs" +[features] +# default = ["feat_run_binary_bench" ] +# The cmd benchmarks start the binaries and take a lot of runtime on the github checks. +# Only run them locally. +feat_run_binary_bench = [] + [dependencies] # const_format = { workspace = true } diff_crate = { workspace = true } diff --git a/src/uu/diff/benches/diff_bench.rs b/src/uu/diff/benches/diff_bench.rs index 0ec21fc..dc420ab 100644 --- a/src/uu/diff/benches/diff_bench.rs +++ b/src/uu/diff/benches/diff_bench.rs @@ -22,7 +22,7 @@ use divan::Bencher; use std::{path::Path, sync::OnceLock}; use tempfile::TempDir; use uudiff::benchmark::{ - binary, + bench_binary, prepare_bench::{generate_test_files_bytes, BenchContext}, str_to_args, }; @@ -52,6 +52,7 @@ fn diff_compare_files_equal(bencher: Bencher, kb: u64) { } // bench original GNU diff +#[cfg(feature = "feat_run_binary_bench")] #[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] fn cmd_diff_gnu_equal(bencher: Bencher, kb: u64) { let fp = get_context().get_files_equal_kb(kb).unwrap(); @@ -59,25 +60,26 @@ fn cmd_diff_gnu_equal(bencher: Bencher, kb: u64) { bencher // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("diff", cmd_args)); + .bench_refs(|cmd_args| bench_binary::bench_binary("diff", cmd_args)); } // bench the compiled release version +#[cfg(feature = "feat_run_binary_bench")] #[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] fn cmd_diff_release_equal(bencher: Bencher, kb: u64) { // search for src, then shorten path let dir = std::env::current_dir().unwrap(); let path = dir.to_string_lossy(); let path = path.trim_end_matches("src/uu/diff"); - let prg = path.to_string() + "target/release/diffutils"; + let prg = path.to_string() + "target/release/diff"; let fp = get_context().get_files_equal_kb(kb).unwrap(); - let args_str = format!("diff {} {}", fp.from, fp.to); + let args_str = format!("{} {}", fp.from, fp.to); bencher // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary(&prg, cmd_args)); + .bench_refs(|cmd_args| bench_binary::bench_binary(&prg, cmd_args)); } // Since each bench function is separate in Divan it is more difficult to dynamically create test data. diff --git a/src/uudiff/src/lib/features/benchmark.rs b/src/uudiff/src/lib/features/benchmark.rs index 791dbb4..177144b 100644 --- a/src/uudiff/src/lib/features/benchmark.rs +++ b/src/uudiff/src/lib/features/benchmark.rs @@ -215,7 +215,7 @@ pub mod prepare_bench { } /// Benchmark tools which are designed to call the compiled executable. -pub mod binary { +pub mod bench_binary { use std::process::Command; use crate::benchmark::str_to_args; From 90035ea31d9f5742abbfa8976765cea711f96795 Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Sat, 14 Mar 2026 09:04:06 +0100 Subject: [PATCH 6/7] try fix github check release The tool dist does not allow different repository names within the workspace. --- .github/workflows/release.yml | 34 +++++++++++++++++----------------- Cargo.toml | 9 ++++----- dist-workspace.toml | 2 +- src/uu/cmp/Cargo.toml | 4 +++- src/uu/diff/Cargo.toml | 4 +++- 5 files changed, 28 insertions(+), 25 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3c59af5..d9aa406 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -56,7 +56,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive @@ -64,9 +64,9 @@ jobs: # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.3/cargo-dist-installer.sh | sh" + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.31.0/cargo-dist-installer.sh | sh" - name: Cache dist - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: cargo-dist-cache path: ~/.cargo/bin/dist @@ -82,7 +82,7 @@ jobs: cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-plan-dist-manifest path: plan-dist-manifest.json @@ -116,7 +116,7 @@ jobs: - name: enable windows longpaths run: | git config --global core.longpaths true - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive @@ -131,7 +131,7 @@ jobs: run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -158,7 +158,7 @@ jobs: cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-build-local-${{ join(matrix.targets, '_') }} path: | @@ -175,19 +175,19 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -205,7 +205,7 @@ jobs: cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-build-global path: | @@ -225,19 +225,19 @@ jobs: outputs: val: ${{ steps.host.outputs.manifest }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -250,14 +250,14 @@ jobs: cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: # Overwrite the previous copy name: artifacts-dist-manifest path: dist-manifest.json # Create a GitHub Release while uploading all files to it - name: "Download GitHub Artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: artifacts @@ -290,7 +290,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive diff --git a/Cargo.toml b/Cargo.toml index a50b2bf..90780e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -144,11 +144,10 @@ inherits = "release" panic = "unwind" debug = true -# -# # The profile that 'dist' will build with -# [profile.dist] -# inherits = "release" -# lto = "thin" +# The profile that 'dist' will build with +[profile.dist] +inherits = "release" +lto = "thin" # [lints] # workspace = true diff --git a/dist-workspace.toml b/dist-workspace.toml index 92c4095..0ca60f0 100644 --- a/dist-workspace.toml +++ b/dist-workspace.toml @@ -4,7 +4,7 @@ members = ["cargo:."] # Config for 'dist' [dist] # The preferred dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.30.3" +cargo-dist-version = "0.31.0" # CI backends to support ci = "github" # The installers to generate for each app diff --git a/src/uu/cmp/Cargo.toml b/src/uu/cmp/Cargo.toml index 7fd2e0c..915d296 100644 --- a/src/uu/cmp/Cargo.toml +++ b/src/uu/cmp/Cargo.toml @@ -1,7 +1,9 @@ [package] name = "uu_cmp" description = "cmp ~ (uutils) decode/encode input (cmp file compare)" -repository = "https://github.com/uutils/diffutils/tree/main/src/uu/cmp" +# The tool dist does not allow different repository names within the workspace. +repository = "https://github.com/uutils/diffutils" +# repository = "https://github.com/uutils/diffutils/tree/main/src/uu/cmp" version.workspace = true authors.workspace = true license.workspace = true diff --git a/src/uu/diff/Cargo.toml b/src/uu/diff/Cargo.toml index bb778b0..aa65235 100644 --- a/src/uu/diff/Cargo.toml +++ b/src/uu/diff/Cargo.toml @@ -1,7 +1,9 @@ [package] name = "uu_diff" description = "diff ~ (uutils) decode/encode input (diff file compare)" -repository = "https://github.com/uutils/diffutils/tree/main/src/uu/diff" +# The tool dist does not allow different repository names within the workspace. +# repository = "https://github.com/uutils/diffutils/tree/main/src/uu/diff" +repository = "https://github.com/uutils/diffutils" version.workspace = true authors.workspace = true license.workspace = true From ad9b2ee8e5818f076df1849ec0ef002656f464af Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Sat, 14 Mar 2026 09:12:16 +0100 Subject: [PATCH 7/7] update chore(deps) * Replaces PR #144 and PR #145 --- .github/workflows/ci.yml | 14 +++++++------- .github/workflows/fuzzing.yml | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a57192a..94e1b2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: cargo check test: @@ -26,7 +26,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: install GNU patch on MacOS if: runner.os == 'macOS' run: | @@ -42,7 +42,7 @@ jobs: name: cargo fmt --all -- --check runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: cargo fmt --all -- --check clippy: @@ -53,7 +53,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: cargo clippy -- -D warnings gnu-testsuite: @@ -62,7 +62,7 @@ jobs: name: GNU test suite runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: | cargo build --config=profile.release.strip=true --profile=release #-fast zstd -19 target/release/diffutils -o diffutils-x86_64-unknown-linux-gnu.zst @@ -70,7 +70,7 @@ jobs: - run: ./tests/run-upstream-testsuite.sh release || true env: TERM: xterm - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: test-results.json path: tests/test-results.json @@ -98,7 +98,7 @@ jobs: - { os: macos-latest , features: macos } - { os: windows-latest , features: windows } steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Initialize workflow variables env: # Use -Z diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index c7e0599..82afec2 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -20,7 +20,7 @@ jobs: name: Build the fuzzers runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install `cargo-fuzz` run: | echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}" @@ -50,7 +50,7 @@ jobs: - { name: fuzz_patch, should_pass: true } - { name: fuzz_side, should_pass: true } steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install `cargo-fuzz` run: | echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}"