diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a57192a..94e1b2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: cargo check test: @@ -26,7 +26,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: install GNU patch on MacOS if: runner.os == 'macOS' run: | @@ -42,7 +42,7 @@ jobs: name: cargo fmt --all -- --check runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: cargo fmt --all -- --check clippy: @@ -53,7 +53,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: cargo clippy -- -D warnings gnu-testsuite: @@ -62,7 +62,7 @@ jobs: name: GNU test suite runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - run: | cargo build --config=profile.release.strip=true --profile=release #-fast zstd -19 target/release/diffutils -o diffutils-x86_64-unknown-linux-gnu.zst @@ -70,7 +70,7 @@ jobs: - run: ./tests/run-upstream-testsuite.sh release || true env: TERM: xterm - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: test-results.json path: tests/test-results.json @@ -98,7 +98,7 @@ jobs: - { os: macos-latest , features: macos } - { os: windows-latest , features: windows } steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Initialize workflow variables env: # Use -Z diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index b799ca0..4ca32ee 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -17,8 +17,19 @@ jobs: codspeed: name: Run benchmarks runs-on: ubuntu-latest + env: + CARGO_INCREMENTAL: 0 + strategy: + matrix: + type: [simulation, memory] + package: [ + uu_cmp, + uu_diff, + ] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + with: + persist-credentials: false - name: Setup rust toolchain, cache and cargo-codspeed binary uses: moonrepo/setup-rust@v1 @@ -27,11 +38,20 @@ jobs: cache-target: release bins: cargo-codspeed - - name: Build the benchmark target(s) - run: cargo codspeed build -m simulation + - name: Build benchmarks for ${{ matrix.package }} (${{ matrix.type }}) + shell: bash + run: | + echo "Building ${{ matrix.type }} benchmarks for ${{ matrix.package }}" + cargo codspeed build -m ${{ matrix.type }} -p ${{ matrix.package }} - - name: Run the benchmarks + - name: Run ${{ matrix.type }} benchmarks for ${{ matrix.package }} uses: CodSpeedHQ/action@v4 + env: + CODSPEED_LOG: debug with: - mode: simulation - run: cargo codspeed run + mode: ${{ matrix.type }} + run: | + echo "Running ${{ matrix.type }} benchmarks for ${{ matrix.package }}" + cargo codspeed run -p ${{ matrix.package }} > /dev/null + token: ${{ secrets.CODSPEED_TOKEN }} + diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index c7e0599..82afec2 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -20,7 +20,7 @@ jobs: name: Build the fuzzers runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install `cargo-fuzz` run: | echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}" @@ -50,7 +50,7 @@ jobs: - { name: fuzz_patch, should_pass: true } - { name: fuzz_side, should_pass: true } steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install `cargo-fuzz` run: | echo "RUSTC_BOOTSTRAP=1" >> "${GITHUB_ENV}" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3c59af5..d9aa406 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -56,7 +56,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive @@ -64,9 +64,9 @@ jobs: # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.3/cargo-dist-installer.sh | sh" + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.31.0/cargo-dist-installer.sh | sh" - name: Cache dist - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: cargo-dist-cache path: ~/.cargo/bin/dist @@ -82,7 +82,7 @@ jobs: cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-plan-dist-manifest path: plan-dist-manifest.json @@ -116,7 +116,7 @@ jobs: - name: enable windows longpaths run: | git config --global core.longpaths true - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive @@ -131,7 +131,7 @@ jobs: run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -158,7 +158,7 @@ jobs: cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-build-local-${{ join(matrix.targets, '_') }} path: | @@ -175,19 +175,19 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -205,7 +205,7 @@ jobs: cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-build-global path: | @@ -225,19 +225,19 @@ jobs: outputs: val: ${{ steps.host.outputs.manifest }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -250,14 +250,14 @@ jobs: cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: # Overwrite the previous copy name: artifacts-dist-manifest path: dist-manifest.json # Create a GitHub Release while uploading all files to it - name: "Download GitHub Artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: artifacts @@ -290,7 +290,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive diff --git a/Cargo.lock b/Cargo.lock index c4795c0..7f00b6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,12 +20,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.1", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.1", +] + [[package]] name = "anyhow" version = "1.0.102" @@ -142,8 +186,10 @@ version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", "terminal_size", ] @@ -165,7 +211,7 @@ dependencies = [ "getrandom 0.2.17", "glob", "libc", - "nix", + "nix 0.31.2", "serde", "serde_json", "statrs", @@ -213,6 +259,12 @@ dependencies = [ "regex-lite", ] +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "colored" version = "2.2.0" @@ -258,20 +310,27 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "diffutils" -version = "0.5.0" +version = "0.5.1" dependencies = [ "assert_cmd", - "chrono", - "codspeed-divan-compat", - "diff", - "itoa", "predicates", - "pretty_assertions", - "rand", "regex", - "same-file", "tempfile", - "unicode-width", + "uu_cmp", + "uu_diff", + "uucore", + "uudiff", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -322,6 +381,51 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fluent" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8137a6d5a2c50d6b0ebfcb9aaa91a28154e0a70605f112d30cb0cd4a78670477" +dependencies = [ + "fluent-bundle", + "unic-langid", +] + +[[package]] +name = "fluent-bundle" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01203cb8918f5711e73891b347816d932046f95f54207710bda99beaeb423bf4" +dependencies = [ + "fluent-langneg", + "fluent-syntax", + "intl-memoizer", + "intl_pluralrules", + "rustc-hash", + "self_cell", + "smallvec", + "unic-langid", +] + +[[package]] +name = "fluent-langneg" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eebbe59450baee8282d71676f3bfed5689aeab00b27545e83e5f14b1195e8b0" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "fluent-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f0d287c53ffd184d04d8677f590f4ac5379785529e5e08b1c8083acdd5c198" +dependencies = [ + "memchr", + "thiserror", +] + [[package]] name = "foldhash" version = "0.1.5" @@ -421,6 +525,31 @@ dependencies = [ "serde_core", ] +[[package]] +name = "intl-memoizer" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310da2e345f5eb861e7a07ee182262e94975051db9e4223e909ba90f392f163f" +dependencies = [ + "type-map", + "unic-langid", +] + +[[package]] +name = "intl_pluralrules" +version = "7.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078ea7b7c29a2b4df841a7f6ac8775ff6074020c6776d48491ce2268e068f972" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -487,6 +616,18 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nix" version = "0.31.2" @@ -520,6 +661,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "os_display" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5fd71b79026fb918650dde6d125000a233764f1c2f1659a1c71118e33ea08f" +dependencies = [ + "unicode-width", +] + [[package]] name = "predicates" version = "3.1.4" @@ -590,9 +746,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -655,6 +811,12 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustix" version = "0.38.44" @@ -690,6 +852,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "self_cell" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" + [[package]] name = "semver" version = "1.0.27" @@ -739,6 +907,12 @@ dependencies = [ "zmij", ] +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "statrs" version = "0.18.0" @@ -749,6 +923,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.117" @@ -789,6 +969,37 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "serde_core", + "zerovec", +] + [[package]] name = "toml_datetime" version = "1.0.0+spec-1.1.0" @@ -819,6 +1030,33 @@ dependencies = [ "winnow", ] +[[package]] +name = "type-map" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" +dependencies = [ + "rustc-hash", +] + +[[package]] +name = "unic-langid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ba52c9b05311f4f6e62d5d9d46f094bd6e84cb8df7b3ef952748d752a7d05" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce1bf08044d4b7a94028c93786f8566047edc11110595914de93362559bc658" +dependencies = [ + "tinystr", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -837,6 +1075,86 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uu_cmp" +version = "0.5.1" +dependencies = [ + "codspeed-divan-compat", + "diff", + "itoa", + "pretty_assertions", + "regex", + "same-file", + "tempfile", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uu_diff" +version = "0.5.1" +dependencies = [ + "codspeed-divan-compat", + "diff", + "pretty_assertions", + "rand", + "regex", + "same-file", + "tempfile", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uucore" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8038531f506a34ab4612b93f97d5f40759768cd34a83fd2af041b84fcbde474" +dependencies = [ + "clap", + "fluent", + "fluent-bundle", + "fluent-syntax", + "nix 0.30.1", + "os_display", + "rustc-hash", + "thiserror", + "unic-langid", + "uucore_procs", + "wild", +] + +[[package]] +name = "uucore_procs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f63e2d5083ff0983193a33e2d57fd271c7e3e3e7df8e46e8f471865647b2cbc" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "uudiff" +version = "0.5.1" +dependencies = [ + "chrono", + "pretty_assertions", + "rand", + "same-file", + "tempfile", + "unicode-width", + "uucore", +] + [[package]] name = "wait-timeout" version = "0.2.0" @@ -958,6 +1276,15 @@ dependencies = [ "semver", ] +[[package]] +name = "wild" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1" +dependencies = [ + "glob", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1022,6 +1349,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -1246,6 +1582,22 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "serde", + "zerofrom", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index 1673839..90780e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,52 +1,165 @@ +# diffutils (uutils) +# * see the repository LICENSE, README, and CONTRIBUTING files for more information + +# spell-checker:ignore (libs) ahash bigdecimal datetime serde gethostid kqueue libselinux mangen memmap uuhelp startswith constness expl unnested logind cfgs interner + [package] name = "diffutils" -version = "0.5.0" -edition = "2021" -description = "A CLI app for generating diff files" -license = "MIT OR Apache-2.0" +description = "diffutils ~ GNU diffutils (updated); implemented as universal (cross-platform) utils, written in Rust" +default-run = "diffutils" repository = "https://github.com/uutils/diffutils" - -[lib] -name = "diffutilslib" -path = "src/lib.rs" +edition.workspace = true +rust-version.workspace = true +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true [[bin]] name = "diffutils" -path = "src/main.rs" +path = "src/bin/diffutils.rs" -[dependencies] +[features] +# default = ["feat_common_core"] +## OS feature shortcodes +# macos = ["feat_os_macos"] +# unix = ["feat_os_unix"] +# windows = ["feat_os_windows"] +# +## (primary platforms) feature sets +# "feat_os_macos" == set of utilities which can be built/run on the MacOS platform +feat_os_macos = [ + "feat_os_unix", ## == a modern/usual *nix platform + # +# "feat_require_unix_hostid", +] +# "feat_os_unix" == set of utilities which can be built/run on modern/usual *nix platforms. +feat_os_unix = [ +# "feat_Tier1", +# # +# "feat_require_unix", +# "feat_require_unix_hostid", +# "feat_require_unix_utmpx", +] +# "feat_os_windows" == set of utilities which can be built/run on modern/usual windows platforms +feat_os_windows = [ +# "feat_Tier1", ## == "feat_os_windows_legacy" + "hostname" +] +# +# TODO How are features centralized in this workspace file? +# # instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +# feat_allow_case_insensitive_number_units = [] +# # Enables a check on options defined in NOT_YET_IMPLEMENTED. +# # If on the parser will return an error message in these cases. +# # This is preferable when running the util as unsupported options +# # are pointed out to the user, but can make tests fail. +# feat_check_not_yet_implemented = [] + +[workspace] +resolver = "3" +members = [ + ".", + "src/uu/*", +# "src/uu/stdbuf/src/libstdbuf", + "src/uudiff", +# "src/uucore_procs", +# "tests/uutests", + # "fuzz", # TODO +] + +[workspace.package] +authors = ["uutils developers"] +categories = ["command-line-utilities"] +edition = "2021" +rust-version = "1.88.0" +homepage = "https://github.com/uutils/diffutils" +description = "A CLI app for generating diff files" +keywords = ["diffutils", "uutils", "cross-platform", "cli", "utility"] +license = "MIT" +# license = "MIT OR Apache-2.0" +readme = "README.package.md" +version = "0.5.1" + + +[workspace.dependencies] +assert_cmd = "2.2.0" +const_format = "0.2.35" chrono = "0.4.38" -diff = "0.1.13" +diff_crate = { package = "diff", version = "0.1.13" } +divan = { version = "4.3.0", package = "codspeed-divan-compat" } itoa = "1.0.11" +predicates = "3.1.0" +pretty_assertions = "1.4.0" +rand = "0.10.0" regex = "1.10.4" same-file = "1.0.6" +tempfile = "3.27.0" unicode-width = "0.2.0" +uucore = "0.7.0" +uudiff = { package = "uudiff", path = "src/uudiff" } +diff = { package = "uu_diff", path = "src/uu/diff" } +# sdiff = {package = "uu_sdiff", path = "src/uu/sdiff" } + + +[dependencies] +diff.workspace = true +# sdiff.workspace = true +uucore.workspace = true +uudiff.workspace = true + +# * uutils +# uu_test = { optional = true, package = "uu_test", path = "src/uu/test" } +# +cmp = { package = "uu_cmp", path = "src/uu/cmp" } +# diff3 = { package = "uu_diff3", path = "src/uu/diff3" } +# sdiff = {optional = true, package = "uu_sdiff", path = "src/uu/sdiff" } [dev-dependencies] -assert_cmd = "2.0.14" -divan = { version = "4.3.0", package = "codspeed-divan-compat" } -pretty_assertions = "1.4.0" -predicates = "3.1.0" -rand = "0.10.0" -tempfile = "3.26.0" +assert_cmd.workspace = true +predicates.workspace = true +regex.workspace = true +tempfile.workspace = true [profile.release] -lto = "thin" +lto = true +panic = "abort" codegen-units = 1 + +# A release-like profile that is as small as possible. +[profile.release-small] +inherits = "release" +opt-level = "z" +strip = true + +[profile.release-fast] +inherits = "release" panic = "abort" -# alias profile for 'dist' +# A release-like profile with debug info for profiling. +# See https://github.com/mstange/samply . +[profile.profiling] +inherits = "release" +panic = "unwind" +debug = true + +# The profile that 'dist' will build with [profile.dist] inherits = "release" +lto = "thin" -[[bench]] -name = "bench_diffutils" -path = "benches/bench-diffutils.rs" -harness = false +# [lints] +# workspace = true +# +# # This is the linting configuration for all crates. +# # In order to use these, all crates have `[lints] workspace = true` section. +# [workspace.lints.rust] +# # Allow "fuzzing" as a "cfg" condition name and "cygwin" as a value for "target_os" +# # https://doc.rust-lang.org/nightly/rustc/check-cfg/cargo-specifics.html +# unexpected_cfgs = { level = "warn", check-cfg = [ +# 'cfg(fuzzing)', +# 'cfg(target_os, values("cygwin"))', +# ] } +# unused_qualifications = "warn" -[features] -# default = ["feat_bench_not_diff"] -# Turn bench for diffutils cmp off -feat_bench_not_cmp = [] -# Turn bench for diffutils diff off -feat_bench_not_diff = [] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..21bd444 --- /dev/null +++ b/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) uutils developers + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/LICENSE-MIT b/LICENSE-MIT deleted file mode 100644 index ba40932..0000000 --- a/LICENSE-MIT +++ /dev/null @@ -1,26 +0,0 @@ -Copyright (c) Michael Howell -Copyright (c) uutils developers - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/benches/bench-diffutils.rs b/benches/bench-diffutils.rs deleted file mode 100644 index e506b3f..0000000 --- a/benches/bench-diffutils.rs +++ /dev/null @@ -1,377 +0,0 @@ -// This file is part of the uutils diffutils package. -// -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. - -//! Benches for all utils in diffutils. -//! -//! There is a file generator included to create files of different sizes for comparison. \ -//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ -//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. - -/// Generate test files with these sizes in KB. -const FILE_SIZE_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; -// const FILE_SIZE_KILO_BYTES: [u64; 3] = [100, 1 * MB, 5 * MB]; -// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files -const TEMP_DIR: &str = ""; -const NUM_DIFF: u64 = 4; -// just for FILE_SIZE_KILO_BYTES -const MB: u64 = 1_000; -const CHANGE_CHAR: u8 = b'#'; - -#[cfg(not(feature = "feat_bench_not_cmp"))] -mod diffutils_cmp { - use std::hint::black_box; - - use diffutilslib::cmp; - use divan::Bencher; - - use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; - - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmp_compare_files_equal(bencher: Bencher, kb: u64) { - let (from, to) = get_context().get_test_files_equal(kb); - let cmd = format!("cmp {from} {to}"); - let opts = str_to_options(&cmd).into_iter().peekable(); - let params = cmp::parse_params(opts).unwrap(); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| params.clone()) - .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); - } - - // bench the actual compare; cmp exits on first difference - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmp_compare_files_different(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_different(bytes); - let cmd = format!("cmp {from} {to} -s"); - let opts = str_to_options(&cmd).into_iter().peekable(); - let params = cmp::parse_params(opts).unwrap(); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| params.clone()) - .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); - } - - // bench original GNU cmp - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_cmp_gnu_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("{from} {to}"); - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("cmp", cmd_args)); - } - - // bench the compiled release version - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_cmp_release_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("cmp {from} {to}"); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); - } -} - -#[cfg(not(feature = "feat_bench_not_diff"))] -mod diffutils_diff { - // use std::hint::black_box; - - use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; - // use diffutilslib::params; - use divan::Bencher; - - // bench the actual compare - // TODO diff does not have a diff function - // #[divan::bench(args = [100_000,10_000])] - // fn diff_compare_files(bencher: Bencher, bytes: u64) { - // let (from, to) = gen_testfiles(lines, 0, "id"); - // let cmd = format!("cmp {from} {to}"); - // let opts = str_to_options(&cmd).into_iter().peekable(); - // let params = params::parse_params(opts).unwrap(); - // - // bencher - // // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - // .with_inputs(|| params.clone()) - // .bench_refs(|params| diff::diff(¶ms).unwrap()); - // } - - // bench original GNU diff - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_diff_gnu_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("{from} {to}"); - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("diff", cmd_args)); - } - - // bench the compiled release version - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_diff_release_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("diff {from} {to}"); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); - } -} - -mod parser { - use std::hint::black_box; - - use diffutilslib::{cmp, params}; - use divan::Bencher; - - use crate::prepare::str_to_options; - - // bench the time it takes to parse the command line arguments - #[divan::bench] - fn cmp_parser(bencher: Bencher) { - let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - let args = str_to_options(&cmd).into_iter().peekable(); - bencher - .with_inputs(|| args.clone()) - .bench_values(|data| black_box(cmp::parse_params(data))); - } - - // // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) - // #[divan::bench] - // fn cmp_parser_no_prepare() { - // let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - // let args = str_to_options(&cmd).into_iter().peekable(); - // let _ = cmp::parse_params(args); - // } - - // bench the time it takes to parse the command line arguments - #[divan::bench] - fn diff_parser(bencher: Bencher) { - let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; - let args = str_to_options(&cmd).into_iter().peekable(); - bencher - .with_inputs(|| args.clone()) - .bench_values(|data| black_box(params::parse_params(data))); - } -} - -mod prepare { - use std::{ - ffi::OsString, - fs::{self, File}, - io::{BufWriter, Write}, - path::Path, - sync::OnceLock, - }; - - use rand::RngExt; - use tempfile::TempDir; - - use crate::{CHANGE_CHAR, FILE_SIZE_KILO_BYTES, NUM_DIFF, TEMP_DIR}; - - // file lines and .txt will be added - const FROM_FILE: &str = "from_file"; - const TO_FILE: &str = "to_file"; - const LINE_LENGTH: usize = 60; - - /// Contains test data (file names) which only needs to be created once. - #[derive(Debug, Default)] - pub struct BenchContext { - pub tmp_dir: Option, - pub dir: String, - pub files_equal: Vec<(String, String)>, - pub files_different: Vec<(String, String)>, - } - - impl BenchContext { - pub fn get_path(&self) -> &Path { - match &self.tmp_dir { - Some(tmp) => tmp.path(), - None => Path::new(&self.dir), - } - } - - pub fn get_test_files_equal(&self, kb: u64) -> &(String, String) { - let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); - &self.files_equal[p] - } - - #[allow(unused)] - pub fn get_test_files_different(&self, kb: u64) -> &(String, String) { - let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); - &self.files_different[p] - } - } - - // Since each bench function is separate in Divan it is more difficult to dynamically create test data. - // This keeps the TempDir alive until the program exits and generates the files only once. - static SHARED_CONTEXT: OnceLock = OnceLock::new(); - /// Creates the test files once and provides them to all tests. - pub fn get_context() -> &'static BenchContext { - SHARED_CONTEXT.get_or_init(|| { - let mut ctx = BenchContext::default(); - if TEMP_DIR.is_empty() { - let tmp_dir = TempDir::new().expect("Failed to create temp dir"); - ctx.tmp_dir = Some(tmp_dir); - } else { - // uses current directory, the generated files are kept - let path = Path::new(TEMP_DIR); - if !path.exists() { - fs::create_dir_all(path).expect("Path {path} could not be created"); - } - ctx.dir = TEMP_DIR.to_string(); - }; - - // generate test bytes - for kb in FILE_SIZE_KILO_BYTES { - let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") - .expect("generate_test_files failed"); - ctx.files_equal.push(f); - let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") - .expect("generate_test_files failed"); - ctx.files_different.push(f); - } - - ctx - }) - } - - pub fn str_to_options(opt: &str) -> Vec { - let s: Vec = opt - .split(" ") - .into_iter() - .filter(|s| !s.is_empty()) - .map(|s| OsString::from(s)) - .collect(); - - s - } - - /// Generates two test files for comparison with size. - /// - /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. - /// If num_differences is set, '#' will be inserted between the first two words of a line, - /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. - fn generate_test_files_bytes( - dir: &Path, - bytes: u64, - num_differences: u64, - id: &str, - ) -> std::io::Result<(String, String)> { - let id = if id.is_empty() { - "".to_string() - } else { - format!("{id}_") - }; - let f1 = format!("{id}{FROM_FILE}_{bytes}.txt"); - let f2 = format!("{id}{TO_FILE}_{bytes}.txt"); - let from_path = dir.join(f1); - let to_path = dir.join(f2); - - generate_file_bytes(&from_path, &to_path, bytes, num_differences)?; - - Ok(( - from_path.to_string_lossy().to_string(), - to_path.to_string_lossy().to_string(), - )) - } - - fn generate_file_bytes( - from_name: &Path, - to_name: &Path, - bytes: u64, - num_differences: u64, - ) -> std::io::Result<()> { - let file_from = File::create(from_name)?; - let file_to = File::create(to_name)?; - // for int division, lines will be smaller than requested bytes - let n_lines = bytes / LINE_LENGTH as u64; - let change_every_n_lines = if num_differences == 0 { - 0 - } else { - let c = n_lines / num_differences; - if c == 0 { - 1 - } else { - c - } - }; - // Use a larger 128KB buffer for massive files - let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); - let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); - let mut rng = rand::rng(); - - // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes - let mut line_buffer = [b' '; 60]; - line_buffer[59] = b'\n'; // Set the newline once at the end - - for i in (0..n_lines).rev() { - // Fill only the letter positions, skipping spaces and the newline - for word_idx in 0..10 { - let start = word_idx * 6; // Each word + space block is 6 bytes - for i in 0..5 { - line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); - } - } - - // Write the raw bytes directly to both files - writer_from.write_all(&line_buffer)?; - // make changes in the file - if num_differences == 0 { - writer_to.write_all(&line_buffer)?; - } else { - if i % change_every_n_lines == 0 && n_lines - i > 2 { - line_buffer[5] = CHANGE_CHAR; - } - writer_to.write_all(&line_buffer)?; - line_buffer[5] = b' '; - } - } - - // create last line - let missing = (bytes - n_lines as u64 * LINE_LENGTH as u64) as usize; - if missing > 0 { - for word_idx in 0..10 { - let start = word_idx * 6; // Each word + space block is 6 bytes - for i in 0..5 { - line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); - } - } - line_buffer[missing - 1] = b'\n'; - writer_from.write_all(&line_buffer[0..missing])?; - writer_to.write_all(&line_buffer[0..missing])?; - } - - writer_from.flush()?; - writer_to.flush()?; - - Ok(()) - } -} - -mod binary { - use std::process::Command; - - use crate::prepare::str_to_options; - - pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus { - let args = str_to_options(cmd_args); - Command::new(program) - .args(args) - .status() - .expect("Failed to execute binary") - } -} - -fn main() { - // Run registered benchmarks. - divan::main(); -} diff --git a/dist-workspace.toml b/dist-workspace.toml index 92c4095..0ca60f0 100644 --- a/dist-workspace.toml +++ b/dist-workspace.toml @@ -4,7 +4,7 @@ members = ["cargo:."] # Config for 'dist' [dist] # The preferred dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.30.3" +cargo-dist-version = "0.31.0" # CI backends to support ci = "github" # The installers to generate for each app diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 545c6ec..3da1875 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -20,6 +20,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + [[package]] name = "arbitrary" version = "1.4.2" @@ -32,6 +82,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + [[package]] name = "bumpalo" version = "3.19.1" @@ -56,6 +112,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.42" @@ -70,25 +132,39 @@ dependencies = [ ] [[package]] -name = "const_format" -version = "0.2.35" +name = "clap" +version = "4.5.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" +checksum = "52fa72306bb30daf11bc97773431628e5b4916e97aaa74b7d3f625d4d495da02" dependencies = [ - "const_format_proc_macros", + "clap_builder", ] [[package]] -name = "const_format_proc_macros" -version = "0.2.34" +name = "clap_builder" +version = "4.5.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +checksum = "2071365c5c56eae7d77414029dde2f4f4ba151cf68d5a3261c9a40de428ace93" dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", + "anstream", + "anstyle", + "clap_lex", + "strsim", + "terminal_size", ] +[[package]] +name = "clap_lex" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e78417baa3b3114dc0e95e7357389a249c4da97c3c2b540700079db6171bfd7" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -103,15 +179,33 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "diffutils" -version = "0.5.0" +version = "0.5.1" dependencies = [ - "chrono", - "const_format", - "diff", - "itoa", - "regex", - "same-file", - "unicode-width", + "uu_cmp", + "uu_diff", + "uucore", + "uudiff", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", ] [[package]] @@ -120,6 +214,51 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +[[package]] +name = "fluent" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8137a6d5a2c50d6b0ebfcb9aaa91a28154e0a70605f112d30cb0cd4a78670477" +dependencies = [ + "fluent-bundle", + "unic-langid", +] + +[[package]] +name = "fluent-bundle" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01203cb8918f5711e73891b347816d932046f95f54207710bda99beaeb423bf4" +dependencies = [ + "fluent-langneg", + "fluent-syntax", + "intl-memoizer", + "intl_pluralrules", + "rustc-hash", + "self_cell", + "smallvec", + "unic-langid", +] + +[[package]] +name = "fluent-langneg" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eebbe59450baee8282d71676f3bfed5689aeab00b27545e83e5f14b1195e8b0" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "fluent-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f0d287c53ffd184d04d8677f590f4ac5379785529e5e08b1c8083acdd5c198" +dependencies = [ + "memchr", + "thiserror", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -132,6 +271,12 @@ dependencies = [ "wasip2", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -156,6 +301,31 @@ dependencies = [ "cc", ] +[[package]] +name = "intl-memoizer" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310da2e345f5eb861e7a07ee182262e94975051db9e4223e909ba90f392f163f" +dependencies = [ + "type-map", + "unic-langid", +] + +[[package]] +name = "intl_pluralrules" +version = "7.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078ea7b7c29a2b4df841a7f6ac8775ff6074020c6776d48491ce2268e068f972" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itoa" version = "1.0.17" @@ -198,6 +368,12 @@ dependencies = [ "cc", ] +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "log" version = "0.4.29" @@ -210,6 +386,18 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -225,6 +413,21 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "os_display" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5fd71b79026fb918650dde6d125000a233764f1c2f1659a1c71118e33ea08f" +dependencies = [ + "unicode-width", +] + [[package]] name = "proc-macro2" version = "1.0.104" @@ -278,6 +481,25 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -293,12 +515,60 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "self_cell" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.112" @@ -310,6 +580,74 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "terminal_size" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" +dependencies = [ + "rustix", + "windows-sys 0.60.2", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "serde_core", + "zerovec", +] + +[[package]] +name = "type-map" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" +dependencies = [ + "rustc-hash", +] + +[[package]] +name = "unic-langid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ba52c9b05311f4f6e62d5d9d46f094bd6e84cb8df7b3ef952748d752a7d05" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce1bf08044d4b7a94028c93786f8566047edc11110595914de93362559bc658" +dependencies = [ + "tinystr", +] + [[package]] name = "unicode-ident" version = "1.0.22" @@ -322,18 +660,84 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - [[package]] name = "unified-diff-fuzz" version = "0.0.0" dependencies = [ "diffutils", "libfuzzer-sys", + "uu_cmp", + "uu_diff", +] + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uu_cmp" +version = "0.5.1" +dependencies = [ + "diff", + "itoa", + "regex", + "same-file", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uu_diff" +version = "0.5.1" +dependencies = [ + "diff", + "regex", + "same-file", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uucore" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8038531f506a34ab4612b93f97d5f40759768cd34a83fd2af041b84fcbde474" +dependencies = [ + "clap", + "fluent", + "fluent-bundle", + "fluent-syntax", + "nix", + "os_display", + "rustc-hash", + "thiserror", + "unic-langid", + "uucore_procs", + "wild", +] + +[[package]] +name = "uucore_procs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f63e2d5083ff0983193a33e2d57fd271c7e3e3e7df8e46e8f471865647b2cbc" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "uudiff" +version = "0.5.1" +dependencies = [ + "chrono", + "same-file", + "unicode-width", + "uucore", ] [[package]] @@ -390,13 +794,22 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wild" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1" +dependencies = [ + "glob", +] + [[package]] name = "winapi-util" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -458,6 +871,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -467,8 +889,89 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "serde", + "zerofrom", +] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 39efd70..be9b9d2 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,6 +11,8 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4.7" diffutils = { path = "../" } +uu_cmp = { path = "../src/uu/cmp" } +uu_diff = { path = "../src/uu/diff" } # Prevent this from interfering with workspaces [workspace] diff --git a/fuzz/fuzz_targets/fuzz_cmp.rs b/fuzz/fuzz_targets/fuzz_cmp.rs index e9d0e4c..1da7366 100644 --- a/fuzz/fuzz_targets/fuzz_cmp.rs +++ b/fuzz/fuzz_targets/fuzz_cmp.rs @@ -1,12 +1,14 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::cmp::{self, Cmp}; +// use diffutilslib::cmp::{self, Cmp}; use std::ffi::OsString; use std::fs::File; use std::io::Write; +use uu_cmp::Cmp; + fn os(s: &str) -> OsString { OsString::from(s) } @@ -14,7 +16,7 @@ fn os(s: &str) -> OsString { fuzz_target!(|x: (Vec, Vec)| { let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"] .into_iter() - .map(|s| os(s)) + .map(os) .peekable(); let (from, to) = x; @@ -30,8 +32,8 @@ fuzz_target!(|x: (Vec, Vec)| { .unwrap(); let params = - cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); - let ret = cmp::cmp(¶ms); + uu_cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let ret = uu_cmp::cmp_compare(¶ms); if from == to && !matches!(ret, Ok(Cmp::Equal)) { panic!( "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.", diff --git a/fuzz/fuzz_targets/fuzz_cmp_args.rs b/fuzz/fuzz_targets/fuzz_cmp_args.rs index 579cf34..5522705 100644 --- a/fuzz/fuzz_targets/fuzz_cmp_args.rs +++ b/fuzz/fuzz_targets/fuzz_cmp_args.rs @@ -1,7 +1,6 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::cmp; use libfuzzer_sys::Corpus; use std::ffi::OsString; @@ -18,6 +17,6 @@ fuzz_target!(|x: Vec| -> Corpus { return Corpus::Reject; } } - let _ = cmp::parse_params(x.into_iter().peekable()); + let _ = uu_cmp::parse_params(x.into_iter().peekable()); Corpus::Keep }); diff --git a/fuzz/fuzz_targets/fuzz_ed.rs b/fuzz/fuzz_targets/fuzz_ed.rs index 7c38fda..18359a1 100644 --- a/fuzz/fuzz_targets/fuzz_ed.rs +++ b/fuzz/fuzz_targets/fuzz_ed.rs @@ -1,12 +1,11 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::ed_diff; -use diffutilslib::ed_diff::DiffError; -use diffutilslib::params::Params; use std::fs::{self, File}; use std::io::Write; use std::process::Command; +use uu_diff::ed_diff::{self, DiffError}; +use uu_diff::params::Params; fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { let mut output = ed_diff::diff(expected, actual, &Params::default())?; diff --git a/fuzz/fuzz_targets/fuzz_normal.rs b/fuzz/fuzz_targets/fuzz_normal.rs index 6b1e6b9..34e0512 100644 --- a/fuzz/fuzz_targets/fuzz_normal.rs +++ b/fuzz/fuzz_targets/fuzz_normal.rs @@ -1,8 +1,8 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::normal_diff; -use diffutilslib::params::Params; +use uu_diff::normal_diff; +use uu_diff::params::Params; use std::fs::{self, File}; use std::io::Write; diff --git a/fuzz/fuzz_targets/fuzz_patch.rs b/fuzz/fuzz_targets/fuzz_patch.rs index 4dea4b5..5f256d6 100644 --- a/fuzz/fuzz_targets/fuzz_patch.rs +++ b/fuzz/fuzz_targets/fuzz_patch.rs @@ -1,11 +1,11 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::params::Params; -use diffutilslib::unified_diff; use std::fs::{self, File}; use std::io::Write; use std::process::Command; +use uu_diff::params::Params; +use uu_diff::unified_diff; fuzz_target!(|x: (Vec, Vec, u8)| { let (from, to, context) = x; @@ -29,7 +29,7 @@ fuzz_target!(|x: (Vec, Vec, u8)| { to: "target/fuzz.file".into(), context_count: context as usize, ..Default::default() - } + }, ); File::create("target/fuzz.file.original") .unwrap() diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs index 8a69c07..6de5420 100644 --- a/fuzz/fuzz_targets/fuzz_side.rs +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -2,11 +2,11 @@ #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::side_diff; +use uu_diff::side_diff; use std::fs::File; use std::io::Write; -use diffutilslib::params::Params; +use uu_diff::params::Params; fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { let (original, new, /* width, tabsize, */ expand) = x; @@ -39,4 +39,4 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { .unwrap() .write_all(&output_buf) .unwrap(); -}); \ No newline at end of file +}); diff --git a/src/main.rs b/src/bin/diffutils.rs similarity index 79% rename from src/main.rs rename to src/bin/diffutils.rs index b7c2712..114222a 100644 --- a/src/main.rs +++ b/src/bin/diffutils.rs @@ -4,27 +4,15 @@ // files that was distributed with this source code. use std::{ - env::ArgsOs, ffi::{OsStr, OsString}, iter::Peekable, path::{Path, PathBuf}, process::ExitCode, }; -mod cmp; -mod context_diff; -mod diff; -mod ed_diff; -mod macros; -mod normal_diff; -mod params; -mod side_diff; -mod unified_diff; -mod utils; - /// # Panics /// Panics if the binary path cannot be determined -fn binary_path(args: &mut Peekable) -> PathBuf { +fn binary_path>(args: &mut Peekable) -> PathBuf { match args.peek() { Some(ref s) if !s.is_empty() => PathBuf::from(s), _ => std::env::current_exe().unwrap(), @@ -53,7 +41,7 @@ fn second_arg_error(name: &OsStr) -> ! { } fn main() -> ExitCode { - let mut args = std::env::args_os().peekable(); + let mut args = uucore::args_os().peekable(); let exe_path = binary_path(&mut args); let exe_name = name(&exe_path); @@ -69,13 +57,16 @@ fn main() -> ExitCode { OsString::from(exe_name) }; - match util_name.to_str() { - Some("diff") => diff::main(args), - Some("cmp") => cmp::main(args), + let code = match util_name.to_str() { + Some("cmp") => cmp::uumain(args), + Some("diff") => diff::uumain(args), Some(name) => { eprintln!("{name}: utility not supported"); - ExitCode::from(2) + // ExitCode::from(2) + 2 } None => second_arg_error(exe_name), - } + }; + + ExitCode::from(code as u8) } diff --git a/src/lib.rs b/src/lib.rs index 342b01c..b626abc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,8 @@ -pub mod cmp; -pub mod context_diff; -pub mod ed_diff; -pub mod macros; -pub mod normal_diff; -pub mod params; -pub mod side_diff; -pub mod unified_diff; -pub mod utils; +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. -// Re-export the public functions/types you need -pub use context_diff::diff as context_diff; -pub use ed_diff::diff as ed_diff; -pub use normal_diff::diff as normal_diff; -pub use side_diff::diff as side_by_side_diff; -pub use unified_diff::diff as unified_diff; +// pub mod common; +// +// pub use common::validation; diff --git a/src/uu/cmp/Cargo.toml b/src/uu/cmp/Cargo.toml new file mode 100644 index 0000000..915d296 --- /dev/null +++ b/src/uu/cmp/Cargo.toml @@ -0,0 +1,55 @@ +[package] +name = "uu_cmp" +description = "cmp ~ (uutils) decode/encode input (cmp file compare)" +# The tool dist does not allow different repository names within the workspace. +repository = "https://github.com/uutils/diffutils" +# repository = "https://github.com/uutils/diffutils/tree/main/src/uu/cmp" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true + +# [lints] +# workspace = true + +[[bin]] +name = "cmp" +path = "src/main.rs" + +[lib] +path = "src/cmp.rs" + +[features] +# TODO How to sync over all modules? +# instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +feat_allow_case_insensitive_number_units = [] +# default = ["feat_run_binary_bench" ] +# The cmd benchmarks start the binaries and take a lot of runtime on the github checks. +# Only run them locally. +feat_run_binary_bench = [] + + +[dependencies] +# const_format = { workspace = true } +diff_crate = { workspace = true } +itoa = { workspace = true } +regex = { workspace = true } +same-file = { workspace = true } +uucore = { workspace = true } +uudiff = { workspace = true } +unicode-width = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +pretty_assertions = { workspace = true } +uudiff = { workspace = true } +tempfile = { workspace = true } + +[[bench]] +name = "cmp_bench" +harness = false diff --git a/src/uu/cmp/LICENSE b/src/uu/cmp/LICENSE new file mode 120000 index 0000000..5853aae --- /dev/null +++ b/src/uu/cmp/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/cmp/benches/cmp_bench.rs b/src/uu/cmp/benches/cmp_bench.rs new file mode 100644 index 0000000..1c625f0 --- /dev/null +++ b/src/uu/cmp/benches/cmp_bench.rs @@ -0,0 +1,143 @@ +#![allow(unused)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! Benches for all utils in diffutils. +//! +//! There is a file generator included to create files of different sizes for comparison. \ +//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ +//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. + +/// Generate test files with these sizes in KB. +const FILE_SIZES_IN_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +const NUM_DIFF: u64 = 4; +// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files +const TEMP_DIR: &str = ""; +// just for FILE_SIZE_KILO_BYTES +const MB: u64 = 1_000; + +use std::sync::OnceLock; + +use divan::Bencher; +use tempfile::TempDir; +use uu_cmp::parse_params; +use uudiff::benchmark::{ + bench_binary, + prepare_bench::{generate_test_files_bytes, BenchContext}, + str_to_args, +}; + +// bench the time it takes to parse the command line arguments +#[divan::bench] +fn cmp_parser(bencher: Bencher) { + let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; + let args = str_to_args(&cmd).into_iter().peekable(); + bencher.with_inputs(|| args.clone()).bench_values( + |params: std::iter::Peekable>| parse_params(params), + ); +} + +// // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) +#[divan::bench] +fn cmp_parser_no_prepare() { + let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; + let args = str_to_args(&cmd).into_iter().peekable(); + let _ = parse_params(args); +} + +// bench equal, full file read +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmp_compare_files_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let cmd = format!("cmp {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_cmp::uumain(params.peekable())); +} + +// bench different; cmp exits on first difference +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmp_compare_files_different(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_different_kb(kb).unwrap(); + let cmd = format!("cmp -s {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_cmp::uumain(params.peekable())); +} + +// bench original GNU cmp +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_cmp_gnu_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| bench_binary::bench_binary("cmp", cmd_args)); +} + +// bench the compiled release version +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_cmp_release_equal(bencher: Bencher, kb: u64) { + // search for src, then shorten path + let dir = std::env::current_dir().unwrap(); + let path = dir.to_string_lossy(); + let path = path.trim_end_matches("src/uu/cmp"); + let prg = path.to_string() + "target/release/cmp"; + + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| bench_binary::bench_binary(&prg, cmd_args)); +} + +// Since each bench function is separate in Divan it is more difficult to dynamically create test data. +// This keeps the TempDir alive until the program exits and generates the files only once. +static SHARED_CONTEXT: OnceLock = OnceLock::new(); +/// Creates the test files once and provides them to all tests. +pub fn get_context() -> &'static BenchContext { + SHARED_CONTEXT.get_or_init(|| { + let mut ctx = BenchContext::default(); + if TEMP_DIR.is_empty() { + let tmp_dir = TempDir::new().expect("Failed to create temp dir"); + ctx.tmp_dir = Some(tmp_dir); + } else { + // uses current directory, the generated files are kept + let path = std::path::Path::new(TEMP_DIR); + if !path.exists() { + std::fs::create_dir_all(path).expect("Path {path} could not be created"); + } + ctx.dir = TEMP_DIR.to_string(); + }; + + // generate test bytes + for kb in FILE_SIZES_IN_KILO_BYTES { + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") + .expect("generate_test_files failed"); + ctx.files_equal.push(f); + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") + .expect("generate_test_files failed"); + ctx.files_different.push(f); + } + + ctx + }) +} + +fn main() { + // Run registered benchmarks. + divan::main(); +} diff --git a/src/cmp.rs b/src/uu/cmp/src/cmp.rs similarity index 96% rename from src/cmp.rs rename to src/uu/cmp/src/cmp.rs index 587d5cc..87b1fcf 100644 --- a/src/cmp.rs +++ b/src/uu/cmp/src/cmp.rs @@ -3,13 +3,13 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use crate::utils::format_failure_to_read_input_file; -use std::env::{self, ArgsOs}; +use std::env::{self}; use std::ffi::OsString; use std::io::{BufRead, BufReader, BufWriter, Read, Write}; use std::iter::Peekable; -use std::process::ExitCode; use std::{cmp, fs, io}; +use uucore::error::UResult; +use uudiff::utils::{format_failure_to_read_input_file, format_io_error}; #[cfg(not(target_os = "windows"))] use std::os::fd::{AsRawFd, FromRawFd}; @@ -76,7 +76,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(_) => { return Err(format!( "{executable_str}: invalid --ignore-initial value '{skip_desc}'" - )) + )); } }; @@ -179,7 +179,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(_) => { return Err(format!( "{executable_str}: invalid --bytes value '{max_bytes}'" - )) + )); } }; params.max_bytes = Some(max_bytes); @@ -233,7 +233,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu } // Do as GNU cmp, and completely disable printing if we are - // outputing to /dev/null. + // outputting to /dev/null. #[cfg(not(target_os = "windows"))] if is_stdout_dev_null() { params.quiet = true; @@ -303,6 +303,7 @@ fn prepare_reader( } }; + #[allow(clippy::collapsible_if)] if let Some(skip) = skip { if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { return Err(format_failure_to_read_input_file( @@ -322,7 +323,7 @@ pub enum Cmp { Different, } -pub fn cmp(params: &Params) -> Result { +pub fn cmp_compare(params: &Params) -> Result { let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; @@ -441,7 +442,7 @@ pub fn cmp(params: &Params) -> Result { })?; output.clear(); } else { - report_difference(from_byte, to_byte, at_byte, at_line, params); + report_difference(from_byte, to_byte, at_byte, at_line, params)?; return Ok(Cmp::Different); } } @@ -473,31 +474,37 @@ pub fn cmp(params: &Params) -> Result { // An exit status of 0 means no differences were found, // 1 means some differences were found, // and 2 means trouble. -pub fn main(opts: Peekable) -> ExitCode { - let params = match parse_params(opts) { +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let args = args.peekable(); + let params = match parse_params(args) { Ok(param) => param, Err(e) => { eprintln!("{e}"); - return ExitCode::from(2); + uucore::error::set_exit_code(2); + return Ok(()); } }; if params.from == "-" && params.to == "-" || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) { - return ExitCode::SUCCESS; + uucore::error::set_exit_code(0); + return Ok(()); } - match cmp(¶ms) { - Ok(Cmp::Equal) => ExitCode::SUCCESS, - Ok(Cmp::Different) => ExitCode::from(1), + match cmp_compare(¶ms) { + Ok(Cmp::Equal) => uucore::error::set_exit_code(0), + Ok(Cmp::Different) => uucore::error::set_exit_code(1), Err(e) => { if !params.quiet { eprintln!("{e}"); } - ExitCode::from(2) + uucore::error::set_exit_code(2); } - } + }; + + Ok(()) } #[inline] @@ -707,9 +714,15 @@ fn is_posix_locale() -> bool { } #[inline] -fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { +fn report_difference( + from_byte: u8, + to_byte: u8, + at_byte: usize, + at_line: usize, + params: &Params, +) -> Result<(), String> { if params.quiet { - return; + return Ok(()); } let term = if is_posix_locale() && !params.print_bytes { @@ -734,7 +747,16 @@ fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, format_visible_byte(to_byte) ); } - println!(); + // Instead of println!(), which panics in case of error (> /dev/full). + let mut stdout = io::stdout(); + if let Err(e) = writeln!(stdout) { + return Err(format_io_error(&e)); + }; + if let Err(e) = stdout.flush() { + return Err(format_io_error(&e)); + }; + + Ok(()) } #[cfg(test)] diff --git a/src/uu/cmp/src/main.rs b/src/uu/cmp/src/main.rs new file mode 100644 index 0000000..35e46c5 --- /dev/null +++ b/src/uu/cmp/src/main.rs @@ -0,0 +1,19 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// TODO implement macro and internationalization +// uucore::bin!(uu_cmp); + +use std::io::Write; + +pub fn main() { + let code = uu_cmp::uumain(uucore::args_os()); + if let Err(e) = std::io::stdout().flush() { + { + eprint!("Error flushing stdout: {e}"); + }; + } + std::process::exit(code); +} diff --git a/src/uu/diff/Cargo.toml b/src/uu/diff/Cargo.toml new file mode 100644 index 0000000..aa65235 --- /dev/null +++ b/src/uu/diff/Cargo.toml @@ -0,0 +1,51 @@ +[package] +name = "uu_diff" +description = "diff ~ (uutils) decode/encode input (diff file compare)" +# The tool dist does not allow different repository names within the workspace. +# repository = "https://github.com/uutils/diffutils/tree/main/src/uu/diff" +repository = "https://github.com/uutils/diffutils" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true + +# [lints] +# workspace = true + +[[bin]] +name = "diff" +path = "src/main.rs" + +[lib] +path = "src/diff.rs" + +[features] +# default = ["feat_run_binary_bench" ] +# The cmd benchmarks start the binaries and take a lot of runtime on the github checks. +# Only run them locally. +feat_run_binary_bench = [] + +[dependencies] +# const_format = { workspace = true } +diff_crate = { workspace = true } +regex = { workspace = true } +same-file = { workspace = true } +# sdiff = { workspace = true } +uucore = { workspace = true } +uudiff = { workspace = true } +unicode-width = { workspace = true } + +[dev-dependencies] +divan = { workspace = true } +pretty_assertions = { workspace = true } +rand = { workspace = true } +tempfile = { workspace = true } + +[[bench]] +name = "diff_bench" +harness = false diff --git a/src/uu/diff/LICENSE b/src/uu/diff/LICENSE new file mode 120000 index 0000000..5853aae --- /dev/null +++ b/src/uu/diff/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/diff/benches/diff_bench.rs b/src/uu/diff/benches/diff_bench.rs new file mode 100644 index 0000000..dc420ab --- /dev/null +++ b/src/uu/diff/benches/diff_bench.rs @@ -0,0 +1,121 @@ +// #![allow(unused)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! Benches for all utils in diffutils. +//! +//! There is a file generator included to create files of different sizes for comparison. \ +//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ +//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. + +/// Generate test files with these sizes in KB. +const FILE_SIZES_IN_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +const NUM_DIFF: u64 = 4; +// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files +const TEMP_DIR: &str = ""; +// just for FILE_SIZE_KILO_BYTES +const MB: u64 = 1_000; + +use divan::Bencher; +use std::{path::Path, sync::OnceLock}; +use tempfile::TempDir; +use uudiff::benchmark::{ + bench_binary, + prepare_bench::{generate_test_files_bytes, BenchContext}, + str_to_args, +}; + +// bench the time it takes to parse the command line arguments +#[divan::bench] +fn diff_parser(bencher: Bencher) { + let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; + let args = str_to_args(&cmd).into_iter().peekable(); + bencher + .with_inputs(|| args.clone()) + .bench_values(|data| uu_diff::params::parse_params(data)); +} + +// bench the actual compare +// bench equal, full file read +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn diff_compare_files_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let cmd = format!("diff {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::diff_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_diff::uumain(params.peekable())); +} + +// bench original GNU diff +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_diff_gnu_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| bench_binary::bench_binary("diff", cmd_args)); +} + +// bench the compiled release version +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_diff_release_equal(bencher: Bencher, kb: u64) { + // search for src, then shorten path + let dir = std::env::current_dir().unwrap(); + let path = dir.to_string_lossy(); + let path = path.trim_end_matches("src/uu/diff"); + let prg = path.to_string() + "target/release/diff"; + + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| bench_binary::bench_binary(&prg, cmd_args)); +} + +// Since each bench function is separate in Divan it is more difficult to dynamically create test data. +// This keeps the TempDir alive until the program exits and generates the files only once. +static SHARED_CONTEXT: OnceLock = OnceLock::new(); +/// Creates the test files once and provides them to all tests. +pub fn get_context() -> &'static BenchContext { + SHARED_CONTEXT.get_or_init(|| { + let mut ctx = BenchContext::default(); + if TEMP_DIR.is_empty() { + let tmp_dir = TempDir::new().expect("Failed to create temp dir"); + ctx.tmp_dir = Some(tmp_dir); + } else { + // uses current directory, the generated files are kept + let path = Path::new(TEMP_DIR); + if !path.exists() { + std::fs::create_dir_all(path).expect("Path {path} could not be created"); + } + ctx.dir = TEMP_DIR.to_string(); + }; + + // generate test bytes + for kb in FILE_SIZES_IN_KILO_BYTES { + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") + .expect("generate_test_files failed"); + ctx.files_equal.push(f); + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") + .expect("generate_test_files failed"); + ctx.files_different.push(f); + } + + ctx + }) +} + +fn main() { + // Run registered benchmarks. + divan::main(); +} diff --git a/src/context_diff.rs b/src/uu/diff/src/context_diff.rs similarity index 94% rename from src/context_diff.rs rename to src/uu/diff/src/context_diff.rs index 873fc3d..c616c03 100644 --- a/src/context_diff.rs +++ b/src/uu/diff/src/context_diff.rs @@ -7,8 +7,7 @@ use std::collections::VecDeque; use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; -use crate::utils::get_modification_time; +use uudiff::utils::{do_write_line, get_modification_time}; #[derive(Debug, PartialEq)] pub enum DiffLine { @@ -77,9 +76,9 @@ fn make_diff( // Rust only allows allocations to grow to isize::MAX, and this is bigger than that. let mut expected_lines_change_idx: usize = !0; - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if lines_since_mismatch > context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -101,7 +100,7 @@ fn make_diff( line_number_expected += 1; lines_since_mismatch = 0; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { if lines_since_mismatch > context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -132,7 +131,7 @@ fn make_diff( line_number_actual += 1; lines_since_mismatch = 0; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { expected_lines_change_idx = !0; // if one of them is missing a newline and the other isn't, then they don't actually match if (line_number_actual > actual_lines_count) @@ -381,10 +380,13 @@ pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { mod tests { use super::*; use pretty_assertions::assert_eq; + use tempfile::TempDir; #[test] fn test_permutations() { - // test all possible six-line files. - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "target/context-diff/"; let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { for &b in &[0, 1, 2] { @@ -450,14 +452,16 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/ab.diff")).unwrap()) .output() .unwrap(); assert!(output.status.success(), "{output:?}"); - //println!("{}", String::from_utf8_lossy(&output.stdout)); - //println!("{}", String::from_utf8_lossy(&output.stderr)); + // println!("{}", String::from_utf8_lossy(&output.stdout)); + // println!("{}", String::from_utf8_lossy(&output.stderr)); let alef = fs::read(format!("{target}/alef")).unwrap(); assert_eq!(alef, bet); } @@ -470,7 +474,10 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/context-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -531,7 +538,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/ab_.diff")).unwrap()) .output() @@ -551,7 +560,10 @@ mod tests { #[test] fn test_permutations_missing_lines() { - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/context-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -615,7 +627,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/abx.diff")).unwrap()) .output() @@ -635,7 +649,10 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/context-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/context-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -702,7 +719,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/abr.diff")).unwrap()) .output() @@ -722,7 +741,7 @@ mod tests { #[test] fn test_stop_early() { - use crate::assert_diff_eq; + use uudiff::assert_diff_eq; let from_filename = "foo"; let from = ["a", "b", "c", ""].join("\n"); diff --git a/src/diff.rs b/src/uu/diff/src/diff.rs similarity index 60% rename from src/diff.rs rename to src/uu/diff/src/diff.rs index f4c0614..64565e3 100644 --- a/src/diff.rs +++ b/src/uu/diff/src/diff.rs @@ -3,26 +3,37 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +pub mod context_diff; +pub mod ed_diff; +pub mod normal_diff; +pub mod params; +pub mod side_diff; +pub mod unified_diff; + use crate::params::{parse_params, Format}; -use crate::utils::report_failure_to_read_input_file; -use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; -use std::env::ArgsOs; use std::ffi::OsString; use std::fs; use std::io::{self, stdout, Read, Write}; -use std::iter::Peekable; -use std::process::{exit, ExitCode}; +// use std::process::{ExitCode, exit}; +use uucore::error::{FromIo, UResult}; +use uudiff::utils::{format_io_error, report_failure_to_read_input_file}; // Exit codes are documented at // https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. // An exit status of 0 means no differences were found, // 1 means some differences were found, // and 2 means trouble. -pub fn main(opts: Peekable) -> ExitCode { - let params = parse_params(opts).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }); +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let args = args.peekable(); + let params = match parse_params(args) { + Ok(p) => p, + Err(error) => { + eprintln!("{error}"); + uucore::error::set_exit_code(2); + return Ok(()); + } + }; // if from and to are the same file, no need to perform any comparison let maybe_report_identical_files = || { if params.report_identical_files { @@ -37,7 +48,9 @@ pub fn main(opts: Peekable) -> ExitCode { || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) { maybe_report_identical_files(); - return ExitCode::SUCCESS; + // ExitCode::SUCCESS; + uucore::error::set_exit_code(0); + return Ok(()); } // read files @@ -67,7 +80,8 @@ pub fn main(opts: Peekable) -> ExitCode { } }; if io_error { - return ExitCode::from(2); + uucore::error::set_exit_code(2); + return Ok(()); } // run diff @@ -77,7 +91,8 @@ pub fn main(opts: Peekable) -> ExitCode { Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { eprintln!("{error}"); - exit(2); + uucore::error::set_exit_code(2); + std::process::exit(2); }), Format::SideBySide => { let mut output = stdout().lock(); @@ -91,12 +106,35 @@ pub fn main(opts: Peekable) -> ExitCode { params.to.to_string_lossy() ); } else { - io::stdout().write_all(&result).unwrap(); + let result = io::stdout().write_all(&result); + match result { + // This code is taken from coreutils. + // + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => { + // GNU seq prints the Broken pipe message but still exits with status 0 + // unless SIGPIPE was explicitly ignored, in which case it should fail. + let err = err.map_err_context(|| "write error".into()); + uucore::show_error!("{err}"); + #[cfg(unix)] + if uucore::signals::sigpipe_was_ignored() { + uucore::error::set_exit_code(1); + } + } + Err(error) => { + eprintln!("{}", format_io_error(&error)); + uucore::error::set_exit_code(1); + return Ok(()); + } + } } if result.is_empty() { maybe_report_identical_files(); - ExitCode::SUCCESS + // ExitCode::SUCCESS; + uucore::error::set_exit_code(0); } else { - ExitCode::from(1) + uucore::error::set_exit_code(1); } + + Ok(()) } diff --git a/src/ed_diff.rs b/src/uu/diff/src/ed_diff.rs similarity index 98% rename from src/ed_diff.rs rename to src/uu/diff/src/ed_diff.rs index b8cdbc5..7e431f1 100644 --- a/src/ed_diff.rs +++ b/src/uu/diff/src/ed_diff.rs @@ -6,7 +6,7 @@ use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; +use uudiff::utils::do_write_line; #[derive(Debug, PartialEq)] struct Mismatch { @@ -71,9 +71,9 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result { + diff_crate::Result::Left(str) => { if !mismatch.actual.is_empty() { results.push(mismatch); mismatch = Mismatch::new(line_number_expected, line_number_actual); @@ -81,11 +81,11 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result { + diff_crate::Result::Right(str) => { mismatch.actual.push(str.to_vec()); line_number_actual += 1; } - diff::Result::Both(_str, _) => { + diff_crate::Result::Both(_str, _) => { line_number_expected += 1; line_number_actual += 1; if !mismatch.actual.is_empty() || !mismatch.expected.is_empty() { @@ -179,7 +179,7 @@ mod tests { #[test] fn test_permutations() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -259,7 +259,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -333,7 +333,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { diff --git a/src/uu/diff/src/main.rs b/src/uu/diff/src/main.rs new file mode 100644 index 0000000..e580046 --- /dev/null +++ b/src/uu/diff/src/main.rs @@ -0,0 +1,19 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// TODO implement macro and internationalization +// uucore::bin!(uu_diff); + +use std::io::Write; + +pub fn main() { + let code = uu_diff::uumain(uucore::args_os()); + if let Err(e) = std::io::stdout().flush() { + { + eprint!("Error flushing stdout: {e}"); + }; + } + std::process::exit(code); +} diff --git a/src/normal_diff.rs b/src/uu/diff/src/normal_diff.rs similarity index 98% rename from src/normal_diff.rs rename to src/uu/diff/src/normal_diff.rs index 002cd01..69a4628 100644 --- a/src/normal_diff.rs +++ b/src/uu/diff/src/normal_diff.rs @@ -6,7 +6,7 @@ use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; +use uudiff::utils::do_write_line; #[derive(Debug, PartialEq)] struct Mismatch { @@ -54,9 +54,9 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec actual_lines.pop(); } - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if !mismatch.actual.is_empty() && !mismatch.actual_missing_nl { results.push(mismatch); mismatch = Mismatch::new(line_number_expected, line_number_actual); @@ -65,12 +65,12 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec mismatch.expected_missing_nl = line_number_expected > expected_lines_count; line_number_expected += 1; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { mismatch.actual.push(str.to_vec()); mismatch.actual_missing_nl = line_number_actual > actual_lines_count; line_number_actual += 1; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { match ( line_number_expected > expected_lines_count, line_number_actual > actual_lines_count, @@ -228,7 +228,7 @@ mod tests { #[test] fn test_permutations() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -306,7 +306,7 @@ mod tests { #[test] fn test_permutations_missing_line_ending() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -400,7 +400,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -472,7 +472,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { diff --git a/src/params.rs b/src/uu/diff/src/params.rs similarity index 96% rename from src/params.rs rename to src/uu/diff/src/params.rs index 74ef3e3..7df7e25 100644 --- a/src/params.rs +++ b/src/uu/diff/src/params.rs @@ -1,9 +1,17 @@ +#![allow(clippy::collapsible_if)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + use std::ffi::OsString; use std::iter::Peekable; use std::path::PathBuf; use regex::Regex; +// use crate::side_diff; + #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub enum Format { #[default] @@ -45,10 +53,20 @@ impl Default for Params { } } -pub fn parse_params>(mut opts: Peekable) -> Result { +// impl From<&Params> for side_diff::Params { +// fn from(param: &Params) -> Self { +// Self { +// expand_tabs: param.expand_tabs, +// tabsize: param.tabsize, +// width: param.width, +// } +// } +// } + +pub fn parse_params>(mut args: Peekable) -> Result { // parse CLI - let Some(executable) = opts.next() else { + let Some(executable) = args.next() else { return Err("Usage: ".to_string()); }; let mut params = Params { @@ -61,8 +79,8 @@ pub fn parse_params>(mut opts: Peekable) -> Resu let mut context = None; let tabsize_re = Regex::new(r"^--tabsize=(?\d+)$").unwrap(); let width_re = Regex::new(r"--width=(?P\d+)$").unwrap(); - while let Some(param) = opts.next() { - let next_param = opts.peek(); + while let Some(param) = args.next() { + let next_param = args.peek(); if param == "--" { break; } @@ -168,7 +186,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu context = context_count; } if next_param_consumed { - opts.next(); + args.next(); } continue; } @@ -187,7 +205,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu context = context_count; } if next_param_consumed { - opts.next(); + args.next(); } continue; } @@ -210,7 +228,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu } params.from = if let Some(from) = from { from - } else if let Some(param) = opts.next() { + } else if let Some(param) = args.next() { param } else { return Err(format!( @@ -220,7 +238,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu }; params.to = if let Some(to) = to { to - } else if let Some(param) = opts.next() { + } else if let Some(param) = args.next() { param } else { return Err(format!( diff --git a/src/side_diff.rs b/src/uu/diff/src/side_diff.rs similarity index 99% rename from src/side_diff.rs rename to src/uu/diff/src/side_diff.rs index 56953d2..d65b4a7 100644 --- a/src/side_diff.rs +++ b/src/uu/diff/src/side_diff.rs @@ -4,7 +4,7 @@ // files that was distributed with this source code. use core::cmp::{max, min}; -use diff::Result; +use diff_crate::Result; use std::{io::Write, vec}; use unicode_width::UnicodeWidthStr; @@ -348,7 +348,7 @@ pub fn diff( More studies are needed to cover GNU diff side by side with 100% accuracy, which is one of the goals of this project : ) */ - for result in diff::slice(&left_lines, &right_lines) { + for result in diff_crate::slice(&left_lines, &right_lines) { match result { Result::Left(left_ln) => push_output(left_ln, b"", b'<', output, &config).unwrap(), Result::Right(right_ln) => push_output(b"", right_ln, b'>', output, &config).unwrap(), @@ -574,8 +574,8 @@ mod tests { let config = create_test_config(false, DEF_TAB_SIZE); let mut buf = vec![]; + // ↓ é char, start multi byte let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding - // ^ é char, start multi byte process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap(); assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter } diff --git a/src/unified_diff.rs b/src/uu/diff/src/unified_diff.rs similarity index 92% rename from src/unified_diff.rs rename to src/uu/diff/src/unified_diff.rs index 0f504a8..ba0ed31 100644 --- a/src/unified_diff.rs +++ b/src/uu/diff/src/unified_diff.rs @@ -7,8 +7,7 @@ use std::collections::VecDeque; use std::io::Write; use crate::params::Params; -use crate::utils::do_write_line; -use crate::utils::get_modification_time; +use uudiff::utils::{do_write_line, get_modification_time}; #[derive(Debug, PartialEq)] pub enum DiffLine { @@ -65,9 +64,9 @@ fn make_diff( actual_lines.pop(); } - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if lines_since_mismatch >= context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -93,7 +92,9 @@ fn make_diff( mismatch.lines.push(DiffLine::Actual(res)); mismatch.lines.push(DiffLine::MissingNL); } - _ => unreachable!("unterminated Left and Common lines shouldn't be followed by more Left lines"), + _ => unreachable!( + "unterminated Left and Common lines shouldn't be followed by more Left lines" + ), } } else { mismatch.lines.push(DiffLine::Expected(str.to_vec())); @@ -104,7 +105,7 @@ fn make_diff( line_number_expected += 1; lines_since_mismatch = 0; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { if lines_since_mismatch >= context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -125,7 +126,7 @@ fn make_diff( line_number_actual += 1; lines_since_mismatch = 0; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { // if one of them is missing a newline and the other isn't, then they don't actually match if (line_number_actual > actual_lines_count) && (line_number_expected > expected_lines_count) @@ -407,10 +408,11 @@ pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { mod tests { use super::*; use pretty_assertions::assert_eq; + use tempfile::TempDir; #[test] fn test_permutations() { - let target = "target/unified-diff/"; + let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -476,29 +478,31 @@ mod tests { fb.write_all(&bet[..]).unwrap(); let _ = fa; let _ = fb; - println!( - "diff: {:?}", - String::from_utf8(diff.clone()) - .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) - ); - println!( - "alef: {:?}", - String::from_utf8(alef.clone()) - .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) - ); - println!( - "bet: {:?}", - String::from_utf8(bet.clone()) - .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) - ); + // println!( + // "diff: {:?}", + // String::from_utf8(diff.clone()) + // .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) + // ); + // println!( + // "alef: {:?}", + // String::from_utf8(alef.clone()) + // .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) + // ); + // println!( + // "bet: {:?}", + // String::from_utf8(bet.clone()) + // .unwrap_or_else(|_| String::from("[Invalid UTF-8]")) + // ); let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/ab.diff")).unwrap()) .output() .unwrap(); - println!("{}", String::from_utf8_lossy(&output.stdout)); - println!("{}", String::from_utf8_lossy(&output.stderr)); + // println!("{}", String::from_utf8_lossy(&output.stdout)); + // println!("{}", String::from_utf8_lossy(&output.stderr)); assert!(output.status.success(), "{output:?}"); let alef = fs::read(format!("{target}/alef")).unwrap(); assert_eq!(alef, bet); @@ -512,7 +516,10 @@ mod tests { #[test] fn test_permutations_missing_line_ending() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -593,7 +600,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abn.diff")).unwrap()) .output() .unwrap(); @@ -613,7 +622,10 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -689,7 +701,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/ab_.diff")).unwrap()) .output() .unwrap(); @@ -709,7 +723,10 @@ mod tests { #[test] fn test_permutations_missing_lines() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -770,7 +787,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abx.diff")).unwrap()) .output() .unwrap(); @@ -789,7 +808,10 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/unified-diff/"; + let dir = TempDir::new().unwrap(); + let target = &dir.path().to_string_lossy().to_string(); + // Depending where the test is started target is in a different path + // let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -856,7 +878,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abr.diff")).unwrap()) .output() .unwrap(); @@ -875,7 +899,7 @@ mod tests { #[test] fn test_stop_early() { - use crate::assert_diff_eq; + use uudiff::assert_diff_eq; let from_filename = "foo"; let from = ["a", "b", "c", ""].join("\n"); diff --git a/src/uudiff/Cargo.toml b/src/uudiff/Cargo.toml new file mode 100644 index 0000000..25b2db1 --- /dev/null +++ b/src/uudiff/Cargo.toml @@ -0,0 +1,42 @@ +# spell-checker:ignore (features) bigdecimal zerocopy extendedbigdecimal tzdb zoneinfo logind + +[package] +name = "uudiff" +description = "uutils ~ 'diff' uutils code library (cross-platform)" +repository = "https://github.com/uutils/diffutils/tree/main/src/uudiff" +authors.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +version.workspace = true + +# [package.metadata.docs.rs] +# all-features = true +# +# [lints] +# workspace = true + +[lib] +path = "src/lib/lib.rs" + +[features] +# TODO How are features centralized in this workspace file? +# instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +feat_allow_case_insensitive_number_units = [] + +[dependencies] +chrono.workspace = true +# coreutils.workspace = true +# regex.workspace = true +rand = { workspace = true } +same-file.workspace = true +tempfile = { workspace = true } +unicode-width.workspace = true +uucore.workspace = true + +[dev-dependencies] +pretty_assertions.workspace = true +tempfile.workspace = true diff --git a/src/uudiff/LICENSE b/src/uudiff/LICENSE new file mode 120000 index 0000000..30cff74 --- /dev/null +++ b/src/uudiff/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/uudiff/src/lib/features.rs b/src/uudiff/src/lib/features.rs new file mode 100644 index 0000000..75a11fa --- /dev/null +++ b/src/uudiff/src/lib/features.rs @@ -0,0 +1,2 @@ +// #[cfg(feature = "benchmark")] +pub mod benchmark; diff --git a/src/uudiff/src/lib/features/benchmark.rs b/src/uudiff/src/lib/features/benchmark.rs new file mode 100644 index 0000000..177144b --- /dev/null +++ b/src/uudiff/src/lib/features/benchmark.rs @@ -0,0 +1,230 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Common benchmark utilities for uutils coreutils +//! +//! This module provides shared functionality for benchmarking utilities, +//! including test data generation and binary execution helpers. + +use std::ffi::OsString; + +/// Converts a String to a Vec which can be used as args \ +/// to pass to the utilities, e.g. "diff file_a file_b -w 150". +/// +/// # Returns +/// A vec OsString which can be used instead of ArgsOs. +pub fn str_to_args(args: &str) -> Vec { + let s: Vec = args + .split(" ") + .filter(|s| !s.is_empty()) + .map(OsString::from) + .collect(); + + s +} + +pub mod prepare_bench { + use std::{ + fs::File, + io::{BufWriter, Write}, + path::Path, + }; + + use rand::RngExt; + use tempfile::TempDir; + + /// When a file is changed to be different, a char is inserted. + const CHANGE_INDICATION_CHAR: u8 = b'#'; + // const FILE_SIZES_IN_KILO_BYTES: [u64; 2] = [100, 1 * 1000]; + + // file lines and .txt will be added + const FROM_FILE: &str = "from_file"; + const TO_FILE: &str = "to_file"; + const LINE_LENGTH: usize = 60; + + #[derive(Debug, Default)] + pub struct FilePair { + pub from: String, + pub to: String, + pub size_bytes: u64, + } + + /// Contains test data (file names) which only needs to be created once. + #[derive(Debug, Default)] + pub struct BenchContext { + /// Optional TempDir directory. When set, the dir is of no relevance. + pub tmp_dir: Option, + /// Directory path if TempDir is not set. + pub dir: String, + /// list of files in different sizes + pub files_equal: Vec, + /// list of files in different sizes + pub files_different: Vec, + } + + impl BenchContext { + pub fn get_path(&self) -> &Path { + match &self.tmp_dir { + Some(tmp) => tmp.path(), + None => Path::new(&self.dir), + } + } + + pub fn get_files_equal_kb(&self, kb: u64) -> Option<&FilePair> { + self.get_files_equal(kb * 1000) + } + + pub fn get_files_equal(&self, bytes: u64) -> Option<&FilePair> { + let p = self.files_equal.iter().find(|f| f.size_bytes == bytes)?; + Some(p) + } + + pub fn get_files_different_kb(&self, kb: u64) -> Option<&FilePair> { + self.get_files_different(kb * 1000) + } + + pub fn get_files_different(&self, bytes: u64) -> Option<&FilePair> { + let p = self + .files_different + .iter() + .find(|f| f.size_bytes == bytes)?; + Some(p) + } + } + + /// Generates two test files for comparison with size. + /// + /// # Params + /// * dir: the directory where the files are created (TempDir suggested) + /// * bytes: the number of bytes the files will be long (exactly) + /// * num_difference: the number of differences inserted in the diff file + /// * id: added to the file names to differentiate for different tests + /// + /// # Returns + /// (from_file_name, to_file_name): Two files of the specified size in bytes. + /// + /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. + /// If num_differences is set, '#' will be inserted between the first two words of a line, + /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. + pub fn generate_test_files_bytes( + dir: &Path, + bytes: u64, + num_differences: u64, + id: &str, + ) -> std::io::Result { + let id = if id.is_empty() { + "".to_string() + } else { + format!("{id}_") + }; + let f1 = format!("{id}{FROM_FILE}_{bytes}.txt"); + let f2 = format!("{id}{TO_FILE}_{bytes}.txt"); + let from_path = dir.join(f1); + let to_path = dir.join(f2); + + generate_file_bytes(&from_path, &to_path, bytes, num_differences)?; + + Ok(FilePair { + from: from_path.to_string_lossy().to_string(), + to: to_path.to_string_lossy().to_string(), + size_bytes: bytes, + }) + } + + /// Generates two test files for comparison with size. + /// + /// # Returns + /// Ok when the files were created. + /// + /// Like [generate_test_files_bytes] with specified file names. \ + /// The function must generate two files at once to quickly create + /// files with minimal differences. + pub fn generate_file_bytes( + from_name: &Path, + to_name: &Path, + bytes: u64, + num_differences: u64, + ) -> std::io::Result<()> { + let file_from = File::create(from_name)?; + let file_to = File::create(to_name)?; + // for int division, lines will be smaller than requested bytes + let n_lines = bytes / LINE_LENGTH as u64; + let change_every_n_lines = if num_differences == 0 { + 0 + } else { + let c = n_lines / num_differences; + if c == 0 { + 1 + } else { + c + } + }; + // Use a larger 128KB buffer for massive files + let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); + let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); + let mut rng = rand::rng(); + + // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes + let mut line_buffer = [b' '; 60]; + line_buffer[59] = b'\n'; // Set the newline once at the end + + for i in (0..n_lines).rev() { + // Fill only the letter positions, skipping spaces and the newline + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + + // Write the raw bytes directly to both files + writer_from.write_all(&line_buffer)?; + // make changes in the file + if num_differences == 0 { + writer_to.write_all(&line_buffer)?; + } else { + if i % change_every_n_lines == 0 && n_lines - i > 2 { + line_buffer[5] = CHANGE_INDICATION_CHAR; + } + writer_to.write_all(&line_buffer)?; + line_buffer[5] = b' '; + } + } + + // create last line + let missing = (bytes - n_lines * LINE_LENGTH as u64) as usize; + if missing > 0 { + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + line_buffer[missing - 1] = b'\n'; + writer_from.write_all(&line_buffer[0..missing])?; + writer_to.write_all(&line_buffer[0..missing])?; + } + + writer_from.flush()?; + writer_to.flush()?; + + Ok(()) + } +} + +/// Benchmark tools which are designed to call the compiled executable. +pub mod bench_binary { + use std::process::Command; + + use crate::benchmark::str_to_args; + + pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus { + let args = str_to_args(cmd_args); + Command::new(program) + .args(args) + .status() + .expect("Failed to execute binary") + } +} diff --git a/src/uudiff/src/lib/lib.rs b/src/uudiff/src/lib/lib.rs new file mode 100644 index 0000000..b50be23 --- /dev/null +++ b/src/uudiff/src/lib/lib.rs @@ -0,0 +1,10 @@ +mod features; // feature-gated code modules +mod macros; // crate macros (macro_rules-type; exported to `crate::...`) +mod mods; // core cross-platform modules + +// pub use crate::mods::arg_parser; +pub use crate::mods::utils; + +// * feature-gated modules +// #[cfg(feature = "benchmark")] +pub use crate::features::benchmark; diff --git a/src/macros.rs b/src/uudiff/src/lib/macros.rs similarity index 100% rename from src/macros.rs rename to src/uudiff/src/lib/macros.rs diff --git a/src/uudiff/src/lib/mods.rs b/src/uudiff/src/lib/mods.rs new file mode 100644 index 0000000..d257538 --- /dev/null +++ b/src/uudiff/src/lib/mods.rs @@ -0,0 +1,2 @@ +// pub mod arg_parser; +pub mod utils; diff --git a/src/utils.rs b/src/uudiff/src/lib/mods/utils.rs similarity index 90% rename from src/utils.rs rename to src/uudiff/src/lib/mods/utils.rs index daca18d..c6e1271 100644 --- a/src/utils.rs +++ b/src/uudiff/src/lib/mods/utils.rs @@ -3,8 +3,10 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use regex::Regex; -use std::{ffi::OsString, io::Write}; +use std::{ + ffi::{OsStr, OsString}, + io::Write, +}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -13,11 +15,11 @@ use unicode_width::UnicodeWidthStr; #[must_use] pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { let tab = b'\t'; - let ntabs = line.iter().filter(|c| **c == tab).count(); - if ntabs == 0 { + let n_tabs = line.iter().filter(|c| **c == tab).count(); + if n_tabs == 0 { return line.to_vec(); } - let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1)); + let mut result = Vec::with_capacity(line.len() + n_tabs * (tabsize - 1)); let mut offset = 0; let mut iter = line.split(|c| *c == tab).peekable(); @@ -71,6 +73,11 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } +/// Checks if files are the same (same file link), which must return 'equal'. +pub fn is_same_file(from: &OsStr, to: &OsStr) -> bool { + (from == "-" && to == "-") || same_file::is_same_file(from, to).unwrap_or(false) +} + pub fn format_failure_to_read_input_file( executable: &OsString, filepath: &OsString, @@ -78,15 +85,20 @@ pub fn format_failure_to_read_input_file( ) -> String { // std::io::Error's display trait outputs "{detail} (os error {code})" // but we want only the {detail} (error string) part - let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); format!( "{}: {}: {}", executable.to_string_lossy(), filepath.to_string_lossy(), - error_code_re.replace(error.to_string().as_str(), ""), + format_io_error(&error), ) } +/// Removes the (os error x) part of the error message +pub fn format_io_error(error: &dyn std::error::Error) -> String { + let s = error.to_string(); + s.split(" (os error").next().unwrap_or(&s).to_string() +} + pub fn report_failure_to_read_input_file( executable: &OsString, filepath: &OsString, diff --git a/tests/integration.rs b/tests/integration.rs index 0e8d21e..696d629 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -97,7 +97,7 @@ mod common { } mod diff { - use diffutilslib::assert_diff_eq; + use uudiff::assert_diff_eq; use super::*;