From 6bcde017784e4e58008f1286bdb7f71fe0d2cdba Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 7 Jan 2025 18:56:03 -0500 Subject: [PATCH 01/47] Update package dependencies: add new type definitions and libraries --- package-lock.json | 347 ++++++++++++++++++++++++++++++++++++++++++++++ package.json | 7 + 2 files changed, 354 insertions(+) diff --git a/package-lock.json b/package-lock.json index 8c88b6c..982a949 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,10 +26,14 @@ "default-shell": "^2.2.0", "delay": "^6.0.0", "diff": "^5.2.0", + "diff-match-patch": "^1.0.5", "fast-deep-equal": "^3.1.3", + "fastest-levenshtein": "^1.0.16", "globby": "^14.0.2", "isbinaryfile": "^5.0.2", + "isomorphic-git": "^1.29.0", "mammoth": "^1.8.0", + "memfs": "^4.15.3", "monaco-vscode-textmate-theme-converter": "^0.1.7", "openai": "^4.73.1", "os-name": "^6.0.0", @@ -39,6 +43,7 @@ "puppeteer-core": "^23.4.0", "serialize-error": "^11.0.3", "sound-play": "^1.1.0", + "string-similarity": "^4.0.4", "strip-ansi": "^7.1.0", "tree-sitter-wasms": "^0.1.11", "turndown": "^7.2.0", @@ -49,9 +54,11 @@ "@changesets/cli": "^2.27.10", "@changesets/types": "^6.0.0", "@types/diff": "^5.2.1", + "@types/diff-match-patch": "^1.0.36", "@types/jest": "^29.5.14", "@types/mocha": "^10.0.7", "@types/node": "20.x", + "@types/string-similarity": "^4.0.2", "@typescript-eslint/eslint-plugin": "^7.14.1", "@typescript-eslint/parser": "^7.11.0", "@vscode/test-cli": "^0.0.9", @@ -6059,6 +6066,13 @@ "integrity": "sha512-K0Oqlrq3kQMaO2RhfrNQX5trmt+XLyom88zS0u84nnIcLvFnRUMRRHmrGny5GSM+kNO9IZLARsdQHDzkhAgmrQ==", "dev": true }, + "node_modules/@types/diff-match-patch": { + "version": "1.0.36", + "resolved": "https://registry.npmjs.org/@types/diff-match-patch/-/diff-match-patch-1.0.36.tgz", + "integrity": "sha512-xFdR6tkm0MWvBfO8xXCSsinYxHcqkQUlcHeSpMC2ukzOb6lwQAfDmW+Qt0AvlGd8HpsS28qKsB+oPeJn9I39jg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/graceful-fs": { "version": "4.1.9", "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", @@ -6136,6 +6150,13 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "dev": true }, + "node_modules/@types/string-similarity": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/string-similarity/-/string-similarity-4.0.2.tgz", + "integrity": "sha512-LkJQ/jsXtCVMK+sKYAmX/8zEq+/46f1PTQw7YtmQwb74jemS1SlNLmARM2Zml9DgdDTWKAtc5L13WorpHPDjDA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/turndown": { "version": "5.0.5", "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.5.tgz", @@ -6707,6 +6728,12 @@ "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==", "dev": true }, + "node_modules/async-lock": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/async-lock/-/async-lock-1.4.1.tgz", + "integrity": "sha512-Az2ZTpuytrtqENulXwO3GGv1Bztugx6TT37NIo7imr/Qo0gsYiGtSdBa2B6fsXhTpVZDNfu1Qn3pk531e3q+nQ==", + "license": "MIT" + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -7371,6 +7398,12 @@ "integrity": "sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==", "dev": true }, + "node_modules/clean-git-ref": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/clean-git-ref/-/clean-git-ref-2.0.1.tgz", + "integrity": "sha512-bLSptAy2P0s6hU4PzuIMKmMJJSE6gLXGH1cntDu7bWJUksvuM+7ReOK61mozULErYvP6a15rnYl0zFDef+pyPw==", + "license": "Apache-2.0" + }, "node_modules/cli-cursor": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-4.0.0.tgz", @@ -7611,6 +7644,18 @@ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" }, + "node_modules/crc-32": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", + "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", + "license": "Apache-2.0", + "bin": { + "crc32": "bin/crc32.njs" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/create-jest": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz", @@ -7758,6 +7803,21 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "license": "MIT", + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/dedent": { "version": "1.5.3", "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", @@ -7903,6 +7963,12 @@ "node": ">=0.3.1" } }, + "node_modules/diff-match-patch": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/diff-match-patch/-/diff-match-patch-1.0.5.tgz", + "integrity": "sha512-IayShXAgj/QMXgB0IWmKx+rOPuGMhqm5w6jvFxmVenXKIzRqTAAsbBPT3kWQeGANj3jGgvcvv4yK6SxqYmikgw==", + "license": "Apache-2.0" + }, "node_modules/diff-sequences": { "version": "29.6.3", "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", @@ -7912,6 +7978,12 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/diff3": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/diff3/-/diff3-0.0.3.tgz", + "integrity": "sha512-iSq8ngPOt0K53A6eVr4d5Kn6GNrM2nQZtC740pzIriHtn4pOQ2lyzEXQMBeVcWERN0ye7fhBsk9PbLLQOnUx/g==", + "license": "MIT" + }, "node_modules/dingbat-to-unicode": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz", @@ -8797,6 +8869,15 @@ "fxparser": "src/cli/cli.js" } }, + "node_modules/fastest-levenshtein": { + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz", + "integrity": "sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==", + "license": "MIT", + "engines": { + "node": ">= 4.9.1" + } + }, "node_modules/fastq": { "version": "1.17.1", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", @@ -9573,6 +9654,15 @@ "url": "https://github.com/sponsors/typicode" } }, + "node_modules/hyperdyperid": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/hyperdyperid/-/hyperdyperid-1.2.0.tgz", + "integrity": "sha512-Y93lCzHYgGWdrJ66yIktxiaGULYc6oGiABxhcO5AufBeOyoIdZF7bIfLaOrbM0iGIOXQQgxxRrFEnb+Y6w1n4A==", + "license": "MIT", + "engines": { + "node": ">=10.18" + } + }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", @@ -10200,6 +10290,55 @@ "node": ">=0.10.0" } }, + "node_modules/isomorphic-git": { + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/isomorphic-git/-/isomorphic-git-1.29.0.tgz", + "integrity": "sha512-zWGqk8901cicvVEhVpN76AwKrS/TzHak2NQCtNXIAavpMIy/yqh+d/JtC9A8AUKZAauUdOyEWKI29tuCLAL+Zg==", + "license": "MIT", + "dependencies": { + "async-lock": "^1.4.1", + "clean-git-ref": "^2.0.1", + "crc-32": "^1.2.0", + "diff3": "0.0.3", + "ignore": "^5.1.4", + "minimisted": "^2.0.0", + "pako": "^1.0.10", + "path-browserify": "^1.0.1", + "pify": "^4.0.1", + "readable-stream": "^3.4.0", + "sha.js": "^2.4.9", + "simple-get": "^4.0.1" + }, + "bin": { + "isogit": "cli.cjs" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/isomorphic-git/node_modules/pify": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/pify/-/pify-4.0.1.tgz", + "integrity": "sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/isomorphic-git/node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/istanbul-lib-coverage": { "version": "3.2.2", "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", @@ -11812,6 +11951,113 @@ "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==" }, + "node_modules/memfs": { + "version": "4.15.3", + "resolved": "https://registry.npmjs.org/memfs/-/memfs-4.15.3.tgz", + "integrity": "sha512-vR/g1SgqvKJgAyYla+06G4p/EOcEmwhYuVb1yc1ixcKf8o/sh7Zngv63957ZSNd1xrZJoinmNyDf2LzuP8WJXw==", + "license": "Apache-2.0", + "dependencies": { + "@jsonjoy.com/json-pack": "^1.0.3", + "@jsonjoy.com/util": "^1.3.0", + "tree-dump": "^1.0.1", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">= 4.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/streamich" + } + }, + "node_modules/memfs/node_modules/@jsonjoy.com/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@jsonjoy.com/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-q6XAnWQDIMA3+FTiOYajoYqySkO+JSat0ytXGSuRdq9uXE7o92gzuQwQM14xaCRlBLGq3v5miDGC4vkVTn54xA==", + "license": "Apache-2.0", + "engines": { + "node": ">=10.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/streamich" + }, + "peerDependencies": { + "tslib": "2" + } + }, + "node_modules/memfs/node_modules/@jsonjoy.com/json-pack": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@jsonjoy.com/json-pack/-/json-pack-1.1.1.tgz", + "integrity": "sha512-osjeBqMJ2lb/j/M8NCPjs1ylqWIcTRTycIhVB5pt6LgzgeRSb0YRZ7j9RfA8wIUrsr/medIuhVyonXRZWLyfdw==", + "license": "Apache-2.0", + "dependencies": { + "@jsonjoy.com/base64": "^1.1.1", + "@jsonjoy.com/util": "^1.1.2", + "hyperdyperid": "^1.2.0", + "thingies": "^1.20.0" + }, + "engines": { + "node": ">=10.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/streamich" + }, + "peerDependencies": { + "tslib": "2" + } + }, + "node_modules/memfs/node_modules/@jsonjoy.com/util": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jsonjoy.com/util/-/util-1.5.0.tgz", + "integrity": "sha512-ojoNsrIuPI9g6o8UxhraZQSyF2ByJanAY4cTFbc8Mf2AXEF4aQRGY1dJxyJpuyav8r9FGflEt/Ff3u5Nt6YMPA==", + "license": "Apache-2.0", + "engines": { + "node": ">=10.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/streamich" + }, + "peerDependencies": { + "tslib": "2" + } + }, + "node_modules/memfs/node_modules/thingies": { + "version": "1.21.0", + "resolved": "https://registry.npmjs.org/thingies/-/thingies-1.21.0.tgz", + "integrity": "sha512-hsqsJsFMsV+aD4s3CWKk85ep/3I9XzYV/IXaSouJMYIoDlgyi11cBhsqYe9/geRfB0YIikBQg6raRaM+nIMP9g==", + "license": "Unlicense", + "engines": { + "node": ">=10.18" + }, + "peerDependencies": { + "tslib": "^2" + } + }, + "node_modules/memfs/node_modules/tree-dump": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.0.2.tgz", + "integrity": "sha512-dpev9ABuLWdEubk+cIaI9cHwRNNDjkBBLXTwI4UCUFdQ5xXKqNXoK4FEciw/vxf+NQ7Cb7sGUyeUtORvHIdRXQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=10.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/streamich" + }, + "peerDependencies": { + "tslib": "2" + } + }, + "node_modules/memfs/node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "license": "0BSD" + }, "node_modules/memorystream": { "version": "0.3.1", "resolved": "https://registry.npmjs.org/memorystream/-/memorystream-0.3.1.tgz", @@ -11886,6 +12132,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/minimatch": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", @@ -11901,6 +12159,24 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/minimisted": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/minimisted/-/minimisted-2.0.1.tgz", + "integrity": "sha512-1oPjfuLQa2caorJUM8HV8lGgWCc0qqAO1MNv/k05G4qslmsndV/5WdNZrqCiyqiz3wohia2Ij2B7w2Dr7/IyrA==", + "license": "MIT", + "dependencies": { + "minimist": "^1.2.5" + } + }, "node_modules/minipass": { "version": "7.1.2", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", @@ -12910,6 +13186,12 @@ "url": "https://github.com/inikulin/parse5?sponsor=1" } }, + "node_modules/path-browserify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz", + "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==", + "license": "MIT" + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -13839,6 +14121,19 @@ "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" }, + "node_modules/sha.js": { + "version": "2.4.11", + "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", + "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", + "license": "(MIT AND BSD-3-Clause)", + "dependencies": { + "inherits": "^2.0.1", + "safe-buffer": "^5.0.1" + }, + "bin": { + "sha.js": "bin.js" + } + }, "node_modules/shallow-clone": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-3.0.1.tgz", @@ -13910,6 +14205,51 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, "node_modules/sisteransi": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", @@ -14186,6 +14526,13 @@ "node": ">=8" } }, + "node_modules/string-similarity": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-4.0.4.tgz", + "integrity": "sha512-/q/8Q4Bl4ZKAPjj8WerIBJWALKkaPRfrvhfF8k/B23i4nzrlRj2/go1m90In7nG/3XDSbOo0+pu6RvCTM9RGMQ==", + "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.", + "license": "ISC" + }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", diff --git a/package.json b/package.json index ff3bb8a..9a23ce8 100644 --- a/package.json +++ b/package.json @@ -175,9 +175,11 @@ "@changesets/cli": "^2.27.10", "@changesets/types": "^6.0.0", "@types/diff": "^5.2.1", + "@types/diff-match-patch": "^1.0.36", "@types/jest": "^29.5.14", "@types/mocha": "^10.0.7", "@types/node": "20.x", + "@types/string-similarity": "^4.0.2", "@typescript-eslint/eslint-plugin": "^7.14.1", "@typescript-eslint/parser": "^7.11.0", "@vscode/test-cli": "^0.0.9", @@ -211,10 +213,14 @@ "default-shell": "^2.2.0", "delay": "^6.0.0", "diff": "^5.2.0", + "diff-match-patch": "^1.0.5", "fast-deep-equal": "^3.1.3", + "fastest-levenshtein": "^1.0.16", "globby": "^14.0.2", "isbinaryfile": "^5.0.2", + "isomorphic-git": "^1.29.0", "mammoth": "^1.8.0", + "memfs": "^4.15.3", "monaco-vscode-textmate-theme-converter": "^0.1.7", "openai": "^4.73.1", "os-name": "^6.0.0", @@ -224,6 +230,7 @@ "puppeteer-core": "^23.4.0", "serialize-error": "^11.0.3", "sound-play": "^1.1.0", + "string-similarity": "^4.0.4", "strip-ansi": "^7.1.0", "tree-sitter-wasms": "^0.1.11", "turndown": "^7.2.0", From 4356cffb0d2f3c89065ae860324d12388b8dac8d Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 7 Jan 2025 18:57:40 -0500 Subject: [PATCH 02/47] Refactor applyDiff method to return a Promise and add logging for diff results --- src/core/Cline.ts | 3 ++- src/core/diff/types.ts | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 840caff..8e25679 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -1277,7 +1277,7 @@ export class Cline { const originalContent = await fs.readFile(absolutePath, "utf-8") // Apply the diff to the original content - const diffResult = this.diffStrategy?.applyDiff( + const diffResult = await this.diffStrategy?.applyDiff( originalContent, diffContent, parseInt(block.params.start_line ?? ''), @@ -1286,6 +1286,7 @@ export class Cline { success: false, error: "No diff strategy available" } + console.log("diffResult", diffResult) if (!diffResult.success) { this.consecutiveMistakeCount++ const currentCount = (this.consecutiveMistakeCountForApplyDiff.get(relPath) || 0) + 1 diff --git a/src/core/diff/types.ts b/src/core/diff/types.ts index 3957a1f..7666d0f 100644 --- a/src/core/diff/types.ts +++ b/src/core/diff/types.ts @@ -28,5 +28,5 @@ export interface DiffStrategy { * @param endLine Optional line number where the search block ends. If not provided, searches the entire file. * @returns A DiffResult object containing either the successful result or error details */ - applyDiff(originalContent: string, diffContent: string, startLine?: number, endLine?: number): DiffResult -} + applyDiff(originalContent: string, diffContent: string, startLine?: number, endLine?: number): Promise +} \ No newline at end of file From 612d7020f3bc4ec0ec67cc0c952bb4c7035ddfdf Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 7 Jan 2025 18:58:23 -0500 Subject: [PATCH 03/47] Add unit tests for NewUnifiedDiffStrategy --- .../strategies/__tests__/new-unified.test.ts | 312 ++++++++++++++++++ 1 file changed, 312 insertions(+) create mode 100644 src/core/diff/strategies/__tests__/new-unified.test.ts diff --git a/src/core/diff/strategies/__tests__/new-unified.test.ts b/src/core/diff/strategies/__tests__/new-unified.test.ts new file mode 100644 index 0000000..a7fd279 --- /dev/null +++ b/src/core/diff/strategies/__tests__/new-unified.test.ts @@ -0,0 +1,312 @@ +import { NewUnifiedDiffStrategy } from '../new-unified'; + +describe('main', () => { + + let strategy: NewUnifiedDiffStrategy + + beforeEach(() => { + strategy = new NewUnifiedDiffStrategy() + }) + + describe('getToolDescription', () => { + it('should return tool description with correct cwd', () => { + const cwd = '/test/path' + const description = strategy.getToolDescription(cwd) + + expect(description).toContain('apply_diff') + expect(description).toContain(cwd) + expect(description).toContain('Parameters:') + expect(description).toContain('Format Requirements:') + }) + }) + + it('should apply simple diff correctly', async () => { + const original = `line1 +line2 +line3`; + + const diff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ + line1 ++new line + line2 +-line3 ++modified line3`; + + const result = await strategy.applyDiff(original, diff); + + expect(result).toBe(`line1 +new line +line2 +modified line3`); + }); + + it('should handle multiple hunks', async () => { + const original = `line1 +line2 +line3 +line4 +line5`; + + const diff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ + line1 ++new line + line2 +-line3 ++modified line3 +@@ ... @@ + line4 +-line5 ++modified line5 ++new line at end`; + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(`line1 +new line +line2 +modified line3 +line4 +modified line5 +new line at end`); + } + }); + + it('should handle complex large', async () => { + const original = `line1 +line2 +line3 +line4 +line5 +line6 +line7 +line8 +line9 +line10`; + + const diff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ + line1 ++header line ++another header + line2 +-line3 +-line4 ++modified line3 ++modified line4 ++extra line +@@ ... @@ + line6 ++middle section + line7 +-line8 ++changed line8 ++bonus line +@@ ... @@ + line9 +-line10 ++final line ++very last line`; + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(`line1 +header line +another header +line2 +modified line3 +modified line4 +extra line +line5 +line6 +middle section +line7 +changed line8 +bonus line +line9 +final line +very last line`); + } + }); + + it('should handle indentation changes', async () => { + const original = `first line + indented line + double indented line + back to single indent +no indent + indented again + double indent again + triple indent + back to single +last line`; + + const diff = `--- original ++++ modified +@@ ... @@ + first line + indented line ++ tab indented line ++ new indented line + double indented line + back to single indent + no indent + indented again + double indent again +- triple indent ++ hi there mate + back to single + last line`; + + const expected = `first line + indented line + tab indented line + new indented line + double indented line + back to single indent +no indent + indented again + double indent again + hi there mate + back to single +last line`; + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(expected); + } + }); + + it('should handle high level edits', async () => { + + const original = `def factorial(n): + if n == 0: + return 1 + else: + return n * factorial(n-1)` + const diff = `@@ ... @@ +-def factorial(n): +- if n == 0: +- return 1 +- else: +- return n * factorial(n-1) ++def factorial(number): ++ if number == 0: ++ return 1 ++ else: ++ return number * factorial(number-1)` + +const expected = `def factorial(number): + if number == 0: + return 1 + else: + return number * factorial(number-1)` + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(expected); + } + }); + + it('it should handle very complex edits', async () => { + const original = `//Initialize the array that will hold the primes +var primeArray = []; +/*Write a function that checks for primeness and + pushes those values to t*he array*/ +function PrimeCheck(candidate){ + isPrime = true; + for(var i = 2; i < candidate && isPrime; i++){ + if(candidate%i === 0){ + isPrime = false; + } else { + isPrime = true; + } + } + if(isPrime){ + primeArray.push(candidate); + } + return primeArray; +} +/*Write the code that runs the above until the + l ength of the array equa*ls the number of primes + desired*/ + +var numPrimes = prompt("How many primes?"); + +//Display the finished array of primes + +//for loop starting at 2 as that is the lowest prime number keep going until the array is as long as we requested +for (var i = 2; primeArray.length < numPrimes; i++) { + PrimeCheck(i); // +} +console.log(primeArray); +` + + const diff = `--- test_diff.js ++++ test_diff.js +@@ ... @@ +-//Initialize the array that will hold the primes + var primeArray = []; +-/*Write a function that checks for primeness and +- pushes those values to t*he array*/ + function PrimeCheck(candidate){ + isPrime = true; + for(var i = 2; i < candidate && isPrime; i++){ +@@ ... @@ + return primeArray; + } +-/*Write the code that runs the above until the +- l ength of the array equa*ls the number of primes +- desired*/ + + var numPrimes = prompt("How many primes?"); + +-//Display the finished array of primes +- +-//for loop starting at 2 as that is the lowest prime number keep going until the array is as long as we requested + for (var i = 2; primeArray.length < numPrimes; i++) { +- PrimeCheck(i); // ++ PrimeCheck(i); + } + console.log(primeArray);` + + const expected = `var primeArray = []; +function PrimeCheck(candidate){ + isPrime = true; + for(var i = 2; i < candidate && isPrime; i++){ + if(candidate%i === 0){ + isPrime = false; + } else { + isPrime = true; + } + } + if(isPrime){ + primeArray.push(candidate); + } + return primeArray; +} + +var numPrimes = prompt("How many primes?"); + +for (var i = 2; primeArray.length < numPrimes; i++) { + PrimeCheck(i); +} +console.log(primeArray); +` + + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(expected); + } + }); +}); \ No newline at end of file From 420c6c72dd762885e007dffe564b71c6d72cd5a1 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 7 Jan 2025 18:59:16 -0500 Subject: [PATCH 04/47] Replace SearchReplaceDiffStrategy with NewUnifiedDiffStrategy in getDiffStrategy function --- src/core/diff/DiffStrategy.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/diff/DiffStrategy.ts b/src/core/diff/DiffStrategy.ts index c611856..5d71ed6 100644 --- a/src/core/diff/DiffStrategy.ts +++ b/src/core/diff/DiffStrategy.ts @@ -1,6 +1,7 @@ import type { DiffStrategy } from './types' import { UnifiedDiffStrategy } from './strategies/unified' import { SearchReplaceDiffStrategy } from './strategies/search-replace' +import { NewUnifiedDiffStrategy } from './strategies/new-unified' /** * Get the appropriate diff strategy for the given model * @param model The name of the model being used (e.g., 'gpt-4', 'claude-3-opus') @@ -9,7 +10,7 @@ import { SearchReplaceDiffStrategy } from './strategies/search-replace' export function getDiffStrategy(model: string, fuzzyMatchThreshold?: number): DiffStrategy { // For now, return SearchReplaceDiffStrategy for all models // This architecture allows for future optimizations based on model capabilities - return new SearchReplaceDiffStrategy(fuzzyMatchThreshold ?? 1.0) + return new NewUnifiedDiffStrategy() } export type { DiffStrategy } From 70cf27071bc104946d656449c098207466724e7a Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 7 Jan 2025 18:59:55 -0500 Subject: [PATCH 05/47] Refactor applyDiff methods in search-replace and unified strategies to return Promises for asynchronous handling --- src/core/diff/strategies/search-replace.ts | 2 +- src/core/diff/strategies/unified.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/diff/strategies/search-replace.ts b/src/core/diff/strategies/search-replace.ts index 3990848..fd44736 100644 --- a/src/core/diff/strategies/search-replace.ts +++ b/src/core/diff/strategies/search-replace.ts @@ -126,7 +126,7 @@ Your search/replace content here ` } - applyDiff(originalContent: string, diffContent: string, startLine?: number, endLine?: number): DiffResult { + async applyDiff(originalContent: string, diffContent: string, startLine?: number, endLine?: number): Promise { // Extract the search and replace blocks const match = diffContent.match(/<<<<<<< SEARCH\n([\s\S]*?)\n?=======\n([\s\S]*?)\n?>>>>>>> REPLACE/); if (!match) { diff --git a/src/core/diff/strategies/unified.ts b/src/core/diff/strategies/unified.ts index 2f80a61..cd5e31e 100644 --- a/src/core/diff/strategies/unified.ts +++ b/src/core/diff/strategies/unified.ts @@ -108,7 +108,7 @@ Your diff here ` } - applyDiff(originalContent: string, diffContent: string): DiffResult { + async applyDiff(originalContent: string, diffContent: string): Promise { try { const result = applyPatch(originalContent, diffContent) if (result === false) { From 2857dd4996e306ab6b571c7132dddb2aa059928d Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 7 Jan 2025 19:00:29 -0500 Subject: [PATCH 06/47] Refactor test cases in search-replace and unified strategies to use async/await for applyDiff method, ensuring consistent handling of asynchronous operations across all tests. --- .../__tests__/search-replace.test.ts | 238 +++++++++--------- .../diff/strategies/__tests__/unified.test.ts | 22 +- 2 files changed, 130 insertions(+), 130 deletions(-) diff --git a/src/core/diff/strategies/__tests__/search-replace.test.ts b/src/core/diff/strategies/__tests__/search-replace.test.ts index c016618..be81e24 100644 --- a/src/core/diff/strategies/__tests__/search-replace.test.ts +++ b/src/core/diff/strategies/__tests__/search-replace.test.ts @@ -8,7 +8,7 @@ describe('SearchReplaceDiffStrategy', () => { strategy = new SearchReplaceDiffStrategy(1.0, 5) // Default 1.0 threshold for exact matching, 5 line buffer for tests }) - it('should replace matching content', () => { + it('should replace matching content', async () => { const originalContent = 'function hello() {\n console.log("hello")\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -21,14 +21,14 @@ function hello() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('function hello() {\n console.log("hello world")\n}\n') } }) - it('should match content with different surrounding whitespace', () => { + it('should match content with different surrounding whitespace', async () => { const originalContent = '\nfunction example() {\n return 42;\n}\n\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -41,14 +41,14 @@ function example() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('\nfunction example() {\n return 43;\n}\n\n') } }) - it('should match content with different indentation in search block', () => { + it('should match content with different indentation in search block', async () => { const originalContent = ' function test() {\n return true;\n }\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -61,14 +61,14 @@ function test() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(' function test() {\n return false;\n }\n') } }) - it('should handle tab-based indentation', () => { + it('should handle tab-based indentation', async () => { const originalContent = "function test() {\n\treturn true;\n}\n" const diffContent = `test.ts <<<<<<< SEARCH @@ -81,14 +81,14 @@ function test() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe("function test() {\n\treturn false;\n}\n") } }) - it('should preserve mixed tabs and spaces', () => { + it('should preserve mixed tabs and spaces', async () => { const originalContent = "\tclass Example {\n\t constructor() {\n\t\tthis.value = 0;\n\t }\n\t}" const diffContent = `test.ts <<<<<<< SEARCH @@ -105,14 +105,14 @@ function test() { \t} >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe("\tclass Example {\n\t constructor() {\n\t\tthis.value = 1;\n\t }\n\t}") } }) - it('should handle additional indentation with tabs', () => { + it('should handle additional indentation with tabs', async () => { const originalContent = "\tfunction test() {\n\t\treturn true;\n\t}" const diffContent = `test.ts <<<<<<< SEARCH @@ -126,14 +126,14 @@ function test() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe("\tfunction test() {\n\t\t// Add comment\n\t\treturn false;\n\t}") } }) - it('should preserve exact indentation characters when adding lines', () => { + it('should preserve exact indentation characters when adding lines', async () => { const originalContent = "\tfunction test() {\n\t\treturn true;\n\t}" const diffContent = `test.ts <<<<<<< SEARCH @@ -148,14 +148,14 @@ function test() { \t} >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe("\tfunction test() {\n\t\t// First comment\n\t\t// Second comment\n\t\treturn true;\n\t}") } }) - it('should handle Windows-style CRLF line endings', () => { + it('should handle Windows-style CRLF line endings', async () => { const originalContent = "function test() {\r\n return true;\r\n}\r\n" const diffContent = `test.ts <<<<<<< SEARCH @@ -168,14 +168,14 @@ function test() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe("function test() {\r\n return false;\r\n}\r\n") } }) - it('should return false if search content does not match', () => { + it('should return false if search content does not match', async () => { const originalContent = 'function hello() {\n console.log("hello")\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -188,19 +188,19 @@ function hello() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(false) }) - it('should return false if diff format is invalid', () => { + it('should return false if diff format is invalid', async () => { const originalContent = 'function hello() {\n console.log("hello")\n}\n' const diffContent = `test.ts\nInvalid diff format` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(false) }) - it('should handle multiple lines with proper indentation', () => { + it('should handle multiple lines with proper indentation', async () => { const originalContent = 'class Example {\n constructor() {\n this.value = 0\n }\n\n getValue() {\n return this.value\n }\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -215,14 +215,14 @@ function hello() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('class Example {\n constructor() {\n this.value = 0\n }\n\n getValue() {\n // Add logging\n console.log("Getting value")\n return this.value\n }\n}\n') } }) - it('should preserve whitespace exactly in the output', () => { + it('should preserve whitespace exactly in the output', async () => { const originalContent = " indented\n more indented\n back\n" const diffContent = `test.ts <<<<<<< SEARCH @@ -235,14 +235,14 @@ function hello() { end >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(" modified\n still indented\n end\n") } }) - it('should preserve indentation when adding new lines after existing content', () => { + it('should preserve indentation when adding new lines after existing content', async () => { const originalContent = ' onScroll={() => updateHighlights()}' const diffContent = `test.ts <<<<<<< SEARCH @@ -255,14 +255,14 @@ function hello() { }} >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(' onScroll={() => updateHighlights()}\n onDragOver={(e) => {\n e.preventDefault()\n e.stopPropagation()\n }}') } }) - it('should handle varying indentation levels correctly', () => { + it('should handle varying indentation levels correctly', async () => { const originalContent = ` class Example { constructor() { @@ -296,7 +296,7 @@ class Example { } >>>>>>> REPLACE`.trim(); - const result = strategy.applyDiff(originalContent, diffContent); + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(` @@ -313,7 +313,7 @@ class Example { } }) - it('should handle mixed indentation styles in the same file', () => { + it('should handle mixed indentation styles in the same file', async () => { const originalContent = `class Example { constructor() { this.value = 0; @@ -340,7 +340,7 @@ class Example { } >>>>>>> REPLACE`; - const result = strategy.applyDiff(originalContent, diffContent); + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`class Example { @@ -355,7 +355,7 @@ class Example { } }) - it('should handle Python-style significant whitespace', () => { + it('should handle Python-style significant whitespace', async () => { const originalContent = `def example(): if condition: do_something() @@ -376,7 +376,7 @@ class Example { process(item) >>>>>>> REPLACE`; - const result = strategy.applyDiff(originalContent, diffContent); + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`def example(): @@ -389,7 +389,7 @@ class Example { } }); - it('should preserve empty lines with indentation', () => { + it('should preserve empty lines with indentation', async () => { const originalContent = `function test() { const x = 1; @@ -409,7 +409,7 @@ class Example { if (x) { >>>>>>> REPLACE`; - const result = strategy.applyDiff(originalContent, diffContent); + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function test() { @@ -423,7 +423,7 @@ class Example { } }); - it('should handle indentation when replacing entire blocks', () => { + it('should handle indentation when replacing entire blocks', async () => { const originalContent = `class Test { method() { if (true) { @@ -450,7 +450,7 @@ class Example { } >>>>>>> REPLACE`; - const result = strategy.applyDiff(originalContent, diffContent); + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`class Test { @@ -467,7 +467,7 @@ class Example { } }); - it('should handle negative indentation relative to search content', () => { + it('should handle negative indentation relative to search content', async () => { const originalContent = `class Example { constructor() { if (true) { @@ -484,8 +484,8 @@ class Example { this.init(); this.setup(); >>>>>>> REPLACE`; - - const result = strategy.applyDiff(originalContent, diffContent); + + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`class Example { @@ -499,7 +499,7 @@ class Example { } }); - it('should handle extreme negative indentation (no indent)', () => { + it('should handle extreme negative indentation (no indent)', async () => { const originalContent = `class Example { constructor() { if (true) { @@ -514,7 +514,7 @@ class Example { this.init(); >>>>>>> REPLACE`; - const result = strategy.applyDiff(originalContent, diffContent); + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`class Example { @@ -527,7 +527,7 @@ this.init(); } }); - it('should handle mixed indentation changes in replace block', () => { + it('should handle mixed indentation changes in replace block', async () => { const originalContent = `class Example { constructor() { if (true) { @@ -548,7 +548,7 @@ this.init(); this.validate(); >>>>>>> REPLACE`; - const result = strategy.applyDiff(originalContent, diffContent); + const result = await strategy.applyDiff(originalContent, diffContent); expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`class Example { @@ -563,7 +563,7 @@ this.init(); } }); - it('should find matches from middle out', () => { + it('should find matches from middle out', async () => { const originalContent = ` function one() { return "target"; @@ -595,7 +595,7 @@ function five() { // Search around the middle (function three) // Even though all functions contain the target text, // it should match the one closest to line 9 first - const result = strategy.applyDiff(originalContent, diffContent, 9, 9) + const result = await strategy.applyDiff(originalContent, diffContent, 9, 9) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -621,15 +621,15 @@ function five() { }) }) - describe('line number stripping', () => { - describe('line number stripping', () => { + describe('line number stripping', async () => { + describe('line number stripping', async () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { strategy = new SearchReplaceDiffStrategy() }) - it('should strip line numbers from both search and replace sections', () => { + it('should strip line numbers from both search and replace sections', async () => { const originalContent = 'function test() {\n return true;\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -642,14 +642,14 @@ function five() { 3 | } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('function test() {\n return false;\n}\n') } }) - it('should strip line numbers with leading spaces', () => { + it('should strip line numbers with leading spaces', async () => { const originalContent = 'function test() {\n return true;\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -662,14 +662,14 @@ function five() { 3 | } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('function test() {\n return false;\n}\n') } }) - it('should not strip when not all lines have numbers in either section', () => { + it('should not strip when not all lines have numbers in either section', async () => { const originalContent = 'function test() {\n return true;\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -682,11 +682,11 @@ function five() { 3 | } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(false) }) - it('should preserve content that naturally starts with pipe', () => { + it('should preserve content that naturally starts with pipe', async () => { const originalContent = '|header|another|\n|---|---|\n|data|more|\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -699,14 +699,14 @@ function five() { 3 | |data|updated| >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('|header|another|\n|---|---|\n|data|updated|\n') } }) - it('should preserve indentation when stripping line numbers', () => { + it('should preserve indentation when stripping line numbers', async () => { const originalContent = ' function test() {\n return true;\n }\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -719,14 +719,14 @@ function five() { 3 | } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(' function test() {\n return false;\n }\n') } }) - it('should handle different line numbers between sections', () => { + it('should handle different line numbers between sections', async () => { const originalContent = 'function test() {\n return true;\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -739,14 +739,14 @@ function five() { 22 | } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('function test() {\n return false;\n}\n') } }) - it('should not strip content that starts with pipe but no line number', () => { + it('should not strip content that starts with pipe but no line number', async () => { const originalContent = '| Pipe\n|---|\n| Data\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -759,14 +759,14 @@ function five() { | Updated >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('| Pipe\n|---|\n| Updated\n') } }) - it('should handle mix of line-numbered and pipe-only content', () => { + it('should handle mix of line-numbered and pipe-only content', async () => { const originalContent = '| Pipe\n|---|\n| Data\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -779,7 +779,7 @@ function five() { 3 | | NewData >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('1 | | Pipe\n2 | |---|\n3 | | NewData\n') @@ -788,15 +788,15 @@ function five() { }) }); - describe('insertion/deletion', () => { + describe('insertion/deletion', async () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { strategy = new SearchReplaceDiffStrategy() }) - describe('deletion', () => { - it('should delete code when replace block is empty', () => { + describe('deletion', async () => { + it('should delete code when replace block is empty', async () => { const originalContent = `function test() { console.log("hello"); // Comment to remove @@ -808,7 +808,7 @@ function five() { ======= >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function test() { @@ -818,7 +818,7 @@ function five() { } }) - it('should delete multiple lines when replace block is empty', () => { + it('should delete multiple lines when replace block is empty', async () => { const originalContent = `class Example { constructor() { // Initialize @@ -838,7 +838,7 @@ function five() { ======= >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`class Example { @@ -848,7 +848,7 @@ function five() { } }) - it('should preserve indentation when deleting nested code', () => { + it('should preserve indentation when deleting nested code', async () => { const originalContent = `function outer() { if (true) { // Remove this @@ -865,7 +865,7 @@ function five() { ======= >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function outer() { @@ -877,8 +877,8 @@ function five() { }) }) - describe('insertion', () => { - it('should insert code at specified line when search block is empty', () => { + describe('insertion', async () => { + it('should insert code at specified line when search block is empty', async () => { const originalContent = `function test() { const x = 1; return x; @@ -889,7 +889,7 @@ function five() { console.log("Adding log"); >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent, 2, 2) + const result = await strategy.applyDiff(originalContent, diffContent, 2, 2) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function test() { @@ -900,7 +900,7 @@ function five() { } }) - it('should preserve indentation when inserting at nested location', () => { + it('should preserve indentation when inserting at nested location', async () => { const originalContent = `function test() { if (true) { const x = 1; @@ -913,7 +913,7 @@ function five() { console.log("After"); >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent, 3, 3) + const result = await strategy.applyDiff(originalContent, diffContent, 3, 3) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function test() { @@ -926,7 +926,7 @@ function five() { } }) - it('should handle insertion at start of file', () => { + it('should handle insertion at start of file', async () => { const originalContent = `function test() { return true; }` @@ -938,7 +938,7 @@ function five() { >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent, 1, 1) + const result = await strategy.applyDiff(originalContent, diffContent, 1, 1) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`// Copyright 2024 @@ -950,7 +950,7 @@ function test() { } }) - it('should handle insertion at end of file', () => { + it('should handle insertion at end of file', async () => { const originalContent = `function test() { return true; }` @@ -961,7 +961,7 @@ function test() { // End of file >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent, 4, 4) + const result = await strategy.applyDiff(originalContent, diffContent, 4, 4) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function test() { @@ -972,7 +972,7 @@ function test() { } }) - it('should error if no start_line is provided for insertion', () => { + it('should error if no start_line is provided for insertion', async () => { const originalContent = `function test() { return true; }` @@ -982,19 +982,19 @@ function test() { console.log("test"); >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(false) }) }) }) - describe('fuzzy matching', () => { + describe('fuzzy matching', async () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { strategy = new SearchReplaceDiffStrategy(0.9, 5) // 90% similarity threshold, 5 line buffer for tests }) - it('should match content with small differences (>90% similar)', () => { + it('should match content with small differences (>90% similar)', async () => { const originalContent = 'function getData() {\n const results = fetchData();\n return results.filter(Boolean);\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -1011,14 +1011,14 @@ function getData() { strategy = new SearchReplaceDiffStrategy(0.9, 5) // Use 5 line buffer for tests - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('function getData() {\n const data = fetchData();\n return data.filter(Boolean);\n}\n') } }) - it('should not match when content is too different (<90% similar)', () => { + it('should not match when content is too different (<90% similar)', async () => { const originalContent = 'function processUsers(data) {\n return data.map(user => user.name);\n}\n' const diffContent = `test.ts <<<<<<< SEARCH @@ -1031,11 +1031,11 @@ function processData(data) { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(false) }) - it('should match content with extra whitespace', () => { + it('should match content with extra whitespace', async () => { const originalContent = 'function sum(a, b) {\n return a + b;\n}' const diffContent = `test.ts <<<<<<< SEARCH @@ -1048,14 +1048,14 @@ function sum(a, b) { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('function sum(a, b) {\n return a + b + 1;\n}') } }) - it('should not exact match empty lines', () => { + it('should not exact match empty lines', async () => { const originalContent = 'function sum(a, b) {\n\n return a + b;\n}' const diffContent = `test.ts <<<<<<< SEARCH @@ -1065,7 +1065,7 @@ import { a } from "a"; function sum(a, b) { >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe('import { a } from "a";\nfunction sum(a, b) {\n\n return a + b;\n}') @@ -1073,14 +1073,14 @@ function sum(a, b) { }) }) - describe('line-constrained search', () => { + describe('line-constrained search', async () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { strategy = new SearchReplaceDiffStrategy(0.9, 5) }) - it('should find and replace within specified line range', () => { + it('should find and replace within specified line range', async () => { const originalContent = ` function one() { return 1; @@ -1105,7 +1105,7 @@ function two() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent, 5, 7) + const result = await strategy.applyDiff(originalContent, diffContent, 5, 7) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -1122,7 +1122,7 @@ function three() { } }) - it('should find and replace within buffer zone (5 lines before/after)', () => { + it('should find and replace within buffer zone (5 lines before/after)', async () => { const originalContent = ` function one() { return 1; @@ -1149,7 +1149,7 @@ function three() { // Even though we specify lines 5-7, it should still find the match at lines 9-11 // because it's within the 5-line buffer zone - const result = strategy.applyDiff(originalContent, diffContent, 5, 7) + const result = await strategy.applyDiff(originalContent, diffContent, 5, 7) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -1166,7 +1166,7 @@ function three() { } }) - it('should not find matches outside search range and buffer zone', () => { + it('should not find matches outside search range and buffer zone', async () => { const originalContent = ` function one() { return 1; @@ -1201,11 +1201,11 @@ function five() { // Searching around function two() (lines 5-7) // function five() is more than 5 lines away, so it shouldn't match - const result = strategy.applyDiff(originalContent, diffContent, 5, 7) + const result = await strategy.applyDiff(originalContent, diffContent, 5, 7) expect(result.success).toBe(false) }) - it('should handle search range at start of file', () => { + it('should handle search range at start of file', async () => { const originalContent = ` function one() { return 1; @@ -1226,7 +1226,7 @@ function one() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent, 1, 3) + const result = await strategy.applyDiff(originalContent, diffContent, 1, 3) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -1239,7 +1239,7 @@ function two() { } }) - it('should handle search range at end of file', () => { + it('should handle search range at end of file', async () => { const originalContent = ` function one() { return 1; @@ -1260,7 +1260,7 @@ function two() { } >>>>>>> REPLACE` - const result = strategy.applyDiff(originalContent, diffContent, 5, 7) + const result = await strategy.applyDiff(originalContent, diffContent, 5, 7) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -1273,7 +1273,7 @@ function two() { } }) - it('should match specific instance of duplicate code using line numbers', () => { + it('should match specific instance of duplicate code using line numbers', async () => { const originalContent = ` function processData(data) { return data.map(x => x * 2); @@ -1306,7 +1306,7 @@ function processData(data) { >>>>>>> REPLACE` // Target the second instance of processData - const result = strategy.applyDiff(originalContent, diffContent, 10, 12) + const result = await strategy.applyDiff(originalContent, diffContent, 10, 12) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function processData(data) { @@ -1330,7 +1330,7 @@ function moreStuff() { } }) - it('should search from start line to end of file when only start_line is provided', () => { + it('should search from start line to end of file when only start_line is provided', async () => { const originalContent = ` function one() { return 1; @@ -1356,7 +1356,7 @@ function three() { >>>>>>> REPLACE` // Only provide start_line, should search from there to end of file - const result = strategy.applyDiff(originalContent, diffContent, 8) + const result = await strategy.applyDiff(originalContent, diffContent, 8) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -1373,7 +1373,7 @@ function three() { } }) - it('should search from start of file to end line when only end_line is provided', () => { + it('should search from start of file to end line when only end_line is provided', async () => { const originalContent = ` function one() { return 1; @@ -1399,7 +1399,7 @@ function one() { >>>>>>> REPLACE` // Only provide end_line, should search from start of file to there - const result = strategy.applyDiff(originalContent, diffContent, undefined, 4) + const result = await strategy.applyDiff(originalContent, diffContent, undefined, 4) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -1416,7 +1416,7 @@ function three() { } }) - it('should prioritize exact line match over expanded search', () => { + it('should prioritize exact line match over expanded search', async () => { const originalContent = ` function one() { return 1; @@ -1446,7 +1446,7 @@ function process() { // Should match the second instance exactly at lines 10-12 // even though the first instance at 6-8 is within the expanded search range - const result = strategy.applyDiff(originalContent, diffContent, 10, 12) + const result = await strategy.applyDiff(originalContent, diffContent, 10, 12) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(` @@ -1468,7 +1468,7 @@ function two() { } }) - it('should fall back to expanded search only if exact match fails', () => { + it('should fall back to expanded search only if exact match fails', async () => { const originalContent = ` function one() { return 1; @@ -1494,7 +1494,7 @@ function process() { // Specify wrong line numbers (3-5), but content exists at 6-8 // Should still find and replace it since it's within the expanded range - const result = strategy.applyDiff(originalContent, diffContent, 3, 5) + const result = await strategy.applyDiff(originalContent, diffContent, 3, 5) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(`function one() { @@ -1512,21 +1512,21 @@ function two() { }) }) - describe('getToolDescription', () => { + describe('getToolDescription', async () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { strategy = new SearchReplaceDiffStrategy() }) - it('should include the current working directory', () => { + it('should include the current working directory', async () => { const cwd = '/test/dir' - const description = strategy.getToolDescription(cwd) + const description = await strategy.getToolDescription(cwd) expect(description).toContain(`relative to the current working directory ${cwd}`) }) - it('should include required format elements', () => { - const description = strategy.getToolDescription('/test') + it('should include required format elements', async () => { + const description = await strategy.getToolDescription('/test') expect(description).toContain('<<<<<<< SEARCH') expect(description).toContain('=======') expect(description).toContain('>>>>>>> REPLACE') @@ -1534,8 +1534,8 @@ function two() { expect(description).toContain('') }) - it('should document start_line and end_line parameters', () => { - const description = strategy.getToolDescription('/test') + it('should document start_line and end_line parameters', async () => { + const description = await strategy.getToolDescription('/test') expect(description).toContain('start_line: (required) The line number where the search block starts.') expect(description).toContain('end_line: (required) The line number where the search block ends.') }) diff --git a/src/core/diff/strategies/__tests__/unified.test.ts b/src/core/diff/strategies/__tests__/unified.test.ts index 83a53b2..7398b40 100644 --- a/src/core/diff/strategies/__tests__/unified.test.ts +++ b/src/core/diff/strategies/__tests__/unified.test.ts @@ -20,7 +20,7 @@ describe('UnifiedDiffStrategy', () => { }) describe('applyDiff', () => { - it('should successfully apply a function modification diff', () => { + it('should successfully apply a function modification diff', async () => { const originalContent = `import { Logger } from '../logger'; function calculateTotal(items: number[]): number { @@ -58,14 +58,14 @@ function calculateTotal(items: number[]): number { export { calculateTotal };` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(expected) } }) - it('should successfully apply a diff adding a new method', () => { + it('should successfully apply a diff adding a new method', async () => { const originalContent = `class Calculator { add(a: number, b: number): number { return a + b; @@ -95,14 +95,14 @@ export { calculateTotal };` } }` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(expected) } }) - it('should successfully apply a diff modifying imports', () => { + it('should successfully apply a diff modifying imports', async () => { const originalContent = `import { useState } from 'react'; import { Button } from './components'; @@ -132,15 +132,15 @@ function App() { useEffect(() => { document.title = \`Count: \${count}\` }, [count]); return ; }` - - const result = strategy.applyDiff(originalContent, diffContent) + + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(expected) } }) - it('should successfully apply a diff with multiple hunks', () => { + it('should successfully apply a diff with multiple hunks', async () => { const originalContent = `import { readFile, writeFile } from 'fs'; function processFile(path: string) { @@ -198,14 +198,14 @@ async function processFile(path: string) { export { processFile };` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(expected) } }) - it('should handle empty original content', () => { + it('should handle empty original content', async () => { const originalContent = '' const diffContent = `--- empty.ts +++ empty.ts @@ -218,7 +218,7 @@ export { processFile };` return \`Hello, \${name}!\`; }\n` - const result = strategy.applyDiff(originalContent, diffContent) + const result = await strategy.applyDiff(originalContent, diffContent) expect(result.success).toBe(true) if (result.success) { expect(result.content).toBe(expected) From 594481643ba3c64d18656ca44b372dcfac7e4c17 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 7 Jan 2025 19:01:12 -0500 Subject: [PATCH 07/47] Add New Unified Diff Strategy Implementation - Introduced a new unified diff strategy with support for context matching, DMP, and Git-based edits. - Implemented helper functions for parsing unified diffs and evaluating similarity. - Added types for changes, hunks, and diffs to enhance type safety. - Created a main edit function that applies strategies sequentially based on confidence levels. - Included detailed descriptions and usage examples for the new strategy. --- .../strategies/new-unified/edit-strategies.ts | 236 ++++++++++++++++++ src/core/diff/strategies/new-unified/index.ts | 181 ++++++++++++++ .../new-unified/search-strategies.ts | 131 ++++++++++ src/core/diff/strategies/new-unified/types.ts | 14 ++ 4 files changed, 562 insertions(+) create mode 100644 src/core/diff/strategies/new-unified/edit-strategies.ts create mode 100644 src/core/diff/strategies/new-unified/index.ts create mode 100644 src/core/diff/strategies/new-unified/search-strategies.ts create mode 100644 src/core/diff/strategies/new-unified/types.ts diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts new file mode 100644 index 0000000..0cbd5c0 --- /dev/null +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -0,0 +1,236 @@ +import { diff_match_patch } from 'diff-match-patch'; +import * as git from 'isomorphic-git'; +import { fs as memfs, vol } from 'memfs'; +import { Hunk } from './types'; +import { getDMPSimilarity } from './search-strategies'; + +// Helper function to infer indentation +function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { + const match = line.match(/^(\s+)/); + if (match) { + return match[1]; + } + + for (const contextLine of contextLines) { + const contextMatch = contextLine.match(/^(\s+)/); + if (contextMatch) { + const currentLineDepth = (line.match(/^\s*/)?.[0] || '').length; + const contextLineDepth = contextMatch[1].length; + + if (currentLineDepth > contextLineDepth) { + return contextMatch[1] + ' '.repeat(2); + } + return contextMatch[1]; + } + } + + return previousIndent; +} + +export type EditResult = { + confidence: number; + result: string[]; + strategy: string; +}; + +// Context matching edit strategy +export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { + if (matchPosition === -1) { + return { confidence: 0, result: content, strategy: 'context' }; + } + + const newResult = [...content.slice(0, matchPosition)]; + let sourceIndex = matchPosition; + let previousIndent = ''; + + for (const change of hunk.changes) { + if (change.type === 'context') { + newResult.push(change.originalLine || (change.indent + change.content)); + previousIndent = change.indent; + sourceIndex++; + } else if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, + hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''), + previousIndent + ); + newResult.push(indent + change.content); + previousIndent = indent; + } else if (change.type === 'remove') { + sourceIndex++; + } + } + + newResult.push(...content.slice(sourceIndex)); + + // Validate the result + const similarity = getDMPSimilarity( + content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), + newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') + ); + + return { + confidence: similarity, + result: newResult, + strategy: 'context' + }; +} + +// DMP edit strategy +export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult { + if (matchPosition === -1) { + return { confidence: 0, result: content, strategy: 'dmp' }; + } + + const dmp = new diff_match_patch(); + const currentText = content.join('\n'); + const contextLines = hunk.changes + .filter(c => c.type === 'context') + .map(c => c.content); + + // Create a patch from the hunk with proper indentation + const patch = dmp.patch_make( + currentText, + hunk.changes.reduce((acc, change) => { + if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, contextLines); + return acc + indent + change.content + '\n'; + } + if (change.type === 'remove') { + return acc.replace(change.content + '\n', ''); + } + return acc + change.content + '\n'; + }, '') + ); + + const [patchedText] = dmp.patch_apply(patch, currentText); + const similarity = getDMPSimilarity( + content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), + patchedText + ); + + return { + confidence: similarity, + result: patchedText.split('\n'), + strategy: 'dmp' + }; +} + +// Git edit strategy +export async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise { + if (matchPosition === -1) { + return { confidence: 0, result: content, strategy: 'git' }; + } + + vol.reset(); + + try { + await git.init({ fs: memfs, dir: '/' }); + + const originalContent = content.join('\n'); + await memfs.promises.writeFile('/file.txt', originalContent); + + await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); + await git.commit({ + fs: memfs, + dir: '/', + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Initial commit' + }); + + await git.branch({ fs: memfs, dir: '/', ref: 'patch-branch' }); + await git.checkout({ fs: memfs, dir: '/', ref: 'patch-branch' }); + + const lines = originalContent.split('\n'); + const newLines = [...lines]; + let offset = matchPosition; + + const contextLines = hunk.changes + .filter(c => c.type === 'context') + .map(c => c.content); + + for (const change of hunk.changes) { + if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, contextLines); + newLines.splice(offset, 0, indent + change.content); + offset++; + } else if (change.type === 'remove') { + const index = newLines.findIndex( + (line, i) => i >= offset && line.trimLeft() === change.content + ); + if (index !== -1) { + newLines.splice(index, 1); + } + } else { + offset++; + } + } + + const modifiedContent = newLines.join('\n'); + await memfs.promises.writeFile('/file.txt', modifiedContent); + + await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); + await git.commit({ + fs: memfs, + dir: '/', + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Apply changes' + }); + + const similarity = getDMPSimilarity( + content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), + newLines.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') + ); + + return { + confidence: similarity, + result: newLines, + strategy: 'git' + }; + } catch (error) { + return { confidence: 0, result: content, strategy: 'git' }; + } finally { + vol.reset(); + } +} + +// Main edit function that tries strategies sequentially +export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise { + // Don't attempt any edits if confidence is too low and not in debug mode + const MIN_CONFIDENCE = 0.9; + if (confidence < MIN_CONFIDENCE && !debug) { + return { confidence: 0, result: content, strategy: 'none' }; + } + + // Try each strategy in sequence until one succeeds + const strategies = [ + { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, + { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, + { name: 'git', apply: () => applyGit(hunk, content, matchPosition) } + ]; + + if (debug) { + // In debug mode, try all strategies and return the first success + const results = await Promise.all(strategies.map(async strategy => { + const result = await strategy.apply(); + return result; + })); + + const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); + if (successfulResults.length > 0) { + return successfulResults.reduce((best, current) => + current.confidence > best.confidence ? current : best + ); + } + } else { + // Normal mode - try strategies sequentially until one succeeds + for (const strategy of strategies) { + const result = await strategy.apply(); + if (result.confidence > MIN_CONFIDENCE) { + return result; + } + } + } + + // If all strategies fail, return failure + return { confidence: 0, result: content, strategy: 'none' }; +} diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts new file mode 100644 index 0000000..891fb96 --- /dev/null +++ b/src/core/diff/strategies/new-unified/index.ts @@ -0,0 +1,181 @@ +import { Diff, Hunk } from "./types" +import { findBestMatch, prepareSearchString } from "./search-strategies" +import { applyEdit } from "./edit-strategies" +import { DiffResult, DiffStrategy } from "../../types" + +export class NewUnifiedDiffStrategy implements DiffStrategy { + private parseUnifiedDiff(diff: string): Diff { + const lines = diff.split("\n") + const hunks: Hunk[] = [] + let currentHunk: Hunk | null = null + + let i = 0 + while (i < lines.length && !lines[i].startsWith("@@")) { + i++ + } + + for (; i < lines.length; i++) { + const line = lines[i] + + if (line.startsWith("@@")) { + if (currentHunk) { + hunks.push(currentHunk) + } + currentHunk = { changes: [] } + continue + } + + if (!currentHunk) { + continue + } + + // Extract the complete indentation for each line + const content = line.slice(1) // Remove the diff marker + const indentMatch = content.match(/^(\s*)/) + const indent = indentMatch ? indentMatch[0] : "" + const trimmedContent = content.slice(indent.length) + + if (line.startsWith(" ")) { + currentHunk.changes.push({ + type: "context", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("+")) { + currentHunk.changes.push({ + type: "add", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("-")) { + currentHunk.changes.push({ + type: "remove", + content: trimmedContent, + indent, + originalLine: content, + }) + } + } + + if (currentHunk && currentHunk.changes.length > 0) { + hunks.push(currentHunk) + } + + return { hunks } + } + + getToolDescription(cwd: string): string { + return `## apply_diff +Description: Apply a unified diff to a file at the specified path. This tool is useful when you need to make specific modifications to a file based on a set of changes provided in unified diff format (diff -U0). + +Make sure you include the first 2 lines with the file paths. +Don't include timestamps with the file paths. + +Start each hunk of changes with a \`@@ ... @@\` line. +Don't include line numbers like \`diff -U0\` does. +The user's patch tool doesn't need them. + +Indentation matters in the diffs! + +Start a new hunk for each section of the file that needs changes. + +Only output hunks that specify changes with \`+\` or \`-\` lines. +Skip any hunks that are entirely unchanging \` \` lines. + +The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file! +Think carefully and make sure you include and mark all lines that need to be removed or changed as \`-\` lines. +Make sure you mark all new or modified lines with \`+\`. +Don't leave out any lines or the diff patch won't apply correctly. + +Output hunks in whatever order makes the most sense. +Hunks don't need to be in any particular order. + +The hunks do not need line numbers. + +When editing a function, method, loop, etc use a hunk to replace the *entire* code block. +Delete the entire existing version with \`-\` lines and then add a new, updated version with \`+\` lines. +This will help you generate correct code and correct diffs. + +To move code within a file, use 2 hunks: 1 to delete it from its current location, 1 to insert it in the new location. + +Parameters: +- path: (required) The path of the file to apply the diff to (relative to the current working directory ${cwd}) +- diff: (required) The diff content in unified format to apply to the file. + +For each file that needs to be changed, write out the changes similar to a unified diff like \`diff -U0\` would produce. + + +Example: +\`\`\`diff +--- mathweb/flask/app.py ++++ mathweb/flask/app.py +@@ ... @@ +-class MathWeb: ++import sympy ++ ++class MathWeb: +@@ ... @@ +-def is_prime(x): +- if x < 2: +- return False +- for i in range(2, int(math.sqrt(x)) + 1): +- if x % i == 0: +- return False +- return True +@@ ... @@ +-@app.route('/prime/') +-def nth_prime(n): +- count = 0 +- num = 1 +- while count < n: +- num += 1 +- if is_prime(num): +- count += 1 +- return str(num) ++@app.route('/prime/') ++def nth_prime(n): ++ count = 0 ++ num = 1 ++ while count < n: ++ num += 1 ++ if sympy.isprime(num): ++ count += 1 ++ return str(num) +\`\`\` + +Usage: + +File path here + +Your diff here + +` + } + + async applyDiff( + originalContent: string, + diffContent: string, + startLine?: number, + endLine?: number + ): Promise { + const MIN_CONFIDENCE = 0.9 + const parsedDiff = this.parseUnifiedDiff(diffContent) + let result = originalContent.split("\n") + + for (const hunk of parsedDiff.hunks) { + const contextStr = prepareSearchString(hunk.changes) + const { index: matchPosition, confidence } = findBestMatch(contextStr, result) + + const editResult = await applyEdit(hunk, result, matchPosition, confidence) + if (editResult.confidence > MIN_CONFIDENCE) { + result = editResult.result + } else { + return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` } + } + } + + return { success: true, content: result.join("\n") } + } +} diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts new file mode 100644 index 0000000..2ad8120 --- /dev/null +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -0,0 +1,131 @@ +import { compareTwoStrings } from 'string-similarity'; +import { closest } from 'fastest-levenshtein'; +import { diff_match_patch } from 'diff-match-patch'; +import { Change } from './types'; + +export type SearchResult = { + index: number; + confidence: number; + strategy: string; +}; + +//TODO: this should be configurable +const MIN_CONFIDENCE = 0.95; + +// Helper function to prepare search string from context +export function prepareSearchString(changes: Change[]): string { + const lines = changes + .filter(c => c.type === 'context' || c.type === 'remove') + .map(c => c.content); + return lines.join('\n'); +} + +// Helper function to evaluate similarity between two texts +export function evaluateSimilarity(original: string, modified: string): number { + return compareTwoStrings(original, modified); +} + +// Helper function to validate using diff-match-patch +export function getDMPSimilarity(original: string, modified: string): number { + const dmp = new diff_match_patch(); + const diffs = dmp.diff_main(original, modified); + dmp.diff_cleanupSemantic(diffs); + const patches = dmp.patch_make(original, diffs); + const [expectedText] = dmp.patch_apply(patches, original); + const similarity = evaluateSimilarity(expectedText, modified); + return similarity; +} + +// Exact match strategy +export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const contentStr = content.slice(startIndex).join('\n'); + const searchLines = searchStr.split('\n'); + + const exactMatch = contentStr.indexOf(searchStr); + if (exactMatch !== -1) { + const matchedContent = content.slice( + startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, + startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length + ).join('\n'); + + const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE; + return { + index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, + confidence: dmpValid ? 1.0 : 0.9, + strategy: 'exact' + }; + } + + return { index: -1, confidence: 0, strategy: 'exact' }; +} + +// String similarity strategy +export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split('\n'); + let bestScore = 0; + let bestIndex = -1; + const minScore = 0.8; + + for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { + const windowStr = content.slice(i, i + searchLines.length).join('\n'); + const score = compareTwoStrings(searchStr, windowStr); + if (score > bestScore && score >= minScore) { + const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE; + const adjustedScore = dmpValid ? score : score * 0.9; + + if (adjustedScore > bestScore) { + bestScore = adjustedScore; + bestIndex = i; + } + } + } + + return { + index: bestIndex, + confidence: bestIndex !== -1 ? bestScore : 0, + strategy: 'similarity' + }; +} + +// Levenshtein strategy +export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split('\n'); + const candidates = []; + + for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { + candidates.push(content.slice(i, i + searchLines.length).join('\n')); + } + + if (candidates.length > 0) { + const closestMatch = closest(searchStr, candidates); + const index = startIndex + candidates.indexOf(closestMatch); + const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE; + return { + index, + confidence: dmpValid ? 0.7 : 0.6, + strategy: 'levenshtein' + }; + } + + return { index: -1, confidence: 0, strategy: 'levenshtein' }; +} + +// Main search function that tries all strategies +export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const strategies = [ + findExactMatch, + findSimilarityMatch, + findLevenshteinMatch + ]; + + let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' }; + + for (const strategy of strategies) { + const result = strategy(searchStr, content, startIndex); + if (result.confidence > bestResult.confidence) { + bestResult = result; + } + } + + return bestResult; +} \ No newline at end of file diff --git a/src/core/diff/strategies/new-unified/types.ts b/src/core/diff/strategies/new-unified/types.ts new file mode 100644 index 0000000..6135d0a --- /dev/null +++ b/src/core/diff/strategies/new-unified/types.ts @@ -0,0 +1,14 @@ +export type Change = { + type: 'context' | 'add' | 'remove'; + content: string; + indent: string; + originalLine?: string; +}; + +export type Hunk = { + changes: Change[]; +}; + +export type Diff = { + hunks: Hunk[]; +}; \ No newline at end of file From 995692c48e4b67e1319e89a3f9b06558aec2a8e7 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 8 Jan 2025 12:28:31 -0500 Subject: [PATCH 08/47] Improve strategies and confidence system --- .../strategies/new-unified/edit-strategies.ts | 254 +++++++++++++----- src/core/diff/strategies/new-unified/index.ts | 3 +- .../new-unified/search-strategies.ts | 81 +++++- src/core/diff/strategies/new-unified/types.ts | 8 +- 4 files changed, 265 insertions(+), 81 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index 0cbd5c0..36c9fa7 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -1,8 +1,8 @@ import { diff_match_patch } from 'diff-match-patch'; import * as git from 'isomorphic-git'; import { fs as memfs, vol } from 'memfs'; -import { Hunk } from './types'; -import { getDMPSimilarity } from './search-strategies'; +import { Change, EditResult, Hunk } from './types'; +import { getDMPSimilarity, validateEditResult } from './search-strategies'; // Helper function to infer indentation function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { @@ -27,12 +27,6 @@ function inferIndentation(line: string, contextLines: string[], previousIndent: return previousIndent; } -export type EditResult = { - confidence: number; - result: string[]; - strategy: string; -}; - // Context matching edit strategy export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { if (matchPosition === -1) { @@ -43,6 +37,8 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio let sourceIndex = matchPosition; let previousIndent = ''; + const hunkChanges = hunk.changes.filter(c => c.type !== 'context'); + for (const change of hunk.changes) { if (change.type === 'context') { newResult.push(change.originalLine || (change.indent + change.content)); @@ -66,10 +62,12 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio const similarity = getDMPSimilarity( content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') - ); - + ) + + const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + hunkChanges.length + 1).join('\n')); + return { - confidence: similarity, + confidence: similarity * confidence, result: newResult, strategy: 'context' }; @@ -82,41 +80,53 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): } const dmp = new diff_match_patch(); - const currentText = content.join('\n'); - const contextLines = hunk.changes - .filter(c => c.type === 'context') - .map(c => c.content); + const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length); + const editText = editRegion.join('\n'); + + // Build the target text sequentially like in applyContextMatching + let targetText = ''; + let previousIndent = ''; + + for (const change of hunk.changes) { + if (change.type === 'context') { + targetText += (change.originalLine || (change.indent + change.content)) + '\n'; + previousIndent = change.indent; + } else if (change.type === 'add') { + const indent = change.indent || inferIndentation(change.content, + hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''), + previousIndent + ); + targetText += indent + change.content + '\n'; + previousIndent = indent; + } + // Skip remove changes as they shouldn't appear in target + } - // Create a patch from the hunk with proper indentation - const patch = dmp.patch_make( - currentText, - hunk.changes.reduce((acc, change) => { - if (change.type === 'add') { - const indent = change.indent || inferIndentation(change.content, contextLines); - return acc + indent + change.content + '\n'; - } - if (change.type === 'remove') { - return acc.replace(change.content + '\n', ''); - } - return acc + change.content + '\n'; - }, '') - ); + // Trim the trailing newline + targetText = targetText.replace(/\n$/, ''); - const [patchedText] = dmp.patch_apply(patch, currentText); - const similarity = getDMPSimilarity( - content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), - patchedText - ); + const patch = dmp.patch_make(editText, targetText); + const [patchedText] = dmp.patch_apply(patch, editText); + + // Construct result with edited portion + const newResult = [ + ...content.slice(0, matchPosition), + ...patchedText.split('\n'), + ...content.slice(matchPosition + hunk.changes.length) + ]; + + const similarity = getDMPSimilarity(editText, patchedText) + const confidence = validateEditResult(hunk, patchedText); return { - confidence: similarity, - result: patchedText.split('\n'), + confidence: similarity * confidence, + result: newResult, strategy: 'dmp' }; } -// Git edit strategy -export async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise { +// Git edit strategy with cherry-pick approach +async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise { if (matchPosition === -1) { return { confidence: 0, result: content, strategy: 'git' }; } @@ -124,26 +134,55 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num vol.reset(); try { + // Initialize git repo await git.init({ fs: memfs, dir: '/' }); - const originalContent = content.join('\n'); - await memfs.promises.writeFile('/file.txt', originalContent); - + // Create original content - only use the edit region + const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length); + const editText = editRegion.join('\n'); + await memfs.promises.writeFile('/file.txt', editText); await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); await git.commit({ fs: memfs, dir: '/', author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Initial commit' + message: 'Original' }); + const originalHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); - await git.branch({ fs: memfs, dir: '/', ref: 'patch-branch' }); - await git.checkout({ fs: memfs, dir: '/', ref: 'patch-branch' }); - - const lines = originalContent.split('\n'); - const newLines = [...lines]; - let offset = matchPosition; + // Create search content (content with removals) + const searchLines = [...editRegion]; + let offset = 0; + for (const change of hunk.changes) { + if (change.type === 'remove') { + const index = searchLines.findIndex( + (line, i) => i >= offset && line.trimLeft() === change.content + ); + if (index !== -1) { + searchLines.splice(index, 1); + } + } + if (change.type !== 'add') { + offset++; + } + } + + // Create search branch and commit + await git.branch({ fs: memfs, dir: '/', ref: 'search' }); + await git.checkout({ fs: memfs, dir: '/', ref: 'search' }); + await memfs.promises.writeFile('/file.txt', searchLines.join('\n')); + await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); + await git.commit({ + fs: memfs, + dir: '/', + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Search state' + }); + const searchHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); + // Create replace content (with additions) + const replaceLines = [...searchLines]; + offset = 0; const contextLines = hunk.changes .filter(c => c.type === 'context') .map(c => c.content); @@ -151,42 +190,108 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num for (const change of hunk.changes) { if (change.type === 'add') { const indent = change.indent || inferIndentation(change.content, contextLines); - newLines.splice(offset, 0, indent + change.content); + replaceLines.splice(offset, 0, indent + change.content); offset++; - } else if (change.type === 'remove') { - const index = newLines.findIndex( - (line, i) => i >= offset && line.trimLeft() === change.content - ); - if (index !== -1) { - newLines.splice(index, 1); - } - } else { + } else if (change.type !== 'remove') { offset++; } } - const modifiedContent = newLines.join('\n'); - await memfs.promises.writeFile('/file.txt', modifiedContent); - + // Create replace branch and commit + await git.branch({ fs: memfs, dir: '/', ref: 'replace' }); + await git.checkout({ fs: memfs, dir: '/', ref: 'replace' }); + await memfs.promises.writeFile('/file.txt', replaceLines.join('\n')); await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); await git.commit({ fs: memfs, dir: '/', author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Apply changes' + message: 'Replace state' }); + const replaceHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); - const similarity = getDMPSimilarity( - content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), - newLines.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') - ); + // Try both strategies: + // 1. OSR: Cherry-pick replace onto original + // 2. SR-SO: Apply search->replace changes to search->original - return { - confidence: similarity, - result: newLines, - strategy: 'git' - }; + // Strategy 1: OSR + await git.checkout({ fs: memfs, dir: '/', ref: originalHash }); + try { + await git.merge({ + fs: memfs, + dir: '/', + ours: originalHash, + theirs: replaceHash, + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Cherry-pick OSR' + }); + const osrResult = (await memfs.promises.readFile('/file.txt')).toString(); + const osrSimilarity = getDMPSimilarity(editText, osrResult) + + const confidence = validateEditResult(hunk, osrResult); + + if (osrSimilarity * confidence > 0.9) { + // Construct result with edited portion + const newResult = [ + ...content.slice(0, matchPosition), + ...osrResult.split('\n'), + ...content.slice(matchPosition + hunk.changes.length) + ]; + return { + confidence: osrSimilarity, + result: newResult, + strategy: 'git-osr' + }; + } + } catch (error) { + console.log('OSR strategy failed:', error); + } + + // Strategy 2: SR-SO + await git.checkout({ fs: memfs, dir: '/', ref: searchHash }); + try { + // First apply original changes + await git.merge({ + fs: memfs, + dir: '/', + ours: searchHash, + theirs: originalHash, + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Apply original changes' + }); + + // Then apply replace changes + await git.merge({ + fs: memfs, + dir: '/', + ours: 'HEAD', + theirs: replaceHash, + author: { name: 'Temp', email: 'temp@example.com' }, + message: 'Apply replace changes' + }); + + const srsoResult = (await memfs.promises.readFile('/file.txt')).toString(); + const srsoSimilarity = getDMPSimilarity(editText, srsoResult) + + const confidence = validateEditResult(hunk, srsoResult); + + // Construct result with edited portion + const newResult = [ + ...content.slice(0, matchPosition), + ...srsoResult.split('\n'), + ...content.slice(matchPosition + hunk.changes.length) + ]; + return { + confidence: srsoSimilarity * confidence, + result: newResult, + strategy: 'git-srso' + }; + } catch (error) { + console.log('SR-SO strategy failed:', error); + return { confidence: 0, result: content, strategy: 'git' }; + } } catch (error) { + console.log('Git strategy failed:', error); return { confidence: 0, result: content, strategy: 'git' }; } finally { vol.reset(); @@ -195,9 +300,11 @@ export async function applyGit(hunk: Hunk, content: string[], matchPosition: num // Main edit function that tries strategies sequentially export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise { + // Don't attempt any edits if confidence is too low and not in debug mode const MIN_CONFIDENCE = 0.9; - if (confidence < MIN_CONFIDENCE && !debug) { + if (confidence < MIN_CONFIDENCE) { + console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), skipping edit`); return { confidence: 0, result: content, strategy: 'none' }; } @@ -211,15 +318,18 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu if (debug) { // In debug mode, try all strategies and return the first success const results = await Promise.all(strategies.map(async strategy => { + console.log(`Attempting edit with ${strategy.name} strategy...`); const result = await strategy.apply(); + console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); return result; })); const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); if (successfulResults.length > 0) { - return successfulResults.reduce((best, current) => + const bestResult = successfulResults.reduce((best, current) => current.confidence > best.confidence ? current : best ); + return bestResult; } } else { // Normal mode - try strategies sequentially until one succeeds diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 891fb96..a3658aa 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -162,7 +162,8 @@ Your diff here ): Promise { const MIN_CONFIDENCE = 0.9 const parsedDiff = this.parseUnifiedDiff(diffContent) - let result = originalContent.split("\n") + const originalLines = originalContent.split("\n") + let result = [...originalLines] for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 2ad8120..5c7a55d 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -1,7 +1,7 @@ import { compareTwoStrings } from 'string-similarity'; import { closest } from 'fastest-levenshtein'; import { diff_match_patch } from 'diff-match-patch'; -import { Change } from './types'; +import { Change, Hunk } from './types'; export type SearchResult = { index: number; @@ -32,10 +32,70 @@ export function getDMPSimilarity(original: string, modified: string): number { dmp.diff_cleanupSemantic(diffs); const patches = dmp.patch_make(original, diffs); const [expectedText] = dmp.patch_apply(patches, original); + const similarity = evaluateSimilarity(expectedText, modified); return similarity; } +// Helper function to validate edit results using hunk information +// Returns a confidence reduction value between 0 and 1 +// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95, +// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact. +// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction). +export function validateEditResult(hunk: Hunk, result: string): number { + const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)); + + // Create skeleton of original content (context + removed lines) + const originalSkeleton = hunkDeepCopy.changes + .filter(change => change.type === 'context' || change.type === 'remove') + .map(change => change.content) + .join('\n'); + + // Create skeleton of expected result (context + added lines) + const expectedSkeleton = hunkDeepCopy.changes + .filter(change => change.type === 'context' || change.type === 'add') + .map(change => change.content) + .join('\n'); + + // Compare with original content + const originalSimilarity = evaluateSimilarity(originalSkeleton, result); + + // If result is too similar to original, it means changes weren't applied + if (originalSimilarity > 0.9) { + console.log('Result too similar to original content:', originalSimilarity); + return 0.5; // Significant confidence reduction + } + + // Compare with expected result + const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result); + console.log('Original similarity:', originalSimilarity); + console.log('Expected similarity:', expectedSimilarity); + + console.log('originalSkeleton:', originalSkeleton); + console.log('expectedSkeleton:', expectedSkeleton); + console.log('result:', result); + + // Scale between 0.98 and 1.0 (4% impact) based on expected similarity + const multiplier = expectedSimilarity < MIN_CONFIDENCE + ? 0.96 + (0.04 * expectedSimilarity) + : 1; + + return multiplier; +} + +// Helper function to validate context lines against original content +function validateContextLines(searchStr: string, content: string): number { + // Extract just the context lines from the search string + const contextLines = searchStr.split('\n') + .filter(line => !line.startsWith('-')); // Exclude removed lines + + // Compare context lines with content + const similarity = evaluateSimilarity(contextLines.join('\n'), content); + + // Context lines must match very closely, or confidence drops significantly + return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity; +} + // Exact match strategy export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { const contentStr = content.slice(startIndex).join('\n'); @@ -48,10 +108,13 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length ).join('\n'); - const dmpValid = getDMPSimilarity(searchStr, matchedContent) >= MIN_CONFIDENCE; + const similarity = getDMPSimilarity(searchStr, matchedContent); + const contextSimilarity = validateContextLines(searchStr, matchedContent); + const confidence = Math.min(similarity, contextSimilarity); + return { index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, - confidence: dmpValid ? 1.0 : 0.9, + confidence, strategy: 'exact' }; } @@ -70,8 +133,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI const windowStr = content.slice(i, i + searchLines.length).join('\n'); const score = compareTwoStrings(searchStr, windowStr); if (score > bestScore && score >= minScore) { - const dmpValid = getDMPSimilarity(searchStr, windowStr) >= MIN_CONFIDENCE; - const adjustedScore = dmpValid ? score : score * 0.9; + const similarity = getDMPSimilarity(searchStr, windowStr); + const contextSimilarity = validateContextLines(searchStr, windowStr); + const adjustedScore = Math.min(similarity, contextSimilarity) * score; if (adjustedScore > bestScore) { bestScore = adjustedScore; @@ -99,10 +163,13 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start if (candidates.length > 0) { const closestMatch = closest(searchStr, candidates); const index = startIndex + candidates.indexOf(closestMatch); - const dmpValid = getDMPSimilarity(searchStr, closestMatch) >= MIN_CONFIDENCE; + const similarity = getDMPSimilarity(searchStr, closestMatch); + const contextSimilarity = validateContextLines(searchStr, closestMatch); + const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty + return { index, - confidence: dmpValid ? 0.7 : 0.6, + confidence, strategy: 'levenshtein' }; } diff --git a/src/core/diff/strategies/new-unified/types.ts b/src/core/diff/strategies/new-unified/types.ts index 6135d0a..a734f6e 100644 --- a/src/core/diff/strategies/new-unified/types.ts +++ b/src/core/diff/strategies/new-unified/types.ts @@ -11,4 +11,10 @@ export type Hunk = { export type Diff = { hunks: Hunk[]; -}; \ No newline at end of file +}; + +export type EditResult = { + confidence: number; + result: string[]; + strategy: string; +}; \ No newline at end of file From 9591ae062ad0f234bfba467d527a0f48cec44cb6 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 8 Jan 2025 17:13:46 -0500 Subject: [PATCH 09/47] Refactor edit strategies and confidence validation in unified diff processing - Enhanced the applyContextMatching and applyDMP functions to improve handling of context and edit changes. - Updated confidence validation logic to ensure stricter checks, now requiring a minimum confidence of 1 for successful edits. - Refined the way changes are processed, including better tracking of removal and addition changes. - Improved the validation of edit results by incorporating strategy-specific checks and logging for better debugging. - Adjusted the applyEdit function to ensure strategies are applied in a more robust manner, with clearer handling of debug mode. --- .../strategies/new-unified/edit-strategies.ts | 105 +++++++------- src/core/diff/strategies/new-unified/index.ts | 4 +- .../new-unified/search-strategies.ts | 132 ++++++++++-------- 3 files changed, 136 insertions(+), 105 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index 36c9fa7..6affac0 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -36,35 +36,43 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio const newResult = [...content.slice(0, matchPosition)]; let sourceIndex = matchPosition; let previousIndent = ''; - - const hunkChanges = hunk.changes.filter(c => c.type !== 'context'); + let lastChangeWasRemove = false; // Track if last change was a remove for (const change of hunk.changes) { + if (change.type === 'context') { newResult.push(change.originalLine || (change.indent + change.content)); previousIndent = change.indent; - sourceIndex++; + if (!lastChangeWasRemove) { // Only increment if we didn't just remove a line + sourceIndex++; + } + lastChangeWasRemove = false; } else if (change.type === 'add') { const indent = change.indent || inferIndentation(change.content, - hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''), + hunk.changes.filter(c => c.type === 'context' && c.originalLine).map(c => c.originalLine || ''), previousIndent ); newResult.push(indent + change.content); previousIndent = indent; + lastChangeWasRemove = false; } else if (change.type === 'remove') { sourceIndex++; + lastChangeWasRemove = true; } } newResult.push(...content.slice(sourceIndex)); - // Validate the result + // Calculate the window size based on all changes + const windowSize = hunk.changes.length; + + // Validate the result using the full window size const similarity = getDMPSimilarity( - content.slice(matchPosition, matchPosition + hunk.changes.length).join('\n'), - newResult.slice(matchPosition, matchPosition + hunk.changes.length).join('\n') + content.slice(matchPosition, matchPosition + windowSize).join('\n'), + newResult.slice(matchPosition, matchPosition + windowSize).join('\n') ) - const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + hunkChanges.length + 1).join('\n')); + const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context'); return { confidence: similarity * confidence, @@ -80,45 +88,45 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): } const dmp = new diff_match_patch(); - const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length); - const editText = editRegion.join('\n'); - // Build the target text sequentially like in applyContextMatching - let targetText = ''; - let previousIndent = ''; + // Build BEFORE block (context + removals) + const beforeLines = hunk.changes + .filter(change => change.type === 'context' || change.type === 'remove') + .map(change => change.originalLine || (change.indent + change.content)); - for (const change of hunk.changes) { - if (change.type === 'context') { - targetText += (change.originalLine || (change.indent + change.content)) + '\n'; - previousIndent = change.indent; - } else if (change.type === 'add') { - const indent = change.indent || inferIndentation(change.content, - hunk.changes.filter(c => c.type === 'context').map(c => c.originalLine || ''), - previousIndent - ); - targetText += indent + change.content + '\n'; - previousIndent = indent; - } - // Skip remove changes as they shouldn't appear in target - } - - // Trim the trailing newline - targetText = targetText.replace(/\n$/, ''); - - const patch = dmp.patch_make(editText, targetText); - const [patchedText] = dmp.patch_apply(patch, editText); + // Build AFTER block (context + additions) + const afterLines = hunk.changes + .filter(change => change.type === 'context' || change.type === 'add') + .map(change => change.originalLine || (change.indent + change.content)); - // Construct result with edited portion + // Convert to text + const beforeText = beforeLines.join('\n'); + const afterText = afterLines.join('\n'); + + // Create the patch + const patch = dmp.patch_make(beforeText, afterText); + + // Get the target text from content + const targetText = content.slice(matchPosition, matchPosition + beforeLines.length).join('\n'); + + // Apply the patch + const [patchedText] = dmp.patch_apply(patch, targetText); + + // Split patched text back into lines + const patchedLines = patchedText.split('\n'); + + // Construct the final result const newResult = [ ...content.slice(0, matchPosition), - ...patchedText.split('\n'), - ...content.slice(matchPosition + hunk.changes.length) + ...patchedLines, + ...content.slice(matchPosition + beforeLines.length) ]; - - const similarity = getDMPSimilarity(editText, patchedText) - const confidence = validateEditResult(hunk, patchedText); - return { + // Calculate confidence + const similarity = getDMPSimilarity(beforeText, targetText); + const confidence = validateEditResult(hunk, patchedText, 'dmp'); + + return { confidence: similarity * confidence, result: newResult, strategy: 'dmp' @@ -228,7 +236,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P const osrResult = (await memfs.promises.readFile('/file.txt')).toString(); const osrSimilarity = getDMPSimilarity(editText, osrResult) - const confidence = validateEditResult(hunk, osrResult); + const confidence = validateEditResult(hunk, osrResult, 'git-osr'); if (osrSimilarity * confidence > 0.9) { // Construct result with edited portion @@ -273,7 +281,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P const srsoResult = (await memfs.promises.readFile('/file.txt')).toString(); const srsoSimilarity = getDMPSimilarity(editText, srsoResult) - const confidence = validateEditResult(hunk, srsoResult); + const confidence = validateEditResult(hunk, srsoResult, 'git-srso'); // Construct result with edited portion const newResult = [ @@ -299,7 +307,7 @@ async function applyGit(hunk: Hunk, content: string[], matchPosition: number): P } // Main edit function that tries strategies sequentially -export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: boolean = false): Promise { +export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise { // Don't attempt any edits if confidence is too low and not in debug mode const MIN_CONFIDENCE = 0.9; @@ -310,12 +318,12 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu // Try each strategy in sequence until one succeeds const strategies = [ - { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, + { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, { name: 'git', apply: () => applyGit(hunk, content, matchPosition) } ]; - if (debug) { + if (debug !== '') { // In debug mode, try all strategies and return the first success const results = await Promise.all(strategies.map(async strategy => { console.log(`Attempting edit with ${strategy.name} strategy...`); @@ -324,18 +332,19 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu return result; })); - const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); + /*const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); if (successfulResults.length > 0) { const bestResult = successfulResults.reduce((best, current) => current.confidence > best.confidence ? current : best ); return bestResult; - } + }*/ + return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; } else { // Normal mode - try strategies sequentially until one succeeds for (const strategy of strategies) { const result = await strategy.apply(); - if (result.confidence > MIN_CONFIDENCE) { + if (result.confidence === 1) { return result; } } diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index a3658aa..bdc1519 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -160,7 +160,7 @@ Your diff here startLine?: number, endLine?: number ): Promise { - const MIN_CONFIDENCE = 0.9 + const MIN_CONFIDENCE = 1 const parsedDiff = this.parseUnifiedDiff(diffContent) const originalLines = originalContent.split("\n") let result = [...originalLines] @@ -170,7 +170,7 @@ Your diff here const { index: matchPosition, confidence } = findBestMatch(contextStr, result) const editResult = await applyEdit(hunk, result, matchPosition, confidence) - if (editResult.confidence > MIN_CONFIDENCE) { + if (editResult.confidence >= MIN_CONFIDENCE) { result = editResult.result } else { return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` } diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 5c7a55d..9e61f87 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -15,8 +15,8 @@ const MIN_CONFIDENCE = 0.95; // Helper function to prepare search string from context export function prepareSearchString(changes: Change[]): string { const lines = changes - .filter(c => c.type === 'context' || c.type === 'remove') - .map(c => c.content); + .filter((c) => c.type === 'context' || c.type === 'remove') + .map((c) => c.content); return lines.join('\n'); } @@ -42,43 +42,42 @@ export function getDMPSimilarity(original: string, modified: string): number { // Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95, // returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact. // If similarity >= MIN_CONFIDENCE, returns 0 (no reduction). -export function validateEditResult(hunk: Hunk, result: string): number { +export function validateEditResult(hunk: Hunk, result: string, strategy: string): number { const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)); - + // Create skeleton of original content (context + removed lines) const originalSkeleton = hunkDeepCopy.changes - .filter(change => change.type === 'context' || change.type === 'remove') - .map(change => change.content) + .filter((change) => change.type === 'context' || change.type === 'remove') + .map((change) => change.content) .join('\n'); // Create skeleton of expected result (context + added lines) const expectedSkeleton = hunkDeepCopy.changes - .filter(change => change.type === 'context' || change.type === 'add') - .map(change => change.content) + .filter((change) => change.type === 'context' || change.type === 'add') + .map((change) => change.content) .join('\n'); // Compare with original content const originalSimilarity = evaluateSimilarity(originalSkeleton, result); - - // If result is too similar to original, it means changes weren't applied - if (originalSimilarity > 0.9) { - console.log('Result too similar to original content:', originalSimilarity); - return 0.5; // Significant confidence reduction + console.log('originalSimilarity ', strategy, originalSimilarity); + // If original similarity is 1, it means changes weren't applied + if (originalSimilarity > 0.97) { + if (originalSimilarity === 1) { + return 0.5; // Significant confidence reduction + } else { + return 0.8; + } } // Compare with expected result const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result); - console.log('Original similarity:', originalSimilarity); - console.log('Expected similarity:', expectedSimilarity); - console.log('originalSkeleton:', originalSkeleton); - console.log('expectedSkeleton:', expectedSkeleton); - console.log('result:', result); + console.log('expectedSimilarity', strategy, expectedSimilarity); + // Scale between 0.98 and 1.0 (4% impact) based on expected similarity - const multiplier = expectedSimilarity < MIN_CONFIDENCE - ? 0.96 + (0.04 * expectedSimilarity) - : 1; + const multiplier = + expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1; return multiplier; } @@ -86,44 +85,59 @@ export function validateEditResult(hunk: Hunk, result: string): number { // Helper function to validate context lines against original content function validateContextLines(searchStr: string, content: string): number { // Extract just the context lines from the search string - const contextLines = searchStr.split('\n') - .filter(line => !line.startsWith('-')); // Exclude removed lines - + const contextLines = searchStr + .split('\n') + .filter((line) => !line.startsWith('-')); // Exclude removed lines + // Compare context lines with content const similarity = evaluateSimilarity(contextLines.join('\n'), content); - + // Context lines must match very closely, or confidence drops significantly return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity; } // Exact match strategy -export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findExactMatch( + searchStr: string, + content: string[], + startIndex: number = 0 +): SearchResult { const contentStr = content.slice(startIndex).join('\n'); const searchLines = searchStr.split('\n'); - + const exactMatch = contentStr.indexOf(searchStr); if (exactMatch !== -1) { - const matchedContent = content.slice( - startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, - startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1 + searchLines.length - ).join('\n'); - + const matchedContent = content + .slice( + startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, + startIndex + + contentStr.slice(0, exactMatch).split('\n').length - + 1 + + searchLines.length + ) + .join('\n'); + const similarity = getDMPSimilarity(searchStr, matchedContent); const contextSimilarity = validateContextLines(searchStr, matchedContent); const confidence = Math.min(similarity, contextSimilarity); - + return { - index: startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, + index: + startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, confidence, - strategy: 'exact' + strategy: 'exact', }; } - + return { index: -1, confidence: 0, strategy: 'exact' }; } // String similarity strategy -export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findSimilarityMatch( + searchStr: string, + content: string[], + startIndex: number = 0 +): SearchResult { const searchLines = searchStr.split('\n'); let bestScore = 0; let bestIndex = -1; @@ -136,7 +150,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI const similarity = getDMPSimilarity(searchStr, windowStr); const contextSimilarity = validateContextLines(searchStr, windowStr); const adjustedScore = Math.min(similarity, contextSimilarity) * score; - + if (adjustedScore > bestScore) { bestScore = adjustedScore; bestIndex = i; @@ -144,33 +158,37 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI } } - return { - index: bestIndex, + return { + index: bestIndex, confidence: bestIndex !== -1 ? bestScore : 0, - strategy: 'similarity' + strategy: 'similarity', }; } // Levenshtein strategy -export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findLevenshteinMatch( + searchStr: string, + content: string[], + startIndex: number = 0 +): SearchResult { const searchLines = searchStr.split('\n'); const candidates = []; - + for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { candidates.push(content.slice(i, i + searchLines.length).join('\n')); } - + if (candidates.length > 0) { const closestMatch = closest(searchStr, candidates); const index = startIndex + candidates.indexOf(closestMatch); const similarity = getDMPSimilarity(searchStr, closestMatch); const contextSimilarity = validateContextLines(searchStr, closestMatch); - const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty - - return { - index, + const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty + + return { + index, confidence, - strategy: 'levenshtein' + strategy: 'levenshtein', }; } @@ -178,21 +196,25 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start } // Main search function that tries all strategies -export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findBestMatch( + searchStr: string, + content: string[], + startIndex: number = 0 +): SearchResult { const strategies = [ findExactMatch, findSimilarityMatch, - findLevenshteinMatch + findLevenshteinMatch, ]; - + let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' }; - + for (const strategy of strategies) { const result = strategy(searchStr, content, startIndex); if (result.confidence > bestResult.confidence) { bestResult = result; } } - + return bestResult; -} \ No newline at end of file +} From 44dc489e49f113584a25b850534c9583b4a731ef Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 8 Jan 2025 17:18:09 -0500 Subject: [PATCH 10/47] revert: lower the confidence levels --- src/core/diff/strategies/new-unified/edit-strategies.ts | 2 +- src/core/diff/strategies/new-unified/index.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index 6affac0..c464292 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -344,7 +344,7 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu // Normal mode - try strategies sequentially until one succeeds for (const strategy of strategies) { const result = await strategy.apply(); - if (result.confidence === 1) { + if (result.confidence > MIN_CONFIDENCE) { return result; } } diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index bdc1519..a3658aa 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -160,7 +160,7 @@ Your diff here startLine?: number, endLine?: number ): Promise { - const MIN_CONFIDENCE = 1 + const MIN_CONFIDENCE = 0.9 const parsedDiff = this.parseUnifiedDiff(diffContent) const originalLines = originalContent.split("\n") let result = [...originalLines] @@ -170,7 +170,7 @@ Your diff here const { index: matchPosition, confidence } = findBestMatch(contextStr, result) const editResult = await applyEdit(hunk, result, matchPosition, confidence) - if (editResult.confidence >= MIN_CONFIDENCE) { + if (editResult.confidence > MIN_CONFIDENCE) { result = editResult.result } else { return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` } From 5351b3d98164f756211ba7ecc5047f03b12bbb61 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 9 Jan 2025 12:05:36 -0500 Subject: [PATCH 11/47] refactor: improve the prompt to prevent absurd amounts of context being generated --- src/core/diff/strategies/new-unified/index.ts | 198 ++++++++++-------- 1 file changed, 110 insertions(+), 88 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index a3658aa..6f2b62f 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -5,106 +5,128 @@ import { DiffResult, DiffStrategy } from "../../types" export class NewUnifiedDiffStrategy implements DiffStrategy { private parseUnifiedDiff(diff: string): Diff { - const lines = diff.split("\n") - const hunks: Hunk[] = [] - let currentHunk: Hunk | null = null - - let i = 0 - while (i < lines.length && !lines[i].startsWith("@@")) { - i++ - } - - for (; i < lines.length; i++) { - const line = lines[i] - - if (line.startsWith("@@")) { - if (currentHunk) { - hunks.push(currentHunk) - } - currentHunk = { changes: [] } - continue - } - - if (!currentHunk) { - continue - } - - // Extract the complete indentation for each line - const content = line.slice(1) // Remove the diff marker - const indentMatch = content.match(/^(\s*)/) - const indent = indentMatch ? indentMatch[0] : "" - const trimmedContent = content.slice(indent.length) - - if (line.startsWith(" ")) { - currentHunk.changes.push({ - type: "context", - content: trimmedContent, - indent, - originalLine: content, - }) - } else if (line.startsWith("+")) { - currentHunk.changes.push({ - type: "add", - content: trimmedContent, - indent, - originalLine: content, - }) - } else if (line.startsWith("-")) { - currentHunk.changes.push({ - type: "remove", - content: trimmedContent, - indent, - originalLine: content, - }) - } - } - - if (currentHunk && currentHunk.changes.length > 0) { - hunks.push(currentHunk) - } - - return { hunks } - } + const MAX_CONTEXT_LINES = 3; // Number of context lines to keep before/after changes + const lines = diff.split('\n'); + const hunks: Hunk[] = []; + let currentHunk: Hunk | null = null; + + let i = 0; + while (i < lines.length && !lines[i].startsWith('@@')) { + i++; + } + + for (; i < lines.length; i++) { + const line = lines[i]; + + if (line.startsWith('@@')) { + if (currentHunk && currentHunk.changes.length > 0 && + currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { + // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes + const changes = currentHunk.changes; + let startIdx = 0; + let endIdx = changes.length - 1; + + // Find first non-context line + for (let j = 0; j < changes.length; j++) { + if (changes[j].type !== 'context') { + startIdx = Math.max(0, j - MAX_CONTEXT_LINES); + break; + } + } + + // Find last non-context line + for (let j = changes.length - 1; j >= 0; j--) { + if (changes[j].type !== 'context') { + endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES); + break; + } + } + + currentHunk.changes = changes.slice(startIdx, endIdx + 1); + hunks.push(currentHunk); + } + currentHunk = { changes: [] }; + continue; + } + + if (!currentHunk) { + continue; + } + + // Extract the complete indentation for each line + const content = line.slice(1); // Remove the diff marker + const indentMatch = content.match(/^(\s*)/); + const indent = indentMatch ? indentMatch[0] : ''; + const trimmedContent = content.slice(indent.length); + + if (line.startsWith(' ')) { + currentHunk.changes.push({ + type: 'context', + content: trimmedContent, + indent, + originalLine: content + }); + } else if (line.startsWith('+')) { + currentHunk.changes.push({ + type: 'add', + content: trimmedContent, + indent, + originalLine: content + }); + } else if (line.startsWith('-')) { + currentHunk.changes.push({ + type: 'remove', + content: trimmedContent, + indent, + originalLine: content + }); + } + } + + if (currentHunk && currentHunk.changes.length > 0 && + currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { + hunks.push(currentHunk); + } + + return { hunks }; + } getToolDescription(cwd: string): string { return `## apply_diff -Description: Apply a unified diff to a file at the specified path. This tool is useful when you need to make specific modifications to a file based on a set of changes provided in unified diff format (diff -U0). -Make sure you include the first 2 lines with the file paths. -Don't include timestamps with the file paths. +Description: +Apply a unified diff to a file at the specified path. This tool generates minimal, focused diffs that group related changes together. -Start each hunk of changes with a \`@@ ... @@\` line. -Don't include line numbers like \`diff -U0\` does. -The user's patch tool doesn't need them. +Important: It is not necessary to include line numbers in the @@ lines! The patch tool does not use them. -Indentation matters in the diffs! +Key Requirements: +1. Generate compact diffs with minimal context + - Use reduced context similar to diff -U0 + - Only include hunks that contain actual changes (+ or - lines) + - Skip hunks with only unchanged lines -Start a new hunk for each section of the file that needs changes. +2. Use high-level, logical grouping + - When modifying code blocks (functions, methods, loops), replace the entire block in one hunk + - Delete the complete existing block with \`-\` lines + - Add the complete updated block with \`+\` lines + - Group related changes together rather than creating many small hunks -Only output hunks that specify changes with \`+\` or \`-\` lines. -Skip any hunks that are entirely unchanging \` \` lines. +3. Format requirements + - Include file paths in the first 2 lines (without timestamps) + - Each hunk must start with ONLY \`@@ ... @@\` (line numbers are not needed) + - Preserve exact indentation + - The @@ lines should be simple separators between hunks - Line numbers or line ranges should not be included -The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file! -Think carefully and make sure you include and mark all lines that need to be removed or changed as \`-\` lines. -Make sure you mark all new or modified lines with \`+\`. -Don't leave out any lines or the diff patch won't apply correctly. - -Output hunks in whatever order makes the most sense. -Hunks don't need to be in any particular order. - -The hunks do not need line numbers. - -When editing a function, method, loop, etc use a hunk to replace the *entire* code block. -Delete the entire existing version with \`-\` lines and then add a new, updated version with \`+\` lines. -This will help you generate correct code and correct diffs. - -To move code within a file, use 2 hunks: 1 to delete it from its current location, 1 to insert it in the new location. +4. Common operations + - To move code: Create one hunk to delete from original location, another to add at new location + - To modify a block: Delete entire original block, then add entire new version + - Order hunks in whatever logical sequence makes sense Parameters: -- path: (required) The path of the file to apply the diff to (relative to the current working directory ${cwd}) -- diff: (required) The diff content in unified format to apply to the file. +- path: (required) File path relative to current working directory ${cwd} +- diff: (required) Unified format diff content to apply -For each file that needs to be changed, write out the changes similar to a unified diff like \`diff -U0\` would produce. +The output must generate correct, clean patches that apply successfully against the current file contents. All changes must be properly marked with + (new/modified) or - (removed) lines. Example: From ab31aea55170d017df02c931e22437b5d0aa58e6 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 9 Jan 2025 16:38:32 -0500 Subject: [PATCH 12/47] refactor: enhance context handling and confidence validation in edit strategies - Improved the applyContextMatching function to better track context lines and handle changes more accurately. - Updated the applyEdit function to enforce a stricter confidence check, now requiring a confidence level of 1 for successful edits. - Refined the processing logic for addition and removal changes, ensuring more reliable diff application. - Enhanced debugging capabilities with clearer logging and validation checks for edit results. --- .../strategies/new-unified/edit-strategies.ts | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index c464292..da0be21 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -36,17 +36,14 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio const newResult = [...content.slice(0, matchPosition)]; let sourceIndex = matchPosition; let previousIndent = ''; - let lastChangeWasRemove = false; // Track if last change was a remove + let contextLinesProcessed = 0; for (const change of hunk.changes) { - if (change.type === 'context') { newResult.push(change.originalLine || (change.indent + change.content)); previousIndent = change.indent; - if (!lastChangeWasRemove) { // Only increment if we didn't just remove a line - sourceIndex++; - } - lastChangeWasRemove = false; + sourceIndex++; + contextLinesProcessed++; } else if (change.type === 'add') { const indent = change.indent || inferIndentation(change.content, hunk.changes.filter(c => c.type === 'context' && c.originalLine).map(c => c.originalLine || ''), @@ -54,14 +51,14 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio ); newResult.push(indent + change.content); previousIndent = indent; - lastChangeWasRemove = false; } else if (change.type === 'remove') { sourceIndex++; - lastChangeWasRemove = true; } } - newResult.push(...content.slice(sourceIndex)); + // Only append remaining content after the hunk's actual span in the original content + const remainingContentStart = matchPosition + contextLinesProcessed + hunk.changes.filter(c => c.type === 'remove').length; + newResult.push(...content.slice(remainingContentStart)); // Calculate the window size based on all changes const windowSize = hunk.changes.length; @@ -344,7 +341,7 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu // Normal mode - try strategies sequentially until one succeeds for (const strategy of strategies) { const result = await strategy.apply(); - if (result.confidence > MIN_CONFIDENCE) { + if (result.confidence === 1) { return result; } } From 058431eaf1632c443a09ba016f2eabe120764979 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 10 Jan 2025 17:32:54 -0500 Subject: [PATCH 13/47] chore: update dependencies - Added @types/tmp and simple-git dependencies. - Updated tmp dependency version to 0.2.3. - Removed isomorphic-git and memfs dependencies. --- package-lock.json | 364 +++++++--------------------------------------- package.json | 5 +- 2 files changed, 57 insertions(+), 312 deletions(-) diff --git a/package-lock.json b/package-lock.json index 982a949..069b02c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,7 @@ "@modelcontextprotocol/sdk": "^1.0.1", "@types/clone-deep": "^4.0.4", "@types/pdf-parse": "^1.1.4", + "@types/tmp": "^0.2.6", "@types/turndown": "^5.0.5", "@types/vscode": "^1.95.0", "@vscode/codicons": "^0.0.36", @@ -31,9 +32,7 @@ "fastest-levenshtein": "^1.0.16", "globby": "^14.0.2", "isbinaryfile": "^5.0.2", - "isomorphic-git": "^1.29.0", "mammoth": "^1.8.0", - "memfs": "^4.15.3", "monaco-vscode-textmate-theme-converter": "^0.1.7", "openai": "^4.73.1", "os-name": "^6.0.0", @@ -42,9 +41,11 @@ "puppeteer-chromium-resolver": "^23.0.0", "puppeteer-core": "^23.4.0", "serialize-error": "^11.0.3", + "simple-git": "^3.27.0", "sound-play": "^1.1.0", "string-similarity": "^4.0.4", "strip-ansi": "^7.1.0", + "tmp": "^0.2.3", "tree-sitter-wasms": "^0.1.11", "turndown": "^7.2.0", "web-tree-sitter": "^0.22.6", @@ -4113,6 +4114,21 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@kwsites/file-exists": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@kwsites/file-exists/-/file-exists-1.1.1.tgz", + "integrity": "sha512-m9/5YGR18lIwxSFDwfE3oA7bWuq9kdau6ugN4H2rJeyhFQZcG9AgSHkQtSD15a8WvTgfz9aikZMrKPHvbpqFiw==", + "license": "MIT", + "dependencies": { + "debug": "^4.1.1" + } + }, + "node_modules/@kwsites/promise-deferred": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@kwsites/promise-deferred/-/promise-deferred-1.1.1.tgz", + "integrity": "sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw==", + "license": "MIT" + }, "node_modules/@manypkg/find-root": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@manypkg/find-root/-/find-root-1.1.0.tgz", @@ -6157,6 +6173,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/tmp": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/@types/tmp/-/tmp-0.2.6.tgz", + "integrity": "sha512-chhaNf2oKHlRkDGt+tiKE2Z5aJ6qalm7Z9rlLdBwmOiAAf09YQvvoLXjWK4HWPF1xU/fqvMgfNfpVoBscA/tKA==", + "license": "MIT" + }, "node_modules/@types/turndown": { "version": "5.0.5", "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.5.tgz", @@ -6728,12 +6750,6 @@ "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==", "dev": true }, - "node_modules/async-lock": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/async-lock/-/async-lock-1.4.1.tgz", - "integrity": "sha512-Az2ZTpuytrtqENulXwO3GGv1Bztugx6TT37NIo7imr/Qo0gsYiGtSdBa2B6fsXhTpVZDNfu1Qn3pk531e3q+nQ==", - "license": "MIT" - }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -7398,12 +7414,6 @@ "integrity": "sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==", "dev": true }, - "node_modules/clean-git-ref": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/clean-git-ref/-/clean-git-ref-2.0.1.tgz", - "integrity": "sha512-bLSptAy2P0s6hU4PzuIMKmMJJSE6gLXGH1cntDu7bWJUksvuM+7ReOK61mozULErYvP6a15rnYl0zFDef+pyPw==", - "license": "Apache-2.0" - }, "node_modules/cli-cursor": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-4.0.0.tgz", @@ -7644,18 +7654,6 @@ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" }, - "node_modules/crc-32": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", - "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", - "license": "Apache-2.0", - "bin": { - "crc32": "bin/crc32.njs" - }, - "engines": { - "node": ">=0.8" - } - }, "node_modules/create-jest": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz", @@ -7803,21 +7801,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/decompress-response": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", - "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", - "license": "MIT", - "dependencies": { - "mimic-response": "^3.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/dedent": { "version": "1.5.3", "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", @@ -7978,12 +7961,6 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, - "node_modules/diff3": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/diff3/-/diff3-0.0.3.tgz", - "integrity": "sha512-iSq8ngPOt0K53A6eVr4d5Kn6GNrM2nQZtC740pzIriHtn4pOQ2lyzEXQMBeVcWERN0ye7fhBsk9PbLLQOnUx/g==", - "license": "MIT" - }, "node_modules/dingbat-to-unicode": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz", @@ -8778,6 +8755,19 @@ "node": ">=0.10.0" } }, + "node_modules/external-editor/node_modules/tmp": { + "version": "0.0.33", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz", + "integrity": "sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw==", + "dev": true, + "license": "MIT", + "dependencies": { + "os-tmpdir": "~1.0.2" + }, + "engines": { + "node": ">=0.6.0" + } + }, "node_modules/extract-zip": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", @@ -9654,15 +9644,6 @@ "url": "https://github.com/sponsors/typicode" } }, - "node_modules/hyperdyperid": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/hyperdyperid/-/hyperdyperid-1.2.0.tgz", - "integrity": "sha512-Y93lCzHYgGWdrJ66yIktxiaGULYc6oGiABxhcO5AufBeOyoIdZF7bIfLaOrbM0iGIOXQQgxxRrFEnb+Y6w1n4A==", - "license": "MIT", - "engines": { - "node": ">=10.18" - } - }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", @@ -10290,55 +10271,6 @@ "node": ">=0.10.0" } }, - "node_modules/isomorphic-git": { - "version": "1.29.0", - "resolved": "https://registry.npmjs.org/isomorphic-git/-/isomorphic-git-1.29.0.tgz", - "integrity": "sha512-zWGqk8901cicvVEhVpN76AwKrS/TzHak2NQCtNXIAavpMIy/yqh+d/JtC9A8AUKZAauUdOyEWKI29tuCLAL+Zg==", - "license": "MIT", - "dependencies": { - "async-lock": "^1.4.1", - "clean-git-ref": "^2.0.1", - "crc-32": "^1.2.0", - "diff3": "0.0.3", - "ignore": "^5.1.4", - "minimisted": "^2.0.0", - "pako": "^1.0.10", - "path-browserify": "^1.0.1", - "pify": "^4.0.1", - "readable-stream": "^3.4.0", - "sha.js": "^2.4.9", - "simple-get": "^4.0.1" - }, - "bin": { - "isogit": "cli.cjs" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/isomorphic-git/node_modules/pify": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/pify/-/pify-4.0.1.tgz", - "integrity": "sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/isomorphic-git/node_modules/readable-stream": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", - "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", - "license": "MIT", - "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" - } - }, "node_modules/istanbul-lib-coverage": { "version": "3.2.2", "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", @@ -11951,113 +11883,6 @@ "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==" }, - "node_modules/memfs": { - "version": "4.15.3", - "resolved": "https://registry.npmjs.org/memfs/-/memfs-4.15.3.tgz", - "integrity": "sha512-vR/g1SgqvKJgAyYla+06G4p/EOcEmwhYuVb1yc1ixcKf8o/sh7Zngv63957ZSNd1xrZJoinmNyDf2LzuP8WJXw==", - "license": "Apache-2.0", - "dependencies": { - "@jsonjoy.com/json-pack": "^1.0.3", - "@jsonjoy.com/util": "^1.3.0", - "tree-dump": "^1.0.1", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">= 4.0.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/streamich" - } - }, - "node_modules/memfs/node_modules/@jsonjoy.com/base64": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@jsonjoy.com/base64/-/base64-1.1.2.tgz", - "integrity": "sha512-q6XAnWQDIMA3+FTiOYajoYqySkO+JSat0ytXGSuRdq9uXE7o92gzuQwQM14xaCRlBLGq3v5miDGC4vkVTn54xA==", - "license": "Apache-2.0", - "engines": { - "node": ">=10.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/streamich" - }, - "peerDependencies": { - "tslib": "2" - } - }, - "node_modules/memfs/node_modules/@jsonjoy.com/json-pack": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@jsonjoy.com/json-pack/-/json-pack-1.1.1.tgz", - "integrity": "sha512-osjeBqMJ2lb/j/M8NCPjs1ylqWIcTRTycIhVB5pt6LgzgeRSb0YRZ7j9RfA8wIUrsr/medIuhVyonXRZWLyfdw==", - "license": "Apache-2.0", - "dependencies": { - "@jsonjoy.com/base64": "^1.1.1", - "@jsonjoy.com/util": "^1.1.2", - "hyperdyperid": "^1.2.0", - "thingies": "^1.20.0" - }, - "engines": { - "node": ">=10.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/streamich" - }, - "peerDependencies": { - "tslib": "2" - } - }, - "node_modules/memfs/node_modules/@jsonjoy.com/util": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/@jsonjoy.com/util/-/util-1.5.0.tgz", - "integrity": "sha512-ojoNsrIuPI9g6o8UxhraZQSyF2ByJanAY4cTFbc8Mf2AXEF4aQRGY1dJxyJpuyav8r9FGflEt/Ff3u5Nt6YMPA==", - "license": "Apache-2.0", - "engines": { - "node": ">=10.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/streamich" - }, - "peerDependencies": { - "tslib": "2" - } - }, - "node_modules/memfs/node_modules/thingies": { - "version": "1.21.0", - "resolved": "https://registry.npmjs.org/thingies/-/thingies-1.21.0.tgz", - "integrity": "sha512-hsqsJsFMsV+aD4s3CWKk85ep/3I9XzYV/IXaSouJMYIoDlgyi11cBhsqYe9/geRfB0YIikBQg6raRaM+nIMP9g==", - "license": "Unlicense", - "engines": { - "node": ">=10.18" - }, - "peerDependencies": { - "tslib": "^2" - } - }, - "node_modules/memfs/node_modules/tree-dump": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.0.2.tgz", - "integrity": "sha512-dpev9ABuLWdEubk+cIaI9cHwRNNDjkBBLXTwI4UCUFdQ5xXKqNXoK4FEciw/vxf+NQ7Cb7sGUyeUtORvHIdRXQ==", - "license": "Apache-2.0", - "engines": { - "node": ">=10.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/streamich" - }, - "peerDependencies": { - "tslib": "2" - } - }, - "node_modules/memfs/node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD" - }, "node_modules/memorystream": { "version": "0.3.1", "resolved": "https://registry.npmjs.org/memorystream/-/memorystream-0.3.1.tgz", @@ -12132,18 +11957,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/mimic-response": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", - "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/minimatch": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", @@ -12159,24 +11972,6 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/minimisted": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/minimisted/-/minimisted-2.0.1.tgz", - "integrity": "sha512-1oPjfuLQa2caorJUM8HV8lGgWCc0qqAO1MNv/k05G4qslmsndV/5WdNZrqCiyqiz3wohia2Ij2B7w2Dr7/IyrA==", - "license": "MIT", - "dependencies": { - "minimist": "^1.2.5" - } - }, "node_modules/minipass": { "version": "7.1.2", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", @@ -12980,6 +12775,7 @@ "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", "integrity": "sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g==", "dev": true, + "license": "MIT", "engines": { "node": ">=0.10.0" } @@ -13186,12 +12982,6 @@ "url": "https://github.com/inikulin/parse5?sponsor=1" } }, - "node_modules/path-browserify": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz", - "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==", - "license": "MIT" - }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -14121,19 +13911,6 @@ "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" }, - "node_modules/sha.js": { - "version": "2.4.11", - "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", - "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", - "license": "(MIT AND BSD-3-Clause)", - "dependencies": { - "inherits": "^2.0.1", - "safe-buffer": "^5.0.1" - }, - "bin": { - "sha.js": "bin.js" - } - }, "node_modules/shallow-clone": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-3.0.1.tgz", @@ -14205,49 +13982,19 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/simple-concat": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", - "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/simple-get": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", - "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], + "node_modules/simple-git": { + "version": "3.27.0", + "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-3.27.0.tgz", + "integrity": "sha512-ivHoFS9Yi9GY49ogc6/YAi3Fl9ROnF4VyubNylgCkA+RVqLaKWnDSzXOVzya8csELIaWaYNutsEuAhZrtOjozA==", "license": "MIT", "dependencies": { - "decompress-response": "^6.0.0", - "once": "^1.3.1", - "simple-concat": "^1.0.0" + "@kwsites/file-exists": "^1.1.1", + "@kwsites/promise-deferred": "^1.1.1", + "debug": "^4.3.5" + }, + "funding": { + "type": "github", + "url": "https://github.com/steveukx/git-js?sponsor=1" } }, "node_modules/sisteransi": { @@ -14875,15 +14622,12 @@ "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" }, "node_modules/tmp": { - "version": "0.0.33", - "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz", - "integrity": "sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw==", - "dev": true, - "dependencies": { - "os-tmpdir": "~1.0.2" - }, + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz", + "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==", + "license": "MIT", "engines": { - "node": ">=0.6.0" + "node": ">=14.14" } }, "node_modules/tmpl": { diff --git a/package.json b/package.json index 9a23ce8..f1ad575 100644 --- a/package.json +++ b/package.json @@ -203,6 +203,7 @@ "@modelcontextprotocol/sdk": "^1.0.1", "@types/clone-deep": "^4.0.4", "@types/pdf-parse": "^1.1.4", + "@types/tmp": "^0.2.6", "@types/turndown": "^5.0.5", "@types/vscode": "^1.95.0", "@vscode/codicons": "^0.0.36", @@ -218,9 +219,7 @@ "fastest-levenshtein": "^1.0.16", "globby": "^14.0.2", "isbinaryfile": "^5.0.2", - "isomorphic-git": "^1.29.0", "mammoth": "^1.8.0", - "memfs": "^4.15.3", "monaco-vscode-textmate-theme-converter": "^0.1.7", "openai": "^4.73.1", "os-name": "^6.0.0", @@ -229,9 +228,11 @@ "puppeteer-chromium-resolver": "^23.0.0", "puppeteer-core": "^23.4.0", "serialize-error": "^11.0.3", + "simple-git": "^3.27.0", "sound-play": "^1.1.0", "string-similarity": "^4.0.4", "strip-ansi": "^7.1.0", + "tmp": "^0.2.3", "tree-sitter-wasms": "^0.1.11", "turndown": "^7.2.0", "web-tree-sitter": "^0.22.6", From 6d68edef3e0961671e7b34e869ff644e3f739f8e Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 10 Jan 2025 17:34:44 -0500 Subject: [PATCH 14/47] refactor: implement Git fallback strategy in edit processing - Introduced a new Git fallback strategy for handling edits when confidence levels are low. - Replaced the previous Git edit strategy with a more robust approach that utilizes temporary directories and commits for original, search, and replace states. - Enhanced error handling and logging for better debugging during the edit process. - Updated the main edit function to incorporate the Git fallback strategy, ensuring it is attempted when other strategies fail. - Improved overall structure and readability of the applyGitFallback function, streamlining the process of applying changes using Git. --- .../strategies/new-unified/edit-strategies.ts | 293 ++++++++---------- 1 file changed, 122 insertions(+), 171 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index da0be21..3956bfa 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -1,8 +1,10 @@ import { diff_match_patch } from 'diff-match-patch'; -import * as git from 'isomorphic-git'; -import { fs as memfs, vol } from 'memfs'; -import { Change, EditResult, Hunk } from './types'; +import { EditResult, Hunk } from './types'; import { getDMPSimilarity, validateEditResult } from './search-strategies'; +import * as path from 'path'; +import simpleGit, { SimpleGit } from 'simple-git'; +import * as tmp from 'tmp'; +import * as fs from 'fs'; // Helper function to infer indentation function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { @@ -130,212 +132,156 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): }; } -// Git edit strategy with cherry-pick approach -async function applyGit(hunk: Hunk, content: string[], matchPosition: number): Promise { - if (matchPosition === -1) { - return { confidence: 0, result: content, strategy: 'git' }; - } - - vol.reset(); +// Git fallback strategy that works with full content +async function applyGitFallback(hunk: Hunk, content: string[]): Promise { + let tmpDir: tmp.DirResult | undefined; try { + // Create temporary directory + tmpDir = tmp.dirSync({ unsafeCleanup: true }); + const git: SimpleGit = simpleGit(tmpDir.name); + // Initialize git repo - await git.init({ fs: memfs, dir: '/' }); + await git.init(); + await git.addConfig('user.name', 'Temp'); + await git.addConfig('user.email', 'temp@example.com'); + + const filePath = path.join(tmpDir.name, 'file.txt'); + + // Build the search text (context + removals) + const searchLines = hunk.changes + .filter(change => change.type === 'context' || change.type === 'remove') + .map(change => change.originalLine || (change.indent + change.content)); - // Create original content - only use the edit region - const editRegion = content.slice(matchPosition, matchPosition + hunk.changes.length); - const editText = editRegion.join('\n'); - await memfs.promises.writeFile('/file.txt', editText); - await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); - await git.commit({ - fs: memfs, - dir: '/', - author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Original' - }); - const originalHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); + // Build the replace text (context + additions) + const replaceLines = hunk.changes + .filter(change => change.type === 'context' || change.type === 'add') + .map(change => change.originalLine || (change.indent + change.content)); - // Create search content (content with removals) - const searchLines = [...editRegion]; - let offset = 0; - for (const change of hunk.changes) { - if (change.type === 'remove') { - const index = searchLines.findIndex( - (line, i) => i >= offset && line.trimLeft() === change.content - ); - if (index !== -1) { - searchLines.splice(index, 1); - } - } - if (change.type !== 'add') { - offset++; - } - } - - // Create search branch and commit - await git.branch({ fs: memfs, dir: '/', ref: 'search' }); - await git.checkout({ fs: memfs, dir: '/', ref: 'search' }); - await memfs.promises.writeFile('/file.txt', searchLines.join('\n')); - await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); - await git.commit({ - fs: memfs, - dir: '/', - author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Search state' - }); - const searchHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); + const searchText = searchLines.join('\n'); + const replaceText = replaceLines.join('\n'); + const originalText = content.join('\n'); - // Create replace content (with additions) - const replaceLines = [...searchLines]; - offset = 0; - const contextLines = hunk.changes - .filter(c => c.type === 'context') - .map(c => c.content); - - for (const change of hunk.changes) { - if (change.type === 'add') { - const indent = change.indent || inferIndentation(change.content, contextLines); - replaceLines.splice(offset, 0, indent + change.content); - offset++; - } else if (change.type !== 'remove') { - offset++; - } - } - - // Create replace branch and commit - await git.branch({ fs: memfs, dir: '/', ref: 'replace' }); - await git.checkout({ fs: memfs, dir: '/', ref: 'replace' }); - await memfs.promises.writeFile('/file.txt', replaceLines.join('\n')); - await git.add({ fs: memfs, dir: '/', filepath: 'file.txt' }); - await git.commit({ - fs: memfs, - dir: '/', - author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Replace state' - }); - const replaceHash = await git.resolveRef({ fs: memfs, dir: '/', ref: 'HEAD' }); - - // Try both strategies: - // 1. OSR: Cherry-pick replace onto original - // 2. SR-SO: Apply search->replace changes to search->original - - // Strategy 1: OSR - await git.checkout({ fs: memfs, dir: '/', ref: originalHash }); + // Strategy 1: O->S->R, cherry-pick R onto O try { - await git.merge({ - fs: memfs, - dir: '/', - ours: originalHash, - theirs: replaceHash, - author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Cherry-pick OSR' - }); - const osrResult = (await memfs.promises.readFile('/file.txt')).toString(); - const osrSimilarity = getDMPSimilarity(editText, osrResult) + // Original commit - use full file content + fs.writeFileSync(filePath, originalText); + await git.add('file.txt'); + const originalCommit = await git.commit('original'); - const confidence = validateEditResult(hunk, osrResult, 'git-osr'); - - if (osrSimilarity * confidence > 0.9) { - // Construct result with edited portion - const newResult = [ - ...content.slice(0, matchPosition), - ...osrResult.split('\n'), - ...content.slice(matchPosition + hunk.changes.length) - ]; + // Search commit - just the search text + fs.writeFileSync(filePath, searchText); + await git.add('file.txt'); + await git.commit('search'); + + // Replace commit - just the replace text + fs.writeFileSync(filePath, replaceText); + await git.add('file.txt'); + const replaceCommit = await git.commit('replace'); + + // Go back to original and cherry-pick + await git.checkout(originalCommit.commit); + try { + await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]); + + // Read result + const newText = fs.readFileSync(filePath, 'utf-8'); + const newLines = newText.split('\n'); return { - confidence: osrSimilarity, - result: newResult, - strategy: 'git-osr' + confidence: 1, + result: newLines, + strategy: 'git-fallback' }; + } catch (cherryPickError) { + console.log('Strategy 1 failed with merge conflict'); } } catch (error) { - console.log('OSR strategy failed:', error); + console.log('Strategy 1 failed:', error); } - // Strategy 2: SR-SO - await git.checkout({ fs: memfs, dir: '/', ref: searchHash }); + // Strategy 2: S->R, S->O, cherry-pick R onto O try { - // First apply original changes - await git.merge({ - fs: memfs, - dir: '/', - ours: searchHash, - theirs: originalHash, - author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Apply original changes' - }); + // Reset repo + await git.init(); + await git.addConfig('user.name', 'Temp'); + await git.addConfig('user.email', 'temp@example.com'); - // Then apply replace changes - await git.merge({ - fs: memfs, - dir: '/', - ours: 'HEAD', - theirs: replaceHash, - author: { name: 'Temp', email: 'temp@example.com' }, - message: 'Apply replace changes' - }); + // Search commit - just the search text + fs.writeFileSync(filePath, searchText); + await git.add('file.txt'); + const searchCommit = await git.commit('search'); - const srsoResult = (await memfs.promises.readFile('/file.txt')).toString(); - const srsoSimilarity = getDMPSimilarity(editText, srsoResult) + // Replace commit - just the replace text + fs.writeFileSync(filePath, replaceText); + await git.add('file.txt'); + const replaceCommit = await git.commit('replace'); - const confidence = validateEditResult(hunk, srsoResult, 'git-srso'); + // Go back to search and create original with full file content + await git.checkout(searchCommit.commit); + fs.writeFileSync(filePath, originalText); + await git.add('file.txt'); + await git.commit('original'); - // Construct result with edited portion - const newResult = [ - ...content.slice(0, matchPosition), - ...srsoResult.split('\n'), - ...content.slice(matchPosition + hunk.changes.length) - ]; - return { - confidence: srsoSimilarity * confidence, - result: newResult, - strategy: 'git-srso' - }; + try { + // Cherry-pick replace onto original + await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]); + + // Read result + const newText = fs.readFileSync(filePath, 'utf-8'); + const newLines = newText.split('\n'); + return { + confidence: 1, + result: newLines, + strategy: 'git-fallback' + }; + } catch (cherryPickError) { + console.log('Strategy 2 failed with merge conflict'); + } } catch (error) { - console.log('SR-SO strategy failed:', error); - return { confidence: 0, result: content, strategy: 'git' }; + console.log('Strategy 2 failed:', error); } + + // If both strategies fail, return no confidence + console.log('Git fallback failed'); + return { confidence: 0, result: content, strategy: 'git-fallback' }; } catch (error) { - console.log('Git strategy failed:', error); - return { confidence: 0, result: content, strategy: 'git' }; + console.log('Git fallback strategy failed:', error); + return { confidence: 0, result: content, strategy: 'git-fallback' }; } finally { - vol.reset(); + // Clean up temporary directory + if (tmpDir) { + tmpDir.removeCallback(); + } } } // Main edit function that tries strategies sequentially export async function applyEdit(hunk: Hunk, content: string[], matchPosition: number, confidence: number, debug: string = 'false'): Promise { - - // Don't attempt any edits if confidence is too low and not in debug mode + // Don't attempt regular edits if confidence is too low const MIN_CONFIDENCE = 0.9; - if (confidence < MIN_CONFIDENCE) { - console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), skipping edit`); - return { confidence: 0, result: content, strategy: 'none' }; + if (confidence < MIN_CONFIDENCE && debug === '') { + console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`); + return applyGitFallback(hunk, content); } // Try each strategy in sequence until one succeeds const strategies = [ { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, - { name: 'git', apply: () => applyGit(hunk, content, matchPosition) } + { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } ]; if (debug !== '') { - // In debug mode, try all strategies and return the first success - const results = await Promise.all(strategies.map(async strategy => { - console.log(`Attempting edit with ${strategy.name} strategy...`); - const result = await strategy.apply(); - console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); - return result; - })); + // In debug mode, try all strategies including git fallback + const results = await Promise.all([ + ...strategies.map(async strategy => { + console.log(`Attempting edit with ${strategy.name} strategy...`); + const result = await strategy.apply(); + console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); + return result; + }) + ]); - /*const successfulResults = results.filter(result => result.confidence > MIN_CONFIDENCE); - if (successfulResults.length > 0) { - const bestResult = successfulResults.reduce((best, current) => - current.confidence > best.confidence ? current : best - ); - return bestResult; - }*/ return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; } else { // Normal mode - try strategies sequentially until one succeeds @@ -345,8 +291,13 @@ export async function applyEdit(hunk: Hunk, content: string[], matchPosition: nu return result; } } + // If all strategies fail, try git fallback + + const result = await applyGitFallback(hunk, content); + if(result.confidence === 1) { + return result; + } } - // If all strategies fail, return failure return { confidence: 0, result: content, strategy: 'none' }; } From 0d004b5c77498c3a212a56ee417517e917f744af Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 10 Jan 2025 17:35:22 -0500 Subject: [PATCH 15/47] refactor: increase context lines in NewUnifiedDiffStrategy - Updated the maximum context lines from 3 to 6 in the parseUnifiedDiff method to provide more context around changes. --- src/core/diff/strategies/new-unified/index.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 6f2b62f..17b5b3f 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -5,7 +5,7 @@ import { DiffResult, DiffStrategy } from "../../types" export class NewUnifiedDiffStrategy implements DiffStrategy { private parseUnifiedDiff(diff: string): Diff { - const MAX_CONTEXT_LINES = 3; // Number of context lines to keep before/after changes + const MAX_CONTEXT_LINES = 6; // Number of context lines to keep before/after changes const lines = diff.split('\n'); const hunks: Hunk[] = []; let currentHunk: Hunk | null = null; @@ -49,9 +49,7 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { continue; } - if (!currentHunk) { - continue; - } + if (!currentHunk) {continue}; // Extract the complete indentation for each line const content = line.slice(1); // Remove the diff marker From f007f6434477689d1f1259dcd4c835e99b46aac2 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 10 Jan 2025 17:36:43 -0500 Subject: [PATCH 16/47] refactor: increase minimum confidence threshold in search strategies - Updated the minimum confidence level from 0.95 to 0.97 to enhance the accuracy of search results. - Adjusted confidence calculation in the findLevenshteinMatch function to ensure more reliable matching. --- src/core/diff/strategies/new-unified/search-strategies.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 9e61f87..9b1c785 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -10,7 +10,7 @@ export type SearchResult = { }; //TODO: this should be configurable -const MIN_CONFIDENCE = 0.95; +const MIN_CONFIDENCE = 0.97; // Helper function to prepare search string from context export function prepareSearchString(changes: Change[]): string { @@ -74,7 +74,6 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string) console.log('expectedSimilarity', strategy, expectedSimilarity); - // Scale between 0.98 and 1.0 (4% impact) based on expected similarity const multiplier = expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1; @@ -183,11 +182,10 @@ export function findLevenshteinMatch( const index = startIndex + candidates.indexOf(closestMatch); const similarity = getDMPSimilarity(searchStr, closestMatch); const contextSimilarity = validateContextLines(searchStr, closestMatch); - const confidence = Math.min(similarity, contextSimilarity) * 0.7; // Still apply Levenshtein penalty - + const confidence = Math.min(similarity, contextSimilarity) return { index, - confidence, + confidence: index !== -1 ? confidence : 0, strategy: 'levenshtein', }; } From 258024aa5aca5ee440c7b09927be875f563d2bfc Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 12:00:29 -0500 Subject: [PATCH 17/47] refactor: enhance search strategies with adaptive thresholds and overlapping windows - Introduced adaptive confidence thresholds based on file size to improve search accuracy. - Implemented overlapping window functionality in search strategies to capture matches more effectively. - Added helper functions for evaluating content uniqueness and creating overlapping windows. - Enhanced existing search functions (exact, similarity, and Levenshtein) to utilize new strategies for better match validation. - Improved logging for search results to facilitate debugging and analysis of search performance. --- .../new-unified/search-strategies.ts | 510 ++++++++++++------ 1 file changed, 353 insertions(+), 157 deletions(-) diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 9b1c785..4dfb2c5 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -1,40 +1,71 @@ -import { compareTwoStrings } from 'string-similarity'; -import { closest } from 'fastest-levenshtein'; -import { diff_match_patch } from 'diff-match-patch'; -import { Change, Hunk } from './types'; +import { compareTwoStrings } from "string-similarity" +import { closest } from "fastest-levenshtein" +import { diff_match_patch } from "diff-match-patch" +import { Change, Hunk } from "./types" export type SearchResult = { - index: number; - confidence: number; - strategy: string; -}; + index: number + confidence: number + strategy: string +} //TODO: this should be configurable -const MIN_CONFIDENCE = 0.97; +const MIN_CONFIDENCE = 0.97 +const MIN_CONFIDENCE_LARGE_FILE = 0.9 +const LARGE_FILE_THRESHOLD = 1000 // lines +const UNIQUE_CONTENT_BOOST = 0.05 +const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows +const MAX_WINDOW_SIZE = 500 // maximum lines in a window + +// Helper function to calculate adaptive confidence threshold based on file size +function getAdaptiveThreshold(contentLength: number): number { + if (contentLength <= LARGE_FILE_THRESHOLD) { + return MIN_CONFIDENCE + } + return MIN_CONFIDENCE_LARGE_FILE +} + +// Helper function to evaluate content uniqueness +function evaluateContentUniqueness(searchStr: string, content: string[]): number { + const searchLines = searchStr.split("\n") + const uniqueLines = new Set(searchLines) + const contentStr = content.join("\n") + + // Calculate how many search lines are relatively unique in the content + let uniqueCount = 0 + for (const line of uniqueLines) { + const regex = new RegExp(line.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g") + const matches = contentStr.match(regex) + if (matches && matches.length <= 2) { + // Line appears at most twice + uniqueCount++ + } + } + + return uniqueCount / uniqueLines.size +} // Helper function to prepare search string from context export function prepareSearchString(changes: Change[]): string { - const lines = changes - .filter((c) => c.type === 'context' || c.type === 'remove') - .map((c) => c.content); - return lines.join('\n'); + const lines = changes.filter((c) => c.type === "context" || c.type === "remove").map((c) => c.originalLine) + return lines.join("\n") } // Helper function to evaluate similarity between two texts export function evaluateSimilarity(original: string, modified: string): number { - return compareTwoStrings(original, modified); + return compareTwoStrings(original, modified) } // Helper function to validate using diff-match-patch export function getDMPSimilarity(original: string, modified: string): number { - const dmp = new diff_match_patch(); - const diffs = dmp.diff_main(original, modified); - dmp.diff_cleanupSemantic(diffs); - const patches = dmp.patch_make(original, diffs); - const [expectedText] = dmp.patch_apply(patches, original); + const dmp = new diff_match_patch() + const diffs = dmp.diff_main(original, modified) + dmp.diff_cleanupSemantic(diffs) + const patches = dmp.patch_make(original, diffs) + const [expectedText] = dmp.patch_apply(patches, original) - const similarity = evaluateSimilarity(expectedText, modified); - return similarity; + const similarity = evaluateSimilarity(expectedText, modified) + return similarity } // Helper function to validate edit results using hunk information @@ -43,176 +74,341 @@ export function getDMPSimilarity(original: string, modified: string): number { // returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact. // If similarity >= MIN_CONFIDENCE, returns 0 (no reduction). export function validateEditResult(hunk: Hunk, result: string, strategy: string): number { - const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)); + const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)) - // Create skeleton of original content (context + removed lines) - const originalSkeleton = hunkDeepCopy.changes - .filter((change) => change.type === 'context' || change.type === 'remove') - .map((change) => change.content) - .join('\n'); + // Create skeleton of original content (context + removed lines) + const originalSkeleton = hunkDeepCopy.changes + .filter((change) => change.type === "context" || change.type === "remove") + .map((change) => change.content) + .join("\n") - // Create skeleton of expected result (context + added lines) - const expectedSkeleton = hunkDeepCopy.changes - .filter((change) => change.type === 'context' || change.type === 'add') - .map((change) => change.content) - .join('\n'); + // Create skeleton of expected result (context + added lines) + const expectedSkeleton = hunkDeepCopy.changes + .filter((change) => change.type === "context" || change.type === "add") + .map((change) => change.content) + .join("\n") - // Compare with original content - const originalSimilarity = evaluateSimilarity(originalSkeleton, result); - console.log('originalSimilarity ', strategy, originalSimilarity); - // If original similarity is 1, it means changes weren't applied - if (originalSimilarity > 0.97) { - if (originalSimilarity === 1) { - return 0.5; // Significant confidence reduction - } else { - return 0.8; - } - } + // Compare with original content + const originalSimilarity = evaluateSimilarity(originalSkeleton, result) + console.log("originalSimilarity ", strategy, originalSimilarity) - // Compare with expected result - const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result); + // Compare with expected result + const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result) - console.log('expectedSimilarity', strategy, expectedSimilarity); - - // Scale between 0.98 and 1.0 (4% impact) based on expected similarity - const multiplier = - expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1; + console.log("expectedSimilarity", strategy, expectedSimilarity) + console.log("result", result) - return multiplier; + // If original similarity is 1 and expected similarity is not 1, it means changes weren't applied + if (originalSimilarity > 0.97 && expectedSimilarity !== 1) { + if (originalSimilarity === 1) { + // If original similarity is 1, it means changes weren't applied + if (originalSimilarity > 0.97) { + if (originalSimilarity === 1) { + return 0.5 // Significant confidence reduction + } else { + return 0.8 + } + } + } else { + return 0.8 + } + } + + // Scale between 0.98 and 1.0 (4% impact) based on expected similarity + const multiplier = expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1 + + return multiplier } // Helper function to validate context lines against original content function validateContextLines(searchStr: string, content: string): number { - // Extract just the context lines from the search string - const contextLines = searchStr - .split('\n') - .filter((line) => !line.startsWith('-')); // Exclude removed lines + // Extract just the context lines from the search string + const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines - // Compare context lines with content - const similarity = evaluateSimilarity(contextLines.join('\n'), content); + // Compare context lines with content + const similarity = evaluateSimilarity(contextLines.join("\n"), content) - // Context lines must match very closely, or confidence drops significantly - return similarity < MIN_CONFIDENCE ? similarity * 0.3 : similarity; + // Get adaptive threshold based on content size + const threshold = getAdaptiveThreshold(content.split("\n").length) + + // Calculate uniqueness boost + const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n")) + const uniquenessBoost = uniquenessScore * UNIQUE_CONTENT_BOOST + + // Adjust confidence based on threshold and uniqueness + return similarity < threshold ? similarity * 0.3 + uniquenessBoost : similarity + uniquenessBoost } -// Exact match strategy -export function findExactMatch( - searchStr: string, - content: string[], - startIndex: number = 0 -): SearchResult { - const contentStr = content.slice(startIndex).join('\n'); - const searchLines = searchStr.split('\n'); +// Helper function to create overlapping windows +function createOverlappingWindows( + content: string[], + searchSize: number, + overlapSize: number = DEFAULT_OVERLAP_SIZE +): { window: string[]; startIndex: number }[] { + const windows: { window: string[]; startIndex: number }[] = [] - const exactMatch = contentStr.indexOf(searchStr); - if (exactMatch !== -1) { - const matchedContent = content - .slice( - startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, - startIndex + - contentStr.slice(0, exactMatch).split('\n').length - - 1 + - searchLines.length - ) - .join('\n'); + // Ensure minimum window size is at least searchSize + const effectiveWindowSize = Math.max(searchSize, Math.min(searchSize * 2, MAX_WINDOW_SIZE)) - const similarity = getDMPSimilarity(searchStr, matchedContent); - const contextSimilarity = validateContextLines(searchStr, matchedContent); - const confidence = Math.min(similarity, contextSimilarity); + // Ensure overlap size doesn't exceed window size + const effectiveOverlapSize = Math.min(overlapSize, effectiveWindowSize - 1) - return { - index: - startIndex + contentStr.slice(0, exactMatch).split('\n').length - 1, - confidence, - strategy: 'exact', - }; - } + // Calculate step size, ensure it's at least 1 + const stepSize = Math.max(1, effectiveWindowSize - effectiveOverlapSize) - return { index: -1, confidence: 0, strategy: 'exact' }; + for (let i = 0; i < content.length; i += stepSize) { + const windowContent = content.slice(i, i + effectiveWindowSize) + if (windowContent.length >= searchSize) { + windows.push({ window: windowContent, startIndex: i }) + } + } + + return windows +} + +// Helper function to combine overlapping matches +function combineOverlappingMatches( + matches: (SearchResult & { windowIndex: number })[], + overlapSize: number = DEFAULT_OVERLAP_SIZE +): SearchResult[] { + if (matches.length === 0) { + return [] + } + + // Sort matches by confidence + matches.sort((a, b) => b.confidence - a.confidence) + + const combinedMatches: SearchResult[] = [] + const usedIndices = new Set() + + for (const match of matches) { + if (usedIndices.has(match.windowIndex)) {continue} + + // Find overlapping matches + const overlapping = matches.filter( + (m) => + Math.abs(m.windowIndex - match.windowIndex) === 1 && + Math.abs(m.index - match.index) <= overlapSize && + !usedIndices.has(m.windowIndex) + ) + + if (overlapping.length > 0) { + // Boost confidence if we find same match in overlapping windows + const avgConfidence = + (match.confidence + overlapping.reduce((sum, m) => sum + m.confidence, 0)) / (overlapping.length + 1) + const boost = Math.min(0.05 * overlapping.length, 0.1) // Max 10% boost + + combinedMatches.push({ + index: match.index, + confidence: Math.min(1, avgConfidence + boost), + strategy: `${match.strategy}-overlapping`, + }) + + usedIndices.add(match.windowIndex) + overlapping.forEach((m) => usedIndices.add(m.windowIndex)) + } else { + combinedMatches.push({ + index: match.index, + confidence: match.confidence, + strategy: match.strategy, + }) + usedIndices.add(match.windowIndex) + } + } + + return combinedMatches +} + +// Modified search functions to use sliding windows +export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split("\n") + const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length) + const matches: (SearchResult & { windowIndex: number })[] = [] + + windows.forEach((windowData, windowIndex) => { + const windowStr = windowData.window.join("\n") + const exactMatch = windowStr.indexOf(searchStr) + + if (exactMatch !== -1) { + const matchedContent = windowData.window + .slice( + windowStr.slice(0, exactMatch).split("\n").length - 1, + windowStr.slice(0, exactMatch).split("\n").length - 1 + searchLines.length + ) + .join("\n") + + const similarity = getDMPSimilarity(searchStr, matchedContent) + const contextSimilarity = validateContextLines(searchStr, matchedContent) + const confidence = Math.min(similarity, contextSimilarity) + + matches.push({ + index: startIndex + windowData.startIndex + windowStr.slice(0, exactMatch).split("\n").length - 1, + confidence, + strategy: "exact", + windowIndex, + }) + } + }) + + const combinedMatches = combineOverlappingMatches(matches) + return combinedMatches.length > 0 ? combinedMatches[0] : { index: -1, confidence: 0, strategy: "exact" } } // String similarity strategy -export function findSimilarityMatch( - searchStr: string, - content: string[], - startIndex: number = 0 -): SearchResult { - const searchLines = searchStr.split('\n'); - let bestScore = 0; - let bestIndex = -1; - const minScore = 0.8; +export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split("\n") + let bestScore = 0 + let bestIndex = -1 + const minScore = 0.8 - for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { - const windowStr = content.slice(i, i + searchLines.length).join('\n'); - const score = compareTwoStrings(searchStr, windowStr); - if (score > bestScore && score >= minScore) { - const similarity = getDMPSimilarity(searchStr, windowStr); - const contextSimilarity = validateContextLines(searchStr, windowStr); - const adjustedScore = Math.min(similarity, contextSimilarity) * score; + for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { + const windowStr = content.slice(i, i + searchLines.length).join("\n") + const score = compareTwoStrings(searchStr, windowStr) + if (score > bestScore && score >= minScore) { + const similarity = getDMPSimilarity(searchStr, windowStr) + const contextSimilarity = validateContextLines(searchStr, windowStr) + const adjustedScore = Math.min(similarity, contextSimilarity) * score - if (adjustedScore > bestScore) { - bestScore = adjustedScore; - bestIndex = i; - } - } - } + if (adjustedScore > bestScore) { + bestScore = adjustedScore + bestIndex = i + } + } + } - return { - index: bestIndex, - confidence: bestIndex !== -1 ? bestScore : 0, - strategy: 'similarity', - }; + return { + index: bestIndex, + confidence: bestIndex !== -1 ? bestScore : 0, + strategy: "similarity", + } } // Levenshtein strategy -export function findLevenshteinMatch( - searchStr: string, - content: string[], - startIndex: number = 0 -): SearchResult { - const searchLines = searchStr.split('\n'); - const candidates = []; +export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split("\n") + const candidates = [] - for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { - candidates.push(content.slice(i, i + searchLines.length).join('\n')); - } + for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { + candidates.push(content.slice(i, i + searchLines.length).join("\n")) + } - if (candidates.length > 0) { - const closestMatch = closest(searchStr, candidates); - const index = startIndex + candidates.indexOf(closestMatch); - const similarity = getDMPSimilarity(searchStr, closestMatch); - const contextSimilarity = validateContextLines(searchStr, closestMatch); - const confidence = Math.min(similarity, contextSimilarity) - return { - index, - confidence: index !== -1 ? confidence : 0, - strategy: 'levenshtein', - }; - } + if (candidates.length > 0) { + const closestMatch = closest(searchStr, candidates) + const index = startIndex + candidates.indexOf(closestMatch) + const similarity = getDMPSimilarity(searchStr, closestMatch) + const contextSimilarity = validateContextLines(searchStr, closestMatch) + const confidence = Math.min(similarity, contextSimilarity) + return { + index, + confidence: index !== -1 ? confidence : 0, + strategy: "levenshtein", + } + } - return { index: -1, confidence: 0, strategy: 'levenshtein' }; + return { index: -1, confidence: 0, strategy: "levenshtein" } +} + +// Helper function to identify anchor lines based on uniqueness and complexity +function identifyAnchors(searchStr: string, content: string[]): { line: string; index: number; weight: number }[] { + const searchLines = searchStr.split("\n") + const contentStr = content.join("\n") + const anchors: { line: string; index: number; weight: number }[] = [] + + for (let i = 0; i < searchLines.length; i++) { + const line = searchLines[i] + if (!line.trim()) {continue} // Skip empty lines + + // Calculate line complexity (more special chars = more unique) + const specialChars = (line.match(/[^a-zA-Z0-9\s]/g) || []).length + const complexity = specialChars / line.length + + // Count occurrences in content + const regex = new RegExp(line.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g") + const matches = contentStr.match(regex) + const occurrences = matches ? matches.length : 0 + + // Calculate uniqueness weight + const uniquenessWeight = occurrences <= 1 ? 1 : 1 / occurrences + const weight = uniquenessWeight * (0.7 + 0.3 * complexity) + + if (weight > 0.5) { + // Only consider lines with high enough weight + anchors.push({ line, index: i, weight }) + } + } + + // Sort by weight descending + return anchors.sort((a, b) => b.weight - a.weight) +} + +// Helper function to validate anchor positions +function validateAnchorPositions( + anchors: { line: string; index: number }[], + content: string[], + searchLines: string[] +): number { + for (const anchor of anchors) { + const anchorIndex = content.findIndex((line) => line === anchor.line) + if (anchorIndex !== -1) { + // Check if surrounding context matches + const contextBefore = searchLines.slice(Math.max(0, anchor.index - 2), anchor.index).join("\n") + const contextAfter = searchLines.slice(anchor.index + 1, anchor.index + 3).join("\n") + const contentBefore = content.slice(Math.max(0, anchorIndex - 2), anchorIndex).join("\n") + const contentAfter = content.slice(anchorIndex + 1, anchorIndex + 3).join("\n") + + const beforeSimilarity = evaluateSimilarity(contextBefore, contentBefore) + const afterSimilarity = evaluateSimilarity(contextAfter, contentAfter) + + if (beforeSimilarity > 0.8 && afterSimilarity > 0.8) { + return anchorIndex - anchor.index + } + } + } + return -1 +} + +// Anchor-based search strategy +export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const searchLines = searchStr.split("\n") + const anchors = identifyAnchors(searchStr, content.slice(startIndex)) + + if (anchors.length === 0) { + return { index: -1, confidence: 0, strategy: "anchor" } + } + + // Try to validate position using top anchors + const offset = validateAnchorPositions(anchors.slice(0, 3), content.slice(startIndex), searchLines) + + if (offset !== -1) { + const matchPosition = startIndex + offset + const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n") + const similarity = getDMPSimilarity(searchStr, matchedContent) + const contextSimilarity = validateContextLines(searchStr, matchedContent) + const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight + + return { + index: matchPosition, + confidence: Math.min(1, confidence), // Cap at 1 + strategy: "anchor", + } + } + + return { index: -1, confidence: 0, strategy: "anchor" } } // Main search function that tries all strategies -export function findBestMatch( - searchStr: string, - content: string[], - startIndex: number = 0 -): SearchResult { - const strategies = [ - findExactMatch, - findSimilarityMatch, - findLevenshteinMatch, - ]; +export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { + const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch] - let bestResult: SearchResult = { index: -1, confidence: 0, strategy: 'none' }; + let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" } - for (const strategy of strategies) { - const result = strategy(searchStr, content, startIndex); - if (result.confidence > bestResult.confidence) { - bestResult = result; - } - } + for (const strategy of strategies) { + const result = strategy(searchStr, content, startIndex) + console.log("Search result:", result) + if (result.confidence > bestResult.confidence) { + bestResult = result + } + } - return bestResult; + return bestResult } From d25f37001367e74a604f34330f1a81df0bc45738 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 12:01:09 -0500 Subject: [PATCH 18/47] refactor: enhance NewUnifiedDiffStrategy for improved diff parsing and context handling - Refactored the parseUnifiedDiff method to streamline the processing of diff lines and improve context line management. - Added handling for empty context lines to ensure accurate representation in hunks. - Updated the tool description to clarify requirements for generating unified diffs, emphasizing the importance of preserving indentation and grouping related changes. - Improved overall readability and consistency of the code by standardizing formatting and indentation practices. --- src/core/diff/strategies/new-unified/index.ts | 232 +++++++++--------- 1 file changed, 119 insertions(+), 113 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 17b5b3f..ad9fd81 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -5,135 +5,139 @@ import { DiffResult, DiffStrategy } from "../../types" export class NewUnifiedDiffStrategy implements DiffStrategy { private parseUnifiedDiff(diff: string): Diff { - const MAX_CONTEXT_LINES = 6; // Number of context lines to keep before/after changes - const lines = diff.split('\n'); - const hunks: Hunk[] = []; - let currentHunk: Hunk | null = null; - - let i = 0; - while (i < lines.length && !lines[i].startsWith('@@')) { - i++; - } - - for (; i < lines.length; i++) { - const line = lines[i]; - - if (line.startsWith('@@')) { - if (currentHunk && currentHunk.changes.length > 0 && - currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { - // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes - const changes = currentHunk.changes; - let startIdx = 0; - let endIdx = changes.length - 1; - - // Find first non-context line - for (let j = 0; j < changes.length; j++) { - if (changes[j].type !== 'context') { - startIdx = Math.max(0, j - MAX_CONTEXT_LINES); - break; - } - } - - // Find last non-context line - for (let j = changes.length - 1; j >= 0; j--) { - if (changes[j].type !== 'context') { - endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES); - break; - } - } - - currentHunk.changes = changes.slice(startIdx, endIdx + 1); - hunks.push(currentHunk); - } - currentHunk = { changes: [] }; - continue; - } - - if (!currentHunk) {continue}; - - // Extract the complete indentation for each line - const content = line.slice(1); // Remove the diff marker - const indentMatch = content.match(/^(\s*)/); - const indent = indentMatch ? indentMatch[0] : ''; - const trimmedContent = content.slice(indent.length); - - if (line.startsWith(' ')) { - currentHunk.changes.push({ - type: 'context', - content: trimmedContent, - indent, - originalLine: content - }); - } else if (line.startsWith('+')) { - currentHunk.changes.push({ - type: 'add', - content: trimmedContent, - indent, - originalLine: content - }); - } else if (line.startsWith('-')) { - currentHunk.changes.push({ - type: 'remove', - content: trimmedContent, - indent, - originalLine: content - }); - } - } - - if (currentHunk && currentHunk.changes.length > 0 && - currentHunk.changes.some(change => change.type === 'add' || change.type === 'remove')) { - hunks.push(currentHunk); - } - - return { hunks }; - } + const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes + const lines = diff.split("\n") + const hunks: Hunk[] = [] + let currentHunk: Hunk | null = null + + let i = 0 + while (i < lines.length && !lines[i].startsWith("@@")) { + i++ + } + + for (; i < lines.length; i++) { + const line = lines[i] + + if (line.startsWith("@@")) { + if ( + currentHunk && + currentHunk.changes.length > 0 && + currentHunk.changes.some((change) => change.type === "add" || change.type === "remove") + ) { + // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes + const changes = currentHunk.changes + let startIdx = 0 + let endIdx = changes.length - 1 + + // Find first non-context line + for (let j = 0; j < changes.length; j++) { + if (changes[j].type !== "context") { + startIdx = Math.max(0, j - MAX_CONTEXT_LINES) + break + } + } + + // Find last non-context line + for (let j = changes.length - 1; j >= 0; j--) { + if (changes[j].type !== "context") { + endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES) + break + } + } + + currentHunk.changes = changes.slice(startIdx, endIdx + 1) + hunks.push(currentHunk) + } + currentHunk = { changes: [] } + continue + } + + if (!currentHunk) { + continue + } + + // Extract the complete indentation for each line + const content = line.slice(1) // Remove the diff marker + const indentMatch = content.match(/^(\s*)/) + const indent = indentMatch ? indentMatch[0] : "" + const trimmedContent = content.slice(indent.length) + + if (line.startsWith(" ")) { + currentHunk.changes.push({ + type: "context", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("+")) { + currentHunk.changes.push({ + type: "add", + content: trimmedContent, + indent, + originalLine: content, + }) + } else if (line.startsWith("-")) { + currentHunk.changes.push({ + type: "remove", + content: trimmedContent, + indent, + originalLine: content, + }) + } else { + // Assume is a context line and add a space if it's empty + const finalContent = trimmedContent ? " " + trimmedContent : " " + currentHunk.changes.push({ + type: "context", + content: finalContent, + indent, + originalLine: content, + }) + } + } + + if ( + currentHunk && + currentHunk.changes.length > 0 && + currentHunk.changes.some((change) => change.type === "add" || change.type === "remove") + ) { + hunks.push(currentHunk) + } + + return { hunks } + } getToolDescription(cwd: string): string { - return `## apply_diff + return `# apply_diff Tool Rules: -Description: -Apply a unified diff to a file at the specified path. This tool generates minimal, focused diffs that group related changes together. +Generate a unified diff similar to what "diff -U0" would produce. -Important: It is not necessary to include line numbers in the @@ lines! The patch tool does not use them. +The first two lines must include the file paths, starting with "---" for the original file and "+++" for the updated file. Do not include timestamps with the file paths. -Key Requirements: -1. Generate compact diffs with minimal context - - Use reduced context similar to diff -U0 - - Only include hunks that contain actual changes (+ or - lines) - - Skip hunks with only unchanged lines +Each hunk of changes must start with a line containing only "@@ ... @@". Do not include line numbers or ranges in the "@@ ... @@" lines. These are not necessary for the user's patch tool. -2. Use high-level, logical grouping - - When modifying code blocks (functions, methods, loops), replace the entire block in one hunk - - Delete the complete existing block with \`-\` lines - - Add the complete updated block with \`+\` lines - - Group related changes together rather than creating many small hunks +Your output must be a correct, clean patch that applies successfully against the current file contents. Mark all lines that need to be removed or changed with "-". Mark all new or modified lines with "+". Ensure you include all necessary changes; missing or unmarked lines will result in a broken patch. -3. Format requirements - - Include file paths in the first 2 lines (without timestamps) - - Each hunk must start with ONLY \`@@ ... @@\` (line numbers are not needed) - - Preserve exact indentation - - The @@ lines should be simple separators between hunks - Line numbers or line ranges should not be included +Indentation matters! Make sure to preserve the exact indentation of both removed and added lines. -4. Common operations - - To move code: Create one hunk to delete from original location, another to add at new location - - To modify a block: Delete entire original block, then add entire new version - - Order hunks in whatever logical sequence makes sense +Start a new hunk for each section of the file that requires changes. However, include only the hunks that contain actual changes. If a hunk consists entirely of unchanged lines, skip it. -Parameters: -- path: (required) File path relative to current working directory ${cwd} -- diff: (required) Unified format diff content to apply +Group related changes together in the same hunk whenever possible. Output hunks in whatever logical order makes the most sense. -The output must generate correct, clean patches that apply successfully against the current file contents. All changes must be properly marked with + (new/modified) or - (removed) lines. +When editing a function, method, loop, or similar code block, replace the *entire* block in one hunk. Use "-" lines to delete the existing block and "+" lines to add the updated block. This ensures accuracy in your diffs. +If you need to move code within a file, create two hunks: one to delete the code from its original location and another to insert it at the new location. + +To create a new file, show a diff from "--- /dev/null" to "+++ path/to/new/file.ext". + +Here’s an example of the desired format: -Example: \`\`\`diff --- mathweb/flask/app.py +++ mathweb/flask/app.py @@ ... @@ -class MathWeb: +import sympy + + +class MathWeb: @@ ... @@ @@ -165,6 +169,8 @@ Example: + return str(num) \`\`\` +Be precise, consistent, and follow these rules carefully to generate correct diffs! + Usage: File path here @@ -182,14 +188,14 @@ Your diff here ): Promise { const MIN_CONFIDENCE = 0.9 const parsedDiff = this.parseUnifiedDiff(diffContent) - const originalLines = originalContent.split("\n") + const originalLines = originalContent.split("\n") let result = [...originalLines] for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) const { index: matchPosition, confidence } = findBestMatch(contextStr, result) - const editResult = await applyEdit(hunk, result, matchPosition, confidence) + const editResult = await applyEdit(hunk, result, matchPosition, confidence, '') if (editResult.confidence > MIN_CONFIDENCE) { result = editResult.result } else { From 534888af9736b4acca330835004ac387009d1aef Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 12:20:52 -0500 Subject: [PATCH 19/47] refactor: improve NewUnifiedDiffStrategy with enhanced formatting and error handling - Cleaned up formatting in the parseUnifiedDiff method to ensure consistent indentation and spacing. - Updated documentation to clarify format requirements for generating unified diffs. - Added error handling for cases where no hunks are found in the provided diff, improving robustness. - Enhanced overall readability of the code by standardizing comments and structure. --- src/core/diff/strategies/new-unified/index.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index ad9fd81..957a454 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -102,7 +102,7 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { ) { hunks.push(currentHunk) } - + return { hunks } } @@ -129,7 +129,7 @@ If you need to move code within a file, create two hunks: one to delete the code To create a new file, show a diff from "--- /dev/null" to "+++ path/to/new/file.ext". -Here’s an example of the desired format: +Format Requirements: \`\`\`diff --- mathweb/flask/app.py @@ -171,6 +171,10 @@ Here’s an example of the desired format: Be precise, consistent, and follow these rules carefully to generate correct diffs! +Parameters: +- path: (required) The path of the file to apply the diff to (relative to the current working directory ${cwd}) +- diff: (required) The diff content in unified format to apply to the file. + Usage: File path here @@ -191,6 +195,10 @@ Your diff here const originalLines = originalContent.split("\n") let result = [...originalLines] + if (!parsedDiff.hunks.length) { + return { success: false, error: "No hunks found in diff" } + } + for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) const { index: matchPosition, confidence } = findBestMatch(contextStr, result) From 3d901929c24ed8093af58f39c8aeeff26dea2051 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 16:44:15 -0500 Subject: [PATCH 20/47] test: add comprehensive tests for NewUnifiedDiffStrategy error handling and edge cases --- .../strategies/__tests__/new-unified.test.ts | 225 +++++++++++++++++- 1 file changed, 223 insertions(+), 2 deletions(-) diff --git a/src/core/diff/strategies/__tests__/new-unified.test.ts b/src/core/diff/strategies/__tests__/new-unified.test.ts index a7fd279..387988b 100644 --- a/src/core/diff/strategies/__tests__/new-unified.test.ts +++ b/src/core/diff/strategies/__tests__/new-unified.test.ts @@ -35,11 +35,13 @@ line3`; +modified line3`; const result = await strategy.applyDiff(original, diff); - - expect(result).toBe(`line1 + expect(result.success).toBe(true); + if(result.success) { + expect(result.content).toBe(`line1 new line line2 modified line3`); + } }); it('should handle multiple hunks', async () => { @@ -309,4 +311,223 @@ console.log(primeArray); expect(result.content).toBe(expected); } }); + + describe('error handling and edge cases', () => { + it('should reject completely invalid diff format', async () => { + const original = 'line1\nline2\nline3'; + const invalidDiff = 'this is not a diff at all'; + + const result = await strategy.applyDiff(original, invalidDiff); + expect(result.success).toBe(false); + }); + + it('should reject diff with invalid hunk format', async () => { + const original = 'line1\nline2\nline3'; + const invalidHunkDiff = `--- a/file.txt ++++ b/file.txt +invalid hunk header + line1 +-line2 ++new line`; + + const result = await strategy.applyDiff(original, invalidHunkDiff); + expect(result.success).toBe(false); + }); + + it('should fail when diff tries to modify non-existent content', async () => { + const original = 'line1\nline2\nline3'; + const nonMatchingDiff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ + line1 +-nonexistent line ++new line + line3`; + + const result = await strategy.applyDiff(original, nonMatchingDiff); + expect(result.success).toBe(false); + }); + + it('should handle overlapping hunks', async () => { + const original = `line1 +line2 +line3 +line4 +line5`; + const overlappingDiff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ + line1 + line2 +-line3 ++modified3 + line4 +@@ ... @@ + line2 +-line3 +-line4 ++modified3and4 + line5`; + + const result = await strategy.applyDiff(original, overlappingDiff); + expect(result.success).toBe(false); + }); + + it('should handle empty lines modifications', async () => { + const original = `line1 + +line3 + +line5`; + const emptyLinesDiff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ + line1 + +-line3 ++line3modified + + line5`; + + const result = await strategy.applyDiff(original, emptyLinesDiff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(`line1 + +line3modified + +line5`); + } + }); + + it('should handle mixed line endings in diff', async () => { + const original = 'line1\r\nline2\nline3\r\n'; + const mixedEndingsDiff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ + line1\r +-line2 ++modified2\r + line3`; + + const result = await strategy.applyDiff(original, mixedEndingsDiff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe('line1\r\nmodified2\r\nline3\r\n'); + } + }); + + it('should handle partial line modifications', async () => { + const original = 'const value = oldValue + 123;'; + const partialDiff = `--- a/file.txt ++++ b/file.txt +@@ ... @@ +-const value = oldValue + 123; ++const value = newValue + 123;`; + + const result = await strategy.applyDiff(original, partialDiff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe('const value = newValue + 123;'); + } + }); + + it('should handle slightly malformed but recoverable diff', async () => { + const original = 'line1\nline2\nline3'; + // Missing space after --- and +++ + const slightlyBadDiff = `---a/file.txt ++++b/file.txt +@@ ... @@ + line1 +-line2 ++new line + line3`; + + const result = await strategy.applyDiff(original, slightlyBadDiff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe('line1\nnew line\nline3'); + } + }); + }); + + describe('similar code sections', () => { + it('should correctly modify the right section when similar code exists', async () => { + const original = `function add(a, b) { + return a + b; +} + +function subtract(a, b) { + return a - b; +} + +function multiply(a, b) { + return a + b; // Bug here +}`; + + const diff = `--- a/math.js ++++ b/math.js +@@ ... @@ + function multiply(a, b) { +- return a + b; // Bug here ++ return a * b; + }`; + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(`function add(a, b) { + return a + b; +} + +function subtract(a, b) { + return a - b; +} + +function multiply(a, b) { + return a * b; +}`); + } + }); + + it('should handle multiple similar sections with correct context', async () => { + const original = `if (condition) { + doSomething(); + doSomething(); + doSomething(); +} + +if (otherCondition) { + doSomething(); + doSomething(); + doSomething(); +}`; + + const diff = `--- a/file.js ++++ b/file.js +@@ ... @@ + if (otherCondition) { + doSomething(); +- doSomething(); ++ doSomethingElse(); + doSomething(); + }`; + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(`if (condition) { + doSomething(); + doSomething(); + doSomething(); +} + +if (otherCondition) { + doSomething(); + doSomethingElse(); + doSomething(); +}`); + } + }); + }); }); \ No newline at end of file From a323a1008e8e609c8731a67d2e3377606876d392 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:00:51 -0500 Subject: [PATCH 21/47] refactor: enhance Git fallback strategy in edit processing - Improved logging within the applyGitFallback function to provide clearer insights during the commit and cherry-pick processes. - Streamlined error handling to ensure more informative console outputs when operations fail. - Maintained consistent formatting and indentation for better readability and maintainability of the code. - Ensured temporary directory cleanup is handled correctly in all scenarios, preventing potential resource leaks. --- .../strategies/new-unified/edit-strategies.ts | 205 +++++++++--------- 1 file changed, 99 insertions(+), 106 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index 3956bfa..e64173e 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -134,125 +134,118 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): // Git fallback strategy that works with full content async function applyGitFallback(hunk: Hunk, content: string[]): Promise { - let tmpDir: tmp.DirResult | undefined; - - try { - // Create temporary directory - tmpDir = tmp.dirSync({ unsafeCleanup: true }); - const git: SimpleGit = simpleGit(tmpDir.name); - - // Initialize git repo - await git.init(); - await git.addConfig('user.name', 'Temp'); - await git.addConfig('user.email', 'temp@example.com'); + let tmpDir: tmp.DirResult | undefined; + + try { + tmpDir = tmp.dirSync({ unsafeCleanup: true }); + const git: SimpleGit = simpleGit(tmpDir.name); + + await git.init(); + await git.addConfig('user.name', 'Temp'); + await git.addConfig('user.email', 'temp@example.com'); - const filePath = path.join(tmpDir.name, 'file.txt'); + const filePath = path.join(tmpDir.name, 'file.txt'); - // Build the search text (context + removals) - const searchLines = hunk.changes - .filter(change => change.type === 'context' || change.type === 'remove') - .map(change => change.originalLine || (change.indent + change.content)); - - // Build the replace text (context + additions) - const replaceLines = hunk.changes - .filter(change => change.type === 'context' || change.type === 'add') - .map(change => change.originalLine || (change.indent + change.content)); + const searchLines = hunk.changes + .filter(change => change.type === 'context' || change.type === 'remove') + .map(change => change.originalLine || (change.indent + change.content)); + + const replaceLines = hunk.changes + .filter(change => change.type === 'context' || change.type === 'add') + .map(change => change.originalLine || (change.indent + change.content)); - const searchText = searchLines.join('\n'); - const replaceText = replaceLines.join('\n'); - const originalText = content.join('\n'); + const searchText = searchLines.join('\n'); + const replaceText = replaceLines.join('\n'); + const originalText = content.join('\n'); - // Strategy 1: O->S->R, cherry-pick R onto O - try { - // Original commit - use full file content - fs.writeFileSync(filePath, originalText); - await git.add('file.txt'); - const originalCommit = await git.commit('original'); + try { + fs.writeFileSync(filePath, originalText); + await git.add('file.txt'); + const originalCommit = await git.commit('original'); + console.log('Strategy 1 - Original commit:', originalCommit.commit); - // Search commit - just the search text - fs.writeFileSync(filePath, searchText); - await git.add('file.txt'); - await git.commit('search'); + fs.writeFileSync(filePath, searchText); + await git.add('file.txt'); + const searchCommit1 = await git.commit('search'); + console.log('Strategy 1 - Search commit:', searchCommit1.commit); - // Replace commit - just the replace text - fs.writeFileSync(filePath, replaceText); - await git.add('file.txt'); - const replaceCommit = await git.commit('replace'); + fs.writeFileSync(filePath, replaceText); + await git.add('file.txt'); + const replaceCommit = await git.commit('replace'); + console.log('Strategy 1 - Replace commit:', replaceCommit.commit); - // Go back to original and cherry-pick - await git.checkout(originalCommit.commit); - try { - await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]); - - // Read result - const newText = fs.readFileSync(filePath, 'utf-8'); - const newLines = newText.split('\n'); - return { - confidence: 1, - result: newLines, - strategy: 'git-fallback' - }; - } catch (cherryPickError) { - console.log('Strategy 1 failed with merge conflict'); - } - } catch (error) { - console.log('Strategy 1 failed:', error); - } + console.log('Strategy 1 - Attempting checkout of:', originalCommit.commit); + await git.raw(['checkout', originalCommit.commit]); + try { + console.log('Strategy 1 - Attempting cherry-pick of:', replaceCommit.commit); + await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]); + + const newText = fs.readFileSync(filePath, 'utf-8'); + const newLines = newText.split('\n'); + return { + confidence: 1, + result: newLines, + strategy: 'git-fallback' + }; + } catch (cherryPickError) { + console.error('Strategy 1 failed with merge conflict'); + } + } catch (error) { + console.error('Strategy 1 failed:', error); + } - // Strategy 2: S->R, S->O, cherry-pick R onto O - try { - // Reset repo - await git.init(); - await git.addConfig('user.name', 'Temp'); - await git.addConfig('user.email', 'temp@example.com'); + try { + await git.init(); + await git.addConfig('user.name', 'Temp'); + await git.addConfig('user.email', 'temp@example.com'); - // Search commit - just the search text - fs.writeFileSync(filePath, searchText); - await git.add('file.txt'); - const searchCommit = await git.commit('search'); + fs.writeFileSync(filePath, searchText); + await git.add('file.txt'); + const searchCommit = await git.commit('search'); + const searchHash = searchCommit.commit.replace(/^HEAD /, ''); + console.log('Strategy 2 - Search commit:', searchHash); - // Replace commit - just the replace text - fs.writeFileSync(filePath, replaceText); - await git.add('file.txt'); - const replaceCommit = await git.commit('replace'); + fs.writeFileSync(filePath, replaceText); + await git.add('file.txt'); + const replaceCommit = await git.commit('replace'); + const replaceHash = replaceCommit.commit.replace(/^HEAD /, ''); + console.log('Strategy 2 - Replace commit:', replaceHash); - // Go back to search and create original with full file content - await git.checkout(searchCommit.commit); - fs.writeFileSync(filePath, originalText); - await git.add('file.txt'); - await git.commit('original'); + console.log('Strategy 2 - Attempting checkout of:', searchHash); + await git.raw(['checkout', searchHash]); + fs.writeFileSync(filePath, originalText); + await git.add('file.txt'); + const originalCommit2 = await git.commit('original'); + console.log('Strategy 2 - Original commit:', originalCommit2.commit); - try { - // Cherry-pick replace onto original - await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]); - - // Read result - const newText = fs.readFileSync(filePath, 'utf-8'); - const newLines = newText.split('\n'); - return { - confidence: 1, - result: newLines, - strategy: 'git-fallback' - }; - } catch (cherryPickError) { - console.log('Strategy 2 failed with merge conflict'); - } - } catch (error) { - console.log('Strategy 2 failed:', error); - } + try { + console.log('Strategy 2 - Attempting cherry-pick of:', replaceHash); + await git.raw(['cherry-pick', '--minimal', replaceHash]); + + const newText = fs.readFileSync(filePath, 'utf-8'); + const newLines = newText.split('\n'); + return { + confidence: 1, + result: newLines, + strategy: 'git-fallback' + }; + } catch (cherryPickError) { + console.error('Strategy 2 failed with merge conflict'); + } + } catch (error) { + console.error('Strategy 2 failed:', error); + } - // If both strategies fail, return no confidence - console.log('Git fallback failed'); - return { confidence: 0, result: content, strategy: 'git-fallback' }; - } catch (error) { - console.log('Git fallback strategy failed:', error); - return { confidence: 0, result: content, strategy: 'git-fallback' }; - } finally { - // Clean up temporary directory - if (tmpDir) { - tmpDir.removeCallback(); - } - } + console.error('Git fallback failed'); + return { confidence: 0, result: content, strategy: 'git-fallback' }; + } catch (error) { + console.error('Git fallback strategy failed:', error); + return { confidence: 0, result: content, strategy: 'git-fallback' }; + } finally { + if (tmpDir) { + tmpDir.removeCallback(); + } + } } // Main edit function that tries strategies sequentially From b30cb293585ff5090b58c0476f39ec38a4b36d4d Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:01:46 -0500 Subject: [PATCH 22/47] refactor: cleanup --- .../strategies/new-unified/search-strategies.ts | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 4dfb2c5..16b0ee8 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -76,35 +76,24 @@ export function getDMPSimilarity(original: string, modified: string): number { export function validateEditResult(hunk: Hunk, result: string, strategy: string): number { const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)) - // Create skeleton of original content (context + removed lines) const originalSkeleton = hunkDeepCopy.changes .filter((change) => change.type === "context" || change.type === "remove") .map((change) => change.content) .join("\n") - // Create skeleton of expected result (context + added lines) const expectedSkeleton = hunkDeepCopy.changes .filter((change) => change.type === "context" || change.type === "add") .map((change) => change.content) .join("\n") - // Compare with original content const originalSimilarity = evaluateSimilarity(originalSkeleton, result) - console.log("originalSimilarity ", strategy, originalSimilarity) - - // Compare with expected result const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result) - console.log("expectedSimilarity", strategy, expectedSimilarity) - console.log("result", result) - - // If original similarity is 1 and expected similarity is not 1, it means changes weren't applied if (originalSimilarity > 0.97 && expectedSimilarity !== 1) { if (originalSimilarity === 1) { - // If original similarity is 1, it means changes weren't applied if (originalSimilarity > 0.97) { if (originalSimilarity === 1) { - return 0.5 // Significant confidence reduction + return 0.5 } else { return 0.8 } @@ -114,7 +103,6 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string) } } - // Scale between 0.98 and 1.0 (4% impact) based on expected similarity const multiplier = expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1 return multiplier @@ -404,7 +392,6 @@ export function findBestMatch(searchStr: string, content: string[], startIndex: for (const strategy of strategies) { const result = strategy(searchStr, content, startIndex) - console.log("Search result:", result) if (result.confidence > bestResult.confidence) { bestResult = result } From 5c420bb7e1f4ccb18a9e7e20cff004df2fa46fbd Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:02:39 -0500 Subject: [PATCH 23/47] refactor: cleanup --- src/core/diff/strategies/new-unified/index.ts | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 957a454..e06735d 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -24,12 +24,10 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { currentHunk.changes.length > 0 && currentHunk.changes.some((change) => change.type === "add" || change.type === "remove") ) { - // Trim excess context, keeping only MAX_CONTEXT_LINES before/after changes const changes = currentHunk.changes let startIdx = 0 let endIdx = changes.length - 1 - // Find first non-context line for (let j = 0; j < changes.length; j++) { if (changes[j].type !== "context") { startIdx = Math.max(0, j - MAX_CONTEXT_LINES) @@ -37,7 +35,6 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { } } - // Find last non-context line for (let j = changes.length - 1; j >= 0; j--) { if (changes[j].type !== "context") { endIdx = Math.min(changes.length - 1, j + MAX_CONTEXT_LINES) @@ -56,8 +53,7 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { continue } - // Extract the complete indentation for each line - const content = line.slice(1) // Remove the diff marker + const content = line.slice(1) const indentMatch = content.match(/^(\s*)/) const indent = indentMatch ? indentMatch[0] : "" const trimmedContent = content.slice(indent.length) @@ -84,7 +80,6 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { originalLine: content, }) } else { - // Assume is a context line and add a space if it's empty const finalContent = trimmedContent ? " " + trimmedContent : " " currentHunk.changes.push({ type: "context", @@ -102,7 +97,7 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { ) { hunks.push(currentHunk) } - + return { hunks } } From a211927097444cf50766b1fa784579cdc1119d32 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:09:20 -0500 Subject: [PATCH 24/47] refactor: enhance error handling and debugging in NewUnifiedDiffStrategy - Improved error messages for cases where no hunks are found in the diff, providing clearer guidance on format requirements. - Added detailed debug information for search and edit failures, including context line ratios and potential issues to assist users in troubleshooting. - Streamlined the logic for determining the cause of edit failures, distinguishing between search and content mismatch errors. - Enhanced overall readability and maintainability of the code with consistent formatting and structured comments. --- src/core/diff/strategies/new-unified/index.ts | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index e06735d..86495f9 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -191,18 +191,61 @@ Your diff here let result = [...originalLines] if (!parsedDiff.hunks.length) { - return { success: false, error: "No hunks found in diff" } + return { + success: false, + error: "No hunks found in diff. Please ensure your diff includes actual changes and follows the unified diff format." + } } for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) - const { index: matchPosition, confidence } = findBestMatch(contextStr, result) + const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result) const editResult = await applyEdit(hunk, result, matchPosition, confidence, '') if (editResult.confidence > MIN_CONFIDENCE) { result = editResult.result } else { - return { success: false, error: `Failed to apply edit using ${editResult.strategy} strategy` } + // Determine if the failure is due to search or edit + if (confidence < MIN_CONFIDENCE) { + // Search failure - likely due to context not matching + const contextLines = hunk.changes.filter(c => c.type === "context").length + const totalLines = hunk.changes.length + const contextRatio = contextLines / totalLines + + let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(MIN_CONFIDENCE * 100)}%)\n\n` + errorMsg += "Debug Info:\n" + errorMsg += `- Search Strategy Used: ${strategy}\n` + errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n` + + if (contextRatio < 0.2) { + errorMsg += "\nPossible Issues:\n" + errorMsg += "- Not enough context lines to uniquely identify the location\n" + errorMsg += "- Add a few more lines of unchanged code around your changes\n" + } else if (contextRatio > 0.5) { + errorMsg += "\nPossible Issues:\n" + errorMsg += "- Too many context lines may reduce search accuracy\n" + errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n" + } + + if (startLine && endLine) { + errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n` + } + + return { success: false, error: errorMsg } + } else { + // Edit failure - likely due to content mismatch + let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n` + errorMsg += "Debug Info:\n" + errorMsg += "- The location was found but the content didn't match exactly\n" + errorMsg += "- This usually means the file has been modified since the diff was created\n" + errorMsg += "- Or the diff may be targeting a different version of the file\n" + errorMsg += "\nPossible Solutions:\n" + errorMsg += "1. Refresh your view of the file and create a new diff\n" + errorMsg += "2. Double-check that the removed lines (-) match the current file content\n" + errorMsg += "3. Ensure your diff targets the correct version of the file" + + return { success: false, error: errorMsg } + } } } From f6e85fa1333c5f2ca58ff84c3dac98dc5c0c9be5 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:57:09 -0500 Subject: [PATCH 25/47] feat: introduce experimental diff strategy toggle and enhance diff handling - Added support for an experimental diff strategy in the Cline class, allowing users to opt for a new unified diff approach. - Updated the getDiffStrategy function to accommodate the experimental strategy, adjusting the fuzzy match threshold accordingly. - Integrated experimentalDiffStrategy into the global state management, enabling persistence across sessions. - Enhanced the ClineProvider and related components to handle the new experimental strategy, including UI updates for user settings. - Improved task history management to include the experimentalDiffStrategy setting, ensuring consistency in task execution. - Updated relevant interfaces and types to reflect the new experimentalDiffStrategy property. --- src/core/Cline.ts | 25 ++++++----- src/core/diff/DiffStrategy.ts | 12 ++++-- src/core/webview/ClineProvider.ts | 41 +++++++++++++++---- src/shared/ExtensionMessage.ts | 1 + src/shared/HistoryItem.ts | 1 + src/shared/WebviewMessage.ts | 1 + .../src/components/settings/SettingsView.tsx | 26 +++++++++++- .../src/context/ExtensionStateContext.tsx | 9 +++- 8 files changed, 89 insertions(+), 27 deletions(-) diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 8e25679..e76b1fb 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -51,6 +51,7 @@ import { detectCodeOmission } from "../integrations/editor/detect-omission" import { BrowserSession } from "../services/browser/BrowserSession" import { OpenRouterHandler } from "../api/providers/openrouter" import { McpHub } from "../services/mcp/McpHub" +import crypto from "crypto" const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ?? path.join(os.homedir(), "Desktop") // may or may not exist but fs checking existence would immediately ask for permission which would be bad UX, need to come up with a better solution @@ -105,26 +106,30 @@ export class Cline { task?: string | undefined, images?: string[] | undefined, historyItem?: HistoryItem | undefined, + experimentalDiffStrategy?: boolean, ) { - this.providerRef = new WeakRef(provider) + this.taskId = crypto.randomUUID() this.api = buildApiHandler(apiConfiguration) this.terminalManager = new TerminalManager() this.urlContentFetcher = new UrlContentFetcher(provider.context) this.browserSession = new BrowserSession(provider.context) - this.diffViewProvider = new DiffViewProvider(cwd) this.customInstructions = customInstructions this.diffEnabled = enableDiff ?? false - if (this.diffEnabled && this.api.getModel().id) { - this.diffStrategy = getDiffStrategy(this.api.getModel().id, fuzzyMatchThreshold ?? 1.0) - } + + // Prioritize experimentalDiffStrategy from history item if available + const effectiveExperimentalDiffStrategy = historyItem?.experimentalDiffStrategy ?? experimentalDiffStrategy + this.diffStrategy = getDiffStrategy(this.api.getModel().id, fuzzyMatchThreshold, effectiveExperimentalDiffStrategy) + this.diffViewProvider = new DiffViewProvider(cwd) + this.providerRef = new WeakRef(provider) + if (historyItem) { this.taskId = historyItem.id - this.resumeTaskFromHistory() - } else if (task || images) { - this.taskId = Date.now().toString() + } + + if (task || images) { this.startTask(task, images) - } else { - throw new Error("Either historyItem or task/images must be provided") + } else if (historyItem) { + this.resumeTaskFromHistory() } } diff --git a/src/core/diff/DiffStrategy.ts b/src/core/diff/DiffStrategy.ts index 5d71ed6..2a05417 100644 --- a/src/core/diff/DiffStrategy.ts +++ b/src/core/diff/DiffStrategy.ts @@ -7,10 +7,14 @@ import { NewUnifiedDiffStrategy } from './strategies/new-unified' * @param model The name of the model being used (e.g., 'gpt-4', 'claude-3-opus') * @returns The appropriate diff strategy for the model */ -export function getDiffStrategy(model: string, fuzzyMatchThreshold?: number): DiffStrategy { - // For now, return SearchReplaceDiffStrategy for all models - // This architecture allows for future optimizations based on model capabilities - return new NewUnifiedDiffStrategy() +export function getDiffStrategy(model: string, fuzzyMatchThreshold?: number, experimentalDiffStrategy?: boolean): DiffStrategy { + if (experimentalDiffStrategy) { + // Use the fuzzyMatchThreshold with a minimum of 0.8 (80%) + const threshold = Math.max(fuzzyMatchThreshold ?? 1.0, 0.8) + return new NewUnifiedDiffStrategy(threshold) + } + // Default to the stable SearchReplaceDiffStrategy + return new SearchReplaceDiffStrategy() } export type { DiffStrategy } diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 579a12e..67b9208 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -85,6 +85,7 @@ type GlobalStateKey = | "mcpEnabled" | "alwaysApproveResubmit" | "requestDelaySeconds" + | "experimentalDiffStrategy" export const GlobalFileNames = { apiConversationHistory: "api_conversation_history.json", uiMessages: "ui_messages.json", @@ -233,7 +234,8 @@ export class ClineProvider implements vscode.WebviewViewProvider { apiConfiguration, customInstructions, diffEnabled, - fuzzyMatchThreshold + fuzzyMatchThreshold, + experimentalDiffStrategy } = await this.getState() this.cline = new Cline( @@ -243,7 +245,9 @@ export class ClineProvider implements vscode.WebviewViewProvider { diffEnabled, fuzzyMatchThreshold, task, - images + images, + undefined, + experimentalDiffStrategy ) } @@ -253,7 +257,8 @@ export class ClineProvider implements vscode.WebviewViewProvider { apiConfiguration, customInstructions, diffEnabled, - fuzzyMatchThreshold + fuzzyMatchThreshold, + experimentalDiffStrategy } = await this.getState() this.cline = new Cline( @@ -264,7 +269,8 @@ export class ClineProvider implements vscode.WebviewViewProvider { fuzzyMatchThreshold, undefined, undefined, - historyItem + historyItem, + experimentalDiffStrategy ) } @@ -805,6 +811,10 @@ export class ClineProvider implements vscode.WebviewViewProvider { } break } + case "experimentalDiffStrategy": + await this.updateGlobalState("experimentalDiffStrategy", message.bool ?? false) + await this.postStateToWebview() + break } }, null, @@ -1155,7 +1165,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { uiMessagesFilePath: string apiConversationHistory: Anthropic.MessageParam[] }> { - const history = ((await this.getGlobalState("taskHistory")) as HistoryItem[] | undefined) || [] + const history = (await this.getGlobalState("taskHistory") as HistoryItem[] | undefined) || [] const historyItem = history.find((item) => item.id === id) if (historyItem) { const taskDirPath = path.join(this.context.globalStorageUri.fsPath, "tasks", id) @@ -1220,7 +1230,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { async deleteTaskFromState(id: string) { // Remove the task from history - const taskHistory = ((await this.getGlobalState("taskHistory")) as HistoryItem[]) || [] + const taskHistory = (await this.getGlobalState("taskHistory") as HistoryItem[]) || [] const updatedTaskHistory = taskHistory.filter((task) => task.id !== id) await this.updateGlobalState("taskHistory", updatedTaskHistory) @@ -1256,6 +1266,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { mcpEnabled, alwaysApproveResubmit, requestDelaySeconds, + experimentalDiffStrategy, } = await this.getState() const allowedCommands = vscode.workspace @@ -1290,6 +1301,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { mcpEnabled: mcpEnabled ?? true, alwaysApproveResubmit: alwaysApproveResubmit ?? false, requestDelaySeconds: requestDelaySeconds ?? 5, + experimentalDiffStrategy: experimentalDiffStrategy ?? false, } } @@ -1397,6 +1409,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { mcpEnabled, alwaysApproveResubmit, requestDelaySeconds, + experimentalDiffStrategy, ] = await Promise.all([ this.getGlobalState("apiProvider") as Promise, this.getGlobalState("apiModelId") as Promise, @@ -1449,6 +1462,7 @@ export class ClineProvider implements vscode.WebviewViewProvider { this.getGlobalState("mcpEnabled") as Promise, this.getGlobalState("alwaysApproveResubmit") as Promise, this.getGlobalState("requestDelaySeconds") as Promise, + this.getGlobalState("experimentalDiffStrategy") as Promise, ]) let apiProvider: ApiProvider @@ -1545,16 +1559,25 @@ export class ClineProvider implements vscode.WebviewViewProvider { mcpEnabled: mcpEnabled ?? true, alwaysApproveResubmit: alwaysApproveResubmit ?? false, requestDelaySeconds: requestDelaySeconds ?? 5, + experimentalDiffStrategy: experimentalDiffStrategy ?? false, } } async updateTaskHistory(item: HistoryItem): Promise { - const history = ((await this.getGlobalState("taskHistory")) as HistoryItem[]) || [] + const history = (await this.getGlobalState("taskHistory") as HistoryItem[] | undefined) || [] const existingItemIndex = history.findIndex((h) => h.id === item.id) + + // Ensure experimentalDiffStrategy is included from current settings if not already set + const { experimentalDiffStrategy } = await this.getState() ?? {} + const updatedItem = { + ...item, + experimentalDiffStrategy: item.experimentalDiffStrategy ?? experimentalDiffStrategy + } + if (existingItemIndex !== -1) { - history[existingItemIndex] = item + history[existingItemIndex] = updatedItem } else { - history.push(item) + history.push(updatedItem) } await this.updateGlobalState("taskHistory", history) return history diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 6b877a0..b911e01 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -70,6 +70,7 @@ export interface ExtensionState { writeDelayMs: number terminalOutputLineLimit?: number mcpEnabled: boolean + experimentalDiffStrategy?: boolean } export interface ClineMessage { diff --git a/src/shared/HistoryItem.ts b/src/shared/HistoryItem.ts index d4539f6..4127b88 100644 --- a/src/shared/HistoryItem.ts +++ b/src/shared/HistoryItem.ts @@ -7,4 +7,5 @@ export type HistoryItem = { cacheWrites?: number cacheReads?: number totalCost: number + experimentalDiffStrategy?: boolean } diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 0ca7cb3..caeefe8 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -54,6 +54,7 @@ export interface WebviewMessage { | "searchCommits" | "alwaysApproveResubmit" | "requestDelaySeconds" + | "experimentalDiffStrategy" text?: string disabled?: boolean askResponse?: ClineAskResponse diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 956b76b..0049443 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -55,6 +55,8 @@ const SettingsView = ({ onDone }: SettingsViewProps) => { setAlwaysApproveResubmit, requestDelaySeconds, setRequestDelaySeconds, + experimentalDiffStrategy, + setExperimentalDiffStrategy, } = useExtensionState() const [apiErrorMessage, setApiErrorMessage] = useState(undefined) const [modelIdErrorMessage, setModelIdErrorMessage] = useState(undefined) @@ -89,6 +91,7 @@ const SettingsView = ({ onDone }: SettingsViewProps) => { vscode.postMessage({ type: "mcpEnabled", bool: mcpEnabled }) vscode.postMessage({ type: "alwaysApproveResubmit", bool: alwaysApproveResubmit }) vscode.postMessage({ type: "requestDelaySeconds", value: requestDelaySeconds }) + vscode.postMessage({ type: "experimentalDiffStrategy", bool: experimentalDiffStrategy }) onDone() } } @@ -252,7 +255,13 @@ const SettingsView = ({ onDone }: SettingsViewProps) => {
- setDiffEnabled(e.target.checked)}> + { + setDiffEnabled(e.target.checked) + if (!e.target.checked) { + // Reset experimental strategy when diffs are disabled + setExperimentalDiffStrategy(false) + } + }}> Enable editing through diffs

{ {diffEnabled && (

+
+ ⚠️ + setExperimentalDiffStrategy(e.target.checked)}> + Use experimental unified diff strategy + +
+

+ Enable the experimental unified diff strategy. This strategy might reduce the number of retries caused by model errors but may cause unexpected behavior or incorrect edits. + Only enable if you understand the risks and are willing to carefully review all changes. +

+
Match precision { {Math.round((fuzzyMatchThreshold || 1) * 100)}%
-

+

This slider controls how precisely code sections must match when applying diffs. Lower values allow more flexible matching but increase the risk of incorrect replacements. Use values below 100% with extreme caution.

diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 131364b..eef9b07 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -50,6 +50,8 @@ export interface ExtensionStateContextType extends ExtensionState { setAlwaysApproveResubmit: (value: boolean) => void requestDelaySeconds: number setRequestDelaySeconds: (value: number) => void + experimentalDiffStrategy: boolean + setExperimentalDiffStrategy: (value: boolean) => void } export const ExtensionStateContext = createContext(undefined) @@ -72,7 +74,8 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode terminalOutputLineLimit: 500, mcpEnabled: true, alwaysApproveResubmit: false, - requestDelaySeconds: 5 + requestDelaySeconds: 0, + experimentalDiffStrategy: false, }) const [didHydrateState, setDidHydrateState] = useState(false) const [showWelcome, setShowWelcome] = useState(false) @@ -208,7 +211,9 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode setTerminalOutputLineLimit: (value) => setState((prevState) => ({ ...prevState, terminalOutputLineLimit: value })), setMcpEnabled: (value) => setState((prevState) => ({ ...prevState, mcpEnabled: value })), setAlwaysApproveResubmit: (value) => setState((prevState) => ({ ...prevState, alwaysApproveResubmit: value })), - setRequestDelaySeconds: (value) => setState((prevState) => ({ ...prevState, requestDelaySeconds: value })) + setRequestDelaySeconds: (value) => setState((prevState) => ({ ...prevState, requestDelaySeconds: value })), + experimentalDiffStrategy: state.experimentalDiffStrategy ?? false, + setExperimentalDiffStrategy: (value) => setState((prevState) => ({ ...prevState, experimentalDiffStrategy: value })) } return {children} From e00ec0cc3f350f01c5ea2ff8bd12ad15e025302b Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:57:55 -0500 Subject: [PATCH 26/47] test: add unit tests for NewUnifiedDiffStrategy constructor and confidence threshold behavior - Introduced tests for the NewUnifiedDiffStrategy constructor to verify default and custom confidence thresholds. - Ensured that the minimum confidence threshold is enforced, preventing values below 0.8. - Enhanced test coverage for the strategy's initialization logic, improving overall reliability and maintainability. --- .../strategies/__tests__/new-unified.test.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/core/diff/strategies/__tests__/new-unified.test.ts b/src/core/diff/strategies/__tests__/new-unified.test.ts index 387988b..e1d568e 100644 --- a/src/core/diff/strategies/__tests__/new-unified.test.ts +++ b/src/core/diff/strategies/__tests__/new-unified.test.ts @@ -5,7 +5,24 @@ describe('main', () => { let strategy: NewUnifiedDiffStrategy beforeEach(() => { - strategy = new NewUnifiedDiffStrategy() + strategy = new NewUnifiedDiffStrategy(0.97) + }) + + describe('constructor', () => { + it('should use default confidence threshold when not provided', () => { + const defaultStrategy = new NewUnifiedDiffStrategy() + expect(defaultStrategy['confidenceThreshold']).toBe(0.9) + }) + + it('should use provided confidence threshold', () => { + const customStrategy = new NewUnifiedDiffStrategy(0.85) + expect(customStrategy['confidenceThreshold']).toBe(0.85) + }) + + it('should enforce minimum confidence threshold', () => { + const lowStrategy = new NewUnifiedDiffStrategy(0.7) // Below minimum of 0.8 + expect(lowStrategy['confidenceThreshold']).toBe(0.8) + }) }) describe('getToolDescription', () => { From e6d3db6075d51ab6b1759e5604899f2c5c89dd6d Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 17:59:46 -0500 Subject: [PATCH 27/47] refactor: use confidenceThreshold from the settings in new diff strategy --- .../strategies/new-unified/edit-strategies.ts | 88 ++++++++++--------- src/core/diff/strategies/new-unified/index.ts | 17 ++-- .../new-unified/search-strategies.ts | 21 ++--- 3 files changed, 69 insertions(+), 57 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index e64173e..05f5732 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -249,48 +249,54 @@ async function applyGitFallback(hunk: Hunk, content: string[]): Promise { - // Don't attempt regular edits if confidence is too low - const MIN_CONFIDENCE = 0.9; - if (confidence < MIN_CONFIDENCE && debug === '') { - console.log(`Search confidence (${confidence}) below minimum threshold (${MIN_CONFIDENCE}), trying git fallback...`); - return applyGitFallback(hunk, content); - } +export async function applyEdit( + hunk: Hunk, + content: string[], + matchPosition: number, + confidence: number, + debug: string = '', + minConfidence: number = 0.9 +): Promise { + // Don't attempt regular edits if confidence is too low + if (confidence < minConfidence && debug === '') { + console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`); + return applyGitFallback(hunk, content); + } - // Try each strategy in sequence until one succeeds - const strategies = [ - { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, - { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, - { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } - ]; + // Try each strategy in sequence until one succeeds + const strategies = [ + { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, + { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, + { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } + ]; - if (debug !== '') { - // In debug mode, try all strategies including git fallback - const results = await Promise.all([ - ...strategies.map(async strategy => { - console.log(`Attempting edit with ${strategy.name} strategy...`); - const result = await strategy.apply(); - console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); - return result; - }) - ]); - - return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; - } else { - // Normal mode - try strategies sequentially until one succeeds - for (const strategy of strategies) { - const result = await strategy.apply(); - if (result.confidence === 1) { - return result; - } - } - // If all strategies fail, try git fallback - - const result = await applyGitFallback(hunk, content); - if(result.confidence === 1) { - return result; - } - } + if (debug !== '') { + // In debug mode, try all strategies including git fallback + const results = await Promise.all([ + ...strategies.map(async strategy => { + console.log(`Attempting edit with ${strategy.name} strategy...`); + const result = await strategy.apply(); + console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); + return result; + }) + ]); + + return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; + } else { + // Normal mode - try strategies sequentially until one succeeds + for (const strategy of strategies) { + const result = await strategy.apply(); + if (result.confidence === 1) { + return result; + } + } + // If all strategies fail, try git fallback + + const result = await applyGitFallback(hunk, content); + if(result.confidence === 1) { + return result; + } + } - return { confidence: 0, result: content, strategy: 'none' }; + return { confidence: 0, result: content, strategy: 'none' }; } diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 86495f9..7737d90 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -4,6 +4,12 @@ import { applyEdit } from "./edit-strategies" import { DiffResult, DiffStrategy } from "../../types" export class NewUnifiedDiffStrategy implements DiffStrategy { + private readonly confidenceThreshold: number + + constructor(confidenceThreshold: number = 0.9) { + this.confidenceThreshold = Math.max(confidenceThreshold, 0.8) + } + private parseUnifiedDiff(diff: string): Diff { const MAX_CONTEXT_LINES = 6 // Number of context lines to keep before/after changes const lines = diff.split("\n") @@ -185,7 +191,6 @@ Your diff here startLine?: number, endLine?: number ): Promise { - const MIN_CONFIDENCE = 0.9 const parsedDiff = this.parseUnifiedDiff(diffContent) const originalLines = originalContent.split("\n") let result = [...originalLines] @@ -199,20 +204,20 @@ Your diff here for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) - const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result) + const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold) - const editResult = await applyEdit(hunk, result, matchPosition, confidence, '') - if (editResult.confidence > MIN_CONFIDENCE) { + const editResult = await applyEdit(hunk, result, matchPosition, confidence, '', this.confidenceThreshold) + if (editResult.confidence >= this.confidenceThreshold) { result = editResult.result } else { // Determine if the failure is due to search or edit - if (confidence < MIN_CONFIDENCE) { + if (confidence < this.confidenceThreshold) { // Search failure - likely due to context not matching const contextLines = hunk.changes.filter(c => c.type === "context").length const totalLines = hunk.changes.length const contextRatio = contextLines / totalLines - let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(MIN_CONFIDENCE * 100)}%)\n\n` + let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n` errorMsg += "Debug Info:\n" errorMsg += `- Search Strategy Used: ${strategy}\n` errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n` diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 16b0ee8..05f2166 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -18,11 +18,11 @@ const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows const MAX_WINDOW_SIZE = 500 // maximum lines in a window // Helper function to calculate adaptive confidence threshold based on file size -function getAdaptiveThreshold(contentLength: number): number { +function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number { if (contentLength <= LARGE_FILE_THRESHOLD) { - return MIN_CONFIDENCE + return baseThreshold } - return MIN_CONFIDENCE_LARGE_FILE + return Math.max(baseThreshold - 0.07, 0.8) // Reduce threshold for large files but keep minimum at 80% } // Helper function to evaluate content uniqueness @@ -109,7 +109,7 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string) } // Helper function to validate context lines against original content -function validateContextLines(searchStr: string, content: string): number { +function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number { // Extract just the context lines from the search string const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines @@ -117,7 +117,7 @@ function validateContextLines(searchStr: string, content: string): number { const similarity = evaluateSimilarity(contextLines.join("\n"), content) // Get adaptive threshold based on content size - const threshold = getAdaptiveThreshold(content.split("\n").length) + const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold) // Calculate uniqueness boost const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n")) @@ -243,18 +243,17 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: } // String similarity strategy -export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult { const searchLines = searchStr.split("\n") let bestScore = 0 let bestIndex = -1 - const minScore = 0.8 for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { const windowStr = content.slice(i, i + searchLines.length).join("\n") const score = compareTwoStrings(searchStr, windowStr) if (score > bestScore && score >= minScore) { const similarity = getDMPSimilarity(searchStr, windowStr) - const contextSimilarity = validateContextLines(searchStr, windowStr) + const contextSimilarity = validateContextLines(searchStr, windowStr, minScore) const adjustedScore = Math.min(similarity, contextSimilarity) * score if (adjustedScore > bestScore) { @@ -385,13 +384,15 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex } // Main search function that tries all strategies -export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult { const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch] let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" } for (const strategy of strategies) { - const result = strategy(searchStr, content, startIndex) + const result = strategy === findSimilarityMatch + ? strategy(searchStr, content, startIndex, minConfidence) + : strategy(searchStr, content, startIndex) if (result.confidence > bestResult.confidence) { bestResult = result } From f696f8e0f1a49581f598b732ebd7f8a9f54ac0c1 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Tue, 14 Jan 2025 18:03:03 -0500 Subject: [PATCH 28/47] refactor: remove debug functionality --- .../strategies/new-unified/edit-strategies.ts | 33 ++++--------------- src/core/diff/strategies/new-unified/index.ts | 2 +- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index 05f5732..f8dc166 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -253,12 +253,11 @@ export async function applyEdit( hunk: Hunk, content: string[], matchPosition: number, - confidence: number, - debug: string = '', + confidence: number, minConfidence: number = 0.9 ): Promise { // Don't attempt regular edits if confidence is too low - if (confidence < minConfidence && debug === '') { + if (confidence < minConfidence) { console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`); return applyGitFallback(hunk, content); } @@ -270,30 +269,10 @@ export async function applyEdit( { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } ]; - if (debug !== '') { - // In debug mode, try all strategies including git fallback - const results = await Promise.all([ - ...strategies.map(async strategy => { - console.log(`Attempting edit with ${strategy.name} strategy...`); - const result = await strategy.apply(); - console.log(`Strategy ${strategy.name} succeeded with confidence ${result.confidence}`); - return result; - }) - ]); - - return results.find(result => result.strategy === debug) || { confidence: 0, result: content, strategy: 'none' }; - } else { - // Normal mode - try strategies sequentially until one succeeds - for (const strategy of strategies) { - const result = await strategy.apply(); - if (result.confidence === 1) { - return result; - } - } - // If all strategies fail, try git fallback - - const result = await applyGitFallback(hunk, content); - if(result.confidence === 1) { + // Try strategies sequentially until one succeeds + for (const strategy of strategies) { + const result = await strategy.apply(); + if (result.confidence >= minConfidence) { return result; } } diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 7737d90..3e70d98 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -206,7 +206,7 @@ Your diff here const contextStr = prepareSearchString(hunk.changes) const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold) - const editResult = await applyEdit(hunk, result, matchPosition, confidence, '', this.confidenceThreshold) + const editResult = await applyEdit(hunk, result, matchPosition, confidence, this.confidenceThreshold) if (editResult.confidence >= this.confidenceThreshold) { result = editResult.result } else { From f9a453a44fef62315303e6da853800f2b53e3327 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 15 Jan 2025 10:54:25 -0500 Subject: [PATCH 29/47] refactor: update edit and search strategies to use configurable confidence thresholds - Modified applyContextMatching and applyDMP functions to accept a confidenceThreshold parameter, enhancing flexibility in edit strategies. - Updated validateEditResult and related functions to utilize the new confidenceThreshold, improving consistency across validation processes. - Adjusted findExactMatch, findSimilarityMatch, findLevenshteinMatch, and findAnchorMatch functions to incorporate confidenceThreshold, ensuring adaptive behavior based on user settings. --- .../strategies/new-unified/edit-strategies.ts | 20 +++---- .../new-unified/search-strategies.ts | 55 ++++++++----------- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index f8dc166..6f14f31 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -30,7 +30,7 @@ function inferIndentation(line: string, contextLines: string[], previousIndent: } // Context matching edit strategy -export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult { +export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult { if (matchPosition === -1) { return { confidence: 0, result: content, strategy: 'context' }; } @@ -71,7 +71,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio newResult.slice(matchPosition, matchPosition + windowSize).join('\n') ) - const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context'); + const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), confidenceThreshold); return { confidence: similarity * confidence, @@ -81,7 +81,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio } // DMP edit strategy -export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult { +export function applyDMP(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult { if (matchPosition === -1) { return { confidence: 0, result: content, strategy: 'dmp' }; } @@ -123,7 +123,7 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): // Calculate confidence const similarity = getDMPSimilarity(beforeText, targetText); - const confidence = validateEditResult(hunk, patchedText, 'dmp'); + const confidence = validateEditResult(hunk, patchedText, confidenceThreshold); return { confidence: similarity * confidence, @@ -254,25 +254,25 @@ export async function applyEdit( content: string[], matchPosition: number, confidence: number, - minConfidence: number = 0.9 + confidenceThreshold: number = 0.97 ): Promise { // Don't attempt regular edits if confidence is too low - if (confidence < minConfidence) { - console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`); + if (confidence < confidenceThreshold) { + console.log(`Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...`); return applyGitFallback(hunk, content); } // Try each strategy in sequence until one succeeds const strategies = [ - { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) }, - { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) }, + { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition, confidenceThreshold) }, + { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition, confidenceThreshold) }, { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } ]; // Try strategies sequentially until one succeeds for (const strategy of strategies) { const result = await strategy.apply(); - if (result.confidence >= minConfidence) { + if (result.confidence >= confidenceThreshold) { return result; } } diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 05f2166..8b2aa70 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -9,16 +9,13 @@ export type SearchResult = { strategy: string } -//TODO: this should be configurable -const MIN_CONFIDENCE = 0.97 -const MIN_CONFIDENCE_LARGE_FILE = 0.9 const LARGE_FILE_THRESHOLD = 1000 // lines const UNIQUE_CONTENT_BOOST = 0.05 const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows const MAX_WINDOW_SIZE = 500 // maximum lines in a window // Helper function to calculate adaptive confidence threshold based on file size -function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number { +function getAdaptiveThreshold(contentLength: number, baseThreshold: number): number { if (contentLength <= LARGE_FILE_THRESHOLD) { return baseThreshold } @@ -69,11 +66,7 @@ export function getDMPSimilarity(original: string, modified: string): number { } // Helper function to validate edit results using hunk information -// Returns a confidence reduction value between 0 and 1 -// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95, -// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact. -// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction). -export function validateEditResult(hunk: Hunk, result: string, strategy: string): number { +export function validateEditResult(hunk: Hunk, result: string, confidenceThreshold: number): number { const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)) const originalSkeleton = hunkDeepCopy.changes @@ -90,26 +83,20 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string) const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result) if (originalSimilarity > 0.97 && expectedSimilarity !== 1) { - if (originalSimilarity === 1) { - if (originalSimilarity > 0.97) { if (originalSimilarity === 1) { return 0.5 - } else { - return 0.8 - } - } } else { return 0.8 } } - const multiplier = expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1 + const multiplier = expectedSimilarity < confidenceThreshold ? expectedSimilarity : 1 return multiplier } // Helper function to validate context lines against original content -function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number { +function validateContextLines(searchStr: string, content: string, confidenceThreshold: number): number { // Extract just the context lines from the search string const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines @@ -117,7 +104,7 @@ function validateContextLines(searchStr: string, content: string, baseThreshold: const similarity = evaluateSimilarity(contextLines.join("\n"), content) // Get adaptive threshold based on content size - const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold) + const threshold = getAdaptiveThreshold(content.split("\n").length, confidenceThreshold) // Calculate uniqueness boost const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n")) @@ -207,8 +194,7 @@ function combineOverlappingMatches( return combinedMatches } -// Modified search functions to use sliding windows -export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length) const matches: (SearchResult & { windowIndex: number })[] = [] @@ -226,7 +212,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: .join("\n") const similarity = getDMPSimilarity(searchStr, matchedContent) - const contextSimilarity = validateContextLines(searchStr, matchedContent) + const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) matches.push({ @@ -243,7 +229,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: } // String similarity strategy -export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult { +export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") let bestScore = 0 let bestIndex = -1 @@ -251,9 +237,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI for (let i = startIndex; i < content.length - searchLines.length + 1; i++) { const windowStr = content.slice(i, i + searchLines.length).join("\n") const score = compareTwoStrings(searchStr, windowStr) - if (score > bestScore && score >= minScore) { + if (score > bestScore && score >= confidenceThreshold) { const similarity = getDMPSimilarity(searchStr, windowStr) - const contextSimilarity = validateContextLines(searchStr, windowStr, minScore) + const contextSimilarity = validateContextLines(searchStr, windowStr, confidenceThreshold) const adjustedScore = Math.min(similarity, contextSimilarity) * score if (adjustedScore > bestScore) { @@ -271,7 +257,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI } // Levenshtein strategy -export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") const candidates = [] @@ -283,7 +269,7 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start const closestMatch = closest(searchStr, candidates) const index = startIndex + candidates.indexOf(closestMatch) const similarity = getDMPSimilarity(searchStr, closestMatch) - const contextSimilarity = validateContextLines(searchStr, closestMatch) + const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) return { index, @@ -355,7 +341,7 @@ function validateAnchorPositions( } // Anchor-based search strategy -export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult { +export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { const searchLines = searchStr.split("\n") const anchors = identifyAnchors(searchStr, content.slice(startIndex)) @@ -370,7 +356,7 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex const matchPosition = startIndex + offset const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n") const similarity = getDMPSimilarity(searchStr, matchedContent) - const contextSimilarity = validateContextLines(searchStr, matchedContent) + const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight return { @@ -384,15 +370,18 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex } // Main search function that tries all strategies -export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult { - const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch] +export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { + const strategies = [ + findExactMatch, + findAnchorMatch, + findSimilarityMatch, + findLevenshteinMatch + ] let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" } for (const strategy of strategies) { - const result = strategy === findSimilarityMatch - ? strategy(searchStr, content, startIndex, minConfidence) - : strategy(searchStr, content, startIndex) + const result = strategy(searchStr, content, startIndex, confidenceThreshold) if (result.confidence > bestResult.confidence) { bestResult = result } From 12f4cc739eadab80bcfa9e2700e4b3da40eef3b3 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 15 Jan 2025 11:59:48 -0500 Subject: [PATCH 30/47] fix: correct syntax in GlobalStateKey type definition by removing trailing comma --- src/core/webview/ClineProvider.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 4394c2b..68b8d29 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -96,7 +96,7 @@ type GlobalStateKey = | "mode" | "modeApiConfigs" | "customPrompts" - | "enhancementApiConfigId", + | "enhancementApiConfigId" | "experimentalDiffStrategy" export const GlobalFileNames = { From 22069e8056e7823b67cbd9eb6b61cc235636c64d Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 15 Jan 2025 12:22:20 -0500 Subject: [PATCH 31/47] fix: remove misplaced async --- .../strategies/__tests__/search-replace.test.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/diff/strategies/__tests__/search-replace.test.ts b/src/core/diff/strategies/__tests__/search-replace.test.ts index be81e24..ce17d31 100644 --- a/src/core/diff/strategies/__tests__/search-replace.test.ts +++ b/src/core/diff/strategies/__tests__/search-replace.test.ts @@ -621,8 +621,8 @@ function five() { }) }) - describe('line number stripping', async () => { - describe('line number stripping', async () => { + describe('line number stripping', () => { + describe('line number stripping', () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { @@ -788,14 +788,14 @@ function five() { }) }); - describe('insertion/deletion', async () => { + describe('insertion/deletion', () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { strategy = new SearchReplaceDiffStrategy() }) - describe('deletion', async () => { + describe('deletion', () => { it('should delete code when replace block is empty', async () => { const originalContent = `function test() { console.log("hello"); @@ -877,7 +877,7 @@ function five() { }) }) - describe('insertion', async () => { + describe('insertion', () => { it('should insert code at specified line when search block is empty', async () => { const originalContent = `function test() { const x = 1; @@ -988,7 +988,7 @@ console.log("test"); }) }) - describe('fuzzy matching', async () => { + describe('fuzzy matching', () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { strategy = new SearchReplaceDiffStrategy(0.9, 5) // 90% similarity threshold, 5 line buffer for tests @@ -1073,7 +1073,7 @@ function sum(a, b) { }) }) - describe('line-constrained search', async () => { + describe('line-constrained search', () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { @@ -1512,7 +1512,7 @@ function two() { }) }) - describe('getToolDescription', async () => { + describe('getToolDescription', () => { let strategy: SearchReplaceDiffStrategy beforeEach(() => { From 6c8d7f4951c31293d14e4ec3f147e74be5ff99da Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 15 Jan 2025 12:23:51 -0500 Subject: [PATCH 32/47] fix: update tests to handle new experimental diff option and increase the default confidence to 1 --- src/core/Cline.ts | 8 ++++++-- src/core/__tests__/Cline.test.ts | 4 ++-- src/core/diff/DiffStrategy.ts | 9 +++------ src/core/diff/strategies/__tests__/new-unified.test.ts | 2 +- src/core/diff/strategies/new-unified/index.ts | 4 ++-- src/core/webview/__tests__/ClineProvider.test.ts | 2 ++ 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/core/Cline.ts b/src/core/Cline.ts index ab0ab26..cbac9df 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -107,8 +107,12 @@ export class Cline { task?: string | undefined, images?: string[] | undefined, historyItem?: HistoryItem | undefined, - experimentalDiffStrategy?: boolean, + experimentalDiffStrategy: boolean = false, ) { + if (!task && !images && !historyItem) { + throw new Error('Either historyItem or task/images must be provided'); + } + this.taskId = crypto.randomUUID() this.api = buildApiHandler(apiConfiguration) this.terminalManager = new TerminalManager() @@ -119,7 +123,7 @@ export class Cline { // Prioritize experimentalDiffStrategy from history item if available const effectiveExperimentalDiffStrategy = historyItem?.experimentalDiffStrategy ?? experimentalDiffStrategy - this.diffStrategy = getDiffStrategy(this.api.getModel().id, fuzzyMatchThreshold, effectiveExperimentalDiffStrategy) + this.diffStrategy = getDiffStrategy(this.api.getModel().id, fuzzyMatchThreshold ?? 1.0, effectiveExperimentalDiffStrategy) this.diffViewProvider = new DiffViewProvider(cwd) this.providerRef = new WeakRef(provider) diff --git a/src/core/__tests__/Cline.test.ts b/src/core/__tests__/Cline.test.ts index 66bdbf7..11e9f9d 100644 --- a/src/core/__tests__/Cline.test.ts +++ b/src/core/__tests__/Cline.test.ts @@ -322,7 +322,7 @@ describe('Cline', () => { expect(cline.diffEnabled).toBe(true); expect(cline.diffStrategy).toBeDefined(); - expect(getDiffStrategySpy).toHaveBeenCalledWith('claude-3-5-sonnet-20241022', 0.9); + expect(getDiffStrategySpy).toHaveBeenCalledWith('claude-3-5-sonnet-20241022', 0.9, false); getDiffStrategySpy.mockRestore(); }); @@ -341,7 +341,7 @@ describe('Cline', () => { expect(cline.diffEnabled).toBe(true); expect(cline.diffStrategy).toBeDefined(); - expect(getDiffStrategySpy).toHaveBeenCalledWith('claude-3-5-sonnet-20241022', 1.0); + expect(getDiffStrategySpy).toHaveBeenCalledWith('claude-3-5-sonnet-20241022', 1.0, false); getDiffStrategySpy.mockRestore(); }); diff --git a/src/core/diff/DiffStrategy.ts b/src/core/diff/DiffStrategy.ts index 2a05417..ac3a0c4 100644 --- a/src/core/diff/DiffStrategy.ts +++ b/src/core/diff/DiffStrategy.ts @@ -7,14 +7,11 @@ import { NewUnifiedDiffStrategy } from './strategies/new-unified' * @param model The name of the model being used (e.g., 'gpt-4', 'claude-3-opus') * @returns The appropriate diff strategy for the model */ -export function getDiffStrategy(model: string, fuzzyMatchThreshold?: number, experimentalDiffStrategy?: boolean): DiffStrategy { +export function getDiffStrategy(model: string, fuzzyMatchThreshold?: number, experimentalDiffStrategy: boolean = false): DiffStrategy { if (experimentalDiffStrategy) { - // Use the fuzzyMatchThreshold with a minimum of 0.8 (80%) - const threshold = Math.max(fuzzyMatchThreshold ?? 1.0, 0.8) - return new NewUnifiedDiffStrategy(threshold) + return new NewUnifiedDiffStrategy(fuzzyMatchThreshold) } - // Default to the stable SearchReplaceDiffStrategy - return new SearchReplaceDiffStrategy() + return new SearchReplaceDiffStrategy(fuzzyMatchThreshold) } export type { DiffStrategy } diff --git a/src/core/diff/strategies/__tests__/new-unified.test.ts b/src/core/diff/strategies/__tests__/new-unified.test.ts index e1d568e..f8cb51d 100644 --- a/src/core/diff/strategies/__tests__/new-unified.test.ts +++ b/src/core/diff/strategies/__tests__/new-unified.test.ts @@ -11,7 +11,7 @@ describe('main', () => { describe('constructor', () => { it('should use default confidence threshold when not provided', () => { const defaultStrategy = new NewUnifiedDiffStrategy() - expect(defaultStrategy['confidenceThreshold']).toBe(0.9) + expect(defaultStrategy['confidenceThreshold']).toBe(1) }) it('should use provided confidence threshold', () => { diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 3e70d98..c8ca119 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -6,8 +6,8 @@ import { DiffResult, DiffStrategy } from "../../types" export class NewUnifiedDiffStrategy implements DiffStrategy { private readonly confidenceThreshold: number - constructor(confidenceThreshold: number = 0.9) { - this.confidenceThreshold = Math.max(confidenceThreshold, 0.8) + constructor(confidenceThreshold: number = 1) { + this.confidenceThreshold = Math.max(confidenceThreshold, 0.8); } private parseUnifiedDiff(diff: string): Diff { diff --git a/src/core/webview/__tests__/ClineProvider.test.ts b/src/core/webview/__tests__/ClineProvider.test.ts index ddcd7e1..389f333 100644 --- a/src/core/webview/__tests__/ClineProvider.test.ts +++ b/src/core/webview/__tests__/ClineProvider.test.ts @@ -610,6 +610,8 @@ describe('ClineProvider', () => { true, 1.0, 'Test task', + undefined, + undefined, undefined ); }); From 36b7da07d2ba00acf43a9c778f26308326c2f2e9 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Wed, 15 Jan 2025 12:55:44 -0500 Subject: [PATCH 33/47] fix: enhance error messaging in diff strategy to address potential issues with context lines and version targeting --- src/core/diff/strategies/new-unified/index.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index c8ca119..f212bb9 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -230,6 +230,10 @@ Your diff here errorMsg += "\nPossible Issues:\n" errorMsg += "- Too many context lines may reduce search accuracy\n" errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n" + } else { + errorMsg += "\nPossible Issues:\n" + errorMsg += "- The diff may be targeting a different version of the file\n" + errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n" } if (startLine && endLine) { From 04f6e4f03f27062eb358b64f0802169878e7bdb4 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 16 Jan 2025 12:49:02 -0500 Subject: [PATCH 34/47] feat: implement hunk splitting strategy in diff application to better handle large diffs that might fail if the search fails --- src/core/diff/strategies/new-unified/index.ts | 170 +++++++++++++----- 1 file changed, 125 insertions(+), 45 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index f212bb9..9eee4de 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -1,4 +1,4 @@ -import { Diff, Hunk } from "./types" +import { Diff, Hunk, Change } from "./types" import { findBestMatch, prepareSearchString } from "./search-strategies" import { applyEdit } from "./edit-strategies" import { DiffResult, DiffStrategy } from "../../types" @@ -185,6 +185,59 @@ Your diff here ` } + // Helper function to split a hunk into smaller hunks based on contiguous changes + private splitHunk(hunk: Hunk): Hunk[] { + const result: Hunk[] = [] + let currentHunk: Hunk | null = null + let contextBefore: Change[] = [] + let contextAfter: Change[] = [] + const MAX_CONTEXT_LINES = 3 // Keep 3 lines of context before/after changes + + for (let i = 0; i < hunk.changes.length; i++) { + const change = hunk.changes[i] + + if (change.type === 'context') { + if (!currentHunk) { + contextBefore.push(change) + if (contextBefore.length > MAX_CONTEXT_LINES) { + contextBefore.shift() + } + } else { + contextAfter.push(change) + if (contextAfter.length > MAX_CONTEXT_LINES) { + // We've collected enough context after changes, create a new hunk + currentHunk.changes.push(...contextAfter) + result.push(currentHunk) + currentHunk = null + // Keep the last few context lines for the next hunk + contextBefore = contextAfter + contextAfter = [] + } + } + } else { + if (!currentHunk) { + currentHunk = { changes: [...contextBefore] } + contextAfter = [] + } else if (contextAfter.length > 0) { + // Add accumulated context to current hunk + currentHunk.changes.push(...contextAfter) + contextAfter = [] + } + currentHunk.changes.push(change) + } + } + + // Add any remaining changes + if (currentHunk) { + if (contextAfter.length > 0) { + currentHunk.changes.push(...contextAfter) + } + result.push(currentHunk) + } + + return result + } + async applyDiff( originalContent: string, diffContent: string, @@ -206,55 +259,82 @@ Your diff here const contextStr = prepareSearchString(hunk.changes) const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold) + if (confidence < 1.1) { + console.log('Full hunk application failed, trying sub-hunks strategy') + // Try splitting the hunk into smaller hunks + const subHunks = this.splitHunk(hunk) + let subHunkSuccess = true + let subHunkResult = [...result] + + for (const subHunk of subHunks) { + const subContextStr = prepareSearchString(subHunk.changes) + console.log(subContextStr) + const subSearchResult = findBestMatch(subContextStr, subHunkResult, 0, this.confidenceThreshold) + + if (subSearchResult.confidence >= this.confidenceThreshold) { + const subEditResult = await applyEdit(subHunk, subHunkResult, subSearchResult.index, subSearchResult.confidence, this.confidenceThreshold) + if (subEditResult.confidence >= this.confidenceThreshold) { + subHunkResult = subEditResult.result + continue + } + } + subHunkSuccess = false + break + } + + if (subHunkSuccess) { + result = subHunkResult + continue + } + + // If sub-hunks also failed, return the original error + const contextLines = hunk.changes.filter(c => c.type === "context").length + const totalLines = hunk.changes.length + const contextRatio = contextLines / totalLines + + let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n` + errorMsg += "Debug Info:\n" + errorMsg += `- Search Strategy Used: ${strategy}\n` + errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n` + errorMsg += `- Attempted to split into ${subHunks.length} sub-hunks but still failed\n` + + if (contextRatio < 0.2) { + errorMsg += "\nPossible Issues:\n" + errorMsg += "- Not enough context lines to uniquely identify the location\n" + errorMsg += "- Add a few more lines of unchanged code around your changes\n" + } else if (contextRatio > 0.5) { + errorMsg += "\nPossible Issues:\n" + errorMsg += "- Too many context lines may reduce search accuracy\n" + errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n" + } else { + errorMsg += "\nPossible Issues:\n" + errorMsg += "- The diff may be targeting a different version of the file\n" + errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n" + } + + if (startLine && endLine) { + errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n` + } + + return { success: false, error: errorMsg } + } + const editResult = await applyEdit(hunk, result, matchPosition, confidence, this.confidenceThreshold) if (editResult.confidence >= this.confidenceThreshold) { result = editResult.result } else { - // Determine if the failure is due to search or edit - if (confidence < this.confidenceThreshold) { - // Search failure - likely due to context not matching - const contextLines = hunk.changes.filter(c => c.type === "context").length - const totalLines = hunk.changes.length - const contextRatio = contextLines / totalLines + // Edit failure - likely due to content mismatch + let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n` + errorMsg += "Debug Info:\n" + errorMsg += "- The location was found but the content didn't match exactly\n" + errorMsg += "- This usually means the file has been modified since the diff was created\n" + errorMsg += "- Or the diff may be targeting a different version of the file\n" + errorMsg += "\nPossible Solutions:\n" + errorMsg += "1. Refresh your view of the file and create a new diff\n" + errorMsg += "2. Double-check that the removed lines (-) match the current file content\n" + errorMsg += "3. Ensure your diff targets the correct version of the file" - let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n` - errorMsg += "Debug Info:\n" - errorMsg += `- Search Strategy Used: ${strategy}\n` - errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n` - - if (contextRatio < 0.2) { - errorMsg += "\nPossible Issues:\n" - errorMsg += "- Not enough context lines to uniquely identify the location\n" - errorMsg += "- Add a few more lines of unchanged code around your changes\n" - } else if (contextRatio > 0.5) { - errorMsg += "\nPossible Issues:\n" - errorMsg += "- Too many context lines may reduce search accuracy\n" - errorMsg += "- Try to keep only 2-3 lines of context before and after changes\n" - } else { - errorMsg += "\nPossible Issues:\n" - errorMsg += "- The diff may be targeting a different version of the file\n" - errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n" - } - - if (startLine && endLine) { - errorMsg += `\nSearch Range: lines ${startLine}-${endLine}\n` - } - - return { success: false, error: errorMsg } - } else { - // Edit failure - likely due to content mismatch - let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n` - errorMsg += "Debug Info:\n" - errorMsg += "- The location was found but the content didn't match exactly\n" - errorMsg += "- This usually means the file has been modified since the diff was created\n" - errorMsg += "- Or the diff may be targeting a different version of the file\n" - errorMsg += "\nPossible Solutions:\n" - errorMsg += "1. Refresh your view of the file and create a new diff\n" - errorMsg += "2. Double-check that the removed lines (-) match the current file content\n" - errorMsg += "3. Ensure your diff targets the correct version of the file" - - return { success: false, error: errorMsg } - } + return { success: false, error: errorMsg } } } From c0d0548479bc4ce1ddd24bc1b8d072ec22422e71 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 16 Jan 2025 12:49:32 -0500 Subject: [PATCH 35/47] test: add hunk splitting tests for handling large diffs with non-contiguous changes --- .../strategies/__tests__/new-unified.test.ts | 189 ++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/src/core/diff/strategies/__tests__/new-unified.test.ts b/src/core/diff/strategies/__tests__/new-unified.test.ts index f8cb51d..c49fc80 100644 --- a/src/core/diff/strategies/__tests__/new-unified.test.ts +++ b/src/core/diff/strategies/__tests__/new-unified.test.ts @@ -547,4 +547,193 @@ if (otherCondition) { } }); }); + + describe('hunk splitting', () => { + it('should handle large diffs with multiple non-contiguous changes', async () => { + const original = `import { readFile } from 'fs'; +import { join } from 'path'; +import { Logger } from './logger'; + +const logger = new Logger(); + +async function processFile(filePath: string) { + try { + const data = await readFile(filePath, 'utf8'); + logger.info('File read successfully'); + return data; + } catch (error) { + logger.error('Failed to read file:', error); + throw error; + } +} + +function validateInput(input: string): boolean { + if (!input) { + logger.warn('Empty input received'); + return false; + } + return input.length > 0; +} + +async function writeOutput(data: string) { + logger.info('Processing output'); + // TODO: Implement output writing + return Promise.resolve(); +} + +function parseConfig(configPath: string) { + logger.debug('Reading config from:', configPath); + // Basic config parsing + return { + enabled: true, + maxRetries: 3 + }; +} + +export { + processFile, + validateInput, + writeOutput, + parseConfig +};`; + + const diff = `--- a/file.ts ++++ b/file.ts +@@ ... @@ +-import { readFile } from 'fs'; ++import { readFile, writeFile } from 'fs'; + import { join } from 'path'; +-import { Logger } from './logger'; ++import { Logger } from './utils/logger'; ++import { Config } from './types'; + +-const logger = new Logger(); ++const logger = new Logger('FileProcessor'); + + async function processFile(filePath: string) { + try { + const data = await readFile(filePath, 'utf8'); +- logger.info('File read successfully'); ++ logger.info(\`File \${filePath} read successfully\`); + return data; + } catch (error) { +- logger.error('Failed to read file:', error); ++ logger.error(\`Failed to read file \${filePath}:\`, error); + throw error; + } + } + + function validateInput(input: string): boolean { + if (!input) { +- logger.warn('Empty input received'); ++ logger.warn('Validation failed: Empty input received'); + return false; + } +- return input.length > 0; ++ return input.trim().length > 0; + } + +-async function writeOutput(data: string) { +- logger.info('Processing output'); +- // TODO: Implement output writing +- return Promise.resolve(); ++async function writeOutput(data: string, outputPath: string) { ++ try { ++ await writeFile(outputPath, data, 'utf8'); ++ logger.info(\`Output written to \${outputPath}\`); ++ } catch (error) { ++ logger.error(\`Failed to write output to \${outputPath}:\`, error); ++ throw error; ++ } + } + +-function parseConfig(configPath: string) { +- logger.debug('Reading config from:', configPath); +- // Basic config parsing +- return { +- enabled: true, +- maxRetries: 3 +- }; ++async function parseConfig(configPath: string): Promise { ++ try { ++ const configData = await readFile(configPath, 'utf8'); ++ logger.debug(\`Reading config from \${configPath}\`); ++ return JSON.parse(configData); ++ } catch (error) { ++ logger.error(\`Failed to parse config from \${configPath}:\`, error); ++ throw error; ++ } + } + + export { + processFile, + validateInput, + writeOutput, +- parseConfig ++ parseConfig, ++ type Config + };`; + + const expected = `import { readFile, writeFile } from 'fs'; +import { join } from 'path'; +import { Logger } from './utils/logger'; +import { Config } from './types'; + +const logger = new Logger('FileProcessor'); + +async function processFile(filePath: string) { + try { + const data = await readFile(filePath, 'utf8'); + logger.info(\`File \${filePath} read successfully\`); + return data; + } catch (error) { + logger.error(\`Failed to read file \${filePath}:\`, error); + throw error; + } +} + +function validateInput(input: string): boolean { + if (!input) { + logger.warn('Validation failed: Empty input received'); + return false; + } + return input.trim().length > 0; +} + +async function writeOutput(data: string, outputPath: string) { + try { + await writeFile(outputPath, data, 'utf8'); + logger.info(\`Output written to \${outputPath}\`); + } catch (error) { + logger.error(\`Failed to write output to \${outputPath}:\`, error); + throw error; + } +} + +async function parseConfig(configPath: string): Promise { + try { + const configData = await readFile(configPath, 'utf8'); + logger.debug(\`Reading config from \${configPath}\`); + return JSON.parse(configData); + } catch (error) { + logger.error(\`Failed to parse config from \${configPath}:\`, error); + throw error; + } +} + +export { + processFile, + validateInput, + writeOutput, + parseConfig, + type Config +};`; + + const result = await strategy.applyDiff(original, diff); + expect(result.success).toBe(true); + if (result.success) { + expect(result.content).toBe(expected); + } + }); + }); }); \ No newline at end of file From 3ce2e0c6bf9e24c58203cc4fb90fa262cbe7f820 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 16 Jan 2025 17:13:25 -0500 Subject: [PATCH 36/47] feat: update diff strategy dynamically - Added `updateDiffStrategy` method to dynamically adjust the diff strategy based on the current state and experimental settings. - Updated ClineProvider to call `updateDiffStrategy` when the experimental diff strategy is modified, ensuring real-time updates in the Cline instance. --- src/core/Cline.ts | 22 ++++++++++++++++------ src/core/webview/ClineProvider.ts | 4 ++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/core/Cline.ts b/src/core/Cline.ts index cbac9df..4487521 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -72,6 +72,7 @@ export class Cline { customInstructions?: string diffStrategy?: DiffStrategy diffEnabled: boolean = false + fuzzyMatchThreshold: number = 1.0 apiConversationHistory: (Anthropic.MessageParam & { ts?: number })[] = [] clineMessages: ClineMessage[] = [] @@ -120,17 +121,17 @@ export class Cline { this.browserSession = new BrowserSession(provider.context) this.customInstructions = customInstructions this.diffEnabled = enableDiff ?? false - - // Prioritize experimentalDiffStrategy from history item if available - const effectiveExperimentalDiffStrategy = historyItem?.experimentalDiffStrategy ?? experimentalDiffStrategy - this.diffStrategy = getDiffStrategy(this.api.getModel().id, fuzzyMatchThreshold ?? 1.0, effectiveExperimentalDiffStrategy) - this.diffViewProvider = new DiffViewProvider(cwd) + this.fuzzyMatchThreshold = fuzzyMatchThreshold ?? 1.0 this.providerRef = new WeakRef(provider) + this.diffViewProvider = new DiffViewProvider(cwd) if (historyItem) { this.taskId = historyItem.id } + // Initialize diffStrategy based on current state + this.updateDiffStrategy(experimentalDiffStrategy) + if (task || images) { this.startTask(task, images) } else if (historyItem) { @@ -138,6 +139,16 @@ export class Cline { } } + // Add method to update diffStrategy + async updateDiffStrategy(experimentalDiffStrategy?: boolean) { + // If not provided, get from current state + if (experimentalDiffStrategy === undefined) { + const { experimentalDiffStrategy: stateExperimentalDiffStrategy } = await this.providerRef.deref()?.getState() ?? {} + experimentalDiffStrategy = stateExperimentalDiffStrategy ?? false + } + this.diffStrategy = getDiffStrategy(this.api.getModel().id, this.fuzzyMatchThreshold, experimentalDiffStrategy) + } + // Storing task to disk for history private async ensureTaskDirectoryExists(): Promise { @@ -1344,7 +1355,6 @@ export class Cline { success: false, error: "No diff strategy available" } - console.log("diffResult", diffResult) if (!diffResult.success) { this.consecutiveMistakeCount++ const currentCount = (this.consecutiveMistakeCountForApplyDiff.get(relPath) || 0) + 1 diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 68b8d29..100dcaa 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1072,6 +1072,10 @@ export class ClineProvider implements vscode.WebviewViewProvider { break case "experimentalDiffStrategy": await this.updateGlobalState("experimentalDiffStrategy", message.bool ?? false) + // Update diffStrategy in current Cline instance if it exists + if (this.cline) { + await this.cline.updateDiffStrategy(message.bool ?? false) + } await this.postStateToWebview() } }, From ab1a7a511fc9edf087adf5a8a3daddb0b3eb90f4 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 16 Jan 2025 17:15:44 -0500 Subject: [PATCH 37/47] fix: remove unnecessary console log in new unified diff strategy --- src/core/diff/strategies/new-unified/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 9eee4de..6585171 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -268,7 +268,6 @@ Your diff here for (const subHunk of subHunks) { const subContextStr = prepareSearchString(subHunk.changes) - console.log(subContextStr) const subSearchResult = findBestMatch(subContextStr, subHunkResult, 0, this.confidenceThreshold) if (subSearchResult.confidence >= this.confidenceThreshold) { From 815c266fecd6058d9c6bcbc860dee6639dbca76e Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Thu, 16 Jan 2025 23:49:11 -0500 Subject: [PATCH 38/47] refactor: remove experimental diff from history since we are using the state --- src/core/webview/ClineProvider.ts | 11 ++--------- src/shared/HistoryItem.ts | 1 - 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 415417d..0585f6b 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1988,18 +1988,11 @@ export class ClineProvider implements vscode.WebviewViewProvider { async updateTaskHistory(item: HistoryItem): Promise { const history = (await this.getGlobalState("taskHistory") as HistoryItem[] | undefined) || [] const existingItemIndex = history.findIndex((h) => h.id === item.id) - - // Ensure experimentalDiffStrategy is included from current settings if not already set - const { experimentalDiffStrategy } = await this.getState() ?? {} - const updatedItem = { - ...item, - experimentalDiffStrategy: item.experimentalDiffStrategy ?? experimentalDiffStrategy - } if (existingItemIndex !== -1) { - history[existingItemIndex] = updatedItem + history[existingItemIndex] = item } else { - history.push(updatedItem) + history.push(item) } await this.updateGlobalState("taskHistory", history) return history diff --git a/src/shared/HistoryItem.ts b/src/shared/HistoryItem.ts index 4127b88..d4539f6 100644 --- a/src/shared/HistoryItem.ts +++ b/src/shared/HistoryItem.ts @@ -7,5 +7,4 @@ export type HistoryItem = { cacheWrites?: number cacheReads?: number totalCost: number - experimentalDiffStrategy?: boolean } From df654f0284ef5097b6e360e1babbb9de38914ddb Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 00:40:33 -0500 Subject: [PATCH 39/47] refactor: enhance anchor-based search strategy in Levenshtein match - Improved the `identifyAnchors` function to return the first and last non-empty lines of the search string. - Updated the `findAnchorMatch` function to validate anchor uniqueness and context more effectively. - Removed unused complexity calculations and streamlined the anchor validation process. --- .../new-unified/search-strategies.ts | 140 +++++++++--------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index 8b2aa70..a58a768 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -271,8 +271,9 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start const similarity = getDMPSimilarity(searchStr, closestMatch) const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) + console.log(searchStr, closestMatch, index, confidence) return { - index, + index: confidence === 0 ? -1 : index, confidence: index !== -1 ? confidence : 0, strategy: "levenshtein", } @@ -281,92 +282,91 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start return { index: -1, confidence: 0, strategy: "levenshtein" } } -// Helper function to identify anchor lines based on uniqueness and complexity -function identifyAnchors(searchStr: string, content: string[]): { line: string; index: number; weight: number }[] { - const searchLines = searchStr.split("\n") - const contentStr = content.join("\n") - const anchors: { line: string; index: number; weight: number }[] = [] +// Helper function to identify anchor lines +function identifyAnchors(searchStr: string): { first: string | null; last: string | null } { + const searchLines = searchStr.split("\n"); + let first: string | null = null; + let last: string | null = null; - for (let i = 0; i < searchLines.length; i++) { - const line = searchLines[i] - if (!line.trim()) {continue} // Skip empty lines - - // Calculate line complexity (more special chars = more unique) - const specialChars = (line.match(/[^a-zA-Z0-9\s]/g) || []).length - const complexity = specialChars / line.length - - // Count occurrences in content - const regex = new RegExp(line.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g") - const matches = contentStr.match(regex) - const occurrences = matches ? matches.length : 0 - - // Calculate uniqueness weight - const uniquenessWeight = occurrences <= 1 ? 1 : 1 / occurrences - const weight = uniquenessWeight * (0.7 + 0.3 * complexity) - - if (weight > 0.5) { - // Only consider lines with high enough weight - anchors.push({ line, index: i, weight }) + // Find the first non-empty line + for (const line of searchLines) { + if (line.trim()) { + first = line; + break; } } - // Sort by weight descending - return anchors.sort((a, b) => b.weight - a.weight) -} - -// Helper function to validate anchor positions -function validateAnchorPositions( - anchors: { line: string; index: number }[], - content: string[], - searchLines: string[] -): number { - for (const anchor of anchors) { - const anchorIndex = content.findIndex((line) => line === anchor.line) - if (anchorIndex !== -1) { - // Check if surrounding context matches - const contextBefore = searchLines.slice(Math.max(0, anchor.index - 2), anchor.index).join("\n") - const contextAfter = searchLines.slice(anchor.index + 1, anchor.index + 3).join("\n") - const contentBefore = content.slice(Math.max(0, anchorIndex - 2), anchorIndex).join("\n") - const contentAfter = content.slice(anchorIndex + 1, anchorIndex + 3).join("\n") - - const beforeSimilarity = evaluateSimilarity(contextBefore, contentBefore) - const afterSimilarity = evaluateSimilarity(contextAfter, contentAfter) - - if (beforeSimilarity > 0.8 && afterSimilarity > 0.8) { - return anchorIndex - anchor.index - } + // Find the last non-empty line + for (let i = searchLines.length - 1; i >= 0; i--) { + if (searchLines[i].trim()) { + last = searchLines[i]; + break; } } - return -1 + + return { first, last }; } // Anchor-based search strategy export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { - const searchLines = searchStr.split("\n") - const anchors = identifyAnchors(searchStr, content.slice(startIndex)) + const searchLines = searchStr.split("\n"); + const { first, last } = identifyAnchors(searchStr); - if (anchors.length === 0) { - return { index: -1, confidence: 0, strategy: "anchor" } + if (!first || !last) { + return { index: -1, confidence: 0, strategy: "anchor" }; } - // Try to validate position using top anchors - const offset = validateAnchorPositions(anchors.slice(0, 3), content.slice(startIndex), searchLines) + let firstIndex = -1; + let lastIndex = -1; - if (offset !== -1) { - const matchPosition = startIndex + offset - const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n") - const similarity = getDMPSimilarity(searchStr, matchedContent) - const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold) - const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight - - return { - index: matchPosition, - confidence: Math.min(1, confidence), // Cap at 1 - strategy: "anchor", + // Check if the first anchor is unique + let firstOccurrences = 0; + for (const contentLine of content) { + if (contentLine === first) { + firstOccurrences++; } } - return { index: -1, confidence: 0, strategy: "anchor" } + if (firstOccurrences !== 1) { + return { index: -1, confidence: 0, strategy: "anchor" }; + } + + // Find the first anchor + for (let i = startIndex; i < content.length; i++) { + if (content[i] === first) { + firstIndex = i; + break; + } + } + + // Find the last anchor + for (let i = content.length - 1; i >= startIndex; i--) { + if (content[i] === last) { + lastIndex = i; + break; + } + } + + if (firstIndex === -1 || lastIndex === -1 || lastIndex <= firstIndex) { + return { index: -1, confidence: 0, strategy: "anchor" }; + } + + // Validate the context + const expectedContext = searchLines.slice(searchLines.indexOf(first) + 1, searchLines.indexOf(last)).join("\n"); + const actualContext = content.slice(firstIndex + 1, lastIndex).join("\n"); + const contextSimilarity = evaluateSimilarity(expectedContext, actualContext); + + if (contextSimilarity < getAdaptiveThreshold(content.length, confidenceThreshold)) { + return { index: -1, confidence: 0, strategy: "anchor" }; + } + + const confidence = 1; + + return { + index: firstIndex, + confidence: confidence, + strategy: "anchor", + }; } // Main search function that tries all strategies From c710f558f60abd994ca628e0e681f5eab6b3b84b Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 00:42:03 -0500 Subject: [PATCH 40/47] test: add comprehensive tests for search strategies --- .../__tests__/search-strategies.test.ts | 237 ++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts diff --git a/src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts b/src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts new file mode 100644 index 0000000..24cd3e2 --- /dev/null +++ b/src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts @@ -0,0 +1,237 @@ +import { findAnchorMatch, findExactMatch, findSimilarityMatch, findLevenshteinMatch } from "../search-strategies"; + +type SearchStrategy = (searchStr: string, content: string[], startIndex?: number) => { + index: number; + confidence: number; + strategy: string; +}; + +const testCases = [ + { + name: "should return no match if the search string is not found", + searchStr: "not found", + content: ["line1", "line2", "line3"], + expected: { index: -1, confidence: 0 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match if the search string is found", + searchStr: "line2", + content: ["line1", "line2", "line3"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match with correct index when startIndex is provided", + searchStr: "line3", + content: ["line1", "line2", "line3", "line4", "line3"], + startIndex: 3, + expected: { index: 4, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match even if there are more lines in content", + searchStr: "line2", + content: ["line1", "line2", "line3", "line4", "line5"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match even if the search string is at the beginning of the content", + searchStr: "line1", + content: ["line1", "line2", "line3"], + expected: { index: 0, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match even if the search string is at the end of the content", + searchStr: "line3", + content: ["line1", "line2", "line3"], + expected: { index: 2, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match for a multi-line search string", + searchStr: "line2\nline3", + content: ["line1", "line2", "line3", "line4"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return no match if a multi-line search string is not found", + searchStr: "line2\nline4", + content: ["line1", "line2", "line3", "line4"], + expected: { index: -1, confidence: 0 }, + strategies: ["exact", "similarity"], + }, + { + name: "should return a match with indentation", + searchStr: " line2", + content: ["line1", " line2", "line3"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match with more complex indentation", + searchStr: " line3", + content: [" line1", " line2", " line3", " line4"], + expected: { index: 2, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match with mixed indentation", + searchStr: "\tline2", + content: [" line1", "\tline2", " line3"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match with mixed indentation and multi-line", + searchStr: " line2\n\tline3", + content: ["line1", " line2", "\tline3", " line4"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return no match if mixed indentation and multi-line is not found", + searchStr: " line2\n line4", + content: ["line1", " line2", "\tline3", " line4"], + expected: { index: -1, confidence: 0 }, + strategies: ["exact", "similarity"], + }, + { + name: "should return a match with leading and trailing spaces", + searchStr: " line2 ", + content: ["line1", " line2 ", "line3"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match with leading and trailing tabs", + searchStr: "\tline2\t", + content: ["line1", "\tline2\t", "line3"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match with mixed leading and trailing spaces and tabs", + searchStr: " \tline2\t ", + content: ["line1", " \tline2\t ", "line3"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return a match with mixed leading and trailing spaces and tabs and multi-line", + searchStr: " \tline2\t \n line3 ", + content: ["line1", " \tline2\t ", " line3 ", "line4"], + expected: { index: 1, confidence: 1 }, + strategies: ["exact", "similarity", "levenshtein"], + }, + { + name: "should return no match if mixed leading and trailing spaces and tabs and multi-line is not found", + searchStr: " \tline2\t \n line4 ", + content: ["line1", " \tline2\t ", " line3 ", "line4"], + expected: { index: -1, confidence: 0 }, + strategies: ["exact", "similarity"], + }, +]; + + +describe("findExactMatch", () => { + testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => { + if (!strategies?.includes("exact")) {return;} + it(name, () => { + const result = findExactMatch(searchStr, content, startIndex); + expect(result.index).toBe(expected.index); + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); + expect(result.strategy).toMatch(/exact(-overlapping)?/); + }); + }); +}); + +describe("findAnchorMatch", () => { + const anchorTestCases = [ + { + name: "should return no match if no anchors are found", + searchStr: " \n \n ", + content: ["line1", "line2", "line3"], + expected: { index: -1, confidence: 0 }, + }, + { + name: "should return no match if anchor positions cannot be validated", + searchStr: "unique line\ncontext line 1\ncontext line 2", + content: ["different line 1", "different line 2", "different line 3", "another unique line", "context line 1", "context line 2"], + expected: { index: -1, confidence: 0 }, + }, + { + name: "should return a match if anchor positions can be validated", + searchStr: "unique line\ncontext line 1\ncontext line 2", + content: ["line1", "line2", "unique line", "context line 1", "context line 2", "line 6"], + expected: { index: 2, confidence: 1 }, + }, + { + name: "should return a match with correct index when startIndex is provided", + searchStr: "unique line\ncontext line 1\ncontext line 2", + content: ["line1", "line2", "line3", "unique line", "context line 1", "context line 2", "line 7"], + startIndex: 3, + expected: { index: 3, confidence: 1 }, + }, + { + name: "should return a match even if there are more lines in content", + searchStr: "unique line\ncontext line 1\ncontext line 2", + content: ["line1", "line2", "unique line", "context line 1", "context line 2", "line 6", "extra line 1", "extra line 2"], + expected: { index: 2, confidence: 1 }, + }, + { + name: "should return a match even if the anchor is at the beginning of the content", + searchStr: "unique line\ncontext line 1\ncontext line 2", + content: ["unique line", "context line 1", "context line 2", "line 6"], + expected: { index: 0, confidence: 1 }, + }, + { + name: "should return a match even if the anchor is at the end of the content", + searchStr: "unique line\ncontext line 1\ncontext line 2", + content: ["line1", "line2", "unique line", "context line 1", "context line 2"], + expected: { index: 2, confidence: 1 }, + }, + { + name: "should return no match if no valid anchor is found", + searchStr: "non-unique line\ncontext line 1\ncontext line 2", + content: ["line1", "line2", "non-unique line", "context line 1", "context line 2", "non-unique line"], + expected: { index: -1, confidence: 0 }, + }, + ]; + + anchorTestCases.forEach(({ name, searchStr, content, startIndex, expected }) => { + it(name, () => { + const result = findAnchorMatch(searchStr, content, startIndex); + expect(result.index).toBe(expected.index); + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); + expect(result.strategy).toBe("anchor"); + }); + }); +}); + +describe("findSimilarityMatch", () => { + testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => { + if (!strategies?.includes("similarity")) {return;} + it(name, () => { + const result = findSimilarityMatch(searchStr, content, startIndex); + expect(result.index).toBe(expected.index); + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); + expect(result.strategy).toBe("similarity"); + }); + }); +}); + +describe("findLevenshteinMatch", () => { + testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => { + if (!strategies?.includes("levenshtein")) {return;} + it(name, () => { + const result = findLevenshteinMatch(searchStr, content, startIndex); + expect(result.index).toBe(expected.index); + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); + expect(result.strategy).toBe("levenshtein"); + }); + }); +}); \ No newline at end of file From 9857e31e9cda3c993468d2023df72ceb63b8dfd5 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 00:57:56 -0500 Subject: [PATCH 41/47] fix: use actual confidence threshold --- src/core/diff/strategies/new-unified/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 6585171..29fa5c8 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -259,7 +259,7 @@ Your diff here const contextStr = prepareSearchString(hunk.changes) const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold) - if (confidence < 1.1) { + if (confidence < this.confidenceThreshold) { console.log('Full hunk application failed, trying sub-hunks strategy') // Try splitting the hunk into smaller hunks const subHunks = this.splitHunk(hunk) From 71fdf88672fbc7e0f3cbab0a5e9e1d85ddf6e773 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 01:38:44 -0500 Subject: [PATCH 42/47] refactor: simplify indentation inference and enhance edit strategies - Refactored the `inferIndentation` function to streamline indentation handling for context and added lines. - Improved the `applyContextMatching` and `applyDMP` functions for better clarity and efficiency in processing changes. - Code was formatted --- .../strategies/new-unified/edit-strategies.ts | 388 ++++++++++-------- 1 file changed, 206 insertions(+), 182 deletions(-) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index 6f14f31..e7f3d85 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -1,249 +1,271 @@ -import { diff_match_patch } from 'diff-match-patch'; -import { EditResult, Hunk } from './types'; -import { getDMPSimilarity, validateEditResult } from './search-strategies'; -import * as path from 'path'; -import simpleGit, { SimpleGit } from 'simple-git'; -import * as tmp from 'tmp'; -import * as fs from 'fs'; +import { diff_match_patch } from "diff-match-patch" +import { EditResult, Hunk } from "./types" +import { getDMPSimilarity, validateEditResult } from "./search-strategies" +import * as path from "path" +import simpleGit, { SimpleGit } from "simple-git" +import * as tmp from "tmp" +import * as fs from "fs" -// Helper function to infer indentation -function inferIndentation(line: string, contextLines: string[], previousIndent: string = ''): string { - const match = line.match(/^(\s+)/); - if (match) { - return match[1]; - } +// Helper function to infer indentation - simplified version +function inferIndentation(line: string, contextLines: string[], previousIndent: string = ""): string { + // If the line has explicit indentation in the change, use it exactly + const lineMatch = line.match(/^(\s+)/) + if (lineMatch) { + return lineMatch[1] + } - for (const contextLine of contextLines) { - const contextMatch = contextLine.match(/^(\s+)/); + // If we have context lines, use the indentation from the first context line + const contextLine = contextLines[0] + if (contextLine) { + const contextMatch = contextLine.match(/^(\s+)/) if (contextMatch) { - const currentLineDepth = (line.match(/^\s*/)?.[0] || '').length; - const contextLineDepth = contextMatch[1].length; - - if (currentLineDepth > contextLineDepth) { - return contextMatch[1] + ' '.repeat(2); - } - return contextMatch[1]; - } - } + return contextMatch[1] + } + } - return previousIndent; + // Fallback to previous indent + return previousIndent } // Context matching edit strategy -export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult { +export function applyContextMatching( + hunk: Hunk, + content: string[], + matchPosition: number, +): EditResult { if (matchPosition === -1) { - return { confidence: 0, result: content, strategy: 'context' }; - } + return { confidence: 0, result: content, strategy: "context" } + } - const newResult = [...content.slice(0, matchPosition)]; - let sourceIndex = matchPosition; - let previousIndent = ''; - let contextLinesProcessed = 0; + const newResult = [...content.slice(0, matchPosition)] + let sourceIndex = matchPosition for (const change of hunk.changes) { - if (change.type === 'context') { - newResult.push(change.originalLine || (change.indent + change.content)); - previousIndent = change.indent; - sourceIndex++; - contextLinesProcessed++; - } else if (change.type === 'add') { - const indent = change.indent || inferIndentation(change.content, - hunk.changes.filter(c => c.type === 'context' && c.originalLine).map(c => c.originalLine || ''), - previousIndent - ); - newResult.push(indent + change.content); - previousIndent = indent; - } else if (change.type === 'remove') { - sourceIndex++; - } - } + if (change.type === "context") { + // Use the original line from content if available + if (sourceIndex < content.length) { + newResult.push(content[sourceIndex]) + } else { + const line = change.indent ? change.indent + change.content : change.content + newResult.push(line) + } + sourceIndex++ + } else if (change.type === "add") { + // Use exactly the indentation from the change + const baseIndent = change.indent || "" - // Only append remaining content after the hunk's actual span in the original content - const remainingContentStart = matchPosition + contextLinesProcessed + hunk.changes.filter(c => c.type === 'remove').length; - newResult.push(...content.slice(remainingContentStart)); - - // Calculate the window size based on all changes - const windowSize = hunk.changes.length; - - // Validate the result using the full window size - const similarity = getDMPSimilarity( - content.slice(matchPosition, matchPosition + windowSize).join('\n'), - newResult.slice(matchPosition, matchPosition + windowSize).join('\n') - ) + // Handle multi-line additions + const lines = change.content.split("\n").map((line) => { + // If the line already has indentation, preserve it relative to the base indent + const lineIndentMatch = line.match(/^(\s*)(.*)/) + if (lineIndentMatch) { + const [, lineIndent, content] = lineIndentMatch + // Only add base indent if the line doesn't already have it + return lineIndent ? line : baseIndent + content + } + return baseIndent + line + }) - const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), confidenceThreshold); + newResult.push(...lines) + } else if (change.type === "remove") { + // Handle multi-line removes by incrementing sourceIndex for each line + const removedLines = change.content.split("\n").length + sourceIndex += removedLines + } + } - return { - confidence: similarity * confidence, - result: newResult, - strategy: 'context' - }; + // Append remaining content + newResult.push(...content.slice(sourceIndex)) + + // Calculate confidence based on the actual changes + const afterText = newResult.slice(matchPosition, newResult.length - (content.length - sourceIndex)).join("\n") + + const confidence = validateEditResult(hunk, afterText) + + return { + confidence, + result: newResult, + strategy: "context" + } } // DMP edit strategy -export function applyDMP(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult { +export function applyDMP( + hunk: Hunk, + content: string[], + matchPosition: number, +): EditResult { if (matchPosition === -1) { - return { confidence: 0, result: content, strategy: 'dmp' }; - } + return { confidence: 0, result: content, strategy: "dmp" } + } - const dmp = new diff_match_patch(); + const dmp = new diff_match_patch() + + // Calculate total lines in before block accounting for multi-line content + const beforeLineCount = hunk.changes + .filter((change) => change.type === "context" || change.type === "remove") + .reduce((count, change) => count + change.content.split("\n").length, 0) // Build BEFORE block (context + removals) const beforeLines = hunk.changes - .filter(change => change.type === 'context' || change.type === 'remove') - .map(change => change.originalLine || (change.indent + change.content)); + .filter((change) => change.type === "context" || change.type === "remove") + .map((change) => { + if (change.originalLine) { + return change.originalLine + } + return change.indent ? change.indent + change.content : change.content + }) // Build AFTER block (context + additions) const afterLines = hunk.changes - .filter(change => change.type === 'context' || change.type === 'add') - .map(change => change.originalLine || (change.indent + change.content)); - - // Convert to text - const beforeText = beforeLines.join('\n'); - const afterText = afterLines.join('\n'); - - // Create the patch - const patch = dmp.patch_make(beforeText, afterText); - - // Get the target text from content - const targetText = content.slice(matchPosition, matchPosition + beforeLines.length).join('\n'); - - // Apply the patch - const [patchedText] = dmp.patch_apply(patch, targetText); - - // Split patched text back into lines - const patchedLines = patchedText.split('\n'); - - // Construct the final result + .filter((change) => change.type === "context" || change.type === "add") + .map((change) => { + if (change.originalLine) { + return change.originalLine + } + return change.indent ? change.indent + change.content : change.content + }) + + // Convert to text with proper line endings + const beforeText = beforeLines.join("\n") + const afterText = afterLines.join("\n") + + // Create and apply patch + const patch = dmp.patch_make(beforeText, afterText) + const targetText = content.slice(matchPosition, matchPosition + beforeLineCount).join("\n") + const [patchedText] = dmp.patch_apply(patch, targetText) + + // Split result and preserve line endings + const patchedLines = patchedText.split("\n") + + // Construct final result const newResult = [ ...content.slice(0, matchPosition), ...patchedLines, - ...content.slice(matchPosition + beforeLines.length) - ]; + ...content.slice(matchPosition + beforeLineCount), + ] - // Calculate confidence - const similarity = getDMPSimilarity(beforeText, targetText); - const confidence = validateEditResult(hunk, patchedText, confidenceThreshold); + const confidence = validateEditResult(hunk, patchedText) return { - confidence: similarity * confidence, + confidence, result: newResult, - strategy: 'dmp' - }; + strategy: "dmp", + } } // Git fallback strategy that works with full content async function applyGitFallback(hunk: Hunk, content: string[]): Promise { - let tmpDir: tmp.DirResult | undefined; - - try { - tmpDir = tmp.dirSync({ unsafeCleanup: true }); - const git: SimpleGit = simpleGit(tmpDir.name); - - await git.init(); - await git.addConfig('user.name', 'Temp'); - await git.addConfig('user.email', 'temp@example.com'); + let tmpDir: tmp.DirResult | undefined - const filePath = path.join(tmpDir.name, 'file.txt'); + try { + tmpDir = tmp.dirSync({ unsafeCleanup: true }) + const git: SimpleGit = simpleGit(tmpDir.name) + + await git.init() + await git.addConfig("user.name", "Temp") + await git.addConfig("user.email", "temp@example.com") + + const filePath = path.join(tmpDir.name, "file.txt") const searchLines = hunk.changes - .filter(change => change.type === 'context' || change.type === 'remove') - .map(change => change.originalLine || (change.indent + change.content)); + .filter((change) => change.type === "context" || change.type === "remove") + .map((change) => change.originalLine || change.indent + change.content) const replaceLines = hunk.changes - .filter(change => change.type === 'context' || change.type === 'add') - .map(change => change.originalLine || (change.indent + change.content)); + .filter((change) => change.type === "context" || change.type === "add") + .map((change) => change.originalLine || change.indent + change.content) - const searchText = searchLines.join('\n'); - const replaceText = replaceLines.join('\n'); - const originalText = content.join('\n'); + const searchText = searchLines.join("\n") + const replaceText = replaceLines.join("\n") + const originalText = content.join("\n") try { - fs.writeFileSync(filePath, originalText); - await git.add('file.txt'); - const originalCommit = await git.commit('original'); - console.log('Strategy 1 - Original commit:', originalCommit.commit); + fs.writeFileSync(filePath, originalText) + await git.add("file.txt") + const originalCommit = await git.commit("original") + console.log("Strategy 1 - Original commit:", originalCommit.commit) - fs.writeFileSync(filePath, searchText); - await git.add('file.txt'); - const searchCommit1 = await git.commit('search'); - console.log('Strategy 1 - Search commit:', searchCommit1.commit); + fs.writeFileSync(filePath, searchText) + await git.add("file.txt") + const searchCommit1 = await git.commit("search") + console.log("Strategy 1 - Search commit:", searchCommit1.commit) - fs.writeFileSync(filePath, replaceText); - await git.add('file.txt'); - const replaceCommit = await git.commit('replace'); - console.log('Strategy 1 - Replace commit:', replaceCommit.commit); + fs.writeFileSync(filePath, replaceText) + await git.add("file.txt") + const replaceCommit = await git.commit("replace") + console.log("Strategy 1 - Replace commit:", replaceCommit.commit) - console.log('Strategy 1 - Attempting checkout of:', originalCommit.commit); - await git.raw(['checkout', originalCommit.commit]); + console.log("Strategy 1 - Attempting checkout of:", originalCommit.commit) + await git.raw(["checkout", originalCommit.commit]) try { - console.log('Strategy 1 - Attempting cherry-pick of:', replaceCommit.commit); - await git.raw(['cherry-pick', '--minimal', replaceCommit.commit]); - - const newText = fs.readFileSync(filePath, 'utf-8'); - const newLines = newText.split('\n'); + console.log("Strategy 1 - Attempting cherry-pick of:", replaceCommit.commit) + await git.raw(["cherry-pick", "--minimal", replaceCommit.commit]) + + const newText = fs.readFileSync(filePath, "utf-8") + const newLines = newText.split("\n") return { confidence: 1, result: newLines, - strategy: 'git-fallback' - }; + strategy: "git-fallback", + } } catch (cherryPickError) { - console.error('Strategy 1 failed with merge conflict'); + console.error("Strategy 1 failed with merge conflict") } } catch (error) { - console.error('Strategy 1 failed:', error); + console.error("Strategy 1 failed:", error) } try { - await git.init(); - await git.addConfig('user.name', 'Temp'); - await git.addConfig('user.email', 'temp@example.com'); + await git.init() + await git.addConfig("user.name", "Temp") + await git.addConfig("user.email", "temp@example.com") - fs.writeFileSync(filePath, searchText); - await git.add('file.txt'); - const searchCommit = await git.commit('search'); - const searchHash = searchCommit.commit.replace(/^HEAD /, ''); - console.log('Strategy 2 - Search commit:', searchHash); + fs.writeFileSync(filePath, searchText) + await git.add("file.txt") + const searchCommit = await git.commit("search") + const searchHash = searchCommit.commit.replace(/^HEAD /, "") + console.log("Strategy 2 - Search commit:", searchHash) - fs.writeFileSync(filePath, replaceText); - await git.add('file.txt'); - const replaceCommit = await git.commit('replace'); - const replaceHash = replaceCommit.commit.replace(/^HEAD /, ''); - console.log('Strategy 2 - Replace commit:', replaceHash); + fs.writeFileSync(filePath, replaceText) + await git.add("file.txt") + const replaceCommit = await git.commit("replace") + const replaceHash = replaceCommit.commit.replace(/^HEAD /, "") + console.log("Strategy 2 - Replace commit:", replaceHash) - console.log('Strategy 2 - Attempting checkout of:', searchHash); - await git.raw(['checkout', searchHash]); - fs.writeFileSync(filePath, originalText); - await git.add('file.txt'); - const originalCommit2 = await git.commit('original'); - console.log('Strategy 2 - Original commit:', originalCommit2.commit); + console.log("Strategy 2 - Attempting checkout of:", searchHash) + await git.raw(["checkout", searchHash]) + fs.writeFileSync(filePath, originalText) + await git.add("file.txt") + const originalCommit2 = await git.commit("original") + console.log("Strategy 2 - Original commit:", originalCommit2.commit) try { - console.log('Strategy 2 - Attempting cherry-pick of:', replaceHash); - await git.raw(['cherry-pick', '--minimal', replaceHash]); - - const newText = fs.readFileSync(filePath, 'utf-8'); - const newLines = newText.split('\n'); + console.log("Strategy 2 - Attempting cherry-pick of:", replaceHash) + await git.raw(["cherry-pick", "--minimal", replaceHash]) + + const newText = fs.readFileSync(filePath, "utf-8") + const newLines = newText.split("\n") return { confidence: 1, result: newLines, - strategy: 'git-fallback' - }; + strategy: "git-fallback", + } } catch (cherryPickError) { - console.error('Strategy 2 failed with merge conflict'); + console.error("Strategy 2 failed with merge conflict") } } catch (error) { - console.error('Strategy 2 failed:', error); + console.error("Strategy 2 failed:", error) } - console.error('Git fallback failed'); - return { confidence: 0, result: content, strategy: 'git-fallback' }; + console.error("Git fallback failed") + return { confidence: 0, result: content, strategy: "git-fallback" } } catch (error) { - console.error('Git fallback strategy failed:', error); - return { confidence: 0, result: content, strategy: 'git-fallback' }; + console.error("Git fallback strategy failed:", error) + return { confidence: 0, result: content, strategy: "git-fallback" } } finally { if (tmpDir) { - tmpDir.removeCallback(); + tmpDir.removeCallback() } } } @@ -258,24 +280,26 @@ export async function applyEdit( ): Promise { // Don't attempt regular edits if confidence is too low if (confidence < confidenceThreshold) { - console.log(`Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...`); - return applyGitFallback(hunk, content); + console.log( + `Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...` + ) + return applyGitFallback(hunk, content) } // Try each strategy in sequence until one succeeds const strategies = [ - { name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition, confidenceThreshold) }, - { name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition, confidenceThreshold) }, - { name: 'git-fallback', apply: () => applyGitFallback(hunk, content) } - ]; + { name: "dmp", apply: () => applyDMP(hunk, content, matchPosition) }, + { name: "context", apply: () => applyContextMatching(hunk, content, matchPosition) }, + { name: "git-fallback", apply: () => applyGitFallback(hunk, content) }, + ] // Try strategies sequentially until one succeeds for (const strategy of strategies) { - const result = await strategy.apply(); + const result = await strategy.apply() if (result.confidence >= confidenceThreshold) { - return result; + return result } } - return { confidence: 0, result: content, strategy: 'none' }; + return { confidence: 0, result: content, strategy: "none" } } From fa49bd804b4034d274082306eff6047fcff169b3 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 01:39:31 -0500 Subject: [PATCH 43/47] refactor: standardize code formatting --- src/core/diff/strategies/new-unified/index.ts | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/core/diff/strategies/new-unified/index.ts b/src/core/diff/strategies/new-unified/index.ts index 29fa5c8..42d87bd 100644 --- a/src/core/diff/strategies/new-unified/index.ts +++ b/src/core/diff/strategies/new-unified/index.ts @@ -7,7 +7,7 @@ export class NewUnifiedDiffStrategy implements DiffStrategy { private readonly confidenceThreshold: number constructor(confidenceThreshold: number = 1) { - this.confidenceThreshold = Math.max(confidenceThreshold, 0.8); + this.confidenceThreshold = Math.max(confidenceThreshold, 0.8) } private parseUnifiedDiff(diff: string): Diff { @@ -196,7 +196,7 @@ Your diff here for (let i = 0; i < hunk.changes.length; i++) { const change = hunk.changes[i] - if (change.type === 'context') { + if (change.type === "context") { if (!currentHunk) { contextBefore.push(change) if (contextBefore.length > MAX_CONTEXT_LINES) { @@ -251,16 +251,20 @@ Your diff here if (!parsedDiff.hunks.length) { return { success: false, - error: "No hunks found in diff. Please ensure your diff includes actual changes and follows the unified diff format." + error: "No hunks found in diff. Please ensure your diff includes actual changes and follows the unified diff format.", } } for (const hunk of parsedDiff.hunks) { const contextStr = prepareSearchString(hunk.changes) - const { index: matchPosition, confidence, strategy } = findBestMatch(contextStr, result, 0, this.confidenceThreshold) + const { + index: matchPosition, + confidence, + strategy, + } = findBestMatch(contextStr, result, 0, this.confidenceThreshold) if (confidence < this.confidenceThreshold) { - console.log('Full hunk application failed, trying sub-hunks strategy') + console.log("Full hunk application failed, trying sub-hunks strategy") // Try splitting the hunk into smaller hunks const subHunks = this.splitHunk(hunk) let subHunkSuccess = true @@ -271,7 +275,13 @@ Your diff here const subSearchResult = findBestMatch(subContextStr, subHunkResult, 0, this.confidenceThreshold) if (subSearchResult.confidence >= this.confidenceThreshold) { - const subEditResult = await applyEdit(subHunk, subHunkResult, subSearchResult.index, subSearchResult.confidence, this.confidenceThreshold) + const subEditResult = await applyEdit( + subHunk, + subHunkResult, + subSearchResult.index, + subSearchResult.confidence, + this.confidenceThreshold + ) if (subEditResult.confidence >= this.confidenceThreshold) { subHunkResult = subEditResult.result continue @@ -287,16 +297,20 @@ Your diff here } // If sub-hunks also failed, return the original error - const contextLines = hunk.changes.filter(c => c.type === "context").length + const contextLines = hunk.changes.filter((c) => c.type === "context").length const totalLines = hunk.changes.length const contextRatio = contextLines / totalLines - let errorMsg = `Failed to find a matching location in the file (${Math.floor(confidence * 100)}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n` + let errorMsg = `Failed to find a matching location in the file (${Math.floor( + confidence * 100 + )}% confidence, needs ${Math.floor(this.confidenceThreshold * 100)}%)\n\n` errorMsg += "Debug Info:\n" errorMsg += `- Search Strategy Used: ${strategy}\n` - errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor(contextRatio * 100)}%)\n` + errorMsg += `- Context Lines: ${contextLines} out of ${totalLines} total lines (${Math.floor( + contextRatio * 100 + )}%)\n` errorMsg += `- Attempted to split into ${subHunks.length} sub-hunks but still failed\n` - + if (contextRatio < 0.2) { errorMsg += "\nPossible Issues:\n" errorMsg += "- Not enough context lines to uniquely identify the location\n" @@ -308,7 +322,8 @@ Your diff here } else { errorMsg += "\nPossible Issues:\n" errorMsg += "- The diff may be targeting a different version of the file\n" - errorMsg += "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n" + errorMsg += + "- There may be too many changes in a single hunk, try splitting the changes into multiple hunks\n" } if (startLine && endLine) { @@ -323,7 +338,9 @@ Your diff here result = editResult.result } else { // Edit failure - likely due to content mismatch - let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor(editResult.confidence * 100)}% confidence)\n\n` + let errorMsg = `Failed to apply the edit using ${editResult.strategy} strategy (${Math.floor( + editResult.confidence * 100 + )}% confidence)\n\n` errorMsg += "Debug Info:\n" errorMsg += "- The location was found but the content didn't match exactly\n" errorMsg += "- This usually means the file has been modified since the diff was created\n" From fa9e055ea938a2ee046765cb28e282ea23fb077b Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 01:40:42 -0500 Subject: [PATCH 44/47] refactor: streamline edit result validation and format code --- .../new-unified/search-strategies.ts | 143 ++++++++++-------- 1 file changed, 80 insertions(+), 63 deletions(-) diff --git a/src/core/diff/strategies/new-unified/search-strategies.ts b/src/core/diff/strategies/new-unified/search-strategies.ts index a58a768..7bee5ba 100644 --- a/src/core/diff/strategies/new-unified/search-strategies.ts +++ b/src/core/diff/strategies/new-unified/search-strategies.ts @@ -66,33 +66,29 @@ export function getDMPSimilarity(original: string, modified: string): number { } // Helper function to validate edit results using hunk information -export function validateEditResult(hunk: Hunk, result: string, confidenceThreshold: number): number { - const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk)) +export function validateEditResult(hunk: Hunk, result: string): number { + // Build the expected text from the hunk + const expectedText = hunk.changes + .filter(change => change.type === "context" || change.type === "add") + .map(change => change.indent ? change.indent + change.content : change.content) + .join("\n"); - const originalSkeleton = hunkDeepCopy.changes - .filter((change) => change.type === "context" || change.type === "remove") - .map((change) => change.content) - .join("\n") + // Calculate similarity between the result and expected text + const similarity = getDMPSimilarity(expectedText, result); - const expectedSkeleton = hunkDeepCopy.changes - .filter((change) => change.type === "context" || change.type === "add") - .map((change) => change.content) - .join("\n") + // If the result is unchanged from original, return low confidence + const originalText = hunk.changes + .filter(change => change.type === "context" || change.type === "remove") + .map(change => change.indent ? change.indent + change.content : change.content) + .join("\n"); - const originalSimilarity = evaluateSimilarity(originalSkeleton, result) - const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result) - - if (originalSimilarity > 0.97 && expectedSimilarity !== 1) { - if (originalSimilarity === 1) { - return 0.5 - } else { - return 0.8 - } + const originalSimilarity = getDMPSimilarity(originalText, result); + if (originalSimilarity > 0.97 && similarity !== 1) { + return 0.8 * similarity; // Some confidence since we found the right location } - - const multiplier = expectedSimilarity < confidenceThreshold ? expectedSimilarity : 1 - - return multiplier + + // For partial matches, scale the confidence but keep it high if we're close + return similarity; } // Helper function to validate context lines against original content @@ -157,7 +153,9 @@ function combineOverlappingMatches( const usedIndices = new Set() for (const match of matches) { - if (usedIndices.has(match.windowIndex)) {continue} + if (usedIndices.has(match.windowIndex)) { + continue + } // Find overlapping matches const overlapping = matches.filter( @@ -194,7 +192,12 @@ function combineOverlappingMatches( return combinedMatches } -export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { +export function findExactMatch( + searchStr: string, + content: string[], + startIndex: number = 0, + confidenceThreshold: number = 0.97 +): SearchResult { const searchLines = searchStr.split("\n") const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length) const matches: (SearchResult & { windowIndex: number })[] = [] @@ -229,7 +232,12 @@ export function findExactMatch(searchStr: string, content: string[], startIndex: } // String similarity strategy -export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { +export function findSimilarityMatch( + searchStr: string, + content: string[], + startIndex: number = 0, + confidenceThreshold: number = 0.97 +): SearchResult { const searchLines = searchStr.split("\n") let bestScore = 0 let bestIndex = -1 @@ -257,7 +265,12 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI } // Levenshtein strategy -export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { +export function findLevenshteinMatch( + searchStr: string, + content: string[], + startIndex: number = 0, + confidenceThreshold: number = 0.97 +): SearchResult { const searchLines = searchStr.split("\n") const candidates = [] @@ -271,7 +284,6 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start const similarity = getDMPSimilarity(searchStr, closestMatch) const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold) const confidence = Math.min(similarity, contextSimilarity) - console.log(searchStr, closestMatch, index, confidence) return { index: confidence === 0 ? -1 : index, confidence: index !== -1 ? confidence : 0, @@ -284,99 +296,104 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start // Helper function to identify anchor lines function identifyAnchors(searchStr: string): { first: string | null; last: string | null } { - const searchLines = searchStr.split("\n"); - let first: string | null = null; - let last: string | null = null; + const searchLines = searchStr.split("\n") + let first: string | null = null + let last: string | null = null // Find the first non-empty line for (const line of searchLines) { if (line.trim()) { - first = line; - break; + first = line + break } } // Find the last non-empty line for (let i = searchLines.length - 1; i >= 0; i--) { if (searchLines[i].trim()) { - last = searchLines[i]; - break; + last = searchLines[i] + break } } - return { first, last }; + return { first, last } } // Anchor-based search strategy -export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { - const searchLines = searchStr.split("\n"); - const { first, last } = identifyAnchors(searchStr); +export function findAnchorMatch( + searchStr: string, + content: string[], + startIndex: number = 0, + confidenceThreshold: number = 0.97 +): SearchResult { + const searchLines = searchStr.split("\n") + const { first, last } = identifyAnchors(searchStr) if (!first || !last) { - return { index: -1, confidence: 0, strategy: "anchor" }; + return { index: -1, confidence: 0, strategy: "anchor" } } - let firstIndex = -1; - let lastIndex = -1; + let firstIndex = -1 + let lastIndex = -1 // Check if the first anchor is unique - let firstOccurrences = 0; + let firstOccurrences = 0 for (const contentLine of content) { if (contentLine === first) { - firstOccurrences++; + firstOccurrences++ } } if (firstOccurrences !== 1) { - return { index: -1, confidence: 0, strategy: "anchor" }; + return { index: -1, confidence: 0, strategy: "anchor" } } // Find the first anchor for (let i = startIndex; i < content.length; i++) { if (content[i] === first) { - firstIndex = i; - break; + firstIndex = i + break } } // Find the last anchor for (let i = content.length - 1; i >= startIndex; i--) { if (content[i] === last) { - lastIndex = i; - break; + lastIndex = i + break } } if (firstIndex === -1 || lastIndex === -1 || lastIndex <= firstIndex) { - return { index: -1, confidence: 0, strategy: "anchor" }; + return { index: -1, confidence: 0, strategy: "anchor" } } // Validate the context - const expectedContext = searchLines.slice(searchLines.indexOf(first) + 1, searchLines.indexOf(last)).join("\n"); - const actualContext = content.slice(firstIndex + 1, lastIndex).join("\n"); - const contextSimilarity = evaluateSimilarity(expectedContext, actualContext); + const expectedContext = searchLines.slice(searchLines.indexOf(first) + 1, searchLines.indexOf(last)).join("\n") + const actualContext = content.slice(firstIndex + 1, lastIndex).join("\n") + const contextSimilarity = evaluateSimilarity(expectedContext, actualContext) if (contextSimilarity < getAdaptiveThreshold(content.length, confidenceThreshold)) { - return { index: -1, confidence: 0, strategy: "anchor" }; + return { index: -1, confidence: 0, strategy: "anchor" } } - const confidence = 1; + const confidence = 1 return { index: firstIndex, confidence: confidence, strategy: "anchor", - }; + } } // Main search function that tries all strategies -export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult { - const strategies = [ - findExactMatch, - findAnchorMatch, - findSimilarityMatch, - findLevenshteinMatch - ] +export function findBestMatch( + searchStr: string, + content: string[], + startIndex: number = 0, + confidenceThreshold: number = 0.97 +): SearchResult { + const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch] let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" } From b089e7ff7cb912d479930f11b668f039a5615886 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 01:41:41 -0500 Subject: [PATCH 45/47] style: format code --- .../__tests__/search-strategies.test.ts | 109 +++++++++++------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts b/src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts index 24cd3e2..6c4aba5 100644 --- a/src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts +++ b/src/core/diff/strategies/new-unified/__tests__/search-strategies.test.ts @@ -1,10 +1,14 @@ -import { findAnchorMatch, findExactMatch, findSimilarityMatch, findLevenshteinMatch } from "../search-strategies"; +import { findAnchorMatch, findExactMatch, findSimilarityMatch, findLevenshteinMatch } from "../search-strategies" -type SearchStrategy = (searchStr: string, content: string[], startIndex?: number) => { - index: number; - confidence: number; - strategy: string; -}; +type SearchStrategy = ( + searchStr: string, + content: string[], + startIndex?: number +) => { + index: number + confidence: number + strategy: string +} const testCases = [ { @@ -134,20 +138,21 @@ const testCases = [ expected: { index: -1, confidence: 0 }, strategies: ["exact", "similarity"], }, -]; - +] describe("findExactMatch", () => { testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => { - if (!strategies?.includes("exact")) {return;} + if (!strategies?.includes("exact")) { + return + } it(name, () => { - const result = findExactMatch(searchStr, content, startIndex); - expect(result.index).toBe(expected.index); - expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); - expect(result.strategy).toMatch(/exact(-overlapping)?/); - }); - }); -}); + const result = findExactMatch(searchStr, content, startIndex) + expect(result.index).toBe(expected.index) + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence) + expect(result.strategy).toMatch(/exact(-overlapping)?/) + }) + }) +}) describe("findAnchorMatch", () => { const anchorTestCases = [ @@ -160,7 +165,14 @@ describe("findAnchorMatch", () => { { name: "should return no match if anchor positions cannot be validated", searchStr: "unique line\ncontext line 1\ncontext line 2", - content: ["different line 1", "different line 2", "different line 3", "another unique line", "context line 1", "context line 2"], + content: [ + "different line 1", + "different line 2", + "different line 3", + "another unique line", + "context line 1", + "context line 2", + ], expected: { index: -1, confidence: 0 }, }, { @@ -179,7 +191,16 @@ describe("findAnchorMatch", () => { { name: "should return a match even if there are more lines in content", searchStr: "unique line\ncontext line 1\ncontext line 2", - content: ["line1", "line2", "unique line", "context line 1", "context line 2", "line 6", "extra line 1", "extra line 2"], + content: [ + "line1", + "line2", + "unique line", + "context line 1", + "context line 2", + "line 6", + "extra line 1", + "extra line 2", + ], expected: { index: 2, confidence: 1 }, }, { @@ -200,38 +221,42 @@ describe("findAnchorMatch", () => { content: ["line1", "line2", "non-unique line", "context line 1", "context line 2", "non-unique line"], expected: { index: -1, confidence: 0 }, }, - ]; + ] anchorTestCases.forEach(({ name, searchStr, content, startIndex, expected }) => { it(name, () => { - const result = findAnchorMatch(searchStr, content, startIndex); - expect(result.index).toBe(expected.index); - expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); - expect(result.strategy).toBe("anchor"); - }); - }); -}); + const result = findAnchorMatch(searchStr, content, startIndex) + expect(result.index).toBe(expected.index) + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence) + expect(result.strategy).toBe("anchor") + }) + }) +}) describe("findSimilarityMatch", () => { testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => { - if (!strategies?.includes("similarity")) {return;} + if (!strategies?.includes("similarity")) { + return + } it(name, () => { - const result = findSimilarityMatch(searchStr, content, startIndex); - expect(result.index).toBe(expected.index); - expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); - expect(result.strategy).toBe("similarity"); - }); - }); -}); + const result = findSimilarityMatch(searchStr, content, startIndex) + expect(result.index).toBe(expected.index) + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence) + expect(result.strategy).toBe("similarity") + }) + }) +}) describe("findLevenshteinMatch", () => { testCases.forEach(({ name, searchStr, content, startIndex, expected, strategies }) => { - if (!strategies?.includes("levenshtein")) {return;} + if (!strategies?.includes("levenshtein")) { + return + } it(name, () => { - const result = findLevenshteinMatch(searchStr, content, startIndex); - expect(result.index).toBe(expected.index); - expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence); - expect(result.strategy).toBe("levenshtein"); - }); - }); -}); \ No newline at end of file + const result = findLevenshteinMatch(searchStr, content, startIndex) + expect(result.index).toBe(expected.index) + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence) + expect(result.strategy).toBe("levenshtein") + }) + }) +}) From 5a35d7fb2a0e251835433c2e92a2d0998c63a103 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 02:00:22 -0500 Subject: [PATCH 46/47] test: add comprehensive tests for applyContextMatching and applyDMP functions --- .../__tests__/edit-strategies.test.ts | 259 ++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts diff --git a/src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts b/src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts new file mode 100644 index 0000000..459445f --- /dev/null +++ b/src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts @@ -0,0 +1,259 @@ +import { applyContextMatching, applyDMP } from "../edit-strategies" +import { Hunk } from "../types" + +const testCases = [ + { + name: "should return original content if no match is found", + hunk: { + changes: [ + { type: "context", content: "line1" }, + { type: "add", content: "line2" }, + ], + } as Hunk, + content: ["line1", "line3"], + matchPosition: -1, + expected: { + confidence: 0, + result: ["line1", "line3"], + }, + expectedResult: "line1\nline3", + strategies: ["context", "dmp"], + }, + { + name: "should apply a simple add change", + hunk: { + changes: [ + { type: "context", content: "line1" }, + { type: "add", content: "line2" }, + ], + } as Hunk, + content: ["line1", "line3"], + matchPosition: 0, + expected: { + confidence: 1, + result: ["line1", "line2", "line3"], + }, + expectedResult: "line1\nline2\nline3", + strategies: ["context", "dmp"], + }, + { + name: "should apply a simple remove change", + hunk: { + changes: [ + { type: "context", content: "line1" }, + { type: "remove", content: "line2" }, + ], + } as Hunk, + content: ["line1", "line2", "line3"], + matchPosition: 0, + expected: { + confidence: 1, + result: ["line1", "line3"], + }, + expectedResult: "line1\nline3", + strategies: ["context", "dmp"], + }, + { + name: "should apply a simple context change", + hunk: { + changes: [{ type: "context", content: "line1" }], + } as Hunk, + content: ["line1", "line2", "line3"], + matchPosition: 0, + expected: { + confidence: 1, + result: ["line1", "line2", "line3"], + }, + expectedResult: "line1\nline2\nline3", + strategies: ["context", "dmp"], + }, + { + name: "should apply a multi-line add change", + hunk: { + changes: [ + { type: "context", content: "line1" }, + { type: "add", content: "line2\nline3" }, + ], + } as Hunk, + content: ["line1", "line4"], + matchPosition: 0, + expected: { + confidence: 1, + result: ["line1", "line2\nline3", "line4"], + }, + expectedResult: "line1\nline2\nline3\nline4", + strategies: ["context", "dmp"], + }, + { + name: "should apply a multi-line remove change", + hunk: { + changes: [ + { type: "context", content: "line1" }, + { type: "remove", content: "line2\nline3" }, + ], + } as Hunk, + content: ["line1", "line2", "line3", "line4"], + matchPosition: 0, + expected: { + confidence: 1, + result: ["line1", "line4"], + }, + expectedResult: "line1\nline4", + strategies: ["context", "dmp"], + }, + { + name: "should apply a multi-line context change", + hunk: { + changes: [ + { type: "context", content: "line1" }, + { type: "context", content: "line2\nline3" }, + ], + } as Hunk, + content: ["line1", "line2", "line3", "line4"], + matchPosition: 0, + expected: { + confidence: 1, + result: ["line1", "line2\nline3", "line4"], + }, + expectedResult: "line1\nline2\nline3\nline4", + strategies: ["context", "dmp"], + }, + { + name: "should apply a change with indentation", + hunk: { + changes: [ + { type: "context", content: " line1" }, + { type: "add", content: " line2" }, + ], + } as Hunk, + content: [" line1", " line3"], + matchPosition: 0, + expected: { + confidence: 1, + result: [" line1", " line2", " line3"], + }, + expectedResult: " line1\n line2\n line3", + strategies: ["context", "dmp"], + }, + { + name: "should apply a change with mixed indentation", + hunk: { + changes: [ + { type: "context", content: "\tline1" }, + { type: "add", content: " line2" }, + ], + } as Hunk, + content: ["\tline1", " line3"], + matchPosition: 0, + expected: { + confidence: 1, + result: ["\tline1", " line2", " line3"], + }, + expectedResult: "\tline1\n line2\n line3", + strategies: ["context", "dmp"], + }, + { + name: "should apply a change with mixed indentation and multi-line", + hunk: { + changes: [ + { type: "context", content: " line1" }, + { type: "add", content: "\tline2\n line3" }, + ], + } as Hunk, + content: [" line1", " line4"], + matchPosition: 0, + expected: { + confidence: 1, + result: [" line1", "\tline2\n line3", " line4"], + }, + expectedResult: " line1\n\tline2\n line3\n line4", + strategies: ["context", "dmp"], + }, + { + name: "should apply a complex change with mixed indentation and multi-line", + hunk: { + changes: [ + { type: "context", content: " line1" }, + { type: "remove", content: " line2" }, + { type: "add", content: "\tline3\n line4" }, + { type: "context", content: " line5" }, + ], + } as Hunk, + content: [" line1", " line2", " line5", " line6"], + matchPosition: 0, + expected: { + confidence: 1, + result: [" line1", "\tline3\n line4", " line5", " line6"], + }, + expectedResult: " line1\n\tline3\n line4\n line5\n line6", + strategies: ["context", "dmp"], + }, + { + name: "should apply a complex change with mixed indentation and multi-line and context", + hunk: { + changes: [ + { type: "context", content: " line1" }, + { type: "remove", content: " line2" }, + { type: "add", content: "\tline3\n line4" }, + { type: "context", content: " line5" }, + { type: "context", content: " line6" }, + ], + } as Hunk, + content: [" line1", " line2", " line5", " line6", " line7"], + matchPosition: 0, + expected: { + confidence: 1, + result: [" line1", "\tline3\n line4", " line5", " line6", " line7"], + }, + expectedResult: " line1\n\tline3\n line4\n line5\n line6\n line7", + strategies: ["context", "dmp"], + }, + { + name: "should apply a complex change with mixed indentation and multi-line and context and a different match position", + hunk: { + changes: [ + { type: "context", content: " line1" }, + { type: "remove", content: " line2" }, + { type: "add", content: "\tline3\n line4" }, + { type: "context", content: " line5" }, + { type: "context", content: " line6" }, + ], + } as Hunk, + content: [" line0", " line1", " line2", " line5", " line6", " line7"], + matchPosition: 1, + expected: { + confidence: 1, + result: [" line0", " line1", "\tline3\n line4", " line5", " line6", " line7"], + }, + expectedResult: " line0\n line1\n\tline3\n line4\n line5\n line6\n line7", + strategies: ["context", "dmp"], + }, +] + +describe("applyContextMatching", () => { + testCases.forEach(({ name, hunk, content, matchPosition, expected, strategies, expectedResult }) => { + if (!strategies?.includes("context")) { + return + } + it(name, () => { + const result = applyContextMatching(hunk, content, matchPosition) + expect(result.result.join("\n")).toEqual(expectedResult) + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence) + expect(result.strategy).toBe("context") + }) + }) +}) + +describe("applyDMP", () => { + testCases.forEach(({ name, hunk, content, matchPosition, expected, strategies, expectedResult }) => { + if (!strategies?.includes("dmp")) { + return + } + it(name, () => { + const result = applyDMP(hunk, content, matchPosition) + expect(result.result.join("\n")).toEqual(expectedResult) + expect(result.confidence).toBeGreaterThanOrEqual(expected.confidence) + expect(result.strategy).toBe("dmp") + }) + }) +}) From 82a0ffe67ae69d54b64ba809565883f283e3f5c9 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Fri, 17 Jan 2025 02:04:34 -0500 Subject: [PATCH 47/47] feat: add tests for the git fallback strategy --- .../__tests__/edit-strategies.test.ts | 38 ++++++++++++++++++- .../strategies/new-unified/edit-strategies.ts | 2 +- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts b/src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts index 459445f..2ed1cc9 100644 --- a/src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts +++ b/src/core/diff/strategies/new-unified/__tests__/edit-strategies.test.ts @@ -1,4 +1,4 @@ -import { applyContextMatching, applyDMP } from "../edit-strategies" +import { applyContextMatching, applyDMP, applyGitFallback } from "../edit-strategies" import { Hunk } from "../types" const testCases = [ @@ -257,3 +257,39 @@ describe("applyDMP", () => { }) }) }) + +describe("applyGitFallback", () => { + it("should successfully apply changes using git operations", async () => { + const hunk = { + changes: [ + { type: "context", content: "line1", indent: "" }, + { type: "remove", content: "line2", indent: "" }, + { type: "add", content: "new line2", indent: "" }, + { type: "context", content: "line3", indent: "" } + ] + } as Hunk + + const content = ["line1", "line2", "line3"] + const result = await applyGitFallback(hunk, content) + + expect(result.result.join("\n")).toEqual("line1\nnew line2\nline3") + expect(result.confidence).toBe(1) + expect(result.strategy).toBe("git-fallback") + }) + + it("should return original content with 0 confidence when changes cannot be applied", async () => { + const hunk = { + changes: [ + { type: "context", content: "nonexistent", indent: "" }, + { type: "add", content: "new line", indent: "" } + ] + } as Hunk + + const content = ["line1", "line2", "line3"] + const result = await applyGitFallback(hunk, content) + + expect(result.result).toEqual(content) + expect(result.confidence).toBe(0) + expect(result.strategy).toBe("git-fallback") + }) +}) diff --git a/src/core/diff/strategies/new-unified/edit-strategies.ts b/src/core/diff/strategies/new-unified/edit-strategies.ts index e7f3d85..0828c83 100644 --- a/src/core/diff/strategies/new-unified/edit-strategies.ts +++ b/src/core/diff/strategies/new-unified/edit-strategies.ts @@ -155,7 +155,7 @@ export function applyDMP( } // Git fallback strategy that works with full content -async function applyGitFallback(hunk: Hunk, content: string[]): Promise { +export async function applyGitFallback(hunk: Hunk, content: string[]): Promise { let tmpDir: tmp.DirResult | undefined try {