diff --git a/package-lock.json b/package-lock.json index 9d53e344..fa159b09 100644 --- a/package-lock.json +++ b/package-lock.json @@ -58,7 +58,6 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.27.4.tgz", "integrity": "sha512-bXYxrXFubeYdvB0NhD/NBB3Qi6aZeV20GOWVI47t2dkecCEoneR4NPVcb7abpXDEvejgrUfFtG6vG/zxAKmg+g==", "license": "MIT", - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.27.1", @@ -98,7 +97,6 @@ "resolved": "https://registry.npmjs.org/@babel/eslint-parser/-/eslint-parser-7.27.5.tgz", "integrity": "sha512-HLkYQfRICudzcOtjGwkPvGc5nF1b4ljLZh1IRDj50lRZ718NAKVgQpIAUX8bfg6u/yuSKY3L7E0YzIV+OxrB8Q==", "license": "MIT", - "peer": true, "dependencies": { "@nicolo-ribaudo/eslint-scope-5-internals": "5.1.1-v1", "eslint-visitor-keys": "^2.1.0", @@ -1815,7 +1813,6 @@ "resolved": "https://registry.npmjs.org/@salesforce/eslint-plugin-lightning/-/eslint-plugin-lightning-1.0.1.tgz", "integrity": "sha512-oyUVSNUA0WkkQr3BRtcAYhYotzIpqZtfMpUVMhROPN8YjDGu6CzCoC3/1i4ySIevgmH3J83KypwoqvRfoQf8Ww==", "license": "MIT", - "peer": true, "peerDependencies": { "eslint": "^7 || ^8" } @@ -2040,18 +2037,16 @@ "license": "MIT" }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.47.0.tgz", - "integrity": "sha512-fe0rz9WJQ5t2iaLfdbDc9T80GJy0AeO453q8C3YCilnGozvOyCG5t+EZtg7j7D88+c3FipfP/x+wzGnh1xp8ZA==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.50.0.tgz", + "integrity": "sha512-O7QnmOXYKVtPrfYzMolrCTfkezCJS9+ljLdKW/+DCvRsc3UAz+sbH6Xcsv7p30+0OwUbeWfUDAQE0vpabZ3QLg==", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "8.47.0", - "@typescript-eslint/type-utils": "8.47.0", - "@typescript-eslint/utils": "8.47.0", - "@typescript-eslint/visitor-keys": "8.47.0", - "graphemer": "^1.4.0", + "@typescript-eslint/scope-manager": "8.50.0", + "@typescript-eslint/type-utils": "8.50.0", + "@typescript-eslint/utils": "8.50.0", + "@typescript-eslint/visitor-keys": "8.50.0", "ignore": "^7.0.0", "natural-compare": "^1.4.0", "ts-api-utils": "^2.1.0" @@ -2064,22 +2059,21 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "@typescript-eslint/parser": "^8.47.0", + "@typescript-eslint/parser": "^8.50.0", "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/parser": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.47.0.tgz", - "integrity": "sha512-lJi3PfxVmo0AkEY93ecfN+r8SofEqZNGByvHAI3GBLrvt1Cw6H5k1IM02nSzu0RfUafr2EvFSw0wAsZgubNplQ==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.50.0.tgz", + "integrity": "sha512-6/cmF2piao+f6wSxUsJLZjck7OQsYyRtcOZS02k7XINSNlz93v6emM8WutDQSXnroG2xwYlEVHJI+cPA7CPM3Q==", "license": "MIT", - "peer": true, "dependencies": { - "@typescript-eslint/scope-manager": "8.47.0", - "@typescript-eslint/types": "8.47.0", - "@typescript-eslint/typescript-estree": "8.47.0", - "@typescript-eslint/visitor-keys": "8.47.0", + "@typescript-eslint/scope-manager": "8.50.0", + "@typescript-eslint/types": "8.50.0", + "@typescript-eslint/typescript-estree": "8.50.0", + "@typescript-eslint/visitor-keys": "8.50.0", "debug": "^4.3.4" }, "engines": { @@ -2095,13 +2089,13 @@ } }, "node_modules/@typescript-eslint/project-service": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.47.0.tgz", - "integrity": "sha512-2X4BX8hUeB5JcA1TQJ7GjcgulXQ+5UkNb0DL8gHsHUHdFoiCTJoYLTpib3LtSDPZsRET5ygN4qqIWrHyYIKERA==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.50.0.tgz", + "integrity": "sha512-Cg/nQcL1BcoTijEWyx4mkVC56r8dj44bFDvBdygifuS20f3OZCHmFbjF34DPSi07kwlFvqfv/xOLnJ5DquxSGQ==", "license": "MIT", "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.47.0", - "@typescript-eslint/types": "^8.47.0", + "@typescript-eslint/tsconfig-utils": "^8.50.0", + "@typescript-eslint/types": "^8.50.0", "debug": "^4.3.4" }, "engines": { @@ -2116,13 +2110,13 @@ } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.47.0.tgz", - "integrity": "sha512-a0TTJk4HXMkfpFkL9/WaGTNuv7JWfFTQFJd6zS9dVAjKsojmv9HT55xzbEpnZoY+VUb+YXLMp+ihMLz/UlZfDg==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.50.0.tgz", + "integrity": "sha512-xCwfuCZjhIqy7+HKxBLrDVT5q/iq7XBVBXLn57RTIIpelLtEIZHXAF/Upa3+gaCpeV1NNS5Z9A+ID6jn50VD4A==", "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.47.0", - "@typescript-eslint/visitor-keys": "8.47.0" + "@typescript-eslint/types": "8.50.0", + "@typescript-eslint/visitor-keys": "8.50.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2133,9 +2127,9 @@ } }, "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.47.0.tgz", - "integrity": "sha512-ybUAvjy4ZCL11uryalkKxuT3w3sXJAuWhOoGS3T/Wu+iUu1tGJmk5ytSY8gbdACNARmcYEB0COksD2j6hfGK2g==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.50.0.tgz", + "integrity": "sha512-vxd3G/ybKTSlm31MOA96gqvrRGv9RJ7LGtZCn2Vrc5htA0zCDvcMqUkifcjrWNNKXHUU3WCkYOzzVSFBd0wa2w==", "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2149,14 +2143,14 @@ } }, "node_modules/@typescript-eslint/type-utils": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.47.0.tgz", - "integrity": "sha512-QC9RiCmZ2HmIdCEvhd1aJELBlD93ErziOXXlHEZyuBo3tBiAZieya0HLIxp+DoDWlsQqDawyKuNEhORyku+P8A==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.50.0.tgz", + "integrity": "sha512-7OciHT2lKCewR0mFoBrvZJ4AXTMe/sYOe87289WAViOocEmDjjv8MvIOT2XESuKj9jp8u3SZYUSh89QA4S1kQw==", "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.47.0", - "@typescript-eslint/typescript-estree": "8.47.0", - "@typescript-eslint/utils": "8.47.0", + "@typescript-eslint/types": "8.50.0", + "@typescript-eslint/typescript-estree": "8.50.0", + "@typescript-eslint/utils": "8.50.0", "debug": "^4.3.4", "ts-api-utils": "^2.1.0" }, @@ -2173,9 +2167,9 @@ } }, "node_modules/@typescript-eslint/types": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.47.0.tgz", - "integrity": "sha512-nHAE6bMKsizhA2uuYZbEbmp5z2UpffNrPEqiKIeN7VsV6UY/roxanWfoRrf6x/k9+Obf+GQdkm0nPU+vnMXo9A==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.50.0.tgz", + "integrity": "sha512-iX1mgmGrXdANhhITbpp2QQM2fGehBse9LbTf0sidWK6yg/NE+uhV5dfU1g6EYPlcReYmkE9QLPq/2irKAmtS9w==", "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2186,20 +2180,19 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.47.0.tgz", - "integrity": "sha512-k6ti9UepJf5NpzCjH31hQNLHQWupTRPhZ+KFF8WtTuTpy7uHPfeg2NM7cP27aCGajoEplxJDFVCEm9TGPYyiVg==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.50.0.tgz", + "integrity": "sha512-W7SVAGBR/IX7zm1t70Yujpbk+zdPq/u4soeFSknWFdXIFuWsBGBOUu/Tn/I6KHSKvSh91OiMuaSnYp3mtPt5IQ==", "license": "MIT", "dependencies": { - "@typescript-eslint/project-service": "8.47.0", - "@typescript-eslint/tsconfig-utils": "8.47.0", - "@typescript-eslint/types": "8.47.0", - "@typescript-eslint/visitor-keys": "8.47.0", + "@typescript-eslint/project-service": "8.50.0", + "@typescript-eslint/tsconfig-utils": "8.50.0", + "@typescript-eslint/types": "8.50.0", + "@typescript-eslint/visitor-keys": "8.50.0", "debug": "^4.3.4", - "fast-glob": "^3.3.2", - "is-glob": "^4.0.3", "minimatch": "^9.0.4", "semver": "^7.6.0", + "tinyglobby": "^0.2.15", "ts-api-utils": "^2.1.0" }, "engines": { @@ -2214,15 +2207,15 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.47.0.tgz", - "integrity": "sha512-g7XrNf25iL4TJOiPqatNuaChyqt49a/onq5YsJ9+hXeugK+41LVg7AxikMfM02PC6jbNtZLCJj6AUcQXJS/jGQ==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.50.0.tgz", + "integrity": "sha512-87KgUXET09CRjGCi2Ejxy3PULXna63/bMYv72tCAlDJC3Yqwln0HiFJ3VJMst2+mEtNtZu5oFvX4qJGjKsnAgg==", "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.47.0", - "@typescript-eslint/types": "8.47.0", - "@typescript-eslint/typescript-estree": "8.47.0" + "@typescript-eslint/scope-manager": "8.50.0", + "@typescript-eslint/types": "8.50.0", + "@typescript-eslint/typescript-estree": "8.50.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2237,12 +2230,12 @@ } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.47.0.tgz", - "integrity": "sha512-SIV3/6eftCy1bNzCQoPmbWsRLujS8t5iDIZ4spZOBHqrM+yfX2ogg8Tt3PDTAVKw3sSCiUgg30uOAvK2r9zGjQ==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.50.0.tgz", + "integrity": "sha512-Xzmnb58+Db78gT/CCj/PVCvK+zxbnsw6F+O1oheYszJbBSdEjVhQi3C/Xttzxgi/GLmpvOggRs1RFpiJ8+c34Q==", "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.47.0", + "@typescript-eslint/types": "8.50.0", "eslint-visitor-keys": "^4.2.1" }, "engines": { @@ -2545,7 +2538,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2785,9 +2777,9 @@ } }, "node_modules/astronomical": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/astronomical/-/astronomical-2.0.1.tgz", - "integrity": "sha512-fBpcshxuu2x79LgFFl4qYWjw86NMkUgQeSA8mkw/XG6b5ccTD1EvdGhuRs9AZ0Njk3AcEbSNgi6K2qR4SOAwDQ==", + "version": "2.1.1-rc.1", + "resolved": "https://registry.npmjs.org/astronomical/-/astronomical-2.1.1-rc.1.tgz", + "integrity": "sha512-V7TftgQgscImaI4+PftXau5SaFbQ9IZ9otYxHjl/tqLEQpdpSBCThYL3Ca42rZ/8/044MF2Ki9DnyNIt/fUDCw==", "license": "Apache-2.0", "dependencies": { "meriyah": "^6.0.3" @@ -2987,7 +2979,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -3780,7 +3771,6 @@ "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==", "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -3882,7 +3872,6 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.30.0.tgz", "integrity": "sha512-/mHNE9jINJfiD2EKkg1BKyPyUk4zdnT54YgbOgfjSakWT5oyX/qQLVNTkehyfpcMxZXMy1zyonZ2v7hZTX43Yw==", "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.8", @@ -3963,11 +3952,10 @@ } }, "node_modules/eslint-plugin-jest": { - "version": "29.2.1", - "resolved": "https://registry.npmjs.org/eslint-plugin-jest/-/eslint-plugin-jest-29.2.1.tgz", - "integrity": "sha512-0WLIezrIxitUGbjMIGwznVzSIp0uFJV0PZ2fiSvpyVcxe+QMXKUt7MRhUpzdbctnnLwiOTOFkACplgB0wAglFw==", + "version": "29.5.0", + "resolved": "https://registry.npmjs.org/eslint-plugin-jest/-/eslint-plugin-jest-29.5.0.tgz", + "integrity": "sha512-DAi9H8xN/TUuNOt+xDP1RqpCJLsSxBb5u1zXSpCyp0VAWGL8MBAg5t7/Dk+76iX7d1LhWu4DDH77IQNUolLDyg==", "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/utils": "^8.0.0" }, @@ -5469,18 +5457,6 @@ "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==", "license": "MIT" }, - "node_modules/isbinaryfile": { - "version": "5.0.7", - "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.7.tgz", - "integrity": "sha512-gnWD14Jh3FzS3CPhF0AxNOJ8CxqeblPTADzI38r0wt8ZyQl5edpy75myt08EG2oKvpyiqSqsx+Wkz9vtkbTqYQ==", - "license": "MIT", - "engines": { - "node": ">= 18.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/gjtorikian/" - } - }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -5580,7 +5556,6 @@ "integrity": "sha512-F26gjC0yWN8uAA5m5Ss8ZQf5nDHWGlN/xWZIh8S5SRbsEKBovwZhxGd6LJlbZYxBgCYOtreSUyb8hpXyGC5O4A==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "@jest/core": "30.2.0", "@jest/types": "30.2.0", @@ -7229,13 +7204,13 @@ } }, "node_modules/retire": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/retire/-/retire-5.3.0.tgz", - "integrity": "sha512-NHstwLMZQCCDW3iGcF1fuAwOlaYDPCcUqu1RgSw47aypPSDpIYWsDitK36HvQdMVNK3rUi14mYYUagCe7uumaQ==", + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/retire/-/retire-5.4.0.tgz", + "integrity": "sha512-vLz3Fc2zPqgUjcR01eRoPM/Hnh/+37zLAAfEs0B8mJK1dfUYZSQ3AfNzDDtYir8Qrn0ukvU5DhMbGIh/BVLyCg==", "license": "Apache-2.0", "dependencies": { "ansi-colors": "^4.1.1", - "astronomical": "^2.0.1", + "astronomical": "^2.1.1-rc.1", "commander": "^10.0.1", "proxy-agent": "^6.4.0", "uuid": "^9.0.1", @@ -8034,6 +8009,51 @@ "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", "license": "MIT" }, + "node_modules/tinyglobby": { + "version": "0.2.15", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", + "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tinyglobby/node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/tinyglobby/node_modules/picomatch": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", + "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -8066,9 +8086,9 @@ } }, "node_modules/ts-jest": { - "version": "29.4.5", - "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.4.5.tgz", - "integrity": "sha512-HO3GyiWn2qvTQA4kTgjDcXiMwYQt68a1Y8+JuLRVpdIzm+UOLSHgl/XqR4c6nzJkq5rOkjc02O2I7P7l/Yof0Q==", + "version": "29.4.6", + "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.4.6.tgz", + "integrity": "sha512-fSpWtOO/1AjSNQguk43hb/JCo16oJDnMJf3CdEGNkqsEX3t0KX96xvyX1D7PfLCpVoKu4MfVrqUkFyblYoY4lA==", "dev": true, "license": "MIT", "dependencies": { @@ -8284,7 +8304,6 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -8294,15 +8313,15 @@ } }, "node_modules/typescript-eslint": { - "version": "8.47.0", - "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.47.0.tgz", - "integrity": "sha512-Lwe8i2XQ3WoMjua/r1PHrCTpkubPYJCAfOurtn+mtTzqB6jNd+14n9UN1bJ4s3F49x9ixAm0FLflB/JzQ57M8Q==", + "version": "8.50.0", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.50.0.tgz", + "integrity": "sha512-Q1/6yNUmCpH94fbgMUMg2/BSAr/6U7GBk61kZTv1/asghQOWOjTlp9K8mixS5NcJmm2creY+UFfGeW/+OcA64A==", "license": "MIT", "dependencies": { - "@typescript-eslint/eslint-plugin": "8.47.0", - "@typescript-eslint/parser": "8.47.0", - "@typescript-eslint/typescript-estree": "8.47.0", - "@typescript-eslint/utils": "8.47.0" + "@typescript-eslint/eslint-plugin": "8.50.0", + "@typescript-eslint/parser": "8.50.0", + "@typescript-eslint/typescript-estree": "8.50.0", + "@typescript-eslint/utils": "8.50.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -8845,7 +8864,7 @@ "version": "0.41.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", "csv-stringify": "^6.6.0", "js-yaml": "^4.1.1", @@ -8853,18 +8872,18 @@ "xmlbuilder": "^15.1.1" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", "@types/js-yaml": "^4.0.9", "@types/sarif": "^2.1.7", "@types/semver": "^7.7.1", "cross-env": "^10.1.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -8895,9 +8914,9 @@ } }, "packages/code-analyzer-core/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -8919,9 +8938,9 @@ } }, "packages/code-analyzer-core/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -8931,7 +8950,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -9101,20 +9120,20 @@ }, "packages/code-analyzer-engine-api": { "name": "@salesforce/code-analyzer-engine-api", - "version": "0.32.0", + "version": "0.33.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { "@types/node": "^20.0.0" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -9145,9 +9164,9 @@ } }, "packages/code-analyzer-engine-api/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -9169,9 +9188,9 @@ } }, "packages/code-analyzer-engine-api/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -9181,7 +9200,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -9351,27 +9370,27 @@ }, "packages/code-analyzer-eslint-engine": { "name": "@salesforce/code-analyzer-eslint-engine", - "version": "0.37.1", + "version": "0.38.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@lwc/eslint-plugin-lwc": "^3.3.0", "@lwc/eslint-plugin-lwc-platform": "^6.3.0", "@salesforce-ux/eslint-plugin-slds": "^1.0.7", - "@salesforce/code-analyzer-engine-api": "0.32.0", - "@salesforce/code-analyzer-eslint8-engine": "0.9.0", - "@salesforce/eslint-config-lwc": "^4.1.1", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", + "@salesforce/code-analyzer-eslint8-engine": "0.10.0-SNAPSHOT", + "@salesforce/eslint-config-lwc": "^4.1.2", "@salesforce/eslint-plugin-lightning": "^2.0.0", "@types/node": "^20.0.0", - "@typescript-eslint/eslint-plugin": "^8.47.0", - "@typescript-eslint/parser": "^8.47.0", - "eslint": "^9.39.1", + "@typescript-eslint/eslint-plugin": "^8.50.0", + "@typescript-eslint/parser": "^8.50.0", + "eslint": "^9.39.2", "eslint-plugin-import": "^2.32.0", - "eslint-plugin-jest": "^29.2.0", + "eslint-plugin-jest": "^29.5.0", "globals": "^16.5.0", "semver": "^7.7.3", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "devDependencies": { "@types/jest": "^30.0.0", @@ -9379,8 +9398,8 @@ "@types/unzipper": "^0.10.11", "cross-env": "^10.1.0", "jest": "^30.2.0", - "rimraf": "^6.0.1", - "ts-jest": "^29.4.5", + "rimraf": "^6.1.2", + "ts-jest": "^29.4.6", "unzipper": "^0.12.3" }, "engines": { @@ -9431,7 +9450,6 @@ "resolved": "https://registry.npmjs.org/@babel/eslint-parser/-/eslint-parser-7.25.9.tgz", "integrity": "sha512-5UXfgpK0j0Xr/xIdgdLEhOFxaDZ0bRPWJJchRpqOSur/3rZoPbqqki5mm0p4NE2cs28krBEiSM2MB7//afRSQQ==", "license": "MIT", - "peer": true, "dependencies": { "@nicolo-ribaudo/eslint-scope-5-internals": "5.1.1-v1", "eslint-visitor-keys": "^2.1.0", @@ -9512,9 +9530,9 @@ } }, "packages/code-analyzer-eslint-engine/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -9582,9 +9600,9 @@ } }, "packages/code-analyzer-eslint-engine/node_modules/@salesforce/eslint-config-lwc": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/@salesforce/eslint-config-lwc/-/eslint-config-lwc-4.1.1.tgz", - "integrity": "sha512-twgNRNnFgD0rwIgtROQEJ0m9QQhEAVuQ4DgO8fmSPZiKlsJsLduULeLZtVzok6qSWLY3ZImetgMBPF7mv+I1kw==", + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/@salesforce/eslint-config-lwc/-/eslint-config-lwc-4.1.2.tgz", + "integrity": "sha512-GMoXOiqdSLYYuup8i6HWJYIScTGyOdgJCXXNzWb76Xa0RehqViigVAroTYB5OKvp4F5T7OLnGq06dARduphauQ==", "license": "MIT", "dependencies": { "@babel/core": "~7.26.0", @@ -9622,7 +9640,6 @@ "resolved": "https://registry.npmjs.org/@salesforce/eslint-plugin-lightning/-/eslint-plugin-lightning-2.0.0.tgz", "integrity": "sha512-lC3GL2j6B2wAGeTFWT0h47BFg+0R7naqqlQW+ANvNSaIC/qEB+tNSRcdAZ8DRTojsI3GRdpgq3FTB1llbrFBng==", "license": "MIT", - "peer": true, "engines": { "node": ">=10.0.0" }, @@ -9643,11 +9660,10 @@ } }, "packages/code-analyzer-eslint-engine/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -9655,7 +9671,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -9707,7 +9723,6 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.32.0.tgz", "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -9900,7 +9915,7 @@ }, "packages/code-analyzer-eslint8-engine": { "name": "@salesforce/code-analyzer-eslint8-engine", - "version": "0.9.0", + "version": "0.10.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { "@babel/core": "7.27.4", @@ -9908,7 +9923,7 @@ "@eslint/js": "8.57.1", "@lwc/eslint-plugin-lwc": "2.2.0", "@lwc/eslint-plugin-lwc-platform": "5.2.0", - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@salesforce/eslint-config-lwc": "3.7.2", "@salesforce/eslint-plugin-lightning": "1.0.1", "@types/node": "^20.0.0", @@ -9927,7 +9942,7 @@ "cross-env": "^10.1.0", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "unzipper": "^0.12.3" }, "engines": { @@ -10027,7 +10042,6 @@ "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.30.1.tgz", "integrity": "sha512-v+VWphxMjn+1t48/jO4t950D6KR8JaJuNXzi33Ve6P8sEmPr5k6CEXjdGwT6+LodVnEa91EQCtwjWNUCPweo+Q==", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.10.0", "@typescript-eslint/scope-manager": "8.30.1", @@ -10103,7 +10117,6 @@ "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.30.1.tgz", "integrity": "sha512-H+vqmWwT5xoNrXqWs/fesmssOW70gxFlgcMlYcBaWNPIEWDgLa4W9nkSPmhuOgLnXq9QYgkZ31fhDyLhleCsAg==", "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.30.1", "@typescript-eslint/types": "8.30.1", @@ -10213,7 +10226,6 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-jest/-/eslint-plugin-jest-28.10.0.tgz", "integrity": "sha512-hyMWUxkBH99HpXT3p8hc7REbEZK3D+nk8vHXGgpB+XXsi0gO4PxMSP+pjfUzb67GnV9yawV9a53eUmcde1CCZA==", "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/utils": "^6.0.0 || ^7.0.0 || ^8.0.0" }, @@ -10248,7 +10260,6 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -10304,23 +10315,23 @@ }, "packages/code-analyzer-flow-engine": { "name": "@salesforce/code-analyzer-flow-engine", - "version": "0.31.1", + "version": "0.32.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", "@types/semver": "^7.7.1", - "semver": "^7.7.2" + "semver": "^7.7.3" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -10351,9 +10362,9 @@ } }, "packages/code-analyzer-flow-engine/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -10375,9 +10386,9 @@ } }, "packages/code-analyzer-flow-engine/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -10387,7 +10398,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -10557,23 +10568,23 @@ }, "packages/code-analyzer-pmd-engine": { "name": "@salesforce/code-analyzer-pmd-engine", - "version": "0.33.0", + "version": "0.34.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", "@types/semver": "^7.7.1", - "semver": "^7.7.2" + "semver": "^7.7.3" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -10604,9 +10615,9 @@ } }, "packages/code-analyzer-pmd-engine/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -10628,9 +10639,9 @@ } }, "packages/code-analyzer-pmd-engine/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -10640,7 +10651,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -10810,23 +10821,23 @@ }, "packages/code-analyzer-regex-engine": { "name": "@salesforce/code-analyzer-regex-engine", - "version": "0.30.0", + "version": "0.31.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", - "isbinaryfile": "^5.0.7", + "isbinaryfile": "^4.0.10", "p-limit": "^3.1.0" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -10857,9 +10868,9 @@ } }, "packages/code-analyzer-regex-engine/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -10881,9 +10892,9 @@ } }, "packages/code-analyzer-regex-engine/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -10893,7 +10904,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -11048,6 +11059,18 @@ "node": ">= 4" } }, + "packages/code-analyzer-regex-engine/node_modules/isbinaryfile": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-4.0.10.tgz", + "integrity": "sha512-iHrqe5shvBUcFbmZq9zOQHBoeOhZJu6RQGrDpBgenUm/Am+F3JM2MgQj+rK3Z601fzrL5gLZWtAPH2OBaSVcyw==", + "license": "MIT", + "engines": { + "node": ">= 8.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/gjtorikian/" + } + }, "packages/code-analyzer-regex-engine/node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -11063,24 +11086,24 @@ }, "packages/code-analyzer-retirejs-engine": { "name": "@salesforce/code-analyzer-retirejs-engine", - "version": "0.29.0", + "version": "0.30.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", - "isbinaryfile": "^5.0.7", + "isbinaryfile": "^4.0.10", "node-stream-zip": "^1.15.0", - "retire": "^5.3.0" + "retire": "^5.4.0" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -11111,9 +11134,9 @@ } }, "packages/code-analyzer-retirejs-engine/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -11135,9 +11158,9 @@ } }, "packages/code-analyzer-retirejs-engine/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -11147,7 +11170,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -11302,6 +11325,18 @@ "node": ">= 4" } }, + "packages/code-analyzer-retirejs-engine/node_modules/isbinaryfile": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-4.0.10.tgz", + "integrity": "sha512-iHrqe5shvBUcFbmZq9zOQHBoeOhZJu6RQGrDpBgenUm/Am+F3JM2MgQj+rK3Z601fzrL5gLZWtAPH2OBaSVcyw==", + "license": "MIT", + "engines": { + "node": ">= 8.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/gjtorikian/" + } + }, "packages/code-analyzer-retirejs-engine/node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -11317,23 +11352,23 @@ }, "packages/code-analyzer-sfge-engine": { "name": "@salesforce/code-analyzer-sfge-engine", - "version": "0.15.0", + "version": "0.16.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", - "semver": "^7.7.2" + "semver": "^7.7.3" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", "@types/semver": "^7.7.1", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -11364,9 +11399,9 @@ } }, "packages/code-analyzer-sfge-engine/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -11388,9 +11423,9 @@ } }, "packages/code-analyzer-sfge-engine/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -11400,7 +11435,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", @@ -11573,18 +11608,18 @@ "version": "0.1.0-SNAPSHOT", "license": "BSD-3-Clause", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" @@ -11615,9 +11650,9 @@ } }, "packages/ENGINE-TEMPLATE/node_modules/@eslint/js": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", - "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", + "integrity": "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==", "dev": true, "license": "MIT", "engines": { @@ -11639,9 +11674,9 @@ } }, "packages/ENGINE-TEMPLATE/node_modules/eslint": { - "version": "9.39.1", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", - "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", + "version": "9.39.2", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz", + "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==", "dev": true, "license": "MIT", "dependencies": { @@ -11651,7 +11686,7 @@ "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.1", + "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", diff --git a/packages/ENGINE-TEMPLATE/package.json b/packages/ENGINE-TEMPLATE/package.json index 2542adb6..e362e9fa 100644 --- a/packages/ENGINE-TEMPLATE/package.json +++ b/packages/ENGINE-TEMPLATE/package.json @@ -14,17 +14,17 @@ "types": "dist/index.d.ts", "dependencies": { "@types/node": "^20.0.0", - "@salesforce/code-analyzer-engine-api": "0.32.0" + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" diff --git a/packages/code-analyzer-core/package.json b/packages/code-analyzer-core/package.json index a513d0d0..2db421a9 100644 --- a/packages/code-analyzer-core/package.json +++ b/packages/code-analyzer-core/package.json @@ -16,7 +16,7 @@ }, "types": "dist/index.d.ts", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", "csv-stringify": "^6.6.0", "js-yaml": "^4.1.1", @@ -24,18 +24,18 @@ "xmlbuilder": "^15.1.1" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/js-yaml": "^4.0.9", "@types/jest": "^30.0.0", "@types/sarif": "^2.1.7", "@types/semver": "^7.7.1", "cross-env": "^10.1.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" diff --git a/packages/code-analyzer-engine-api/package.json b/packages/code-analyzer-engine-api/package.json index 3561a71c..46057098 100644 --- a/packages/code-analyzer-engine-api/package.json +++ b/packages/code-analyzer-engine-api/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-engine-api", "description": "Engine API Package for the Salesforce Code Analyzer", - "version": "0.32.0", + "version": "0.33.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", @@ -19,14 +19,14 @@ "@types/node": "^20.0.0" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" diff --git a/packages/code-analyzer-eslint-engine/package.json b/packages/code-analyzer-eslint-engine/package.json index b2770e65..9ff3743e 100644 --- a/packages/code-analyzer-eslint-engine/package.json +++ b/packages/code-analyzer-eslint-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-eslint-engine", "description": "Plugin package that adds 'eslint' as an engine into Salesforce Code Analyzer", - "version": "0.37.1", + "version": "0.38.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", @@ -13,24 +13,24 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "dependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@lwc/eslint-plugin-lwc": "^3.3.0", "@lwc/eslint-plugin-lwc-platform": "^6.3.0", "@salesforce-ux/eslint-plugin-slds": "^1.0.7", - "@salesforce/code-analyzer-engine-api": "0.32.0", - "@salesforce/code-analyzer-eslint8-engine": "0.9.0", - "@salesforce/eslint-config-lwc": "^4.1.1", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", + "@salesforce/code-analyzer-eslint8-engine": "0.10.0-SNAPSHOT", + "@salesforce/eslint-config-lwc": "^4.1.2", "@salesforce/eslint-plugin-lightning": "^2.0.0", "@types/node": "^20.0.0", - "@typescript-eslint/eslint-plugin": "^8.47.0", - "@typescript-eslint/parser": "^8.47.0", - "eslint": "^9.39.1", + "@typescript-eslint/eslint-plugin": "^8.50.0", + "@typescript-eslint/parser": "^8.50.0", + "eslint": "^9.39.2", "eslint-plugin-import": "^2.32.0", - "eslint-plugin-jest": "^29.2.0", + "eslint-plugin-jest": "^29.5.0", "globals": "^16.5.0", "semver": "^7.7.3", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "devDependencies": { "@types/jest": "^30.0.0", @@ -38,8 +38,8 @@ "@types/unzipper": "^0.10.11", "cross-env": "^10.1.0", "jest": "^30.2.0", - "rimraf": "^6.0.1", - "ts-jest": "^29.4.5", + "rimraf": "^6.1.2", + "ts-jest": "^29.4.6", "unzipper": "^0.12.3" }, "engines": { diff --git a/packages/code-analyzer-eslint-engine/src/rule-mappings.ts b/packages/code-analyzer-eslint-engine/src/rule-mappings.ts index 390aa18b..fd524b9a 100644 --- a/packages/code-analyzer-eslint-engine/src/rule-mappings.ts +++ b/packages/code-analyzer-eslint-engine/src/rule-mappings.ts @@ -1476,6 +1476,10 @@ export const RULE_MAPPINGS: Record bool: - """ - Determine whether this is a supported function that may skip propagation + """Determine whether this is a supported function that may skip propagation. + Args: - function_name: + function_name: Name of the function to check. - Returns: True if supported function + Returns: + True if the function is supported and may skip propagation. """ msg = function_name.upper() @@ -63,13 +65,13 @@ def _should_propagate_by_arg(function_name: str | None, arg_num: int, last_arg: """Determine transfer policy for arguments passed to functions. Args: - function_name: name of function (must be uppercase) - arg_num: position of argument (starting at 1) - last_arg: True if this is the last argument + function_name: Name of function (must be uppercase). + arg_num: Position of argument (starting at 1). + last_arg: True if this is the last argument. Returns: True if argument is propagated, False otherwise. - Note that for unrecognized functions we default to True + Note that for unrecognized functions we default to True. """ # TODO: should we be stricter and raise an argument error if too many arguments are passed? if function_name is None: @@ -89,7 +91,7 @@ def _should_propagate_by_arg(function_name: str | None, arg_num: int, last_arg: # must have at least 1 CASE if arg_num == 1: return False - if last_arg is True: + if last_arg: return True if arg_num % 2 == 0: return False @@ -118,29 +120,38 @@ def _should_propagate_by_arg(function_name: str | None, arg_num: int, last_arg: # parse_utils.parse_expression(txt) def _strip_quoted(msg: str) -> str: - """ - Replaces quoted values with empty strings - Args: - msg: string to be processed + """Replace quoted values with empty strings. - Returns: message where all quoted strings are empty + Args: + msg: String to be processed. + Returns: + Message where all quoted strings are replaced with empty strings. """ no_doubles = re.sub(double_re, '""', msg) return re.sub(single_re, '\'\'', no_doubles) def _strip_whitespace(msg: str) -> str: + """Remove all whitespace from a string. + + Args: + msg: String to process. + + Returns: + String with all whitespace removed. + """ return re.sub(r'\s+', '', msg) def extract_expression(txt: str) -> list[str]: - """ + """Extract variable references from an expression using regex. + Args: - txt: expression in which merge-fields may be present + txt: Expression in which merge-fields may be present. Returns: - List of elementRef names (empty list if no matches) + List of elementRef names (empty list if no matches). """ accum = [] res = re.finditer(reg, txt) @@ -152,14 +163,15 @@ def extract_expression(txt: str) -> list[str]: def parse_expression(expression: str) -> list[str]: - """Main entry point for parsing expressions. Do not use this on templates - in which expressions are mixed with text or html. + """Main entry point for parsing expressions. + + Do not use this on templates in which expressions are mixed with text or HTML. Args: - expression: expression to be evaluated. + expression: Expression to be evaluated. Returns: - list of variables that data influence the expression + List of variables that data influence the expression. """ # TODO: might as well extract variables directly here and save the grep try: @@ -171,13 +183,13 @@ def parse_expression(expression: str) -> list[str]: def process_expression(expression: str) -> list[str]: - """Process expression to return list of data influencing variables + """Process expression to return list of data influencing variables. Args: - expression: expression to be processed + expression: Expression to be processed. Returns: - list of variable names that data influence the expression + List of variable names that data influence the expression. """ expr = _strip_whitespace(expression) # Handle degenerate cases @@ -213,12 +225,13 @@ def process_expression(expression: str) -> list[str]: def _extract_results_from_context(context: Context) -> list[str]: - """returns list of variables names from context + """Extract list of variable names from context. Args: - context: + context: Parsing context containing processed arguments. - Returns: list of variable names (de-duped) + Returns: + List of variable names (de-duplicated). """ res_list = util.safe_list_add(context.prev_arguments_text_array, context.current_argument_text_array) @@ -231,14 +244,14 @@ def _extract_results_from_context(context: Context) -> list[str]: def _update_parent_context(parent_ctx: Context, child_ctx: Context) -> Context: - """Updates the parent context after child context has finished processing + """Update the parent context after child context has finished processing. Args: - parent_ctx: parent context - child_ctx: child context - - Returns: parent_ctx + parent_ctx: Parent context to update. + child_ctx: Child context that has finished processing. + Returns: + Updated parent context. """ # Add the processed segments to the parent parent_ctx.current_argument_text_array = util.safe_list_add( @@ -260,16 +273,18 @@ def _update_parent_context(parent_ctx: Context, child_ctx: Context) -> Context: def _parse_function(ctx: Context) -> Context | None: - """Enter this function after the first parenthesis - and call with function name and skip policy in context + """Parse a function call within an expression. + + Enter this function after the first parenthesis + and call with function name and skip policy in context. Args: - ctx: function parsing context + ctx: Function parsing context. Returns: - None if the entire function has completed processing + None if the entire function has completed processing, or a new context if processing was interrupted with a function call - in which case it resumes in current position + in which case it resumes in current position. """ if ctx.current_position + 1 == len(ctx.expression): # we are done processing, so collect arguments @@ -294,7 +309,7 @@ def _parse_function(ctx: Context) -> Context | None: if i > 0: ctx.current_position += 1 - if empty_call is True: + if empty_call: # We're in a FOO() situation and want to skip over it empty_call = False continue @@ -340,20 +355,20 @@ def _parse_function(ctx: Context) -> Context | None: return None else: continue + return None # we've finished processing this function def _handle_argument_end(ctx: Context, is_comma=True) -> Context: - """Decides whether to flush or add to processed buffers the current - portion of the argument being scanned. + """Decide whether to flush or add to processed buffers the current argument. Args: - ctx: current context - is_comma: True if comma, False if parenthesis + ctx: Current parsing context. + is_comma: True if comma separator, False if closing parenthesis. Returns: - copy of the current context + Updated context. """ # dispose of last argument should_propagate = ctx.function_propagate_policy @@ -363,7 +378,7 @@ def _handle_argument_end(ctx: Context, is_comma=True) -> Context: ctx.current_argument_no, last_arg=(not is_comma)) - if should_propagate is True: + if should_propagate: # add existing text array to processed buffer ctx.prev_arguments_text_array = util.safe_list_add( ctx.current_argument_text_array, @@ -383,20 +398,21 @@ def _handle_argument_end(ctx: Context, is_comma=True) -> Context: def _handle_open_paren(ctx: Context, is_bracket=False) -> Context: - """When encountering an open parenthesis, we halt current + """Handle an open parenthesis in the expression. + + When encountering an open parenthesis, we halt current argument processing up to the function name start, if any. Args: - ctx: context of current function with index at open paren + ctx: Context of current function with index at open paren. is_bracket: True if this is a bracket pseudo-function so that we don't search for a function identifier to precede it. - Caution: - Make sure we are not at the end of the expression - Returns: - new context to process + New context to process the nested function. + + .. warning:: Make sure we are not at the end of the expression. """ # current position is a ( segment = ctx.expression[ctx.start_of_current_argument_processing: ctx.current_position] @@ -433,14 +449,16 @@ def _handle_open_paren(ctx: Context, is_bracket=False) -> Context: def _get_function_name(msg: str) -> str: - """ - Assumes the string terminates with a ( but the ( is not - passed into the msg - Args: - msg: + """Extract function name from a string. - Returns: name of the function + Assumes the string terminates with a '(' but the '(' is not + passed into the msg. + Args: + msg: String containing function name (without the opening parenthesis). + + Returns: + Name of the function. """ res = re.findall(func_name, msg) assert len(res) >= 1 @@ -449,6 +467,30 @@ def _get_function_name(msg: str) -> str: @dataclass(init=True, kw_only=True) class Context: + """Context for parsing expressions with nested function calls. + + This dataclass tracks the state of expression parsing, including + current position, function names, and argument processing. + + Attributes: + expression: The master expression we are working with. + length: Total length of this expression. + current_position: Current position in the expression. + start_of_current_argument_processing: Where in the expression the first + character (after previous comma) appears OR where we resumed + processing for in the current argument. + current_argument_text_array: Text of current argument (only append string + when receiving values from function call return). + current_function_name: Name of current function being parsed (None if + just in a parenthesis or unknown function context). + function_propagate_policy: Whether it is known that all arguments do or + do not propagate. If unknown, set to None. + current_argument_no: Which argument we are on, starting at 1. + is_last_argument: Whether this is the last argument (only relevant for + case statement). + prev_arguments_text_array: Already pruned text from previous arguments + (or None). + """ # The expression is the master expression we are working with expression: str diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py index 9a9979ae..b14dd479 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py @@ -1,5 +1,7 @@ -"""Module for xml parsing of flow global attributes +"""Module for XML parsing of flow global attributes. +This module provides the Parser class for parsing Salesforce Flow XML files +and extracting global attributes, variable types, and flow metadata. """ from __future__ import annotations @@ -42,20 +44,19 @@ def get_root(path: str) -> El: - """Get flow root + """Get the root element of an XML flow file. Args: - path: path of xml file to load + path: Path of XML file to load. Returns: - the root of the xml file - + The root element of the XML file. """ return CP.get_root(path) class Parser(FlowParser): - """API for parsing global lexical attributes of flow xml files. + """API for parsing global lexical attributes of flow XML files. Parser instances do not contain any branch-dependent data. In particular when a variable is seen by the parser, that does not mean it has been @@ -65,10 +66,9 @@ class Parser(FlowParser): Parsers should not have any data modified after initialization except new member resolutions (e.g. Account.foo where 'foo' is new). - Parsers should always be built with the :meth:`Parser.from_file` class method - except for testing where from_string can be used, but then be sure to provide + Parsers should always be built with the :meth:`from_file` class method + except for testing where :meth:`from_string` can be used, but then be sure to provide a dummy filename. - """ def __init__(self, root): @@ -101,7 +101,7 @@ def __init__(self, root): self.all_named_elems: frozenset[El] | None = None #: set of all names (names of named elements) - self.all_names: tuple[str,] | None = None + self.all_names: tuple[str,...] | None = None #: all traversable elements that are not crawlable self.not_crawlable_elems: list[El] | None = None @@ -131,30 +131,68 @@ def __init__(self, root): self.tainted_inputs: Optional[set[tuple[str, str]]] = None def get_all_named_elems(self) -> frozenset[tuple[str, str]] | None: + """Get all named elements in the flow. + + Returns: + Frozenset of all named elements, or None if not yet computed. + """ return self.all_named_elems def get_all_names(self) -> tuple[str] | None: + """Get all element names in the flow. + + Returns: + Tuple of all element names, or None if not yet computed. + """ return self.all_names def get_effective_run_mode(self) -> RunMode: + """Get the effective run mode (considering inheritance). + + Returns: + Effective RunMode enum value. + """ return self.effective_run_mode def get_declared_run_mode(self) -> RunMode: + """Get the declared run mode (without inheritance). + + Returns: + Declared RunMode enum value. + """ return self.declared_run_mode def get_filename(self) -> str: + """Get the filename/path of the flow. + + Returns: + Flow file path as string. + """ return self.flow_path def get_root(self) -> El: + """Get the root XML element of the flow. + + Returns: + Root XML element. + """ return self.root def get_literal_var(self) -> VariableType: + """Get the literal variable type for string literals. + + Returns: + VariableType instance for string literals. + """ return self.literal_var def get_traversable_inbound(self) -> dict[str, list[str]]: - """Returns dict from element name to list of all inbound element names - will be empty list if no inbound. + """Get dictionary mapping element names to inbound element names. + + Returns: + Dictionary from element name to list of all inbound element names. + Will be empty list if no inbound elements exist. """ if self.traversable_inbound is None: self.set_traversable_inbound() @@ -162,6 +200,7 @@ def get_traversable_inbound(self) -> dict[str, list[str]]: return self.traversable_inbound def set_traversable_inbound(self) -> None: + """Set the traversable_inbound mapping for all flow elements.""" start_el = self.get_start_elem() node_els = self.get_all_traversable_flow_elements() node_els.append(start_el) @@ -183,8 +222,11 @@ def set_traversable_inbound(self) -> None: def get_action_call_map(self) -> dict[str, list[tuple[El, str]]] | None: - """Gets all actionCalls in the flow element - Returns: actionCall type -> (action_element, action name) + """Get all actionCalls in the flow element. + + Returns: + Dictionary mapping actionCall type to list of (action_element, action_name) tuples, + or None if no actionCalls are found. """ accum = {} action_call_els = parse_utils.get_by_tag(self.root, 'actionCalls') @@ -207,6 +249,11 @@ def get_action_call_map(self) -> dict[str, list[tuple[El, str]]] | None: return accum def get_async_scheduled_paths(self) -> list[str]: + """Get list of async scheduled path target references. + + Returns: + List of target reference names for async scheduled paths. + """ accum = [] start = self.get_start_elem() if start is None or start.tag != f'{ns}start': @@ -224,6 +271,11 @@ def get_async_scheduled_paths(self) -> list[str]: return accum def get_trigger_object(self) -> str | None: + """Get the trigger object name for this flow. + + Returns: + Trigger object name as string, or None if not a trigger or unknown. + """ if self.trigger_object is True: return None elif isinstance(self.trigger_object, str): @@ -256,6 +308,11 @@ def get_trigger_object(self) -> str | None: return obj_name def get_trigger_type(self) -> TriggerType: + """Get the trigger type for this flow. + + Returns: + TriggerType enum value indicating the type of trigger, or NotTrigger if not a trigger. + """ if self.trigger_type is not None: return self.trigger_type @@ -307,13 +364,13 @@ def get_trigger_type(self) -> TriggerType: def get_flow_type(self) -> FlowType: - """Returns type of flow + """Get the type of flow. If the flow_type member attribute is not set, it is determined, set and returned. Returns: - FlowType + FlowType enum value indicating the type of flow. """ if self.flow_type is not None: return self.flow_type @@ -362,20 +419,19 @@ def get_flow_type(self) -> FlowType: def resolve_by_name(self, name: str, path: str | None = None, strict: bool = False) -> Optional[(str, str, VariableType)]: - """Resolves name to variable, property, VariableType. Does not store anything. + """Resolve name to variable, property, and VariableType. - Examples:: + Does not store anything in the cache. This is a pure resolution function. Args: - name: raw name as it is used in the flow xml file (e.g. foo.bar.baz) - path: filename in which to resolve - strict: whether to resolve unknown variables to None (which can cause program execution + name: Raw name as it is used in the flow XML file (e.g. foo.bar.baz). + path: Filename in which to resolve (defaults to current flow path). + strict: Whether to resolve unknown variables to None (which can cause program execution to terminate) or to create a best effort 'unknown' variable type resolution. Returns: - ``None`` if the name cannot be resolved, - else the triple (parent name, member, type) - + None if the name cannot be resolved, + else the triple (parent_name, member, VariableType). """ """ @@ -481,6 +537,15 @@ def resolve_by_name(self, name: str, path: str | None = None, @classmethod def from_file(cls, filepath: str, old_parser: Parser = None) -> Parser: + """Build a Parser instance from a file path. + + Args: + filepath: Path to the flow XML file. + old_parser: Optional previous parser to inherit data from. + + Returns: + New Parser instance. + """ root = CP.get_root(filepath) parser = Parser(root) parser.flow_path = filepath @@ -490,6 +555,19 @@ def from_file(cls, filepath: str, old_parser: Parser = None) -> Parser: @classmethod def from_string(cls, xml_string: str | bytes, filepath_to_use: str, old_parser: Parser = None) -> Parser: + """Build a Parser instance from an XML string. + + Args: + xml_string: XML content as string or bytes. + filepath_to_use: Dummy filename to use for this parser (required for testing). + old_parser: Optional previous parser to inherit data from. + + Returns: + New Parser instance. + + Raises: + ValueError: If xml_string is not str or bytes. + """ if isinstance(xml_string, str): root = CP.get_root_from_string(xml_string.encode()) elif isinstance(xml_string, bytes): @@ -503,17 +581,15 @@ def from_string(cls, xml_string: str | bytes, filepath_to_use: str, return parser def update(self, old_parser: Parser = None, is_return=False) -> Parser: - """Parse flow root and populate default values + """Parse flow root and populate default values. Args: - old_parser: when updating a new parser from an old, to copy over elements - and update run-mode - - is_return: are we returning from a function call? + old_parser: When updating a new parser from an old, to copy over elements + and update run-mode. + is_return: Are we returning from a function call? Returns: - None - + Self (for method chaining). """ if old_parser is not None: self.top_flow_path = old_parser.flow_path @@ -567,23 +643,45 @@ def update(self, old_parser: Parser = None, is_return=False) -> Parser: return self def get_output_variables(self, path: str | None = None) -> set[tuple[str, str]]: + """Get output variables for a specific path. + + Args: + path: Flow path to filter by (defaults to current flow path). + + Returns: + Set of (flow_path, variable_name) tuples for output variables. + """ if path is None: path = self.flow_path return {(x, y) for (x, y) in self.output_variables if x == path} def get_input_variables(self, path: str | None = None) -> set[tuple[str, str]]: + """Get input variables for a specific path. + + Args: + path: Flow path to filter by (defaults to current flow path). + + Returns: + Set of (flow_path, variable_name) tuples for input variables. + """ if path is None: path = self.flow_path return {(x, y) for (x, y) in self.input_variables if x == path} def get_input_field_elems(self) -> set[El] | None: + """Get all input field elements from the flow. + + Returns: + Set of input field XML elements, or None if none found. + """ return parse_utils.get_input_fields(self.root) def get_input_output_elems(self) -> dict[str, set[El]]: - """ - Returns:: - {"input": input variable elements, - "output": output variable elements } + """Get input and output variable elements. + + Returns: + Dictionary with keys "input" and "output", each mapping to + a set of XML elements representing input or output variables. """ vars_ = self.get_all_variable_elems() input_accum = set() @@ -602,7 +700,15 @@ def get_input_output_elems(self) -> dict[str, set[El]]: } def get_by_name(self, name_to_match: str, scope: El | None = None) -> El | None: - """returns the first elem with the given name that is a child of the scope element""" + """Get the first element with the given name within the scope. + + Args: + name_to_match: Name of element to find (use '*' for start element). + scope: XML element to search within (defaults to root). + + Returns: + First matching XML element, or None if not found. + """ if name_to_match == '*': return self.get_start_elem() @@ -621,7 +727,14 @@ def get_by_name(self, name_to_match: str, scope: El | None = None) -> El | None: return None def get_flow_name(self) -> str: - """we assume there is always a flow label.""" + """Get the flow name from the label element. + + Returns: + Flow name as string. + + Raises: + InvalidFlowException: If flow has no label element. + """ res = get_by_tag(self.root, 'label') if len(res) == 0: raise InvalidFlowException(f"Flow {self.flow_path} has no name, skipping..", flow_path=self.flow_path) @@ -629,11 +742,10 @@ def get_flow_name(self) -> str: return res[0].text def get_run_mode(self) -> RunMode: - """Get effective context of flow + """Get effective context (run mode) of flow. Returns: - RunMode public enum - + RunMode enum value indicating the execution context. """ flow_type = self.get_flow_type() @@ -654,14 +766,28 @@ def get_run_mode(self) -> RunMode: return RunMode[elems[0].text] def get_api_version(self) -> str: + """Get the API version of the flow. + + Returns: + API version string. + """ return get_by_tag(self.root, 'apiVersion')[0].text def get_all_traversable_flow_elements(self) -> list[El]: - """ ignore start""" + """Get all traversable flow elements (excluding start element). + + Returns: + List of traversable flow XML elements. + """ return [child for child in self.root if get_tag(child) in parse_utils.CTRL_FLOW_ELEM] def get_all_variable_elems(self) -> list[El] | None: + """Get all variable elements from the flow. + + Returns: + List of variable XML elements, or None if none found. + """ elems = get_by_tag(self.root, 'variables') if len(elems) == 0: return None @@ -669,37 +795,58 @@ def get_all_variable_elems(self) -> list[El] | None: return elems def get_templates(self) -> list[El]: - """Grabs all template elements. - Returns empty list if none found + """Get all template elements. + + Returns: + List of template XML elements (empty list if none found). """ templates = get_by_tag(self.root, 'textTemplates') return templates def get_formulas(self) -> list[El]: - """Grabs all formula elements. - Returns empty list if none found + """Get all formula elements. + + Returns: + List of formula XML elements (empty list if none found). """ formulas = get_by_tag(self.root, 'formulas') return formulas def get_choices(self) -> list[El]: + """Get all choice elements. + + Returns: + List of choice XML elements. + """ choices = get_by_tag(self.root, 'choices') return choices def get_dynamic_choice_sets(self) -> list[El]: + """Get all dynamic choice set elements. + + Returns: + List of dynamic choice set XML elements. + """ dcc = get_by_tag(self.root, 'dynamicChoiceSets') return dcc def get_constants(self) -> list[El]: + """Get all constant elements. + + Returns: + List of constant XML elements. + """ constants = get_by_tag(self.root, 'constants') return constants def get_start_elem(self) -> El: - """Get first element of flow - Raises InvalidFlowException if no start element + """Get the first element of the flow. + Returns: - element or element pointed to in + Start XML element or element pointed to in startElementReference. + Raises: + InvalidFlowException: If no start element is found. """ res = parse_utils.get_start_element(self.root) if res is None: @@ -708,10 +855,12 @@ def get_start_elem(self) -> El: return res def get_all_indirect_tuples(self) -> list[tuple[str, El]]: - """returns a list of tuples of all indirect references, e.g. - str, elem, where str influences elem. - The elem is a formula or template element and - str is an extracted merge-field from the elem + """Get all indirect references from formulas and templates. + + Returns: + List of (variable_name, element) tuples where the variable influences + the element. The element is a formula or template element and the + variable name is an extracted merge-field from the element. """ accum = [] elems = self.get_templates() + self.get_formulas() @@ -739,16 +888,16 @@ def get_all_indirect_tuples(self) -> list[tuple[str, El]]: return accum def get_cached_resolution(self, name: str, path: str | None = None) -> tuple[str, str | None, VariableType] | None: - """Gets the VariableType for the named Flow Element + """Get the VariableType for the named Flow Element from cache. - Only looks in cache. + Only looks in cache, does not perform new resolution. Args: - name: name of Flow Element to retrieve - path: filename to use (if None, use current path) + name: Name of Flow Element to retrieve. + path: Filename to use (if None, use current path). Returns: - VariableType or None if not present in cache + Tuple of (parent_name, member, VariableType) or None if not present in cache. """ if name == STRING_LITERAL_TOKEN: return name, None, self.literal_var @@ -762,7 +911,13 @@ def get_cached_resolution(self, name: str, path: str | None = None) -> tuple[str return None def get_called_descendents(self, elem_name: str) -> list[str]: - """Returns empty list if no descendents + """Get all called descendant element names. + + Args: + elem_name: Name of element to get descendants for. + + Returns: + List of descendant element names (empty list if none). """ el = self.get_by_name(elem_name) target_map = get_conn_target_map(el) @@ -771,8 +926,15 @@ def get_called_descendents(self, elem_name: str) -> list[str]: else: return [x[0] for x in get_conn_target_map(el).values()] - def get_traversable_descendents_of_elem(self, elem_name: str) -> list[str]: - """includes the original elem name, elem_tag""" + def get_traversable_descendants_of_elem(self, elem_name: str) -> list[str]: + """Get all traversable descendants of an element. + + Args: + elem_name: Name of element to get descendants for. + + Returns: + List of element names including the original element name. + """ visited = [] worklist = [] curr_name = elem_name @@ -789,14 +951,11 @@ def get_traversable_descendents_of_elem(self, elem_name: str) -> list[str]: return visited def get_tainted_inputs(self) -> set[tuple[str, str]] | None: - """Looks for sources - Args: - parser: parser instance for flow - start: whether this is the first flow being scanned + """Get tainted input variables (user-controlled sources). Returns: - ((path, varname), ) corresponding to sources of taint - + Set of (path, varname) tuples corresponding to sources of taint, + or None if not yet computed. """ start = self.flow_path == self.top_flow_path @@ -835,7 +994,7 @@ def get_tainted_inputs(self) -> set[tuple[str, str]] | None: return self.tainted_inputs def build_vartype_from_elem(elem: El) -> VariableType | None: - """Build VariableType from XML Element + """Build VariableType from XML Element. The purpose of this function is to assign types to named flow elements, in order to assist in object resolution @@ -850,12 +1009,12 @@ def build_vartype_from_elem(elem: El) -> VariableType | None: variable from this flow element. Args: - elem: must be a *named* Flow element (e.g. an element with a tag that - is a child of the element root) + elem: Must be a *named* Flow element (e.g. an element with a tag that + is a child of the element root). Returns: - VariableType instance containing type information for the element or None - If the element is not a named Flow element or is unknown to the parser. + VariableType instance containing type information for the element, or None + if the element is not a named Flow element or is unknown to the parser. """ if elem is None: return None @@ -1116,8 +1275,22 @@ def build_vartype_from_elem(elem: El) -> VariableType | None: return None def _get_global_flow_data(flow_path: str, root: El) \ - -> tuple[list[El], tuple[str,...], dict[tuple[str, str], VariableType], + -> tuple[frozenset[El], tuple[str,...], dict[tuple[str, str], VariableType], frozenset[tuple[str, str]], frozenset[tuple[str, str]]]: + """Extract global flow data from the XML root. + + Args: + flow_path: Path of the flow file. + root: Root XML element of the flow. + + Returns: + Tuple containing: + - frozenset of all named elements + - tuple of all element names + - dictionary mapping (flow_path, name) to VariableType + - frozenset of (flow_path, name) tuples for input variables + - frozenset of (flow_path, name) tuples for output variables + """ all_named = get_named_elems(root) @@ -1159,6 +1332,6 @@ def _get_global_flow_data(flow_path: str, root: El) \ if var.is_output: outputs.append((flow_path, name_dict[x])) - return all_named, all_names, vars_, frozenset(inputs), frozenset(outputs) + return frozenset(all_named), all_names, vars_, frozenset(inputs), frozenset(outputs) diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/ESAPI.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/ESAPI.py index 76b99701..b87b14bc 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/ESAPI.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/ESAPI.py @@ -1,14 +1,18 @@ -"""Security Encoding library +"""Security Encoding library. + +Provides functions for HTML encoding, SQL escaping, and related security +utilities for safe output handling. """ def html_encode(msg: str) -> str | int | None: - """Performs html encoding + """Perform HTML encoding on a message. + Args: - msg: unicode message to encode + msg: Unicode message to encode. Returns: - html encoded message + HTML-encoded message, or original value if None or int. """ if msg is None: return msg @@ -25,13 +29,13 @@ def html_encode(msg: str) -> str | int | None: def sql_escape(msg: str) -> str: - """simple sql escape (unicode) + """Simple SQL escape for Unicode strings. Args: - msg: string to escape + msg: String to escape. Returns: - escapes \\ and ' + Escaped string with backslashes and single quotes escaped. """ msg = msg.replace("\\", "\\\\") msg = msg.replace("'", "\\'") @@ -41,26 +45,26 @@ def sql_escape(msg: str) -> str: def legal_sql_escape(msg: str) -> str: """Escape single quotes with two single quotes. - This is the SQL standard escaping. + This is the SQL standard escaping method. Args: - msg: string to escape - - Returns: escaped string + msg: String to escape. + Returns: + Escaped string with single quotes doubled. """ msg = msg.replace("'", "''") return msg def sql_enc_html_dec(msg: str) -> str: - """Decodes html-encoded text + """Decode HTML-encoded text and apply SQL escaping. Args: - msg: string to decode - - Returns: decoded string + msg: HTML-encoded string to decode. + Returns: + Decoded and SQL-escaped string. """ msg = msg.replace('&', '&') msg = msg.replace('>', '>') diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py index 0e7b5087..9a7ffb16 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py @@ -6,11 +6,13 @@ import re import sys import traceback +import uuid import flow_scanner.executor as executor import flow_scanner.query_manager import flow_scanner.util as util import flow_scanner.version as version +import flow_scanner.db_storage as db_storage from flow_scanner.query_manager import validate_qry_list, get_all_queries from flow_scanner.util import make_id @@ -151,11 +153,12 @@ def get_validated_queries(data: list[str]) -> list[str]: return found else: for issue, data in [('Duplicate', duplicates), ('Unrecognized', missed)]: - if data is not None and len(data) == 1: - raise argparse.ArgumentTypeError(f"{issue} query requested: %s" % data[0]) - else: - raise argparse.ArgumentTypeError(f"{issue} queries requested: %s" % - ",".join(data)) + if data is not None and len(data) > 0: + if len(data) == 1: + raise argparse.ArgumentTypeError(f"{issue} query requested: %s" % data[0]) + else: + raise argparse.ArgumentTypeError(f"{issue} queries requested: %s" % + ",".join(data)) def unsplit(msg: str) -> list[str]: @@ -299,6 +302,10 @@ def parse_args(my_args: list[str], default: str = None) -> argparse.Namespace: type=check_not_exist) parser.add_argument("-t", "--html", required=False, help="Path to store html report", type=check_not_exist) + parser.add_argument("--db", required=False, nargs='?', const=True, + help="path to SQLite database file for storing results. " + "If --db is provided without a path, a database with a UUID-based name will be created in the current directory.", + default=None) parser.add_argument("-c", "--chunk", required=False, help=(f"chunk scan into groups of files, with one report generated for each group. " "Reports will be appended with the chunk number. Useful for processing " @@ -437,8 +444,8 @@ def main(argv: list[str] = None) -> str | None: query_manager = None - # make sure a report has been chosen - if args.html is None and args.xml is None and args.json is None: + # make sure a report has been chosen (including database) + if args.html is None and args.xml is None and args.json is None and args.db is None: raise argparse.ArgumentTypeError("No report format chosen") chunk_counter = 0 @@ -461,56 +468,98 @@ def main(argv: list[str] = None) -> str | None: f"you use the `--chunk` switch to break this scan up into smaller pieces to avoid excessively large" f"reports and to reduce scan memory usage. Chunked scans have no reduction in scan accuracy.") - for (index, flow_path) in enumerate(flow_paths): - - status_message = get_status_msg(index, total_paths) - print(f"{status_message} scanning {flow_path}...") + # Set up database connection and run_id if database output is requested + db_conn = None + run_id = None + if args.db is not None: + # Determine database path + # With nargs='?' and const=True: + # - If --db is not provided: args.db = None + # - If --db is provided without value: args.db = True (const) + # - If --db is provided with value: args.db = that value (string) + if args.db is True: + # User provided --db without path, generate UUID-based name + db_path = f"flow_scanner_results_{uuid.uuid4()}.db" + else: + # User provided a path + db_path = args.db + # Ensure parent directory exists if path has directory component + db_abs_path = os.path.abspath(db_path) + db_dir = os.path.dirname(db_abs_path) + if db_dir and not os.path.exists(db_dir): + os.makedirs(db_dir, exist_ok=True) + db_path = db_abs_path + try: - # top level loop in case something goes wrong - # specifically we have noticed it's now possible - # to save malformed flows :( - query_manager = executor.parse_flow(flow_path, - requestor=args.requestor, - report_label=label, - result_id=args.id, - service_version=args.service_version, - help_url=args.url, - query_manager=query_manager, - query_module_path=args.query_path, - query_class_name=args.query_class, - query_preset=args.preset, - queries=qry_l, - crawl_dir=args.crawl_dir, - resolver=resolver) - - except KeyboardInterrupt: - # Program could be long-running and should be interruptible by the user - return - - except: - msg = (f"error processing flow {flow_path}" - f"{traceback.format_exc()}" - "...continuing to next flow..") - print(msg) - - if (index % chunk == 0 and index > 0) or index == total_paths-1: - chunk_counter += 1 + db_conn = db_storage.create_database(db_path) + # Create run with description + description = args.label or f"scan of {len(flow_paths)} flows" + run_id = db_storage.create_run(db_conn, description=description) + print(f"Database initialized at {db_path}, run_id={run_id}") + except Exception as e: + print(f"Error setting up database: {e}") + raise + + try: + for (index, flow_path) in enumerate(flow_paths): + + status_message = get_status_msg(index, total_paths) + print(f"{status_message} scanning {flow_path}...") try: - gen_reports(args, query_manager, chunk_counter, number_chunks) + # top level loop in case something goes wrong + # specifically we have noticed it's now possible + # to save malformed flows :( + query_manager = executor.parse_flow(flow_path, + requestor=args.requestor, + report_label=label, + result_id=args.id, + service_version=args.service_version, + help_url=args.url, + query_manager=query_manager, + query_module_path=args.query_path, + query_class_name=args.query_class, + query_preset=args.preset, + queries=qry_l, + crawl_dir=args.crawl_dir, + resolver=resolver) except KeyboardInterrupt: + # Program could be long-running and should be interruptible by the user return except: - print("error generating reports") - print(traceback.format_exc()) - - query_manager = None - - print("scanning complete.") - print(f"{STATUS_LABEL} {STATUS_COMPLETE}") + msg = (f"error processing flow {flow_path}" + f"{traceback.format_exc()}" + "...continuing to next flow..") + print(msg) + + if (index % chunk == 0 and index > 0) or index == total_paths-1: + chunk_counter += 1 + try: + gen_reports(args, query_manager, chunk_counter, number_chunks, db_conn=db_conn, run_id=run_id) + + except KeyboardInterrupt: + return + + except: + print("error generating reports") + print(traceback.format_exc()) + + query_manager = None + + print("scanning complete.") + print(f"{STATUS_LABEL} {STATUS_COMPLETE}") + finally: + # Ensure database connection is closed when processing ends + # This will execute even if there's an early return or exception + if db_conn is not None: + try: + db_conn.close() + except Exception as e: + # Log but don't raise - connection might already be closed + logging.warning(f"Error closing database connection: {e}") -def gen_reports(args, query_manager, chunk_counter, number_chunks): +def gen_reports(args, query_manager, chunk_counter, number_chunks, db_conn=None, run_id=None): # we are not chunking, we are generating a single report for everything if query_manager is None: @@ -552,6 +601,16 @@ def gen_reports(args, query_manager, chunk_counter, number_chunks): print(f"json result file written to {rep_path}") + # Dump results to database if connection and run_id are provided + if db_conn is not None and run_id is not None: + try: + num_stored = query_manager.results.dump_result_to_db(db_conn, run_id) + print(f"stored {num_stored} query results to database for run_id={run_id}") + except Exception as e: + print(f"error storing results to database: {e}") + print(traceback.format_exc()) + # Continue execution even if database dump fails + def add_chunk_to_path(old_path: str, to_insert)-> str: if to_insert == '': return old_path diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py index be176e76..7fb5046e 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py @@ -91,20 +91,20 @@ class BranchState(State): - """Instances of BranchState track dataflows within a given flow + """Instances of BranchState track dataflows within a given flow. - * the current element being processed - * all influence flows seen on this branch and up to this element + Tracks: + - The current element being processed + - All influence flows seen on this branch and up to this element All interaction with influence flows must be done via public APIs exposed by BranchState. Instantiate only with a builder method. - A shallow copy of the influence map is made at each crawl - step (the influence map contains only immutable elements). + A shallow copy of the influence map is made at each crawl step + (the influence map contains only immutable elements). Prior to exiting a subflow, all branches must be consolidated so that all execution paths are available as return values. - """ def __init__(self, parser: parse.Parser): @@ -140,16 +140,16 @@ def __init__(self, parser: parse.Parser): @classmethod def from_parser(cls, parser: parse.Parser) -> BranchState: - """Returns a state instance with variable defaults populated + """Create a state instance with variable defaults populated. This instance is *not* ready to be used until it is loaded with a crawl step. Only the defaults have been added. Args: - parser: parser instance for this flow + parser: Parser instance for this flow. Returns: - Branch State instance + BranchState instance with defaults populated. """ state = BranchState(parser=parser) state.flow_path = parser.flow_path @@ -171,25 +171,26 @@ def get_parser(self) -> parse.Parser: return self.parser def get_current_elem(self) -> ET.Element: - """Get current element being processed + """Get current element being processed. Returns: - xml element associated to the flow's crawl step + XML element associated to the flow's crawl step. """ return self.current_elem def get_current_elem_name(self) -> str: - """Get name of element + """Get name of current element. Returns: - Flow Element name of the current crawl step + Flow Element name of the current crawl step. """ return self.current_elem_name - def filter_maps(self, steps: list[CrawlStep]): - """Removes all influence maps except those in `steps` + def filter_maps(self, steps: list[CrawlStep]) -> None: + """Remove all influence maps except those in steps. - .. WARNING:: Destructive operation, only call after flow + .. warning:: + Destructive operation, only call after flow processing is complete. Args: @@ -778,7 +779,7 @@ def _get_vector(self, flow_path: str, name: str, step: CrawlStep = None) -> Flow return dict.get(self._get_influence_map(crawl_step=step), (flow_path, name)) def _init_vec_from_elem(self, elem: ET.Element, store=True) -> FlowVector | None: - """Initializes a FlowVector from the provided (named) xml element + """Initializes a FlowVector from the provided (named) XML element Args: elem: XML Element whose name is the vector's parent @@ -928,7 +929,7 @@ def _build_path_from_history(parser: parse.Parser, history: tuple[InfluenceState Args: parser: parser that can convert variables in statements to element names and properties, as well as extract type - information from the flow xml file. + information from the flow XML file. history: tuple of DataflowInfluenceStatement type_replacements: [expert use] name/value pairs for property overrides diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py index d627518f..9dbedf31 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py @@ -64,16 +64,14 @@ class Segment(JSONSerializable, AbstractSegment): def accept(self, visitor: BranchVisitor) -> list[BranchVisitor] | None: - """does the node accept the visitor - - Also updates visitor state + """Check if the node accepts the visitor and update visitor state. Args: - visitor: Branch Visitor trying to jump into node + visitor: Branch Visitor trying to jump into node. Returns: - list of labels to process or None - + List of BranchVisitor instances to process next, or None if + visitor is rejected (e.g., due to cycles). """ if not self.jumps: return None @@ -95,7 +93,15 @@ def accept(self, visitor: BranchVisitor) -> list[BranchVisitor] | None: return self._send_outbound(visitor) - def _send_outbound(self, visitor): + def _send_outbound(self, visitor: BranchVisitor) -> list[BranchVisitor]: + """Generate outbound visitors from this segment. + + Args: + visitor: BranchVisitor entering this segment. + + Returns: + List of BranchVisitor instances for outbound jumps. + """ jumps = self.jumps to_return = [] @@ -126,6 +132,7 @@ def _send_outbound(self, visitor): else: # remove everything before the entrance to the loop loop_context = loop_context[:z] + pass if self.is_multiple_inbound: new_token = visitor.token @@ -158,14 +165,14 @@ def _send_outbound(self, visitor): # noinspection PyTypeChecker @classmethod def build_from_parser(cls, parser: parse.Parser, start_elem: El) -> Segment: - """Build a segment starting at this element + """Build a segment starting at this element. Args: - parser: flow parser instance - start_elem: first element in this segment + parser: Flow parser instance. + start_elem: First element in this segment. Returns: - segment + Segment instance built from the element. """ inbound_map = parser.get_traversable_inbound() label = get_name(start_elem) @@ -228,8 +235,17 @@ def build_from_parser(cls, parser: parse.Parser, start_elem: El) -> Segment: raise InvalidFlowException("Could not crawl flow", flow_path=parser.get_filename()) -def get_jumps_and_terminal(el_name: str, el_tag:str, elem: El) -> tuple[list[Jump], bool]: - """Return list of jumps for this element, is_terminal (bool)""" +def get_jumps_and_terminal(el_name: str, el_tag: str, elem: El) -> tuple[list[Jump], bool]: + """Get list of jumps for an element and determine if it's terminal. + + Args: + el_name: Name of the element. + el_tag: XML tag of the element. + elem: XML element to analyze. + + Returns: + Tuple of (list of Jump objects, is_terminal boolean). + """ jumps = [] conns = get_conn_target_map(elem) @@ -298,7 +314,15 @@ class ControlFlowGraph(JSONSerializable, AbstractControlFlowGraph): segment_map: dict[str, Segment] @classmethod - def from_parser(cls, parser: parse.Parser): + def from_parser(cls, parser: parse.Parser) -> 'ControlFlowGraph': + """Build a ControlFlowGraph from a parser. + + Args: + parser: Flow parser instance. + + Returns: + ControlFlowGraph instance. + """ start_elem = parser.get_start_elem() start_label = get_name(start_elem) visited_labels = [] @@ -344,14 +368,16 @@ def get_crawl_data(cfg: ControlFlowGraph) -> \ tuple[tuple[CrawlStep, ...], tuple[CrawlStep, ...], dict[str, list[CrawlStep]]]: - """Builds crawl schedule + """Build crawl schedule from control flow graph. Args: - cfg: Control Flow Graph + cfg: Control Flow Graph to build schedule from. Returns: - (tuple of crawl steps, tuple of terminal steps, dict of element to list of crawl steps) - + Tuple of: + - Tuple of all crawl steps in execution order. + - Tuple of terminal crawl steps. + - Dictionary mapping element names to lists of crawl steps. """ generator = _crawl_iter(cfg) @@ -396,7 +422,15 @@ def get_crawl_data(cfg: ControlFlowGraph) -> \ return tuple(crawl_steps), tuple(terminal_steps), el_2_cs -def get_visits_statistics(visit_map: dict[str, list[Jump] | None], cfg: ControlFlowGraph): +def get_visits_statistics(visit_map: dict[str, list[Jump] | None], cfg: ControlFlowGraph) -> None: + """Print statistics about CFG traversal visits. + + Checks that every label has been visited and every jump has been traversed. + + Args: + visit_map: Dictionary mapping segment labels to lists of visits. + cfg: Control Flow Graph to analyze. + """ # first check that every label has been visited: missed = [] for label in cfg.segment_map: @@ -440,13 +474,13 @@ def get_visits_statistics(visit_map: dict[str, list[Jump] | None], cfg: ControlF def _get_crawl_visits(cfg: ControlFlowGraph) -> dict[str, list[BranchVisitor]]: - """For testing and analysis. + """Get visit map for testing and analysis. Args: - cfg: control flow graph + cfg: Control flow graph to analyze. Returns: - map from label to BranchVisitor + Dictionary mapping segment labels to lists of BranchVisitor instances. """ # for testing and analysis # initialize visits @@ -460,14 +494,13 @@ def _get_crawl_visits(cfg: ControlFlowGraph) -> dict[str, list[BranchVisitor]]: def _crawl_iter(cfg: ControlFlowGraph) -> Generator[tuple[BranchVisitor, Segment], None, None]: - """crawls CFG + """Crawl a control flow graph. Args: - cfg: control flow graph + cfg: Control flow graph to crawl. Yields: - current Branch visitor (that points to the current segment), - the segment (list of flow elements to process, and outgoing visitors) + Tuples of (BranchVisitor, Segment) as the graph is traversed. """ label = cfg.start_label @@ -530,19 +563,15 @@ def _crawl_iter(cfg: ControlFlowGraph) -> Generator[tuple[BranchVisitor, Segment def _find_segments_with_elem(val: str, segment_map: dict[str, Segment]) -> list[tuple[str, Segment, int]]: - """Find segments that also contain an element. + """Find segments that contain a specific element. Args: - val: string name of element - segment_map: label -> segment + val: String name of element to find. + segment_map: Dictionary mapping labels to Segment instances. Returns: - - * list of segments that have this element along with their label - and the index of the found element in the form - (label, segment, dupe_index) - - * Empty set if no segments found + List of (label, segment, index) tuples where the element appears. + Returns empty list if no segments found. """ if segment_map is None or len(segment_map) == 0: @@ -562,7 +591,18 @@ def _find_segments_with_elem(val: str, segment_map: dict[str, Segment]) -> list[ return to_return class CrawlEncoder(json.JSONEncoder): + """JSON encoder for crawl-related objects.""" + def default(self, obj): + """Encode an object to JSON. + + Args: + obj: Object to encode. + + Returns: + Dictionary representation for JSONSerializable, BranchVisitor, + or CrawlStep objects, otherwise falls back to default JSON encoding. + """ if (isinstance(obj, JSONSerializable) or isinstance(obj, BranchVisitor) or isinstance(obj, CrawlStep)): return obj.to_dict() @@ -571,26 +611,32 @@ def default(self, obj): class Crawler(AbstractCrawler): - """Class representing the crawl of a graph + """Class representing the crawl of a control flow graph. + Manages the crawl schedule, terminal steps, and traversal state + for symbolic execution of a flow. """ def __init__(self, total_steps: int, cfg: ControlFlowGraph, - crawl_schedule: tuple[CrawlStep,...], - terminal_steps: tuple[CrawlStep,...], + crawl_schedule: tuple[CrawlStep, ...], + terminal_steps: tuple[CrawlStep, ...], history_maps: dict[tuple[tuple[str, str], ...], CrawlStep] | None, flow_path: str, el_2_cs: dict[str, list[CrawlStep]] | None = None): - """Constructor + """Initialize Crawler instance. - .. WARNING:: For module use only + .. warning:: + For module use only. Use :meth:`from_parser` instead. Args: - total_steps: how many steps in crawl - cfg: control flow graph - crawl_schedule: tuple of :class:`public.data_obj.CrawlStep` in order of execution - terminal_steps: tuple of :class:`public.data_obj.CrawlStep` - that can end program (note, *not* in any specific order) + total_steps: Total number of steps in crawl. + cfg: Control flow graph. + crawl_schedule: Tuple of :class:`public.data_obj.CrawlStep` in order of execution. + terminal_steps: Tuple of :class:`public.data_obj.CrawlStep` that can end program + (note, *not* in any specific order). + history_maps: Dictionary mapping history tuples to crawl steps, or None. + flow_path: File path of the flow. + el_2_cs: Dictionary mapping element names to crawl steps, or None. """ #: int current step of crawl self.current_step: int = 0 @@ -628,15 +674,14 @@ def __init__(self, total_steps: int, cfg: ControlFlowGraph, self.crawlable_elem_tuples: list[tuple[str, str]] | None = None @classmethod - def from_parser(cls, parser: parse.Parser): - """Builds a crawl schedule (recommended builder) + def from_parser(cls, parser: parse.Parser) -> 'Crawler': + """Build a crawl schedule from a parser (recommended builder). Args: - parser: :obj:`flow_parser.parse.Parser` instance + parser: :obj:`flow_parser.parse.Parser` instance. Returns: - :obj:`Crawler` instance - + :obj:`Crawler` instance with crawl schedule built. """ cfg = ControlFlowGraph.from_parser(parser) crawl_schedule, terminal_steps, el_2_cs = get_crawl_data(cfg) @@ -652,35 +697,54 @@ def from_parser(cls, parser: parse.Parser): el_2_cs=el_2_cs ) - def get_crawl_schedule(self)->tuple[CrawlStep, ...]: + def get_crawl_schedule(self) -> tuple[CrawlStep, ...]: + """Get the crawl schedule. + + Returns: + Tuple of all crawl steps in execution order. + """ return self.crawl_schedule def get_flow_path(self) -> str | None: + """Get the file path of the current flow. + + Returns: + Flow file path, or None if not set. + """ return self.flow_path def get_subflow_parents(self) -> list[tuple[El, str]]: - """READ ONLY + """Get history of subflow parents (read-only). Returns: - history of crawlers encountered during crawl, together with the current step (int) + List of (element, flow_path) tuples representing the history of + crawlers encountered during crawl, together with the current step when they entered a child flow. """ return self.subflow_parents - def get_cfg(self)-> ControlFlowGraph: + def get_cfg(self) -> ControlFlowGraph: + """Get the control flow graph. + + Returns: + ControlFlowGraph instance. + """ return self.cfg - def get_current_step_index(self)->int: - """Retrieve current crawl step (read-only)""" + def get_current_step_index(self) -> int: + """Get current crawl step index (read-only). + + Returns: + Current step index in the crawl schedule. + """ return self.current_step def load_crawl_step(self) -> CrawlStep | None: - """Retrieve the current crawl step and advance counter (irreversible) + """Retrieve the current crawl step and advance counter (irreversible). Returns: - :obj:`public.data_obj.BranchVisitor` and flow element name to process - + :obj:`public.data_obj.CrawlStep` instance, or None if crawl is complete. """ if self.current_step >= self.total_steps: return None @@ -691,17 +755,16 @@ def load_crawl_step(self) -> CrawlStep | None: return to_return - def get_last_ancestor(self, crawl_step) -> CrawlStep | None: - """Get latest ancestor branch that was last visited at crawl_step + def get_last_ancestor(self, crawl_step: CrawlStep) -> CrawlStep | None: + """Get latest ancestor branch that was last visited at crawl_step. - Useful for knowing which influence map to clone + Useful for knowing which influence map to clone. Args: - crawl_step: step whose history is sought + crawl_step: CrawlStep whose history is sought. Returns: - CrawlStep instance or None - + CrawlStep instance representing the last visited ancestor, or None. """ history = crawl_step.visitor.history @@ -721,15 +784,14 @@ def get_last_ancestor(self, crawl_step) -> CrawlStep | None: return res def get_elem_to_crawl_step(self, elem_name: str) -> list[CrawlStep]: - """returns a list of all crawl steps in which this element has been visited - during the crawl of this flow. If not visited, the empty list is returned. + """Get all crawl steps in which this element has been visited. Args: - elem_name (str): element name (use '*' for the start element) + elem_name: Element name (use '*' for the start element). Returns: - list of :obj:`CrawlStep` instances that visit this element - + List of :obj:`CrawlStep` instances that visit this element. + Returns empty list if not visited. """ if self.el_2_cs is None: logger.error(f"requested element to crawlstep but " @@ -739,7 +801,10 @@ def get_elem_to_crawl_step(self, elem_name: str) -> list[CrawlStep]: return dict.get(self.el_2_cs, elem_name, list()) def get_crawlable_elem_tuples(self) -> list[tuple[str, str]] | None: - """Returns all traversable element name, tag tuples that are connected to the start element + """Get all traversable element name, tag tuples connected to the start element. + + Returns: + List of (element_name, element_tag) tuples, or None if none found. """ if self.crawlable_elem_tuples is None: accum = [] @@ -751,15 +816,22 @@ def get_crawlable_elem_tuples(self) -> list[tuple[str, str]] | None: def get_call_chain(self, source_el: El, source_path: str, sink_el: El, source_parser: FlowParser) -> list[tuple[El, str]] | None: - """sink_el must be in the current flow. source_el can be in an ancestor - flow. Only returns paths currently crawled, so this must be called - every time a specific frame is loaded. + """Get the call chain from a source element to a sink element. - Returns: - A list starting with the source and ending with the sink in which the each is an - ancestor caller of the succeeding element. - [(element, element flow path)] + The sink_el must be in the current flow. The source_el can be in an + ancestor flow. Only returns paths currently crawled, so this must be + called every time a specific frame is loaded. + Args: + source_el: Source XML element. + source_path: Flow path of the source element. + sink_el: Sink XML element (must be in the current flow). + source_parser: Parser instance for the source flow path. + + Returns: + List of (element, element flow path) tuples starting with the source + and ending with the sink, where each is an ancestor caller of the + succeeding element. Returns None if no call chain is found. """ source_el_tag = parse_utils.get_tag(source_el) source_el_name = parse_utils.get_name(source_el) @@ -768,7 +840,7 @@ def get_call_chain(self, source_el: El, source_path: str, if source_el_tag in parse_utils.START_ELEMS: local_source_influenced = [x[0] for x in self.get_crawlable_elem_tuples()] else: - local_source_influenced = source_parser.get_traversable_descendents_of_elem(source_el_name) + local_source_influenced = source_parser.get_traversable_descendants_of_elem(source_el_name) if not local_source_influenced: return None @@ -803,15 +875,11 @@ def get_call_chain(self, source_el: El, source_path: str, def dump_cfg(cfg: ControlFlowGraph, fp: SupportsWrite[str]) -> None: - """Writes to file pointer + """Serialize control flow graph to JSON and write to file pointer. Args: - cfg (ControlFlowGraph): graph to serialize (JSON) - fp (TextIO): file pointer: - - Returns: - None - + cfg: ControlFlowGraph to serialize. + fp: File pointer to write JSON to. """ json.dump(cfg, indent=4, fp=fp, cls=CrawlEncoder) @@ -863,17 +931,18 @@ def validate_cfg(cfg: ControlFlowGraph, def _get_connector_map(elem: El, parser: Parser) -> dict[El, tuple[str, ConnType, bool]]: - """ + """Get connector map with validation. + Wrapper for getting connectors that handles start elements and missing - connector targets, which requires a parser. - + connector targets, which requires a parser. + Args: - elem: element to search for connectors - parser: parser containing global file data + elem: Element to search for connectors. + parser: Parser containing global file data. Returns: - connector map (connector elem: name of target, type of connector, is_optional) - + Dictionary mapping connector elements to (target_name, connector_type, is_optional). + Only includes connectors whose targets exist in the parser. """ raw = get_conn_target_map(elem) @@ -881,19 +950,44 @@ def _get_connector_map(elem: El, return {x: v for x, v in raw.items() if v[0] in parser.all_names} def tuple_trace(x: tuple[tuple[str, str], ...]) -> frozenset[tuple[str, str]]: - return frozenset([t for t in x]) + """Convert a tuple of tuples to a frozenset. + Args: + x: Tuple of (str, str) tuples. -def _right_find(my_iter: tuple[str, ConnType], val_to_find) -> int: + Returns: + Frozenset of the tuples. """ - returns -1 if val_to_find is not in the second value of my_iter + return frozenset([t for t in x]) + + +def _right_find(my_iter: tuple[tuple[str, ConnType], ...], val_to_find: ConnType) -> int: + """Find the rightmost occurrence of a ConnType value in a tuple. + + Args: + my_iter: Tuple of (element_name, ConnType) tuples. + val_to_find: ConnType value to find. + + Returns: + Index of rightmost occurrence (from right), or -1 if not found. """ iter_len = len(my_iter) if iter_len == 0: return -1 else: + best = None + el_name = None for index, x in enumerate(reversed(my_iter)): - if x[1] == val_to_find: - return iter_len - index - return -1 + if el_name is None: + if x[1] == val_to_find: + el_name = x[0] # name of loop_context to pop + best = index + elif x == (el_name, val_to_find): + best = index + + if best is not None: + return (iter_len - 1) - best + else: + return -1 + diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/db_storage.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/db_storage.py new file mode 100644 index 00000000..e3044f7e --- /dev/null +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/db_storage.py @@ -0,0 +1,942 @@ +"""SQLite database storage for QueryResult objects. + +This module provides functionality to create and manage an SQLite database +for storing QueryResult objects from flow scanner analysis. +""" + +from __future__ import annotations + +import json +import logging +import os +import sqlite3 +import subprocess +from pathlib import Path +from typing import TYPE_CHECKING + +# Import for runtime use (not just type checking) +from public.data_obj import InfluenceStatement, InfluenceStatementEncoder + +if TYPE_CHECKING: + from public.data_obj import QueryResult, InfluencePath + from public.enums import FlowType + +#: module logger +logger = logging.getLogger(__name__) + + +class QueryResultEncoder(json.JSONEncoder): + """Custom JSON encoder for QueryResult and related objects.""" + + def default(self, obj): + """Encode objects to JSON-serializable format.""" + # Handle InfluenceStatement + if isinstance(obj, InfluenceStatement): + raw_dict = obj.to_dict() + # For public display, we replace flow_path with source_path + cleaned_dict = {s: raw_dict[s] for s in raw_dict.keys() if ( + s != 'flow_path' and s != 'source_path')} + cleaned_dict['flow_path'] = raw_dict['source_path'] + return cleaned_dict + + # Handle InfluencePath - convert to dict representation + # Check for InfluencePath by looking for characteristic attributes + if hasattr(obj, 'history') and hasattr(obj, 'influenced_name') and hasattr(obj, 'influencer_name'): + # This is an InfluencePath + return { + 'history': [self.default(stmt) for stmt in obj.history] if obj.history else [], + 'influenced_name': obj.influenced_name, + 'influenced_property': obj.influenced_property, + 'influencer_name': obj.influencer_name, + 'influencer_property': obj.influencer_property, + 'influenced_filepath': obj.influenced_filepath, + 'influencer_filepath': obj.influencer_filepath, + 'influenced_type_info': self.default(obj.influenced_type_info) if obj.influenced_type_info else None + } + + # Handle FlowType enum (and other enums) + if hasattr(obj, 'name') and hasattr(obj, 'value'): + # This is an Enum + return obj.name + + # Handle frozenset + if isinstance(obj, frozenset): + return list(obj) + + # Handle VariableType if present + if hasattr(obj, 'tag') and hasattr(obj, 'datatype'): + # This is a VariableType + return {s: getattr(obj, s) for s in obj.__slots__ if hasattr(obj, s)} + + return json.JSONEncoder.default(self, obj) + + +def create_database(db_path: str | Path) -> sqlite3.Connection: + """Create a new SQLite database with QueryResult schema. + + Args: + db_path: Path to the SQLite database file. If the file exists, + it will be opened. If it doesn't exist, it will be created + with the appropriate schema. + + Returns: + sqlite3.Connection: Connection to the database + + Raises: + sqlite3.Error: If database creation fails + """ + db_path = Path(db_path) + + # Ensure parent directory exists + db_path.parent.mkdir(parents=True, exist_ok=True) + + # Connect to database (creates file if it doesn't exist) + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + + # Create the runs table if it doesn't exist + conn.execute(""" + CREATE TABLE IF NOT EXISTS runs ( + run_id INTEGER PRIMARY KEY AUTOINCREMENT, + description TEXT, + git_hash TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + + # Create index on git_hash for filtering by code version + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_git_hash ON runs(git_hash) + """) + + # Create index on created_at for chronological queries + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_runs_created_at ON runs(created_at) + """) + + # Create the query_results table if it doesn't exist + # Note: run_id is INTEGER to match runs.run_id (INTEGER PRIMARY KEY) + # Foreign key constraint ensures referential integrity + conn.execute(""" + CREATE TABLE IF NOT EXISTS query_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id INTEGER NOT NULL, + query_id TEXT NOT NULL, + flow_type TEXT NOT NULL, + influence_statement TEXT, + paths TEXT, + elem_code TEXT, + elem_line_no INTEGER, + elem_name TEXT, + field TEXT, + filename TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (run_id) REFERENCES runs(run_id) ON DELETE CASCADE + ) + """) + + # Create index on run_id for faster lookups and comparisons between runs + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_run_id ON query_results(run_id) + """) + + # Create index on query_id for faster lookups + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_query_id ON query_results(query_id) + """) + + # Create composite index on run_id and query_id for efficient run comparisons + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_run_query ON query_results(run_id, query_id) + """) + + # Create index on flow_type for filtering + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_flow_type ON query_results(flow_type) + """) + + # Create index on filename for filtering by file + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_filename ON query_results(filename) + """) + + conn.commit() + logger.info(f"Database initialized at {db_path}") + + return conn + + +def create_run(conn: sqlite3.Connection, description: str | None = None) -> int: + """Create a new run record in the database. + + This function creates a new run entry that can be used to group query results + together for snapshot testing and comparison. The function automatically detects + the current git hash of the codebase. + + Args: + conn: SQLite database connection + description: Optional description of the run (e.g., "Nightly scan", "Pre-commit check") + + Returns: + int: The run_id (integer primary key) of the newly created run + + Raises: + sqlite3.Error: If insertion fails + """ + # Automatically detect git hash + # Security: All command arguments are hardcoded, not user-controlled + git_hash = None + try: + # Find a safe working directory - use the directory containing this module + # This ensures we're running git from a known location, not an untrusted cwd + module_dir = Path(__file__).parent.resolve() + + # Try to find git repo root by walking up from module directory + # This is safer than using current working directory which could be untrusted + git_repo_root = None + current_path = module_dir + max_levels = 10 # Limit search depth to prevent infinite loops + for _ in range(max_levels): + if (current_path / '.git').exists() or (current_path / '.git').is_dir(): + git_repo_root = current_path + break + if current_path.parent == current_path: # Reached filesystem root + break + current_path = current_path.parent + + # If no git repo found, use module directory as fallback + # Git will fail gracefully if not in a repo + cwd = git_repo_root if git_repo_root else module_dir + + # Security: Use minimal environment to prevent environment variable attacks + # Only include PATH and essential variables, exclude GIT_* variables that could be malicious + safe_env = { + 'PATH': os.environ.get('PATH', ''), + 'HOME': os.environ.get('HOME', ''), + 'USER': os.environ.get('USER', ''), + } + + # Security: All command arguments are hardcoded literals, not user-controlled + # 'git', 'rev-parse', 'HEAD' are all hardcoded strings + result = subprocess.run( + ['git', 'rev-parse', 'HEAD'], # Hardcoded command - no user input + cwd=str(cwd), # Use safe working directory + env=safe_env, # Use minimal, safe environment + capture_output=True, + text=True, + check=True, + timeout=5 # Prevent hanging + ) + + git_hash = result.stdout.strip() + # Validate output - should be a 40-character hex string (SHA-1) or longer (SHA-256) + if not git_hash or len(git_hash) < 7: # Minimum valid short hash is 7 chars + logger.warning("Git command returned invalid hash, setting to None") + git_hash = None + # Additional validation: check it looks like a git hash (hex characters) + elif not all(c in '0123456789abcdefABCDEF' for c in git_hash): + logger.warning(f"Git hash contains invalid characters: {git_hash}, setting to None") + git_hash = None + except subprocess.TimeoutExpired: + logger.warning("Git command timed out while getting hash, setting to None") + git_hash = None + except subprocess.CalledProcessError as e: + logger.warning(f"Error getting git hash (exit code {e.returncode}): {e.stderr.strip()}, setting to None") + git_hash = None + except FileNotFoundError: + logger.warning("Git command not found, setting git_hash to None") + git_hash = None + except Exception as e: + logger.warning(f"Unexpected error getting git hash: {e}, setting to None") + git_hash = None + + cursor = conn.execute(""" + INSERT INTO runs (description, git_hash) + VALUES (?, ?) + """, (description, git_hash)) + + conn.commit() + run_id = cursor.lastrowid + logger.info(f"Created run with run_id={run_id}, description={description}, git_hash={git_hash}") + return run_id + + +def get_runs(conn: sqlite3.Connection) -> list[dict]: + """Retrieve all runs from the database, sorted by creation time. + + Returns all runs sorted by timestamp in descending order (most recent first). + Each run is returned as a dictionary with keys: run_id, description, git_hash, created_at. + + Args: + conn: SQLite database connection + + Returns: + list[dict]: List of run dictionaries, sorted by created_at descending. + Each dict contains: run_id (int), description (str | None), + git_hash (str | None), created_at (str). + Returns empty list if no runs exist. + + Raises: + sqlite3.Error: If query fails + """ + cursor = conn.execute(""" + SELECT run_id, description, git_hash, created_at + FROM runs + ORDER BY created_at DESC + """) + + rows = cursor.fetchall() + + if not rows: + return [] + + # Convert rows to list of dictionaries + runs = [] + for row in rows: + runs.append({ + 'run_id': int(row['run_id']), # Keep as integer to match database type + 'description': row['description'], + 'git_hash': row['git_hash'], + 'created_at': row['created_at'] + }) + + return runs + + +def drop_run(conn: sqlite3.Connection, run_id: int) -> tuple[int, bool]: + """Delete a run and all its associated query results. + + This function performs a cascading delete: first deletes all query results + associated with the run_id, then deletes the run itself. The operation is + atomic - if either deletion fails, the transaction is rolled back. + + Note: Due to FOREIGN KEY CASCADE, query results will be automatically deleted + when the run is deleted, but we delete them explicitly first for clarity. + + Args: + conn: SQLite database connection + run_id: The run_id (integer) of the run to delete + + Returns: + tuple[int, bool]: A tuple containing: + - int: Number of query results deleted + - bool: True if the run was successfully deleted, False otherwise + + Raises: + sqlite3.Error: If deletion fails + ValueError: If run_id is None or invalid + """ + if run_id is None: + raise ValueError("run_id cannot be None") + + try: + # Start a transaction for atomicity + # First, delete all query results associated with this run + cursor = conn.execute(""" + DELETE FROM query_results + WHERE run_id = ? + """, (run_id,)) + + num_results_deleted = cursor.rowcount + + # Then, delete the run itself + cursor = conn.execute(""" + DELETE FROM runs + WHERE run_id = ? + """, (run_id,)) + + run_deleted = cursor.rowcount > 0 + + # Commit the transaction + conn.commit() + + if run_deleted: + logger.info(f"Deleted run {run_id} and {num_results_deleted} associated query results") + else: + logger.warning(f"Run {run_id} not found, but deleted {num_results_deleted} query results (if any)") + + return num_results_deleted, run_deleted + + except sqlite3.Error as e: + # Rollback on error + conn.rollback() + logger.error(f"Error deleting run {run_id}: {e}") + raise + + +def _query_result_to_comparable_dict(row: sqlite3.Row) -> dict: + """Convert a database row to a comparable dictionary for set operations. + + This function deserializes JSON fields and normalizes the data structure + so that QueryResults can be compared as sets, with special handling for + paths (comparing as sets, handling None values). + + Args: + row: SQLite Row object from query_results table + + Returns: + dict: Normalized dictionary suitable for comparison + """ + # Deserialize JSON fields + influence_statement = None + if row['influence_statement']: + try: + influence_statement = json.loads(row['influence_statement']) + except (json.JSONDecodeError, TypeError): + influence_statement = None + + paths = None + if row['paths']: + try: + paths_data = json.loads(row['paths']) + # paths_data is a list from JSON, convert to frozenset for comparison + # We'll compare as a set of serialized paths since we can't easily + # reconstruct InfluencePath objects from JSON + if paths_data: + # Sort and convert to tuple for hashability in set comparison + paths = tuple(sorted(json.dumps(p, sort_keys=True) for p in paths_data)) + else: + paths = None + except (json.JSONDecodeError, TypeError): + paths = None + + # Create a comparable dict with all fields + # Use tuple for paths to make it hashable for set operations + return { + 'query_id': row['query_id'], + 'flow_type': row['flow_type'], + 'influence_statement': json.dumps(influence_statement, sort_keys=True) if influence_statement else None, + 'paths': paths, # Already a tuple (hashable) or None + 'elem_code': row['elem_code'], + 'elem_line_no': row['elem_line_no'], + 'elem_name': row['elem_name'], + 'field': row['field'], + 'filename': row['filename'] + } + + +def _query_results_equal(result1: dict, result2: dict) -> bool: + """Compare two query result dictionaries for equality. + + Two QueryResults are considered equal if all their fields match, with + special handling for paths which are compared as sets (ignoring order). + + Args: + result1: First query result dictionary + result2: Second query result dictionary + + Returns: + bool: True if the results are equal, False otherwise + """ + # Compare all fields except paths first + fields_to_compare = ['query_id', 'flow_type', 'influence_statement', + 'elem_code', 'elem_line_no', 'elem_name', 'field', 'filename'] + + for field in fields_to_compare: + if result1.get(field) != result2.get(field): + return False + + # Special handling for paths: compare as sets (both can be None) + paths1 = result1.get('paths') + paths2 = result2.get('paths') + + # Both None -> equal + if paths1 is None and paths2 is None: + return True + + # One None, one not None -> not equal + if paths1 is None or paths2 is None: + return False + + # Both are tuples (from _query_result_to_comparable_dict), compare as sets + return set(paths1) == set(paths2) + + +def compare_runs(conn: sqlite3.Connection, run_id1: int, run_id2: int) -> tuple[bool, dict]: + """Compare two runs to determine if they have identical query results. + + This is the core of snapshot testing functionality. Two runs are considered + equal if they contain the same set of QueryResults (ignoring order), where + individual QueryResults are compared with special handling for paths (compared + as sets, handling None values). + + Args: + conn: SQLite database connection + run_id1: First run ID to compare + run_id2: Second run ID to compare + + Returns: + tuple[bool, dict]: A tuple containing: + - bool: True if runs are identical, False otherwise + - dict: Detailed comparison results with keys: + - 'equal': bool (same as return value) + - 'run1_count': int (number of results in run1) + - 'run2_count': int (number of results in run2) + - 'only_in_run1': list[dict] (results only in run1) + - 'only_in_run2': list[dict] (results only in run2) + - 'different': list[tuple[dict, dict]] (results with same query_id but different content) + + Raises: + sqlite3.Error: If query fails + ValueError: If run_id is None or invalid + """ + if run_id1 is None or run_id2 is None: + raise ValueError("run_id cannot be None") + + # Fetch all query results for both runs + cursor1 = conn.execute(""" + SELECT query_id, flow_type, influence_statement, paths, + elem_code, elem_line_no, elem_name, field, filename + FROM query_results + WHERE run_id = ? + ORDER BY id + """, (run_id1,)) + + cursor2 = conn.execute(""" + SELECT query_id, flow_type, influence_statement, paths, + elem_code, elem_line_no, elem_name, field, filename + FROM query_results + WHERE run_id = ? + ORDER BY id + """, (run_id2,)) + + rows1 = cursor1.fetchall() + rows2 = cursor2.fetchall() + + # Convert to comparable dictionaries + results1 = [_query_result_to_comparable_dict(row) for row in rows1] + results2 = [_query_result_to_comparable_dict(row) for row in rows2] + + # Create sets for comparison (need to use tuples since dicts aren't hashable) + # We'll use a tuple of all field values as the key + def make_hashable(result: dict) -> tuple: + """Convert a result dict to a hashable tuple for set operations.""" + return ( + result['query_id'], + result['flow_type'], + result['influence_statement'], + result['paths'], # Already a tuple or None + result['elem_code'], + result['elem_line_no'], + result['elem_name'], + result['field'], + result['filename'] + ) + + set1 = {make_hashable(r) for r in results1} + set2 = {make_hashable(r) for r in results2} + + # Find differences + only_in_run1 = [r for r in results1 if make_hashable(r) not in set2] + only_in_run2 = [r for r in results2 if make_hashable(r) not in set1] + + # Find results with same query_id but different content + # Group by query_id to find potential matches + results1_by_query = {} + results2_by_query = {} + + for r in results1: + qid = r['query_id'] + if qid not in results1_by_query: + results1_by_query[qid] = [] + results1_by_query[qid].append(r) + + for r in results2: + qid = r['query_id'] + if qid not in results2_by_query: + results2_by_query[qid] = [] + results2_by_query[qid].append(r) + + different = [] + all_query_ids = set(results1_by_query.keys()) | set(results2_by_query.keys()) + + for qid in all_query_ids: + r1_list = results1_by_query.get(qid, []) + r2_list = results2_by_query.get(qid, []) + + # Check if there are any pairs that don't match + for r1 in r1_list: + found_match = False + for r2 in r2_list: + if _query_results_equal(r1, r2): + found_match = True + break + if not found_match and r1 not in only_in_run1: + # This result exists in both runs but is different + # Find a corresponding r2 that also doesn't match + for r2 in r2_list: + if not _query_results_equal(r1, r2) and r2 not in only_in_run2: + different.append((r1, r2)) + break + + # Runs are equal if sets are identical + are_equal = (set1 == set2) and len(only_in_run1) == 0 and len(only_in_run2) == 0 and len(different) == 0 + + return are_equal, { + 'equal': are_equal, + 'run1_count': len(results1), + 'run2_count': len(results2), + 'only_in_run1': only_in_run1, + 'only_in_run2': only_in_run2, + 'different': different + } + + +def insert_query_result(conn: sqlite3.Connection, query_result: 'QueryResult', run_id: int) -> int: + """Insert a QueryResult object into the database. + + Args: + conn: SQLite database connection + query_result: QueryResult object to insert + run_id: Integer identifier for this scan run (must exist in runs table) + + Returns: + int: The count of inserted records (always 1 for this function) + + Raises: + sqlite3.Error: If insertion fails (including foreign key constraint violations) + ValueError: If run_id is None or invalid + """ + if run_id is None: + raise ValueError("run_id cannot be None") + + # Ensure run_id is an integer + try: + run_id = int(run_id) + except (ValueError, TypeError): + raise ValueError(f"run_id must be an integer, got: {type(run_id).__name__}") + + # Serialize influence_statement to JSON + influence_statement_json = None + if query_result.influence_statement is not None: + influence_statement_json = json.dumps( + query_result.influence_statement, + cls=QueryResultEncoder + ) + + # Serialize paths to JSON + paths_json = None + if query_result.paths is not None: + paths_json = json.dumps( + list(query_result.paths), + cls=QueryResultEncoder + ) + + # Get flow_type as string (enum name) + flow_type_str = query_result.flow_type.name if hasattr(query_result.flow_type, 'name') else str(query_result.flow_type) + + cursor = conn.execute(""" + INSERT INTO query_results ( + run_id, + query_id, + flow_type, + influence_statement, + paths, + elem_code, + elem_line_no, + elem_name, + field, + filename + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + run_id, + query_result.query_id, + flow_type_str, + influence_statement_json, + paths_json, + query_result.elem_code, + query_result.elem_line_no, + query_result.elem_name, + query_result.field, + query_result.filename + )) + + conn.commit() + return 1 + + +def insert_query_results(conn: sqlite3.Connection, query_results: list['QueryResult'], run_id: int) -> list[int]: + """Insert multiple QueryResult objects into the database using batch insert. + + This function performs a single database operation to insert all query results, + which is much more efficient than inserting them one at a time. All results + will be associated with the same run_id for snapshot testing. + + Args: + conn: SQLite database connection + query_results: List of QueryResult objects to insert + run_id: Integer identifier for this scan run (must exist in runs table) + + Returns: + list[int]: List of row IDs of the inserted records + + Raises: + sqlite3.Error: If insertion fails (including foreign key constraint violations) + ValueError: If run_id is None or invalid + """ + if not query_results: + return [] + + if run_id is None: + raise ValueError("run_id cannot be None") + + # Ensure run_id is an integer + try: + run_id = int(run_id) + except (ValueError, TypeError): + raise ValueError(f"run_id must be an integer, got: {type(run_id).__name__}") + + # Prepare all data for batch insert + rows_data = [] + + for query_result in query_results: + # Serialize influence_statement to JSON + influence_statement_json = None + if query_result.influence_statement is not None: + influence_statement_json = json.dumps( + query_result.influence_statement, + cls=QueryResultEncoder + ) + + # Serialize paths to JSON + paths_json = None + if query_result.paths is not None: + paths_json = json.dumps( + list(query_result.paths), + cls=QueryResultEncoder + ) + + # Get flow_type as string (enum name) + flow_type_str = query_result.flow_type.name if hasattr(query_result.flow_type, 'name') else str(query_result.flow_type) + + rows_data.append(( + run_id, + query_result.query_id, + flow_type_str, + influence_statement_json, + paths_json, + query_result.elem_code, + query_result.elem_line_no, + query_result.elem_name, + query_result.field, + query_result.filename + )) + + # Perform batch insert + cursor = conn.executemany(""" + INSERT INTO query_results ( + run_id, + query_id, + flow_type, + influence_statement, + paths, + elem_code, + elem_line_no, + elem_name, + field, + filename + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, rows_data) + + conn.commit() + + # Get the row IDs of inserted records + # Note: executemany() doesn't set lastrowid reliably, so we query for the IDs + # by using the fact that they were just inserted and are sequential + num_rows = len(query_results) + if num_rows == 0: + return [] + + # Query for the last num_rows rows inserted (they should be the ones we just inserted) + cursor = conn.execute(""" + SELECT id FROM query_results + WHERE run_id = ? + ORDER BY id DESC + LIMIT ? + """, (run_id, num_rows)) + + row_ids = [row[0] for row in cursor.fetchall()] + # Reverse to get them in insertion order + row_ids.reverse() + + return row_ids + + +def get_results_json(conn: sqlite3.Connection, run_id: int = 0) -> str: + """Retrieve all query results for a run and generate a JSON string. + + This function retrieves all QueryResult objects from the database for the + given run_id and formats them as a JSON string matching the format produced + by flow_result.dump_json(). The format includes a results dictionary organized + by query_id, with each entry containing query result information. + + Args: + conn: SQLite database connection + run_id: Integer identifier for the run (default: 0, which retrieves all runs) + + Returns: + str: JSON string containing the results in the same format as flow_result.dump_json() + + Raises: + sqlite3.Error: If query fails + ValueError: If run_id is None or invalid + """ + if run_id is None: + raise ValueError("run_id cannot be None") + + # Ensure run_id is an integer + try: + run_id = int(run_id) + except (ValueError, TypeError): + raise ValueError(f"run_id must be an integer, got: {type(run_id).__name__}") + + # Fetch all query results + if run_id == 0: + # Retrieve all results from all runs + cursor = conn.execute(""" + SELECT query_id, flow_type, influence_statement, paths, + elem_code, elem_line_no, elem_name, field, filename + FROM query_results + ORDER BY id + """) + else: + cursor = conn.execute(""" + SELECT query_id, flow_type, influence_statement, paths, + elem_code, elem_line_no, elem_name, field, filename + FROM query_results + WHERE run_id = ? + ORDER BY id + """, (run_id,)) + + rows = cursor.fetchall() + + if not rows: + # Return empty results structure matching the format + job_result = { + "preset": None, + "help_url": None, + "result_id": None, + "service_version": None, + "flow_scanner_version": None, + "report_label": None, + "email": None, + "scan_start": None, + "scan_end": None, + "results": {} + } + return json.dumps(job_result, indent=4, cls=InfluenceStatementEncoder) + + # Build results_dict similar to gen_result_dict() + results_dict = {} + counter = 0 + + for row in rows: + query_id = row['query_id'] + flow_type = row['flow_type'] + + # Deserialize influence_statement + influence_statement = None + if row['influence_statement']: + try: + influence_statement = json.loads(row['influence_statement']) + except (json.JSONDecodeError, TypeError): + influence_statement = None + + # Deserialize paths + paths = None + if row['paths']: + try: + paths = json.loads(row['paths']) + except (json.JSONDecodeError, TypeError): + paths = None + + # Initialize query_id entry if needed + if query_id not in results_dict: + results_dict[query_id] = [] + + # Determine source code, line, filename, elem_name from influence_statement or row + if influence_statement is not None: + src_code = influence_statement.get('source_text', row['elem_code']) + src_line = influence_statement.get('line_no', row['elem_line_no']) + file_name = influence_statement.get('flow_path', row['filename']) + elem_name = influence_statement.get('element_name', row['elem_name']) + field_end = influence_statement.get('influenced_var', row['field'] or row['elem_name']) + else: + src_code = row['elem_code'] + src_line = row['elem_line_no'] + file_name = row['filename'] + elem_name = row['elem_name'] + field_end = row['field'] or row['elem_name'] + + # Build base result entry + to_append = { + "query_id": query_id, + "query_name": query_id, # Use query_id as name since we don't have preset + "severity": "", # Empty since we don't have preset + "description": "", # Empty since we don't have preset + "counter": counter, + "elem_name": elem_name, + "field": field_end, + "elem_code": src_code, + "elem_line_no": src_line, + "filename": file_name, + "flow_type": flow_type + } + + # Build flow field (tuple of InfluenceStatement dicts) + if paths is None or len(paths) == 0: + if influence_statement is None: + # Lexical query only - no flow + to_append["flow"] = None + results_dict[query_id].append(to_append) + counter += 1 + continue + else: + # Single statement, no paths + statements = [(influence_statement,)] + else: + # Build statements from paths + statements = [] + for path_dict in paths: + # Get history from path (list of InfluenceStatement dicts) + history = path_dict.get('history', []) + # Filter out "[builtin]" statements + pruned_history = [stmt for stmt in history if stmt.get('source_text') != "[builtin]"] + + if influence_statement is not None: + # Check if last history item matches influence_statement + if pruned_history and pruned_history[-1] == influence_statement: + statements.append(tuple(pruned_history)) + elif influence_statement.get('source_text') != "[builtin]": + statements.append(tuple(pruned_history) + (influence_statement,)) + else: + statements.append(tuple(pruned_history)) + else: + statements.append(tuple(pruned_history)) + + # Create entries for each statement tuple + for path_tuple in statements: + new_path = to_append.copy() + new_path["flow"] = path_tuple + new_path["counter"] = counter + results_dict[query_id].append(new_path) + + counter += 1 + + # Build job_result structure matching _make_job_result() + job_result = { + "preset": None, + "help_url": None, + "result_id": None, + "service_version": None, + "flow_scanner_version": None, + "report_label": None, + "email": None, + "scan_start": None, + "scan_end": None, + "results": results_dict + } + + # Serialize to JSON using InfluenceStatementEncoder to match dump_json() format + return json.dumps(job_result, indent=4, cls=InfluenceStatementEncoder) + diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py index 0dcba66b..39a6616d 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py @@ -1,9 +1,9 @@ -"""Performs symbolic execution and vulnerability analysis - - @author: rsussland@salesforce.com +"""Performs symbolic execution and vulnerability analysis. The executor handles the element crawl and dispatches -query handlers and dataflow updates as appropriate +query handlers and dataflow updates as appropriate. + +@author: rsussland@salesforce.com """ from __future__ import annotations @@ -68,20 +68,17 @@ class Stack(object): """The Stack handles subflow invocation. When we pass to a subflow a new frame is pushed on the stack and when we - return it is popped.""" + return it is popped. + """ def __init__(self, root_flow_path: str, resolver: Resolver, query_manager: QueryManager): - """Constructor (can be used) + """Initialize a Stack instance. Args: - root_flow_path: current filename of flow being processed - resolver: map[flow_name] -> flow_path of all files in scope - query_manager: invokes queries and stores results - - Results: - result instance object - + root_flow_path: Current filename of flow being processed. + resolver: Map from flow_name to flow_path of all files in scope. + query_manager: Invokes queries and stores results. """ #: tracks list of frames that need to be processed *after* current frame @@ -110,11 +107,10 @@ def __init__(self, root_flow_path: str, resolver: Resolver, self.query_manager: QueryManager = query_manager def pop(self) -> Frame | None: - """Get next frame from stack + """Get next frame from stack. Returns: - frame or None if empty - + Frame or None if stack is empty. """ if len(self.__frame_stack) > 0: return self.__frame_stack.pop(0) @@ -122,24 +118,21 @@ def pop(self) -> Frame | None: return None def push(self, frame: Frame): - """Pushes a frame onto stack, call when invoking a subflow + """Push a frame onto stack. - Args: - frame: frame to push to the top of the stack - - Returns: - None + Call this when invoking a subflow. + Args: + frame: Frame to push to the top of the stack. """ self.__frame_stack.insert(0, frame) return def run(self) -> QueryManager: - """Main entry point for symbolic execution of an initialized stack + """Main entry point for symbolic execution of an initialized stack. Returns: - Query Manager object - + QueryManager object after execution completes. """ while True: next_frame = self.current_frame.execute() @@ -191,14 +184,13 @@ def run(self) -> QueryManager: self.current_frame = next_frame def is_circular_reference(self, next_frame: Frame) -> bool: - """Checks if the next frame is in the previously - processed frames. + """Check if the next frame is in the previously processed frames. Args: - next_frame: next frame to process + next_frame: Next frame to process. Returns: - True if the next frame is in the old frames + True if the next frame is in the old frames (circular reference detected). """ if next_frame is None: return False @@ -227,16 +219,15 @@ def add_inputs_to_call_cache(cache: dict[str, list[list[flow_vec_g]]], sub_path: str, val: flow_vec_g, ) -> dict[str, list[list[flow_vec_g]]]: - """Store input values to subflow in cache + """Store input values to subflow in cache. Args: - cache: cached return values (subflow name -> flow_vec_g) - sub_path: path of subflow - val: input values to store + cache: Cached return values (subflow name -> flow_vec_g). + sub_path: Path of subflow. + val: Input values to store. Returns: - updated cache - + Updated cache dictionary. """ if cache is None: cache = {sub_path: [[val]]} @@ -255,17 +246,16 @@ def add_outputs_to_call_cache(cache: dict[str, list[list[flow_vec_g]]], inputs: flow_vec_g, added: flow_vec_g, flow_path: str) -> dict[str,list[list[flow_vec_g]]]: - """Store return values of subflow in call cache + """Store return values of subflow in call cache. Args: - cache: cached subflow inputs and outputs - inputs: inputs whose outputs are being added to cache - added: vars to flow vectors to add to cache - flow_path: filename of flow + cache: Cached subflow inputs and outputs. + inputs: Inputs whose outputs are being added to cache. + added: Variables to flow vectors to add to cache. + flow_path: Filename of flow. Returns: - the updated cache - + The updated cache dictionary. """ # outputs should only be added after inputs assert cache is not None and flow_path in cache @@ -278,18 +268,17 @@ def call_carnac(input_cache: dict[str, list[list[flow_vec_g]]] | None, vector_map: flow_vec_g, subflow_path: str, outputs: flow_vec_g = None) -> flow_vec_g | None: - """Predicts what the subflow will return + """Predict what the subflow will return based on cached inputs/outputs. Args: - input_cache: cache of previous flow inputs - subflow_path ->[[input1, output1], [input2, output2], ] - vector_map: subflow inputs being called now - subflow_path: filepath of subflow to be called - outputs: outputs to add + input_cache: Cache of previous flow inputs/outputs. + Format: subflow_path -> [[input1, output1], [input2, output2], ...] + vector_map: Subflow inputs being called now. + subflow_path: Filepath of subflow to be called. + outputs: Outputs to add to cache (optional). Returns: - output vector map (return values from subflow) - + Output vector map (return values from subflow) if found in cache, None otherwise. """ if input_cache is None or subflow_path not in input_cache: # Carnac not ready as cache is not populated yet @@ -319,7 +308,7 @@ class Frame(object): """Frame is responsible for managing program analysis within a single flow. Execution happens along each branch, which is assigned a branch state - and branching xml element (e.g. Loop element or Decision element) + and branching XML element (e.g. Loop element or Decision element) Branch management is via maintaining a worklist consisting of branches that need to be processed, with each branch a tuple (State, branch elem) @@ -350,14 +339,14 @@ def __init__(self, current_flow_path: str | None = None, resolver: Resolver = No #: placeholder for fast-forward scans (not currently used) self.resolved_subflows: dict[Any, Any] = {} - #: path of flow we are working on, needed when labelling inputs/outputs + #: path of flow we are working on, needed when labeling inputs/outputs self.flow_path: str = current_flow_path #: name of flow we are working on. Needed for loading subflows self.flow_name: str | None = None - #: XML parser instance - self.parser: parse.Parser | None = None + #: XML parser instance (the or is to quiet typing complaints) + self.parser: parse.Parser | parse.FlowParser | None = None #: supplies next element and branch-state to process self.crawler: Crawler | None = None @@ -409,19 +398,22 @@ def build(cls, current_flow_path: str | None = None, resolved_subflows: dict[Any, Any] = None, parent_subflow: El = None, query_manager: QueryManager = None) -> Frame: - """Call this whenever program analysis starts or a subflow is reached + """Build a new Frame instance. + + Call this whenever program analysis starts or a subflow is reached. Args: - current_flow_path: current path of flow - resolver: Resolves subflows to be scanned - resolved_subflows: subflows that have been already processed - parent_subflow: current subflow element that spawned this - frame - query_manager: manages query instances + current_flow_path: Current path of flow. + resolver: Resolves subflows to be scanned. + resolved_subflows: Subflows that have been already processed. + parent_subflow: Current subflow element that spawned this frame. + query_manager: Manages query instances. Returns: - new Frame + New Frame instance. + Raises: + ValueError: If current_flow_path is None. """ if current_flow_path is None: @@ -453,20 +445,18 @@ def build(cls, current_flow_path: str | None = None, return frame def update_parent_frame(self, parent_frame: Frame, output_vector_map) -> None: - """Updates the provided parent frame with the return values of the current frame. + """Update the provided parent frame with the return values of the current frame. - * Query Manager updated to have new parser + * Query Manager updated to have new parser. * New Influence Paths that flow into the output variables of the subflow are pushed into the parent. Args: - output_vector_map: map from tuples to output vectors of the child subflow - parent_frame: frame which spawned the current frame via a - subflow - - Returns: - None + output_vector_map: Map from tuples to output vectors of the child subflow. + parent_frame: Frame which spawned the current frame via a subflow. + Raises: + RuntimeError: If parent_frame or self.parent_subflow is None. """ if parent_frame is None or self.parent_subflow is None: raise RuntimeError("Attempted to update a null parent frame") @@ -514,14 +504,13 @@ def update_parent_frame(self, parent_frame: Frame, output_vector_map) -> None: """ def get_consolidated_output_vars(self) -> dict[tuple[str, str], flows.FlowVector]: - """get all output variable vectors from all terminal BranchStates. + """Get all output variable vectors from all terminal BranchStates. Call this method after flow processing has completed for a subflow in order to return all possible output variables to the parent. Returns: - a map (flow_path, variable name) -> FlowVector - + A map from (flow_path, variable name) to FlowVector. """ # grab from current state @@ -556,7 +545,6 @@ def spawn_child_frame(self, subflow: El, ) -> Frame: """Spawn a child frame when entering subflow. - Function Call and Return ============================ @@ -575,19 +563,18 @@ def spawn_child_frame(self, subflow: El, 4. set self.child_spawned = True (so when we return, we don't spawn again!) 5. return the new Frame. - .. NOTE:: the parser object remembers all the sources and sinks from the parent + .. note:: The parser object remembers all the sources and sinks from the parent and these are available to the child as well (the old parser is propagated - to the child, as is the old result instance) + to the child, as is the old result instance). Args: - sub_path: filepath of subflow being called - input_map: map of output variables in child to input variables of subflow elem in parent - vector_map: map from tuple to the flow vectors that will be pushed into the child - subflow: subflow xml element + sub_path: Filepath of subflow being called. + input_map: Map of output variables in child to input variables of subflow elem in parent. + vector_map: Map from tuple to the flow vectors that will be pushed into the child. + subflow: Subflow XML element. Returns: - updated child frame ready to begin processing - + Updated child frame ready to begin processing. """ # build a parser for new subflow, which inherits variable info new_parser = parse.Parser.from_file(filepath=sub_path, old_parser=self.parser) @@ -620,17 +607,16 @@ def spawn_child_frame(self, subflow: El, return new_frame def handle_subflows(self, current_elem: El) -> Frame | None: - """Checks whether we have encountered a subflow elem. + """Check whether we have encountered a subflow element. Different behavior required if we are returning from the element or entering into it. Args: - current_elem: Flow element to check + current_elem: Flow element to check. Returns: - new Frame in case we are entering a new subflow, or None in + New Frame in case we are entering a new subflow, or None in case we are returning from the subflow. - """ if not FOLLOW_SUBFLOWS: @@ -649,12 +635,11 @@ def handle_subflows(self, current_elem: El) -> Frame | None: return None def execute(self) -> Frame | None: - """Performs symbolic execution on the Frame. + """Perform symbolic execution on the Frame. Returns: - new Frame to process in case a subflow has been launched, + New Frame to process in case a subflow has been launched, and finally returning None when the Frame's processing is complete. - """ # once, we run queries at flow start: @@ -686,11 +671,18 @@ def execute(self) -> Frame | None: return child_frame - def process_subflow(self, current_elem): + def process_subflow(self, current_elem) -> Frame | None: + """Process a subflow element. - # If there is a problem, we return None and the parent - # continues on as if the subflow did not exist + If there is a problem, we return None and the parent + continues on as if the subflow did not exist. + Args: + current_elem: Subflow XML element to process. + + Returns: + Child Frame if subflow should be processed, None otherwise. + """ try: sub_name = parse_utils.get_subflow_name(current_elem) sub_path = self.resolver.get_subflow_path(sub_name=sub_name, flow_path=self.flow_path) @@ -768,27 +760,27 @@ def parse_flow(flow_path: str, crawl_dir: str = None, resolver: Resolver = None, debug_query: str | None = None) -> QueryManager: - """Main loop that performs control and dataflow analysis + """Main loop that performs control and dataflow analysis. Args: - flow_path: path (on filesystem) of flow-meta.xml file - requestor: email address of scan recipient (optional) - report_label: human-readable name for report (optional) - result_id: id of report (for use in a jobs management system) (optional) - service_version: version of jobs management system (optional) - help_url: url to display on report for more info about results (optional) - query_module_path: path of module where custom queries are stored - query_class_name: name of query class to instantiate - query_preset: name of preset to run - queries: list of optional queries to run - query_manager: existing instance that invokes queries across entire run. Start with None - and one will be created. - crawl_dir: directory of where to store crawl specifications - resolver: used for looking up flow paths of subflows - debug_query (str): pass this string to the query_manager constructor + flow_path: Path (on filesystem) of flow-meta.xml file. + requestor: Email address of scan recipient (optional). + report_label: Human-readable name for report (optional). + result_id: ID of report (for use in a jobs management system) (optional). + service_version: Version of jobs management system (optional). + help_url: URL to display on report for more info about results (optional). + query_module_path: Path of module where custom queries are stored. + query_class_name: Name of query class to instantiate. + query_preset: Name of preset to run. + queries: List of optional queries to run. + query_manager: Existing instance that invokes queries across entire run. + Start with None and one will be created. + crawl_dir: Directory of where to store crawl specifications. + resolver: Used for looking up flow paths of subflows. + debug_query: Pass this string to the query_manager constructor. Returns: - instance of ger_report.Result class that can be used to generate reports + Instance of QueryManager that can be used to generate reports or passed to other flows. """ @@ -897,7 +889,15 @@ def parse_flow(flow_path: str, def report(state: BranchState, current_step: int, total_steps: int) -> None: - # TODO: this will be made pretty later + """Report progress during symbolic execution. + + Args: + state: Current branch state. + current_step: Current step number. + total_steps: Total number of steps. + + .. todo:: This will be made pretty later. + """ msg = (f"flow: {state.flow_name}" f"step: {current_step}" f"total steps: {total_steps}" @@ -907,6 +907,15 @@ def report(state: BranchState, current_step: int, total_steps: int) -> None: def get_output_variable_map(subflow_elem: El, subflow_output_vars: list[var_g]) -> dict[str, str]: + """Get output variable map from subflow element. + + Args: + subflow_elem: Subflow XML element. + subflow_output_vars: List of (flow_path, variable_name) tuples for output variables. + + Returns: + Dictionary mapping child variable name to parent variable name. + """ # output_variable_map: child name --> parent name the child influences auto, output_variable_map = public.parse_utils.get_subflow_output_map(subflow_elem) if auto: @@ -920,6 +929,14 @@ def get_output_variable_map(subflow_elem: El, subflow_output_vars: list[var_g]) def _consolidate_collected_frames(old_frames: list[Frame]) -> tuple[BranchState,]: + """Consolidate collected frames by filtering to terminal steps. + + Args: + old_frames: List of Frame instances to consolidate. + + Returns: + Tuple of BranchState instances from terminal steps. + """ to_return = [] for frame in old_frames: to_keep = list(frame.crawler.terminal_steps) @@ -929,4 +946,12 @@ def _consolidate_collected_frames(old_frames: list[Frame]) -> tuple[BranchState, def report_map(vec_map: flow_vec_g) -> str: + """Generate a string report from a flow vector map. + + Args: + vec_map: Dictionary mapping (flow_path, variable_name) to FlowVector. + + Returns: + Multi-line string report of all flow vectors. + """ return '\n'.join([x.short_report() for x in vec_map.values()]) diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_metrics.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_metrics.py index 058c793b..5600e10c 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_metrics.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_metrics.py @@ -1,9 +1,7 @@ -""" - Python module for parsing xml result file and generating html reports. - @author: rsussland@salesforce.com - - This file will read from the package +"""Python module for parsing XML result file and generating HTML reports. +This module handles parsing of XML scan results and generation of HTML reports +for flow scanner results. It reads query descriptions from the package data. """ from __future__ import annotations @@ -26,8 +24,8 @@ if TYPE_CHECKING: from public.data_obj import QueryDescription -# Compatability: -# Python 3 doesn't have a 'unicode' function +# Compatibility: +# Python 3 doesn't have a 'Unicode' function # # set up logging @@ -65,20 +63,16 @@ def add_to_query_config(list_of_desc: list[QueryDescription]) -> None: - """Adds query descriptions to module-level config file if not present + """Add query descriptions to module-level config file if not present. - Call after loading any queries from disk. Must pass - an object of type public.data_obj.QueryDescription + Call after loading any queries from disk. Must pass objects of type + public.data_obj.QueryDescription. Args: - list_of_desc: - - Returns: - None + list_of_desc: List of QueryDescription objects to add to config. Raises: - ValueError if query descriptions are already present - + ValueError: If query descriptions are already present in config. """ global QUERY_DESC if len(QUERY_DESC) == 1: @@ -94,7 +88,7 @@ def add_to_query_config(list_of_desc: list[QueryDescription]) -> None: key_name = desc.query_id raw_severity = desc.severity.name.strip() severity = raw_severity.replace("_", " ") - if desc.is_security is True: + if desc.is_security: security = "1" else: security = "0" @@ -112,7 +106,12 @@ def add_to_query_config(list_of_desc: list[QueryDescription]) -> None: QUERY_DESC.set(key_name, "security", security) -def load_query_desc_from_config(path: str | None): +def load_query_desc_from_config(path: str | None) -> None: + """Load query descriptions from a config file. + + Args: + path: Path to config file. If None, uses DEFAULT_DESC_CONFIG_PATH. + """ if path is None: path = DEFAULT_DESC_CONFIG_PATH if not os.path.exists(path): @@ -122,33 +121,53 @@ def load_query_desc_from_config(path: str | None): QUERY_DESC.read_string( pkgutil.get_data(__name__, os.path.join("data", path)).decode() ) + return None def add_to_presets(presets: list[str], preset_name: str) -> None: + """Add a preset to the software presets dictionary. + + Args: + presets: List of query names in the preset. + preset_name: Name of the preset to add. + """ global SOFTWARE_PRESETS SOFTWARE_PRESETS[preset_name] = presets pass def get_software_presets(preset_name: str) -> list[str]: - """Returns empty list if no software presets found with this name + """Get a software preset by name. Args: - preset_name: name of preset to retrieve + preset_name: Name of preset to retrieve. Returns: - List of query names in this preset + List of query names in this preset, or empty list if not found. """ return dict.get(SOFTWARE_PRESETS, preset_name, []) -def serialize(portion, elem): - """ serializes element from iterparse. - (N.B: iterparse returns bytestrings, not code points. This is true for events, tags, etc. - Because we are using unicode literals, this means a decoding will occur. Decoding automatically - occurs when concatenating a string with a unicode code point) - """ - out = '' +def serialize(portion: str | None, elem) -> str: + """Serialize element from iterparse to string representation. + + .. note:: + iterparse returns bytestrings, not code points. This is true for events, + tags, etc. Because we are using Unicode literals, decoding will occur. + Decoding automatically occurs when concatenating a string with a Unicode + code point. + + Args: + portion: Portion to serialize ('start', 'end', or None). + elem: XML Element to serialize. + + Returns: + Serialized string representation of the element. + + Raises: + RuntimeError: If portion is invalid. + """ + line_end = '\n' if os.name == 'nt': line_end = '\r' + line_end @@ -177,7 +196,15 @@ def serialize(portion, elem): raise RuntimeError('Called with invalid portion' + portion) -def escape(msg): +def escape(msg: str) -> str: + """Escape HTML special characters in a string. + + Args: + msg: String to escape. + + Returns: + Escaped string with HTML entities. + """ msg = msg.replace("<", "<") msg = msg.replace(">", ">") msg = msg.replace("&", "&") @@ -185,7 +212,17 @@ def escape(msg): return msg -def normalize_query_path(s): +def normalize_query_path(s: str) -> str: + """Normalize a query path string. + + Removes version information and normalizes path separators and spaces. + + Args: + s: Query path string to normalize. + + Returns: + Normalized query path. + """ index = s.rfind('Version:') if index > 1: t = s[:s.find('Version:')].strip() @@ -194,8 +231,8 @@ def normalize_query_path(s): return t.replace('\\', '.').replace(' ', '_') -def _get_fallback_query(s): - """Gets default query in case no match found +def _get_fallback_query(s: str) -> str | None: + """Get default query in case no exact match found. If we override a query, the path changes, e.g. from:: @@ -205,16 +242,15 @@ def _get_fallback_query(s): 'Apex.Corp.General.SOQL_SOSL_Injection' - but we don't want to make a new query description entry - so look through the description file to see if - there is an existing description + but we don't want to make a new query description entry. So we look + through the description file to see if there is an existing description + with the same short name. Args: - s: the normalized corp query found + s: The normalized query path found. Returns: - original normalized cx query path - + Original normalized query path if found, None otherwise. """ try: QUERY_DESC.get(s, 'name') @@ -228,7 +264,15 @@ def _get_fallback_query(s): return None -def get_query_for_config(path): +def get_query_for_config(path: str) -> str: + """Get query path for config lookup, using fallback if needed. + + Args: + path: Original query path. + + Returns: + Query path to use for config lookup (original or fallback). + """ original_query = normalize_query_path(path) fallback = _get_fallback_query(original_query) if fallback is None: @@ -238,18 +282,43 @@ def get_query_for_config(path): return query_path -def normalize_time(s): +def normalize_time(s: str | None) -> str | None: + """Normalize a timestamp string. + + Args: + s: Timestamp string to normalize. + + Returns: + Normalized timestamp string, or None if input is None. + """ if s is not None and 'Z' in s: return s.replace('T', ' ').replace('Z', '')[:-4] else: return s -def truncate(msg, size=40): +def truncate(msg: str, size: int = 40) -> str: + """Truncate a string to a maximum length. + + Args: + msg: String to truncate. + size: Maximum length before truncation. Defaults to 40. + + Returns: + Truncated string with "..." appended if truncated, original string otherwise. + """ return (msg[:size] + "...") if len(msg) > size else msg -def _make_scanner_help(help_url): +def _make_scanner_help(help_url: str | None) -> str: + """Generate HTML help message for scanner reports. + + Args: + help_url: Optional URL to scanner help page. + + Returns: + HTML string with help message. + """ if help_url is not None: msg = ('
' 'For any questions about this service, please consult the scanner help page at' @@ -260,26 +329,53 @@ def _make_scanner_help(help_url): return msg -def reverse_map(vuln_map): - """Go from 1) a dict of field names to vuln name list - to 2) a dict of vuln names to field name lists +def reverse_map(vuln_map: dict) -> dict: + """Reverse a vulnerability mapping dictionary. + + Converts from a dict of field names to vulnerability name lists, + to a dict of vulnerability names to field name lists. + + Args: + vuln_map: Dictionary mapping field names to lists of vulnerability names. + + Returns: + Dictionary mapping vulnerability names to lists of field names. """ - reverse_map = {} + rev_map = {} for key in vuln_map: for val in vuln_map[key]: - if val not in reverse_map: - reverse_map[val] = [] - reverse_map[val].append(key) - return reverse_map + if val not in rev_map: + rev_map[val] = [] + rev_map[val].append(key) + return rev_map + +def _bail(msg: str, exception: type[Exception] = Exception) -> None: + """Log a critical error and raise an exception. + + Args: + msg: Error message. + exception: Exception class to raise. Defaults to Exception. -def _bail(msg, exception=Exception): + Raises: + exception: Always raises the specified exception type. + """ logger.critical(PARSE_ERROR + msg) raise exception(msg) -def _safe_append(fp, data): - """Accepts file pointer and unicode data to append. Does not throw exceptions.""" +def _safe_append(fp, data: str) -> bool: + """Safely append Unicode data to a file pointer. + + Does not throw exceptions - logs errors instead. + + Args: + fp: File pointer to write to. + data: Unicode string data to append. + + Returns: + True if write succeeded, False otherwise. + """ # TODO: email exception? try: fp.write(data) @@ -290,8 +386,18 @@ def _safe_append(fp, data): return False -def _safe_prepend(filepath, data): - """Accepts filepath and data to prepend. Does not throw exceptions.""" +def _safe_prepend(filepath: str, data: str) -> bool: + """Safely prepend data to a file. + + Does not throw exceptions - logs errors instead. + + Args: + filepath: Path to file to prepend to. + data: String data to prepend. + + Returns: + True if prepend succeeded, False otherwise. + """ # TODO: email exception? try: temp_file = filepath + "_tmp" @@ -310,14 +416,14 @@ def _safe_prepend(filepath, data): return False -def count_issues(scan_results): - """ +def count_issues(scan_results) -> tuple[int, int]: + """Count security and quality issues from scan results. + Args: - scan_results: scan_results: List + scan_results: List of QueryData objects from scan. Returns: - tuple: security issues, quality issues total counts from - scan_results + Tuple of (security_issues_count, quality_issues_count). """ sec_count = 0 @@ -334,18 +440,35 @@ def count_issues(scan_results): # noinspection PyPep8 class JobInfo(object): + """Stores metadata about a scan job. + + Attributes: + email_add: Email address for the scan. + friendly_name: Friendly name/description of the scan. + job_type: Type of job (e.g., 'Portal'). + preset: Preset name used for the scan. + scan_start: Scan start timestamp. + scan_end: Scan end timestamp. + result_id: Scan result ID. + cx_version: Checkmarx version. + service_version: Service version string. + lightning_api_version: Lightning API version if applicable. + lightning_api_too_low: Whether Lightning API version is too low. + help_url: URL to help documentation. + """ + def __init__(self, - email_add, - friendly_name, - job_type, - preset, - scan_start, - scan_end, - result_id, - service_version, - lightning_api_version=None, - lightning_api_too_low=False, - help_url=None): + email_add: str | None, + friendly_name: str | None, + job_type: str | None, + preset: str | None, + scan_start: str | None, + scan_end: str | None, + result_id: str | None, + service_version: str, + lightning_api_version: str | None = None, + lightning_api_too_low: bool = False, + help_url: str | None = None): self.email_add = None if email_add is None else email_add self.friendly_name = None if friendly_name is None else friendly_name @@ -360,39 +483,11 @@ def __init__(self, self.lightning_api_too_low = lightning_api_too_low self.help_url = help_url - def update(self, root_elem): - r"""Attempts to populate params from root xml element: - - - - JobInfo( - email_add, - friendly_name, - job_type, - preset, - scan_start, - scan_end, - result_id - ) + def update(self) -> None: + """Populate missing parameters with default values. + + Attempts to populate params from root XML element attributes. + Sets default values for any None attributes. """ if self.email_add is None: self.email_add = 'N/A' @@ -415,8 +510,15 @@ def update(self, root_elem): if self.result_id is None: self.result_id = "default" - def make_html(self, scan_results): - """Generates report metadata html""" + def make_html(self, scan_results) -> str: + """Generate HTML report metadata section. + + Args: + scan_results: List of QueryData objects from scan. + + Returns: + HTML string containing report metadata. + """ security_count, quality_count = count_issues(scan_results) data = ('
' '
' @@ -467,7 +569,20 @@ def make_html(self, scan_results): class QueryData(object): - def __init__(self, path): + """Stores data about a query and its results. + + Attributes: + query_path: Normalized query path. + success: Whether query executed successfully. + tallies: Number of issues found. + name: Query name from config. + group: Query group/severity from config. + references: Reference URLs from config. + security: Whether this is a security query ('1') or quality ('0'). + help_name: Short name for help lookup. + """ + + def __init__(self, path: str): if len(QUERY_DESC) == 1: load_query_desc_from_config(path=None) @@ -481,28 +596,55 @@ def __init__(self, path): tmp = self.query_path.split('.') self.help_name = tmp[len(tmp) - 1] - def get_name(self): + def get_name(self) -> str: + """Get formatted query name. + + Returns: + Query name with underscores replaced by spaces. + """ # TODO: Have a real dictionary, but currently we get rid of underscores only return self.name.replace('_', ' ') - def get_group(self): + def get_group(self) -> str: + """Get formatted query group. + + Returns: + Query group with underscores replaced by spaces. + """ # TODO: Have a real dictionary as above return self.group.replace('_', ' ') - def found_issues(self): + def found_issues(self) -> int: + """Check if query found issues. + + Returns: + 0 if issues found, 1 otherwise (for sorting). + """ if self.success and (int(self.tallies) > 0): return 0 else: return 1 - def isSecurity(self): + def isSecurity(self) -> bool: + """Check if this is a security query. + + Returns: + True if security query, False if quality query. + """ return self.security == '1' -def _report_append(element, report_fp, source_dir='None', tallies=None): - """Converts element to appropriate HTML report line. - In the future, should use XSLT, but currently our transforms are very simple, and on a streaming per-element - basis so there is not a lot of structure to the tag transforms that happen. +def _report_append(element, report_fp, tallies: int | None = None) -> None: + """Convert element to appropriate HTML report line. + + In the future, should use XSLT, but currently our transforms are very + simple and on a streaming per-element basis, so there is not a lot of + structure to the tag transforms that happen. + + Args: + element: XML Element to convert. + report_fp: File pointer to write HTML to. + tallies: Optional tally count for path elements. """ logger.debug("_report_append invoked with element tag: " + element.tag) data = None @@ -537,7 +679,7 @@ def _report_append(element, report_fp, source_dir='None', tallies=None): '
\n') if element.tag == "PathNode": - node_data = {} + source = None snippet = element.find('Snippet') if snippet is not None: @@ -545,7 +687,6 @@ def _report_append(element, report_fp, source_dir='None', tallies=None): filename = element.find('FileName').text flow_type = element.find('FlowType').text - node_id = str(element.find('NodeId').text) name = element.find('Name').text column = str(element.find('Column').text) line_no = str(element.find('Line').text) @@ -569,9 +710,16 @@ def _report_append(element, report_fp, source_dir='None', tallies=None): '
' + ESAPI.html_encode(source) + '
\n') _safe_append(report_fp, data) + return None -def _append_overflow(report_fp, max_results): +def _append_overflow(report_fp, max_results: int) -> None: + """Append overflow message to report when results are truncated. + + Args: + report_fp: File pointer to write to. + max_results: Maximum number of results shown. + """ data = ('
' '
' 'Only the first ' + str(max_results) + @@ -581,28 +729,29 @@ def _append_overflow(report_fp, max_results): _safe_append(report_fp, data) -def _add_source(source_dir, filename, target_line_no, obj_name): - """OBSOLETE: - Adds line from source, if possible. As the Cx XML line no has off by one errors, - we first look for the object in the provided line, and if not present, we look - for the object in the previous line. +def _add_source(source_dir: str, filename: str, target_line_no: int, obj_name: str) -> tuple[int, str | None]: + """Add source line from file (OBSOLETE). - If we still cannot find the object, - we log the issue and return the original source line. + .. deprecated:: + This function is obsolete and may be removed in future versions. - The XML file is written on a windows server. While popcrab may be running on a - windows OS, testing versions may not. But source_dir is the location of the - staging version of the code which may be in unix format. + Adds line from source if possible. As the XML line number has off-by-one + errors, we first look for the object in the provided line, and if not + present, we look for the object in the previous line. - Therefore we convert the FileName - in the xml file to be suitable to the filename on the popcrab host. - """ + Args: + source_dir: Directory containing source files. + filename: Name of source file. + target_line_no: Target line number to extract. + obj_name: Object name to search for in line. - source = None + Returns: + Tuple of (line_number, source_line). Returns (-1, None) on error. + """ try: if os.sep != u'\\': - # we are running on linux/mac + # we are running on Linux/Mac normalized_path = os.path.join(source_dir, filename.replace(u'\\', u'/')) else: normalized_path = os.path.join(source_dir, filename) @@ -637,19 +786,22 @@ def _add_source(source_dir, filename, target_line_no, obj_name): return -1, None -def _update_results(scan_results, failed_scans, preset): - """Newer versions of CX omit nodes in the xml file when - the query succeeds and no issues are found or when the query fails. +def _update_results(scan_results, failed_scans: list[str] | None, preset: str): + """Update scan results with failed and missing queries. + + Newer versions of CX omit nodes in the XML file when the query + succeeds and no issues are found or when the query fails. This function + patches the results to include these missing queries. Args: - scan_results: are results from this (reduced) xml file (List) - failed_scans: is a list of QueryPaths from logfile (may be None) - preset: is the name of the preset used. + scan_results: Results from the (reduced) XML file (set of QueryData). + failed_scans: List of QueryPaths from logfile, or None. + preset: Name of the preset used. Returns: - an enlarged scan_results with additional failed QueryData or missing QueryData - - This should be called to patch scan_results before report html table is generated + Enlarged scan_results set with additional failed QueryData or missing + QueryData. This should be called to patch scan_results before report + HTML table is generated. """ keys = [q.query_path for q in scan_results] failed = [] @@ -662,10 +814,6 @@ def _update_results(scan_results, failed_scans, preset): q.success = False scan_results.add(q) - # Get all queries - all_d = [] - # with codecs.open(os.path.join(FLOW_SCANNER_HOME, 'data', preset + '_preset.txt'), encoding='utf-8') as fp: - # all_d = [query_path.strip() for query_path in fp] disk_preset = os.path.join('data', preset + "_preset.txt") if os.path.exists(disk_preset): preset_str = pkgutil.get_data(__name__, os.path.join('data', preset + "_preset.txt")).decode().strip() @@ -682,7 +830,15 @@ def _update_results(scan_results, failed_scans, preset): return scan_results -def _make_query_desc(query_path): +def _make_query_desc(query_path: str) -> str: + """Generate HTML description section for a query. + + Args: + query_path: Query path to get description for. + + Returns: + HTML string containing query description and references. + """ description = QUERY_DESC.get(query_path, 'description') references = QUERY_DESC.get(query_path, 'references') data = ('
' @@ -714,8 +870,16 @@ def _make_query_desc(query_path): return data -def _make_header(scan_results, jobinfo): - """TODO: change to file builder""" +def _make_header(scan_results, jobinfo: JobInfo) -> str: + """Generate HTML header section for report. + + Args: + scan_results: List of QueryData objects. + jobinfo: JobInfo object with scan metadata. + + Returns: + HTML string containing report header. + """ logger.debug("_make_header invoked with scan_results of length:" + str(len(scan_results))) # with open(os.path.join(FLOW_SCANNER_HOME, 'data', 'header.out'), mode='r', encoding="utf-8") as fp: # data = fp.read() @@ -725,20 +889,19 @@ def _make_header(scan_results, jobinfo): return data -def _present_query_results(scan_results): - """Builds html table summarizing query results. +def _present_query_results(scan_results) -> str: + """Build HTML table summarizing query results. Results are sorted on: - Security, Quality with decreasing severity levels: - Critical, Serious, Warning - finally by number of issues + - Security, Quality + - Decreasing severity levels: Critical, Serious, Warning + - Finally by number of issues Args: - scan_results: List + scan_results: List of QueryData objects. Returns: - string (html file) - + HTML string containing results table. """ data = ('
' '' @@ -769,7 +932,15 @@ def _present_query_results(scan_results): return data -def _make_footer(report_fp): +def _make_footer(report_fp) -> None: + """Write footer to report file. + + Args: + report_fp: File pointer to write footer to. + + Raises: + Exception: If footer cannot be written. + """ report_path = os.path.join(FLOW_SCANNER_HOME, 'data', 'footer.out') # with codecs.open(report_path, 'r') as fp: # data = fp.read() @@ -780,14 +951,32 @@ def _make_footer(report_fp): _bail('failed to write footer for report file at ' + report_path) -def _clean_up(element): +def _clean_up(element) -> None: + """Clean up XML element to free memory. + + Args: + element: XML Element to clean up. + """ if element is not None: element.clear() # while element.getprevious() is not None: # del element.getparent()[0] -def _get_signature(element): +def _get_signature(element) -> tuple[str, str] | str | None: + """Get signature from an XML element. + + Args: + element: XML Element to extract signature from. + + Returns: + For PathNode: (filename, line_no) tuple. + For Path: SimilarityId string. + None for other elements or on error. + + Raises: + RuntimeError: If element is None. + """ if element is None: raise RuntimeError('tried to get signature of None element') elif element.tag == 'PathNode': @@ -800,6 +989,7 @@ def _get_signature(element): elif element.tag == 'Path': return element.attrib['SimilarityId'] + return None def parse_results(xml_file=None, @@ -820,7 +1010,7 @@ def parse_results(xml_file=None, min_api_version=40.0, help_url=None ): - """Parses Cx xml results file and generates HTML report. + """Parses XML results file and generates HTML report. Parsing policy: @@ -835,16 +1025,15 @@ def parse_results(xml_file=None, Attributes set during start events but not tag contents. Args: - xml_file: unicode path of xml file containing results - xml_report_str: unicode str of xml report (if file not provided) - report_path: (required for report gen) unicode path where the + xml_file: unicode path of XML file containing results + xml_report_str: unicode str of XML report (if file not provided) + report_path: (required for report gen) Unicode path where the HTML report should be stored failed_queries: pulled from log file. list of query_paths that failed. throttle: Boolean (whether to limit the number of issues found per query) - source_dir: unicode directory where source code is stored (on - Cx) + source_dir: unicode directory where source code is stored email_add: unicode email address to which report should be sent friendly_name: unicode friendly name of scan job_type: unicode job type (TZ, Portal) @@ -868,17 +1057,8 @@ def parse_results(xml_file=None, result_id, service_version, help_url) scan_results = set() - query_data = None report_fp = None query_data = None - context = None - root = None # reference - - if scan_start is not None: - scan_start = normalize_time(scan_start) - - if scan_end is not None: - scan_end = normalize_time(scan_end) if report_path is not None: report_fp = open(report_path, mode='a', encoding='utf-8') @@ -896,7 +1076,7 @@ def parse_results(xml_file=None, event, root = next(context) # grab root. c.f. http://effbot.org/zone/element-iterparse.htm - jobinfo.update(root) + jobinfo.update() logger.debug('preset is: ' + jobinfo.preset) parent = root @@ -920,11 +1100,10 @@ def parse_results(xml_file=None, query_printed is False): # render parent (result) info query_printed = True - _report_append(element.getparent(), report_fp, source_dir) + _report_append(element.getparent(), report_fp) if report_fp is not None: _report_append(element, report_fp, - source_dir, query_data.tallies) if event == 'end': @@ -970,21 +1149,17 @@ def parse_results(xml_file=None, return jobinfo, scan_results -def get_issues_for_org(scan_results, vuln_map): - """Counts findings for each query +def get_issues_for_org(scan_results, vuln_map: dict) -> dict: + """Count findings for each query by organization field. Args: - scan_results: List list of found issues - vuln_map: map of scan info fields to CX issues e.g. - vuln_map[StoredXSS]=[cx_desc1,cx_desc2,] etc. - - (A bit inefficient since we loop through all queries rather than - removing already used queries, but this is not a concern here.) + scan_results: List of QueryData objects with found issues. + vuln_map: Map of scan info fields to issue descriptions, e.g. + vuln_map[StoredXSS] = [desc1, desc2, ...]. Returns: - a dictionary with keys = issue types in scan info and number of - issues found - + Dictionary with keys = issue types in scan info and values = number + of issues found, plus 'type' key set to SFDC_OBJECT_NAME. """ d = dict() diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py index 572f1495..a9dbd47c 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py @@ -21,7 +21,8 @@ from flow_scanner import flow_metrics from flow_scanner.version import __version__ from public.data_obj import QueryResult, Preset, InfluenceStatementEncoder, InfluenceStatement -from public.enums import FlowType +import flow_scanner.db_storage as db_storage + DEFAULT_HELP_URL = "https://security.secure.force.com/security/tools/forcecom/scannerhelp" DEFAULT_JOB_TYPE = "FlowSecurityCLI" @@ -33,11 +34,11 @@ class ResultsProcessor(object): """Class storing all the information necessary for a report. - This includes labelling information like the report requested, - scan start time, etc., as well as the results of the findings. + This includes labeling information like the report requested, + scan start time, etc., as well as the results of the findings. - The class contains methods to take this information and generate - json, xml and html reports. + The class contains methods to take this information and generate + JSON, XML and HTML reports. """ def __init__(self, preset: Preset = None, requestor="System", report_label=None, @@ -69,7 +70,17 @@ def __init__(self, preset: Preset = None, requestor="System", report_label=None, # xml report string self.report_xml: str | None = None - def get_root(self, filepath: str): + def get_root(self, filepath: str) -> ET.Element | None: + """Get root XML element for a file path. + + Caches results in root_map for efficiency. + + Args: + filepath: Path to flow file. + + Returns: + Root XML Element if found, None on error. + """ if self.root_map is not None and filepath not in self.root_map: return self.root_map[filepath] else: @@ -86,14 +97,16 @@ def get_root(self, filepath: str): return None def write_html(self, html_report_path: str): - """Writes html report to disk + """Write HTML report to disk. Args: - html_report_path: where to write html report + html_report_path: Path where to write HTML report. Returns: - metrics (results) of issues sorted and counted. + Tuple of (jobinfo, scan_results) with metrics of issues sorted and counted. + Raises: + RuntimeError: If no valid preset is set. """ if self.report_xml is None: self.get_cx_xml_str() @@ -131,31 +144,29 @@ def write_html(self, html_report_path: str): return results def dump_json(self, fp: TextIO) -> None: - """Write json string of results to file pointer - - Returns: - None + """Dump results as JSON to a file pointer. + Args: + fp: Text file pointer to write JSON to. """ job_result = self._make_job_result() json.dump(job_result, indent=4, fp=fp, cls=InfluenceStatementEncoder) def get_json_str(self) -> str: - """get json result string + """Get JSON result string. Returns: - string that serializes list of QueryResult objects - + JSON string that serializes list of QueryResult objects. """ job_result = self._make_job_result() return json.dumps(job_result, indent=4, cls=InfluenceStatementEncoder) - def get_cx_xml_str(self): - """Converts results to popcrab compatible report format + def get_cx_xml_str(self) -> str: + """Convert results to popcrab compatible report format. Returns: - report xml string + Report XML string in CxXMLResults format. """ id2path_dict = self._make_query_id_to_path_dict() @@ -244,19 +255,16 @@ def get_cx_xml_str(self): return self.report_xml def add_results(self, query_results: list[QueryResult]) -> None: - """Add results to processor + """Add results to processor. Stores results internally for simple de-duplication. All we do is use datapath equality, so please don't put unique comment strings containing things like step number - or timestamps into influence statements, as they wont be - de-duped. + in the influence statements, as this will prevent proper + de-duplication. Args: - query_results: list of Query-Result objects - - Returns: - None + query_results: List of QueryResult objects to add. """ query_results = _validate_qr(query_results) @@ -268,28 +276,25 @@ def add_results(self, query_results: list[QueryResult]) -> None: self.stored_results = list(set(self.stored_results + query_results)) def gen_result_dict(self) -> dict[str, dict[str, str]]: - """Sorts results into query buckets + """Sort results into query buckets. - Used internally to generate popcrab compatible - xml and html report formats. - - Also useful for testing + Used internally to generate popcrab compatible XML and HTML report formats. + Also useful for testing. Returns: - dictionary of the form:: - - query_id -> {flow: tuple of DataInfluenceStatements or None (in case this is a dataflow) - query_name: (human_readable), - counter: (fake similarity id), - elem: source code of element, - elem_name: name of Flow Element, - elem_code: source code of element, - elem_line_no: line number of element, - field: name of influenced variable (if any) within the element, - } - + Dictionary of the form:: + + query_id -> { + flow: tuple of DataInfluenceStatements or None (in case this is a dataflow), + query_name: (human_readable), + counter: (fake similarity id), + elem: source code of element, + elem_name: name of Flow Element, + elem_code: source code of element, + elem_line_no: line number of element, + field: name of influenced variable (if any) within the element, + } """ - query_results = self.stored_results accum = {} if query_results is None or len(query_results) == 0: @@ -385,17 +390,22 @@ def gen_result_dict(self) -> dict[str, dict[str, str]]: return accum def _make_query_id_to_path_dict(self) -> dict[str, str]: - """Generate a dictionary from query_id to query_path + """Generate a dictionary from query_id to query_path. - e.g. foo bar -> foo\\bar: Version X + Example: 'foo.bar' -> 'foo\\bar: Version X' Returns: - dictionary + Dictionary mapping query_id to formatted query path string. """ return {x.query_id: x.query_id.strip().replace(".", "\\") + f" Version: {x.query_version.strip()}" for x in self.preset.queries} - def _make_job_result(self): + def _make_job_result(self) -> dict: + """Generate job result dictionary for JSON/XML output. + + Returns: + Dictionary containing job metadata and results. + """ if self.results_dict is None: self.gen_result_dict() @@ -412,22 +422,68 @@ def _make_job_result(self): } return job_result - def _get_query_desc_from_id(self, query_id: str): + def _get_query_desc_from_id(self, query_id: str) -> QueryDescription: + """Get QueryDescription by query ID from preset. + + Args: + query_id: Query ID to look up. + + Returns: + QueryDescription object matching the query_id. + + Raises: + ValueError: If no query with the given ID is in the preset. + """ descriptions = self.preset.queries for x in descriptions: if x.query_id == query_id: return x raise ValueError(f"No query with id {query_id} is in the preset provided") + def dump_result_to_db(self, conn, run_id: int) -> int: + """Dump query results to the database. + + This function stores all QueryResult objects from this ResultsProcessor + into the database, associated with the given run_id. The results_dict + will be generated if it doesn't already exist. + + Args: + conn: SQLite database connection (from db_storage.create_database) + run_id: Integer run ID to associate results with (from db_storage.create_run) + + Returns: + int: Number of QueryResult objects stored in the database + + Raises: + sqlite3.Error: If database insertion fails + ValueError: If stored_results is None or empty + """ + # Ensure results_dict is generated if it doesn't exist + if self.results_dict is None: + self.gen_result_dict() + + # Get the stored QueryResult objects + if self.stored_results is None or len(self.stored_results) == 0: + logger.warning("No query results to dump to database") + return 0 + + # Insert all results into the database + row_ids = db_storage.insert_query_results(conn, self.stored_results, run_id) + + num_stored = len(row_ids) + logger.info(f"Stored {num_stored} query results to database for run_id={run_id}") + + return num_stored + def _validate_and_prettify_xml(xml_str: str) -> str: - """Pretty print and validate generated xml string + """Pretty print and validate generated XML string. Args: - xml_str: string to validate + xml_str: XML string to validate and prettify. Returns: - validated/beautified xml_string + Validated and beautified XML string. """ my_root = CP.get_root_from_string(bytes(xml_str, encoding='utf-8')) ET.indent(my_root) @@ -435,6 +491,16 @@ def _validate_and_prettify_xml(xml_str: str) -> str: def render_normal_dataflow_html(statements: tuple[InfluenceStatement, ...], flow_type: str, start_node_id: int = 0) -> str: + """Render a normal dataflow as HTML PathNode elements. + + Args: + statements: Tuple of InfluenceStatement objects representing the dataflow. + flow_type: Type of flow (e.g., 'Screen', 'AutoLaunched'). + start_node_id: Starting node ID for numbering. Defaults to 0. + + Returns: + HTML string containing PathNode elements. + """ result_str = '' for index, node in enumerate(statements, start=start_node_id): filename = node.source_path @@ -450,6 +516,19 @@ def render_normal_dataflow_html(statements: tuple[InfluenceStatement, ...], flow def render_html_pathnode(filename: str, flow_type: str, influenced_var: str, line: int, node_id: int, code: str) -> str: + """Render a single PathNode as HTML. + + Args: + filename: Path to the flow file. + flow_type: Type of flow. + influenced_var: Name of influenced variable ('*' becomes 'start'). + line: Line number in source. + node_id: Unique node identifier. + code: Source code snippet. + + Returns: + HTML string for a PathNode element. + """ if influenced_var == '*': influenced_var = 'start' @@ -465,19 +544,29 @@ def render_html_pathnode(filename: str, flow_type: str, influenced_var: str, lin def make_path_node_header(filename: str, flow_type: str, similarity_id: int = 0) -> str: + """Generate HTML header for a path node. + + Args: + filename: Path to the flow file. + flow_type: Type of flow. + similarity_id: Similarity ID for the path. Defaults to 0. + + Returns: + HTML string containing Result and Path opening tags. + """ return (f'' f'') def _validate_qr(qr_list: list[QueryResult]) -> list[QueryResult] | None: - """Checks query result for correctness + """Check query results for correctness. Args: - qr_list: Query Result list to validate + qr_list: Query Result list to validate. Returns: - list of valid QueryResults with invalid results removed - None if the list was None + List of valid QueryResults with invalid results removed, + or None if the input list was None or empty. """ if qr_list is None or len(qr_list) == 0: return None @@ -510,13 +599,23 @@ def _validate_qr(qr_list: list[QueryResult]) -> list[QueryResult] | None: def fix_names(x: InfluenceStatement) -> InfluenceStatement: + """Fix special variable names in an InfluenceStatement. + + Replaces '*' with 'start' for both influenced and influencer variables. + + Args: + x: InfluenceStatement to fix. + + Returns: + New InfluenceStatement with fixed names. + """ new_influenced = None new_influencer = None if x.influenced_var == '*': new_influenced = 'start' - elif x.influenced_var == '*': + elif x.influencer_var == '*': new_influencer = 'start' return dataclasses.replace(x, diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flows.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flows.py index d2918876..34e759be 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flows.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flows.py @@ -1,11 +1,10 @@ -"""Flow propagation data structures and algorithms - - @author: rsussland@salesforce.com +"""Flow propagation data structures and algorithms. +This module implements the FlowVector data structure and algorithms for +propagating data influence paths through flow elements. """ from __future__ import annotations import logging -import copy import json import typing from collections.abc import Callable @@ -19,50 +18,64 @@ #: module logger logger = logging.getLogger(__name__) +# --- Type Definitions --- +# A map of property names to the sets of paths influencing them +# Note: Values can be None, representing no overrides for that property +PropertyOverrides = dict[str, set[InfluencePath] | None] -@dataclass(frozen=True, eq=True, slots=True) -class FlowVector(AbstractFlowVector): - """Common data structure for both vectors and scalars. +# A map of Default Paths to their specific Property Overrides +# Note: Values can be None, representing no property overrides for that default +VectorPropertyMap = dict[InfluencePath, PropertyOverrides | None] - FlowVector supports vectorization, so that we can accurately - track taint as follows:: - taint --> Case.Subject - Case --> Case2 - Case2.Status --> sink1 // detect not tainted - Case2.Subject --> sink2 // detect tainted +def _copy_property_maps(prop_maps: VectorPropertyMap) -> VectorPropertyMap: + """Shallow copy of property_maps structure. + Since InfluencePath and all nested structures contain only immutable objects, + we only need to copy the container structure, not the contents. """ - # For each default path, this list has the overrides. - # An override is a map: "property name" --> {DataInfluencePaths} that - # influence this property - property_maps: dict[InfluencePath, dict[str, set[InfluencePath]]] + result = {} + for key, value in prop_maps.items(): + if value is None: + result[key] = None + else: + # Shallow copy the inner dict: keys are strings (immutable), + # values are sets of InfluencePath (immutable) or None + result[key] = {prop: flows.copy() if flows is not None else None + for prop, flows in value.items()} + return result - # TODO: revisit this later if a property spec is needed - # property_spec: set[str] | None - @classmethod - def from_flows(cls, default: set[InfluencePath] = None) -> FlowVector: - """Builds a vector from the provided flows. +def _copy_property_map_entry(entry: PropertyOverrides | None) -> PropertyOverrides | None: + """Copy a single property map entry (None or dict).""" + if entry is None: + return None + # Shallow copy: dict keys are strings (immutable), set elements are InfluencePath (immutable) + return {prop: flows.copy() if flows is not None else None + for prop, flows in entry.items()} - Flows must all have the same influencer_name and no flow can have a non-null influencer_property. - Notes: - Do not use FlowVector constructor outside the ``flows`` module, use this method instead - and build FlowVectors up using the provided instance methods as a result of parsing program - elements. +@dataclass(frozen=True, eq=True, slots=True) +class FlowVector(AbstractFlowVector): + """Common data structure for both vectors and scalars.""" - This method should be used to initialize variables by building the initialization flow - (where a variable influences itself) + # For each default path, this list has the overrides. + property_maps: VectorPropertyMap + + @classmethod + def from_flows(cls, default: set[InfluencePath] = None) -> FlowVector: + """Build a vector from the provided flows. Args: - default {`DataInfluencePath`}: each of these paths should be assigned to their own default + default: Set of InfluencePath objects to initialize the vector with. + Can also be a single InfluencePath (converted to set). Returns: - FlowVector instance with the provided flows as defaults + New FlowVector instance. Raises: - ValueError if the flows have different influenced_name, or if default is empty. + ValueError: If default is empty, None, or flows don't influence + the same variable name with null influenced_property. """ # we make an exception: if isinstance(default, InfluencePath): @@ -71,6 +84,7 @@ def from_flows(cls, default: set[InfluencePath] = None) -> FlowVector: raise ValueError("Please call with set argument") else: default_ = default + # add guards if default_ is None or len(default_) == 0: raise ValueError("Called builder with empty default") @@ -88,18 +102,15 @@ def from_flows(cls, default: set[InfluencePath] = None) -> FlowVector: return FlowVector(property_maps=property_map) - def short_report(self, indent=2) -> str: - """Brief string serialization of the FlowVector (for testing and reporting) - + def short_report(self, indent: int = 2) -> str: + """Generate brief string serialization of the FlowVector. Args: - indent: number of spaces for indentation + indent: Number of spaces for indentation. Defaults to 2. Returns: - summary of defaults and property maps - + String representation of the FlowVector for testing and reporting. """ - str_prop_map = {} for curr_default, val in self.property_maps.items(): def_str = curr_default.short_report(arrows=True) @@ -114,7 +125,6 @@ def short_report(self, indent=2) -> str: str_flowset.sort() tmp[prop] = str_flowset else: - # no property maps for this default tmp = None str_prop_map[def_str] = tmp @@ -122,20 +132,13 @@ def short_report(self, indent=2) -> str: return json.dumps(str_prop_map, indent=indent, sort_keys=True) def report_dict(self) -> dict[str, dict[str, set[str]]]: - """get brief object dict with stringified flows - - flows are replaced with arrow and star notation - in :meth:`DataInfluencePath.short_report` and are - sorted alphabetically in all keys and (non-None) values. + """Get brief object dictionary with stringified flows. Returns: - dict with the following schema:: - - {default flows : {property_name: {flows}} | None } - + Dictionary representation with flows converted to strings. """ loaded = json.loads(self.short_report()) - # json notation does not support sets, so turn the list of prop flows into a set + # JSON notation does not support sets, so turn the list of prop flows into a set for default in loaded: if loaded[default] is not None: for prop in loaded[default]: @@ -147,15 +150,13 @@ def report_dict(self) -> dict[str, dict[str, set[str]]]: return loaded def get_flows_by_prop(self, member_name: str | None = None) -> set[InfluencePath]: - """Returns this vector's flows with the requested influenced property name. + """Get this vector's flows with the requested influenced property name. Args: - member_name: If None, returns all flows (including overrides) - Otherwise, returns all flows for the named property. + member_name: Property name to filter by. If None, returns all flows. Returns: - The requested flows associated to *all default indexes* consolidated into a set, - or empty set if no match. + Set of InfluencePath objects matching the property name. """ to_return = set() defaults = set(self.property_maps.keys()) @@ -180,10 +181,7 @@ def get_flows_by_prop(self, member_name: str | None = None) -> set[InfluencePath # requesting no property means all paths are returned to_return.update(defaults) else: - # Now need to add missing flows, for example if a default had no properties - # at all, then action will not find it. So we compare the returned - # defaults from the prop query to make sure we have full coverage - # for all primary paths: + # Now need to add missing flows seen_defaults = {x[0] for x in res} [to_return.add(_restrict(x, member_name)) for x in defaults if x not in seen_defaults] @@ -191,121 +189,56 @@ def get_flows_by_prop(self, member_name: str | None = None) -> set[InfluencePath return to_return def add_vector(self, vector: FlowVector) -> FlowVector: - """Create new vector that adds flows of self and ``vector``. - - Notes: - * Both must be at the same variable name + """Create new vector that combines flows of self and vector. Args: - vector: vector containing new flow information - - Returns: new FlowVector representing the sum - - ============== - Adding Vectors - ============== - - In flows, the concept of "adding" objects or scalars - is overloaded to mean either - - 1) adding an element to a collection (object or scalar), or - 2) combining two scalar values (e.g. string concatenation) - - For dataflow analysis, we are adding sets of influence paths, - for example when exiting a flow. - - This means that all the influencers of - the first are added to the influencers of the second (which - can create a doubling of flow paths) via set-addition. - - Care must be taken when the generic case is the same but overrides - differ: Imagine program execution along two branches, followed - by combining the branches (say a function return). Then we - know that in reality, only one branch can be taken in an execution - run, but there is the possibility of cross-contamination. E.g. - in branch 1, Account.Name can change, and in branch 2, Account.Description, - but when we merge, we need to decide whether to combine both into "account" - and merge the overrides or to keep them as separate vectors with different - overrides. This boils down to whether the default vector is a key for the - override map or whether the better data structure is a tuple of a vector - and its overrides, with the addition operation always adding new tuples. - - Presently we opt for first approach - merging the overrides - of the two vectors, because in principle, we still retain all the - information in the override dataflow histories - for example, we can store - the state branch id in each path - and it is the examination of the overrides - where fine-grained exclusion analysis should happen. - - This choice forces us to create dummy (induced) overrides: If `A.Name` has - an override, `foo` in path 1, but not in path 2, then we want to get both - `A.Name` and `foo` when requesting the override of the sum. - This is because the sum is the possibility of taking either path. + vector: FlowVector to add to this one. + Returns: + New FlowVector with combined flows from both vectors. """ if vector is None: - return copy.deepcopy(self) + return FlowVector(property_maps=_copy_property_maps(self.property_maps)) new_property_map = {} - # default in self but not in vector + + # 1. Default in self but not in vector: Copy from self for x in self.property_maps: if x not in vector.property_maps: - new_property_map[x] = copy.deepcopy(self.property_maps[x]) + new_property_map[x] = _copy_property_map_entry(self.property_maps[x]) - # default in self and vector + # 2. Default in self and vector: Merge for other_def in vector.property_maps: if other_def in self.property_maps: - # The merge-override method is where we create induced paths - new_property_map[other_def] = _merge_override(other_def, copy.deepcopy(self.property_maps[other_def]), - copy.deepcopy(vector.property_maps[other_def])) - - # default in vector but not self: + # The merge-override method creates new sets via union, + # so we can safely pass the original maps without pre-copying. + new_property_map[other_def] = _merge_override( + other_def, + self.property_maps[other_def], + vector.property_maps[other_def] + ) + + # 3. Default in vector but not self: Copy from vector for x in vector.property_maps: if x not in self.property_maps: - new_property_map[x] = copy.deepcopy(vector.property_maps)[x] + new_property_map[x] = _copy_property_map_entry(vector.property_maps[x]) return FlowVector(property_maps=new_property_map) def push_via_flow(self, extension_path: InfluencePath, influenced_vec: FlowVector, assign: bool = True, cross_flow: bool = False) -> FlowVector: - """Build new FlowVector with all influence paths in self pushed into ``vec`` via the extension_path. - - For example, if the current vector corresponds to influencers of ``A``, and ``vec`` to ``B``, - then we can push A into B as follows: - - ``1. A.x --> B`` - Then ``B`` must be a scalar and the default of ``B`` is populated with the extended influencers - of ``A``'s x property. If ``assign`` is ``False``, then the existing influencers of ``B``'s x - property are added to those pushed forward from ``A``. - - ``2. A --> B.x`` - Then ``A`` must be a scalar and the defaults of ``A`` are pushed forward into ``B``'s x-property - influencers. If ``assign`` is ``False``, then ``B``'s existing property influencers are also kept. - - ``3. A.x --> B.x`` - Then ``A`` and ``B`` must be Objects, and the x-property influencers of ``B`` are reassigned - to those pushed forward from ``A.x``, and if `assign` is `False`, B also retains its existing - x-property influencers. - - ``4. A --> B`` - Then all the influencers of A are pushed forward to B, either replacing ``B`` or - adding to ``B``'s existing influencers. + """Build new FlowVector with all influence paths in self pushed into vec. Args: - extension_path: DataInfluencePath to push forward by - influenced_vec: The target vector that is influenced by the statement - assign: ``True`` for assignment and ``False`` for addition. Note that - object addition corresponds to enlarging a collection. - cross_flow: if ``True``, allows this extension to cross flows, which requires - the flow being extended by to cross flows. + extension_path: InfluencePath to extend through. + influenced_vec: FlowVector being influenced. + assign: Whether this is an assignment operation. Defaults to True. + cross_flow: Whether this crosses flow boundaries. Defaults to False. Returns: - new FlowVector - - Raises: - ValueError if there is a variable name mismatch. - + New FlowVector with extended influence paths. """ if extension_path.influenced_property is None: # the entire vector is pushed @@ -319,13 +252,10 @@ def push_via_flow(self, extension_path: InfluencePath, influenced_vec: FlowVecto return pushed_vec else: - # A.x ---> B.y or - # scalar --> B.y - # we want to use assign_or_add_property_flows() - # and we need to extend the flows of A selected by x to B + # A.x ---> B.y or scalar --> B.y to_extend = self.get_flows_by_prop(extension_path.influencer_property) if to_extend is None or len(to_extend) == 0: - return FlowVector(property_maps=copy.deepcopy(influenced_vec.property_maps)) + return FlowVector(property_maps=_copy_property_maps(influenced_vec.property_maps)) else: accum = set() for flow_ in to_extend: @@ -339,81 +269,25 @@ def push_via_flow(self, extension_path: InfluencePath, influenced_vec: FlowVecto return influenced_vec._assign_or_add_property_flows(accum, assign=assign) - # # # End of FlowVector Public API # - # def _extend_by_path(self, flow: InfluencePath, cross_flow: bool = False) -> FlowVector: - """Creates a new flow vector by *pushing forward* this vector's flows. - - =========================== - Pushing FlowVectors Forward - =========================== - - Consider the vector ``A`` with a simple influencer vector:: - - A: default: B --> A (path 1) - A.x: C.y --> A.x (path 2) - A.y: t ----> A.y (path 3) - - When we apply the influence map: ``A --f--> D``, this generates a - new vector at D:: - - fA default: B ---> D (combine path 1 with f) - D.x: C.y ---> D.x (combine path 2 with f restricted to x) - D.y: t ---> D.y (combine path 3 with f restricted to y) - - Suppose the influence vector has an influencer_property, so it is - ``A.x --g--> Z`` (Note that Z must be a scalar variable). The new flows - will be:: - - gA default: C.y ---> Z (combine path 2 with g) - Z.x: None - Z.y: None - - Because ``property_maps`` *always override* the default. Notice that - if there are N DataInfluencePaths influencing ``Z.x``, then the pushed - forward vector will have N defaults. So neither the size ``defaults`` - field nor ``property_maps`` is preserved by the - push-forward operation when overrides are picked up (but it is otherwise - in the generic, e.g. non-singular, case). - - As a last example, suppose the ``influencer_property`` of the pushing flow is - not in the property map: ``A.v --h--> Z``. - - Then the pushed vector is:: - - hA default: B.v ---> Z (combine path 1 with g) - Z.x: None - Z.y: None - - Notes: - In order to use this function: - - 1) `flow.influenced_property` must be None, otherwise we are in - the singular case and the image is not a full FlowVector - 2) `flow.influenced_name` of the flow must match the flow.influenced_name - properties of all the flows in this vector. + """Create a new flow vector by pushing forward this vector's flows. Args: - flow: The DataInfluencePath to extend by - - cross_flow: True if the new vector is in a different flow than - the current one (default to False). + flow: InfluencePath to extend through. + cross_flow: Whether this crosses flow boundaries. Defaults to False. Returns: - Flow Vector pushed forward by the influence path. + New FlowVector with extended paths. + Raises: + ValueError: If flow has a non-null influenced_property. """ if flow.influenced_property is not None: - raise ValueError(f"called with flow {flow} that has a non-null influencer." - "This means the flow only inserts into a portion of " - "a vector and so cannot be used to generate a new vector. " - "To push this vector into part of an existing vector, " - " please use the combine_via_path method and provide " - "the target vector.") + raise ValueError(f"called with flow {flow} that has a non-null influencer.") new_property_maps = dict() tgt_prop = flow.influencer_property @@ -427,9 +301,7 @@ def _extend_by_path(self, flow: InfluencePath, cross_flow: bool = False) -> Flow start_flow=curr_default, end_flow=flow, cross_flow=cross_flow) # and push all property maps forward *if they exist* - # otherwise the method will return None if self.property_maps[curr_default] is not None: - # initialize: new_property_maps[pushed_default] = {} # take *all* property_overrides and push them forward @@ -447,19 +319,10 @@ def _extend_by_path(self, flow: InfluencePath, cross_flow: bool = False) -> Flow new_property_maps[pushed_default] = None else: # tgt_prop is not None, so the flow is A.x --> B - # Therefore the target is a scalar and will have - # null overrides and more defaults. This - # is tracked with new counter: for curr_default in self.property_maps: if self.property_maps[curr_default] is None or tgt_prop not in self.property_maps[curr_default]: - - # there is no override for tgt_prop, but the flow wants it, so - # induce a property from defaults via restriction, e.g. - # old flow: A->B->C - # map: C.x->D - # - # we restrict: A.x->B.x->C.x, and then combine C.x->D + # induce a property from defaults via restriction pushed_default = InfluencePath.combine( start_flow=_restrict(curr_default, tgt_prop), end_flow=flow, @@ -469,7 +332,7 @@ def _extend_by_path(self, flow: InfluencePath, cross_flow: bool = False) -> Flow new_property_maps[pushed_default] = None else: - # There is an override for target prop, so push all its flows into the property_maps + # There is an override for target prop, so push all its flows pushed_defaults = [InfluencePath.combine( start_flow=x, end_flow=flow, @@ -479,7 +342,6 @@ def _extend_by_path(self, flow: InfluencePath, cross_flow: bool = False) -> Flow assert x not in new_property_maps new_property_maps[x] = None - # end of if-statement return FlowVector(property_maps=new_property_maps) def _search_props(self, defaults_matcher: Callable[[InfluencePath], bool] = is_non_null, @@ -487,78 +349,22 @@ def _search_props(self, defaults_matcher: Callable[[InfluencePath], bool] = is_n flow_matcher: Callable[[InfluencePath | None], bool] = is_non_null, action: Callable[[InfluencePath, str, InfluencePath], typing.Any] = id_ ) -> typing.Any: - """Searches through FlowVector based on match conditions. - - .. WARNING:: Be careful when removing flows from vectors, - as override relationships are lost. - - The intention of this module is to simplify bookkeeping logic - associated to FlowVectors, which are primarily accounting containers - to track property overrides. - - For example: - - * return all overrides for a specific property in all defaults - * return all defaults and all overrides - * return all defaults that have no override for a specific property - - As our data structure is a recursive dictionary, repeatedly nesting - for-loops with additional handling of null cases - is error-prone and creates logic that is difficult to maintain. - - All such queries should be replaced with appropriate match callables and passed - into this function. - - Notes: - * All callables are optional, as is the action. - - * All matching tuples are returned regardless of the action callable - - * The action callable is passed matching tuples in flattened form, - just as the return values. - - E.g.:: - - (default_name, prop_name, function1) - (default_name, prop_name, function2) - - and the result of action for each tuple is returned by the function. - - * The defaults matcher will never match to None. - - * The default ``action`` callable is the identity. - - * If a prop matcher or override matcher matches None, - then None will be passed into the match tuples for the action - callable. - - * If no callable is provided for an entry, one that matches - any non-null will be used. Thus, calling this - method with no arguments returns a flattened property map with - all non-null entries. - - * matches short circuit, so matchers in the next level are only - invoked on matches in the previous level. The action is invoked - on the flattened matches when the search is complete. - - * Callables should be pulled from the util module. + """Search through FlowVector based on match conditions. Args: - defaults_matcher: callable to match on :attr:`FlowVector.defaults` - prop_matcher: callable to match on property names - flow_matcher: callable to match on DataInfluencePaths in overrides - action: function that accepts a matched values (DataInfluencePath, str, DataInfluencePath) + defaults_matcher: Function to match default paths. + prop_matcher: Function to match property names. + flow_matcher: Function to match flows. + action: Function to apply to matched items. Returns: - results of applying ``action`` to matches. None values from action are not returned. - + Set of results from applying action to matched items. """ assert action is not None assert prop_matcher is not None assert flow_matcher is not None assert defaults_matcher is not None - # TODO: clean this up with iters, but it's good enough for now accum = set() for current_default, prop_map in self.property_maps.items(): if defaults_matcher(current_default): @@ -593,42 +399,22 @@ def _search_props(self, defaults_matcher: Callable[[InfluencePath], bool] = is_n def _assign_or_add_property_flows(self, flows: set[InfluencePath], assign: bool = True ) -> FlowVector: - """Injects DataInfluencePaths into vector. - - .. WARNING:: Expert use only as FlowVector can be corrupted by adding the wrong flows. - - Flows are unstructured, so where the flow is placed depends on - the :attr:`DataInfluencePath.influenced_property` attribute of each flow. - All flows must have a non-null influencer property, otherwise - the entire vector is being pushed and the :meth:`FlowVector.extend_by_path` method - should be used instead of this (injective) method. - - Notes: - * Use this method to model the injection of different property flows, for example:: - A.x --f-> B.y - - * Would cause the y-member flows of B to change by the path ``f``. - - * When paths are 'added', induced maps need to be created when target - overrides are missing, otherwise the added override will always take - over, and we lost the ability to keep both added and original resolutions. + """Inject DataInfluencePaths into vector. Args: - flows: A set of flows each of which should have a non-null influenced_property - attribute. - assign: if (True), the existing flows are replaced, otherwise they are added to the - existing flows. + flows: Set of InfluencePath objects to inject. + assign: Whether to assign (replace) or add flows. Defaults to True. + Returns: - new FlowVector + New FlowVector with injected flows. Raises: - ValueError if a flow is passed with a null influenced_property. + ValueError: If flows contain paths with null influenced_property. """ - if flows is None or len(flows) == 0: return self - new_property_maps = copy.deepcopy(self.property_maps) + new_property_maps = _copy_property_maps(self.property_maps) for flow in flows: prop = flow.influenced_property if prop is None: @@ -642,27 +428,35 @@ def _assign_or_add_property_flows(self, flows: set[InfluencePath], assign: bool new_property_maps[default_][prop] = {flow} else: - # property maps index has this property and we are adding new_property_maps[default_][prop].update({flow}) return FlowVector(property_maps=new_property_maps) """ - -simple lambda for sorting + + Helper Functions """ -def _sort_key(x): +def _sort_key(x: InfluencePath) -> str: + """Generate sort key for an InfluencePath. + + Args: + x: InfluencePath to generate key for. + + Returns: + String representation for sorting. + """ return x.short_report(arrows=True) def _merge_override(default: InfluencePath, - first: dict[str, set[InfluencePath]], - second: dict[str, set[InfluencePath]]) -> dict[str, set[InfluencePath]] | None: - """Take the property map for a specific default and combine it with another + first: PropertyOverrides | None, + second: PropertyOverrides | None) -> PropertyOverrides | None: + """Take the property map for a specific default and combine it with another. + Args: default: default flow for this map first: map from properties to sets of flows @@ -672,59 +466,59 @@ def _merge_override(default: InfluencePath, New map that is the combination of the two or None if both maps are None """ - # Take care of degeneracies if first is None and second is None: return None - keys_to_update = set() - keys_to_update.update(second and second.keys() or set()) - keys_to_update.update(first and first.keys() or set()) + keys1 = first.keys() if first else set() + keys2 = second.keys() if second else set() - if None in keys_to_update: - keys_to_update.remove(None) + # Union of keys, remove None if it accidentally crept in + all_keys = (keys1 | keys2) - {None} accum = {} - for key in keys_to_update: + for key in all_keys: + # Create induced flow set fresh every time to avoid shared reference issues induced_set = {_restrict(default, key)} - first_set = (first and dict.get(first, key, induced_set)) or induced_set - second_set = (second and dict.get(second, key, induced_set)) or induced_set - first_set.update(second_set) - accum[key] = first_set - return accum + # 1. Retrieve the value (set or None) + # .get(key) returns None if key is missing, or if key exists and value is None. + val1 = first.get(key) if first else None + val2 = second.get(key) if second else None + # 2. Resolve to set + # If the value is None (missing or explicit None), we use the induced set. + set1 = val1 if val1 is not None else induced_set + set2 = val2 if val2 is not None else induced_set -""" + # 3. Create new set via union (non-mutating) + accum[key] = set1 | set2 -Callable builders - -""" + return accum def _build_action_restrict_if_no_prop(wanted_prop: str) -> Callable: + """Build an action function that restricts flows if no property override exists. + + Args: + wanted_prop: Property name to restrict to, or None for all. + + Returns: + Callable function that takes (default, curr_prop, flow) and returns + tuple of (default, flow) or None. + """ def action(default: InfluencePath, curr_prop: str | None, flow: InfluencePath) -> tuple[InfluencePath, InfluencePath] | None: - # The matchers will ensure we have a prop-wanted prop match, - # but we still need the wanted prop variable because a wanted prop - # of 'None' may be passed, in which case everything is wanted. - if wanted_prop is None: - # When the caller does not specify a desired property, - # then all requested, so return all flows if they exist + # return all flows if they exist if flow is not None: return default, flow if wanted_prop is not None and flow is None: # we don't have an override, so we return the restricted default - # Both endpoints are restricted, since we have an object map: - # e.g. Account_var1 --> Account_var2, so the request for - # Account_var.Name induces the flow - # from Account_var1.Name -> Account_var2.Name return default, _restrict(default, wanted_prop) if wanted_prop is not None and flow is not None: - # sanity check to make sure the filters are working assert curr_prop == wanted_prop return default, flow @@ -733,30 +527,16 @@ def action(default: InfluencePath, curr_prop: str | None, return action -""" - - Property Map manipulation functions - -""" - - -def _safe_add(my_prop_map: dict[InfluencePath, dict[str, set[InfluencePath]]], +def _safe_add(my_prop_map: VectorPropertyMap, my_default: InfluencePath, flow: InfluencePath, assign: bool = True) -> None: - """add function that provides the induced flow if needed - - Need to add the induced flow from the default - as well as the flow to the corresponding key. + """Add flow to property map, providing induced flow if needed. Args: - my_prop_map: full property map - my_default: default being updated - flow: flow being added - assign: True if elements are being assigned, False if added - - Returns: - None, the passed in map is updated. - + my_prop_map: Property map to modify. + my_default: Default InfluencePath. + flow: InfluencePath to add. + assign: Whether to assign (replace) or add. Defaults to True. """ prop = flow.influenced_property @@ -765,6 +545,7 @@ def _safe_add(my_prop_map: dict[InfluencePath, dict[str, set[InfluencePath]]], else: induced_flow = _restrict(my_default, prop) to_add = {flow, induced_flow} + if my_prop_map[my_default] is None: my_prop_map[my_default] = dict() my_prop_map[my_default][prop] = to_add @@ -777,40 +558,20 @@ def _safe_add(my_prop_map: dict[InfluencePath, dict[str, set[InfluencePath]]], my_prop_map[my_default][prop].update({flow}) -def _safe_update(prop: str, x: set, old_map: dict[str, set]) -> None: - """Merges a set into a map at the specified property - - Args: - prop: string (not null) - x: must not be None - old_map: must not be None, but can be none on any property - - Returns: - new map that merges both - - """ - assert x is not None - - if prop not in old_map or old_map[prop] is None: - old_map[prop] = x - else: - old_map[prop].update(x) - - def _restrict(dataflow: InfluencePath, prop: str) -> InfluencePath: - """Restricts path to a member property + """Restrict path to a member property. Args: - dataflow: path - prop: restriction - - Returns: - restricted path + dataflow: InfluencePath to restrict. + prop: Property name to restrict to. + Returns: + New InfluencePath with both influencer_property and influenced_property + set to prop, or original path if prop is None. """ if prop is None: return dataflow return replace(dataflow, influencer_property=prop, - influenced_property=prop) + influenced_property=prop) \ No newline at end of file diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py index 42be36d2..bf98583b 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py @@ -16,13 +16,12 @@ import queries.default_query import queries.optional_query - from flow_parser.parse import Parser -from flow_scanner.util import case_insensitive_match from flow_scanner.control_flow import Crawler from flow_scanner.flow_result import ResultsProcessor +from flow_scanner.util import case_insensitive_match from public.contracts import State, AbstractCrawler, Query, LexicalQuery, FlowParser -from public.data_obj import Preset, PresetEncoder, QueryDescription +from public.data_obj import Preset, PresetEncoder from public.enums import QueryAction from queries import debug_query from queries.debug_query import Detect @@ -66,14 +65,15 @@ ] class QueryManager: - """ - Lifecycle: QueryManager is instantiated once per invocation of flow_scanner. - That means that if an argument is set to None due to error, it will not be - attempted again in the next flow. + """Manages query execution during flow scanning. - At the end of a full flow parse (including subflows), the queries are reloaded, e.g. - re-instantiated. But query instances persist across subflows. They persist until reload - is called. + Lifecycle: QueryManager is instantiated once per invocation of flow_scanner. + That means that if an argument is set to None due to error, it will not be + attempted again in the next flow. + + At the end of a full flow parse (including subflows), the queries are reloaded, + e.g. re-instantiated. But query instances persist across subflows. They persist + until reload is called. """ # which built-in queries were requested, combining preset and any optional requested_query_ids: list[str] | None = None @@ -95,7 +95,7 @@ class QueryManager: results: ResultsProcessor = None # current parser associated to flow-file - parser: Parser = None + parser: FlowParser = None # which preset to request preset: str = None @@ -108,7 +108,7 @@ class QueryManager: # external_class_names: list[str] | None = None - # json object that will be passed to the debug query + # JSON object that will be passed to the debug query debug_arg: Any | None = None @classmethod @@ -119,7 +119,24 @@ def build(cls, external_module_path: str | None = None, external_class_names: str | None = None, debug_arg_str: str | None = None) -> QueryManager: - """Only call this once to build Query Manager at scan start + """Build QueryManager instance at scan start. + + Only call this once to build Query Manager at scan start. + + Args: + parser: FlowParser instance for the current flow. + requested_preset: Name of preset to use, or None. + requested_queries: List of specific query IDs to run, or None. + external_module_path: Path to external query module, or None. + external_class_names: Comma-separated class names from external module, or None. + debug_arg_str: JSON string for debug query arguments, or None. + + Returns: + Configured QueryManager instance. + + Raises: + ValueError: If debug_arg_str cannot be parsed. + ImportError: If external module cannot be loaded. """ qm = QueryManager() qm.parser = parser @@ -175,13 +192,15 @@ def build(cls, return qm - def generate_effective_preset(self)-> Preset: - """ + def generate_effective_preset(self) -> Preset: + """Generate the effective preset that will actually be run. - Returns: The list of query descriptions that will actually be run, combining - the preset field selected by the caller and any additional queries selected - by the caller + Combines the preset field selected by the caller and any additional + queries selected by the caller. + Returns: + Preset object containing the list of query descriptions that will + actually be run. """ q = [] if self.queries: @@ -201,7 +220,13 @@ def generate_effective_preset(self)-> Preset: - def lexical_query(self, parser: Parser, crawler: AbstractCrawler=None) -> None: + def lexical_query(self, parser: Parser, crawler: AbstractCrawler = None) -> None: + """Execute all lexical queries on the parser. + + Args: + parser: Parser instance for the flow. + crawler: Optional crawler instance (not used for lexical queries). + """ if self.queries is None or QueryAction.lexical not in self.action2queries: return None @@ -216,25 +241,21 @@ def lexical_query(self, parser: Parser, crawler: AbstractCrawler=None) -> None: f"{parser.flow_path} {traceback.format_exc()}") return None - def static_accept(self, query_id, **kwargs) -> None: - """Calls the (static) 'accept' method of this query. The query must - override the static 'accept' method of the abstract class. Expert - use only. + def static_accept(self, query_id: str, **kwargs) -> None: + """Call the (static) 'accept' method of a query. + + The query must override the static 'accept' method of the abstract class. + Expert use only. The purpose of accept methods is to record issues found in the course of normal scanning and parsing, and not as a result of running queries. - Because of this, we are accepting issues found and merely reformatting them into the appropriate query result. But if this query is not requested, then it will not override the parent accept which is a null op. Args: - query_id (str): Name of class that has the static accept method - **kwargs (Any): Keyword args to pass - - Returns: - Query Description - + query_id: Name of class that has the static accept method. + **kwargs: Keyword arguments to pass to the accept method. """ if self.queries is not None and query_id in self.queries: mod_name = self.query_id2module_name[query_id] @@ -250,16 +271,12 @@ def static_accept(self, query_id, **kwargs) -> None: logger.info(f"The query id {query_id} is not recognized as a requested lexical query id") def query(self, action: QueryAction, state: State, crawler: Crawler = None) -> None: - """Invokes QueryProcessor to execute query and stores results + """Invoke QueryProcessor to execute query and store results. Args: - action: type of invocation (flow entrance or element entrance) - state: current state - crawler: flow crawler object which has crawl schedule and cfg - - Returns: - None - + action: Type of invocation (flow entrance or element entrance). + state: Current execution state. + crawler: Flow crawler object which has crawl schedule and CFG. """ # when we first enter a state, there is a start elem which is not assigned and so curr elem is None. # don't look for sinks into these start states. @@ -272,7 +289,15 @@ def query(self, action: QueryAction, state: State, crawler: Crawler = None) -> N return None - def final_query(self, all_states: tuple[State]=None) -> None: + def final_query(self, all_states: tuple[State] = None) -> None: + """Run final queries and reload for next flow. + + Executes scan_exit queries and then reloads query instances + for the next flow to process. + + Args: + all_states: Tuple of all execution states, or None. + """ self.run_queries(action=QueryAction.scan_exit, all_states=all_states) @@ -280,6 +305,12 @@ def final_query(self, all_states: tuple[State]=None) -> None: self.reload() def accept(self, query_id: str, **kwargs) -> None: + """Call the accept method of a query instance. + + Args: + query_id: Query ID to call accept on. + **kwargs: Keyword arguments to pass to accept method. + """ if query_id not in self.queries: return None qry = self.queries[query_id] @@ -292,11 +323,24 @@ def accept(self, query_id: str, **kwargs) -> None: return None - def debug_query(self, msg: str): + def debug_query(self, msg: str) -> None: + """Set debug argument for debug query. + + Args: + msg: Debug message string. + """ self.debug_arg = msg - def run_queries(self, action: QueryAction, state: State=None, - crawler: AbstractCrawler=None, all_states: tuple[State]=None) -> None: + def run_queries(self, action: QueryAction, state: State = None, + crawler: AbstractCrawler = None, all_states: tuple[State] = None) -> None: + """Run all queries for a specific action. + + Args: + action: QueryAction type to run queries for. + state: Current execution state, or None. + crawler: Flow crawler instance, or None. + all_states: Tuple of all execution states, or None. + """ if self.action2queries is None: return None if action not in self.action2queries: @@ -314,11 +358,11 @@ def run_queries(self, action: QueryAction, state: State=None, return None - def reload(self): - """Make a new instance of the queries after completing one flow + def reload(self) -> None: + """Make new instances of queries after completing one flow. - Returns: - None + Deletes old query instances, modules, and reloads for the next flow + to process. """ # reload internal modules for mod_ in QUERY_MODULES: @@ -336,18 +380,17 @@ def reload(self): ) def create_module(module_path: str) -> Any: - """Loads and Instantiates QueryProcessor + """Load and instantiate a query module. - Args: - module_path: location of module to load - - Returns: - QueryProcessor module + Args: + module_path: Location of module file to load. - Raises: - ValueError if module name cannot be parsed or preset not accepted - ImportError if the module cannot be loaded + Returns: + Loaded module object. + Raises: + ValueError: If module name cannot be parsed or file doesn't end in .py. + ImportError: If the module cannot be loaded. """ if module_path is None: # we'll build default @@ -376,30 +419,30 @@ def create_module(module_path: str) -> Any: raise e def build_query_maps( - requested_queries: list[str] | None=None, + requested_queries: list[str] | None = None, external_module: Any | None = None, external_classnames: list[str] | None = None, - debug_arg: Any | None=None + debug_arg: Any | None = None ) -> tuple[ dict[str, Query | LexicalQuery] | None, dict[str, Query | LexicalQuery] | None, dict[QueryAction, list[Query | LexicalQuery]] | None, - dict[str,str] + dict[str, str] ]: - """Instantiates queries and places them into convenient map structures + """Instantiate queries and place them into convenient map structures. Args: - requested_queries: list of validated built in queries - external_module: (loaded) external module reference - external_classnames: list of classnames in external module - debug_arg: json obj corresponding to argument + requested_queries: List of validated built-in query IDs. + external_module: Loaded external module reference, or None. + external_classnames: List of class names in external module, or None. + debug_arg: JSON object for debug query arguments, or None. Returns: - queries (id -> instance), - custom_queries (id -> instance), - action2queries (actionType -> List[QueryInstance] - query_id2module_name (str -> str) - + Tuple of: + - queries: Dictionary mapping query_id to built-in query instance. + - custom_queries: Dictionary mapping query_id to custom query instance. + - action2queries: Dictionary mapping QueryAction to list of query instances. + - query_id2module_name: Dictionary mapping query_id to module name. """ built_in_id2instance = {} # only for builtin @@ -437,18 +480,32 @@ def build_query_maps( return built_in_id2instance, custom_id2instance, action2queries, id2module -def populate_maps_from_instance(qry_id, my_module, - id2instance, - id2module, - action2queries)-> None: +def populate_maps_from_instance(qry_id: str, my_module: Any, + id2instance: dict, id2module: dict, + action2queries: dict) -> None: + """Populate query maps from a query instance. + + Args: + qry_id: Query ID/class name. + my_module: Module containing the query class. + id2instance: Dictionary to add query instance to. + id2module: Dictionary to add module mapping to. + action2queries: Dictionary to add action mappings to. + """ qry_instance = getattr(my_module, qry_id)() id2instance[qry_id] = qry_instance id2module[qry_id] = my_module populate_action2queries(action2queries, qry_instance) -def populate_action2queries(action2queries: dict[QueryAction,list[LexicalQuery | Query]], - instance: Query|LexicalQuery|Detect) -> None: +def populate_action2queries(action2queries: dict[QueryAction, list[LexicalQuery | Query]], + instance: Query | LexicalQuery | Detect) -> None: + """Add query instance to action2queries map based on when_to_run. + + Args: + action2queries: Dictionary mapping QueryAction to list of queries. + instance: Query instance to add to the map. + """ for action in instance.when_to_run(): if action not in action2queries: action2queries[action] = [instance] @@ -456,11 +513,11 @@ def populate_action2queries(action2queries: dict[QueryAction,list[LexicalQuery | action2queries[action].append(instance) -def get_query_descriptions()-> str: - """ - - Returns: All descriptions for builtin queries +def get_query_descriptions() -> str: + """Get all descriptions for built-in queries. + Returns: + JSON string containing all built-in query descriptions. """ descriptions = [] for (my_module, qry_map) in QUERY_MODULES: @@ -472,16 +529,17 @@ def get_query_descriptions()-> str: def validate_qry_list(qry_list: list[str]) -> tuple[bool, list[str] | None, list[str] | None, list[str] | None]: - """Verifies that the passed in list of strings is a case-insensitive match of legal - query names and returns the matching de-duped legal query names along with a boolean - that is False if there are any queries requested that are illegal, or if there are any duplicates + """Verify that query list contains valid, case-insensitive query names. Args: - qry_list: list of user provided query_ids to run + qry_list: List of user-provided query IDs to validate. Returns: - boolean (is valid), found, missed, duplicates - + Tuple of (is_valid, found_list, missed_list, duplicates_list). + - is_valid: True if all queries are valid and no duplicates. + - found_list: List of matching legal query names (de-duplicated). + - missed_list: List of unrecognized query names. + - duplicates_list: List of duplicate query names in input. """ query_keys = [x[1].keys() for x in QUERY_MODULES] found_tkns = [] @@ -508,28 +566,36 @@ def validate_qry_list(qry_list: list[str]) -> tuple[bool, list[str] | None, list return valid, found_tkns, missed_tkns, duplicates def build_preset_for_name(preset_name: str) -> Preset | None: - """This is used by the CLI to describe an internal preset + """Build a Preset object for an internal preset name. + + Used by the CLI to describe an internal preset. Args: - preset_name (str): + preset_name: Name of the preset to build. Returns: - Preset corresponding to this name + Preset object corresponding to this name, or None if not found. """ - queries = dict.get(PRESETS, preset_name, []) - accum = [] + queries_ = dict.get(PRESETS, preset_name, []) + accum = set() if not queries: return None - for (mod, query) in queries: + for (mod, query) in queries_: class_ = getattr(mod, query) - accum.append(class_.get_query_description()) + accum.add(class_.get_query_description()) + return Preset(preset_name=preset_name, preset_owner="Salesforce", queries=accum) def get_all_queries() -> list[str]: - """Does not return debug queries + """Get list of all built-in query IDs. + + Does not return debug queries. + + Returns: + List of all built-in query ID strings. """ accum = [] for x in QUERY_MODULES: diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py index ded109a9..b8bf83f1 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py @@ -12,8 +12,8 @@ from collections.abc import Callable from dataclasses import fields from pathlib import Path +from typing import Any from typing import TYPE_CHECKING -from typing import Any, Sequence from public.data_obj import VariableType from public.enums import RunMode @@ -38,13 +38,13 @@ def get_flows_in_dirs(root_dirs: str) -> list[str]: - """Searches recursively through for flows + """Search recursively for flow files in directories. Args: - root_dirs: csv list of directories in which to search + root_dirs: Comma-separated list of directories in which to search. Returns: - list of all flows (recursively) + List of all flow file paths found recursively. """ flow_paths = [] for root_dir in root_dirs.split(','): @@ -58,6 +58,14 @@ def get_flows_in_dirs(root_dirs: str) -> list[str]: def get_local_label(filename: str) -> str: + """Extract local label from flow filename. + + Args: + filename: Flow filename to extract label from. + + Returns: + Local label (name before first '-' character). + """ if filename.endswith(PACKAGE_FLOW_EXTENSION): short = filename[:-5] elif filename.endswith(FLOW_EXTENSION): @@ -75,23 +83,23 @@ def get_local_label(filename: str) -> str: def propagate(src_type: VariableType, dest_type: VariableType, **replacements) -> VariableType: - """Propagate attributes across flows. + """Propagate type attributes across flows. - For example, if we know that a variable - of type 'Account' is passed into loop, then we want to remember - that the object type of this loop is Account. This works if we leave - all properties none unless we are certain of their values and then - adopt this simple method. Longer term, we may need to put conditional - logic, but now add a replacement field for manual override. + For example, if we know that a variable of type 'Account' is passed + into a loop, then we want to remember that the object type of this + loop is Account. This works if we leave all properties None unless + we are certain of their values and then adopt this simple method. + Longer term, we may need to put conditional logic, but now add a + replacement field for manual override. Args: - src_type: start Variable Type - dest_type: end Variable Type - replacements: property overrides + src_type: Source VariableType to propagate from. + dest_type: Destination VariableType to propagate to. + **replacements: Property overrides as keyword arguments. Returns: - Variable Type, modified with sources populating empty dest entries. - + VariableType modified with source properties populating empty + destination entries. """ prop_names = [x.name for x in fields(VariableType) if x is not None] @@ -112,16 +120,31 @@ def propagate(src_type: VariableType, dest_type: VariableType, **replacements) - def make_id() -> str: - """Generates unique id strings + """Generate unique ID strings. Returns: - 8 digit unique id as str - + 8-character unique ID as string (first 8 chars of UUID). """ return str(uuid.uuid4())[:8] def get_effective_run_mode(parent_sharing: RunMode | None, current_sharing: RunMode) -> RunMode: + """Get effective run mode based on parent and current sharing settings. + + A master Flow running in system context will cause actions run in the + SubFlow to be run in system context as well, regardless of whether the + SubFlow was originally created and configured to run in user context. + A master Flow running in user context that has a SubFlow running in + system context will proceed to run the actions in the SubFlow in system + context. + + Args: + parent_sharing: Run mode of parent flow, or None. + current_sharing: Run mode of current flow. + + Returns: + Effective run mode to use. + """ if (parent_sharing is None or current_sharing is RunMode.SystemModeWithoutSharing or current_sharing is RunMode.SystemModeWithSharing): return current_sharing @@ -129,7 +152,16 @@ def get_effective_run_mode(parent_sharing: RunMode | None, current_sharing: RunM return parent_sharing -def sane_index(my_tuple: tuple, to_match): +def sane_index(my_tuple: tuple, to_match) -> int: + """Find index of item in tuple, returning -1 if not found. + + Args: + my_tuple: Tuple to search. + to_match: Item to find in tuple. + + Returns: + Index of item if found, -1 otherwise. + """ try: index = my_tuple.index(to_match) except ValueError: @@ -144,19 +176,51 @@ def sane_index(my_tuple: tuple, to_match): def is_non_null(entry) -> bool: + """Check if entry is not None. + + Args: + entry: Value to check. + + Returns: + True if entry is not None, False otherwise. + """ return entry is not None def is_null(entry) -> bool: + """Check if entry is None. + + Args: + entry: Value to check. + + Returns: + True if entry is None, False otherwise. + """ return entry is None def id_(*entry) -> typing.Any: + """Identity function that returns its arguments as a tuple. + + Args: + *entry: Variable number of arguments. + + Returns: + Tuple of all arguments. + """ return entry -def build_match_on_null(prop: str = None) -> Callable: +def build_match_on_null(prop: str | None = None) -> Callable: + """Build a function that matches properties, optionally on a specific property. + + Args: + prop: Property name to match, or None to match all. + + Returns: + Callable function that takes a property name and returns True if it matches. + """ def prop_match(prop_to_match: str): if prop is None: return True @@ -169,6 +233,17 @@ def prop_match(prop_to_match: str): def build_action_filter(include_default: bool = True, include_prop: bool = True, include_flow: bool = True) -> Callable: + """Build a filter function for action matching. + + Args: + include_default: Whether to include default in result. + include_prop: Whether to include property in result. + include_flow: Whether to include flow in result. + + Returns: + Callable function that takes (default, prop, flow) and returns + a tuple of the included values. + """ def action(default, prop, flow): accum = [] if include_default: @@ -183,32 +258,81 @@ def action(default, prop, flow): def build_equality_match(to_match) -> Callable: + """Build a function that checks equality with a specific value. + + Args: + to_match: Value to match against. + + Returns: + Callable function that takes an object and returns True if it equals to_match. + """ def equ_match(obj_to_match): return obj_to_match == to_match return equ_match -def match_all(x) -> bool: +def match_all(x) -> bool: # noqa + """Match all values (always returns True). + + Args: + x: Any value (ignored). + + Returns: + Always True. + """ return True -def safe_dict_list_append(a_dict: dict[Any, Any], key:Any, val: Any) -> dict[Any, list[Any]]: - if key not in dict: +def safe_dict_list_append(a_dict: dict[Any, Any], key: Any, val: Any) -> dict[Any, list[Any]]: + """Safely append a value to a list in a dictionary. + + Creates the list if the key doesn't exist. + + Args: + a_dict: Dictionary to modify. + key: Key to append to. + val: Value to append. + + Returns: + Modified dictionary. + """ + if key not in a_dict: a_dict[key] = [val] else: a_dict[key].append(val) return a_dict -def safe_dict_list_add(a_dict: dict[Any, Any], key:Any, list_val: list[Any]) -> dict[Any, list[Any]]: - if key not in dict: +def safe_dict_list_add(a_dict: dict[Any, Any], key: Any, list_val: list[Any]) -> dict[Any, list[Any]]: + """Safely add a list of values to a list in a dictionary. + + Creates the list if the key doesn't exist, otherwise concatenates. + + Args: + a_dict: Dictionary to modify. + key: Key to add to. + list_val: List of values to add. + + Returns: + Modified dictionary. + """ + if key not in a_dict: a_dict[key] = list_val else: a_dict[key] = a_dict[key] + list_val return a_dict -def safe_list_add(a_list, b_list) -> list | None: - # this should be in standard library +def safe_list_add(a_list: list | None, b_list: list | None) -> list | None: + """Safely add two lists, handling None values. + + Args: + a_list: First list, or None. + b_list: Second list, or None. + + Returns: + Concatenated list, or None if both are None, or the non-None list + if one is None. + """ if a_list is None and b_list is None: return None elif a_list is None: @@ -220,7 +344,20 @@ def safe_list_add(a_list, b_list) -> list | None: class Resolver(object): + """Resolves subflow paths by namespace and label. + + Attributes: + all_flow_paths: List of all flow file paths. + resolver_map: Dictionary mapping scope to (namespace, label) -> flow_path. + cached_namespace_lookups: Dictionary mapping folder path to namespace. + """ + def __init__(self, all_flow_paths: list[str]) -> None: + """Initialize resolver with flow paths. + + Args: + all_flow_paths: List of all flow file paths to resolve from. + """ self.all_flow_paths = all_flow_paths #: folder path -> namespace @@ -256,6 +393,19 @@ def __init__(self, all_flow_paths: list[str]) -> None: self.cached_namespace_lookups = cached_namespace_lookups def get_subflow_path(self, sub_name: str, flow_path: str) -> str | None: + """Get the file path for a subflow by name. + + Checks if there is a namespace in the sub_name. If so, searches all + scopes for that namespace. Otherwise, checks in the local scope and + local namespace for a full name match. + + Args: + sub_name: Name of the subflow to find. + flow_path: Path of the current flow (for local scope resolution). + + Returns: + File path of the subflow if found, None otherwise. + """ # check if there is a namespace in the sub_name to_match = sub_name.lower() splits = to_match.split("__") @@ -281,7 +431,19 @@ def get_subflow_path(self, sub_name: str, flow_path: str) -> str | None: else: return None -def get_scope_ns_label(f_path, cached_namespace_lookups): +def get_scope_ns_label(f_path: str, cached_namespace_lookups: dict[str, str]) -> tuple[str, str, str]: + """Get scope, namespace, and label from a flow file path. + + Args: + f_path: Flow file path. + cached_namespace_lookups: Dictionary mapping folder paths to namespaces. + + Returns: + Tuple of (scope, namespace, label). + + Raises: + RuntimeError: If path is not absolute or not in a folder. + """ ns = next((cached_namespace_lookups[x] for x in cached_namespace_lookups.keys() if f_path.startswith(x)), None) # then check for sfdx-project.json or package.xml files @@ -306,38 +468,41 @@ def get_scope_ns_label(f_path, cached_namespace_lookups): def update_folder_ns(f_path: str, cached_namespace_lookups: dict[str, str]) -> tuple[str | None, dict[str, str]]: - """looks at the filepath and tries to find the namespace definition from either - the package manifest or project-json file. Stores the results in a cache + """Find namespace definition from package manifest or project-json file. + Looks at the filepath and tries to find the namespace definition from either + the package manifest or project-json file. Stores the results in a cache. We do not infer from folder structure yet, just from the config files. - package manifest assumes a folder structure like this: + Package manifest assumes a folder structure like:: + + top -> /flows/file + package.xml + -- XML and can load and look at PT1 + under the XML root, with xmlns: "http://soap.sforce.com/2006/04/metadata" - top -> /flows/file - package.xml - -- xml and can load and look at PT1 - under the xml root, with xmlns: "http://soap.sforce.com/2006/04/metadata" + but we also support:: - but we also support - top -> second -> flows/file + top -> second -> flows/file - project-json assumes - sfdx-project.json at the top of the project next to force-app - -- can load and look at loaded["namespace"] + Project-json assumes:: - and assumes a project structure of: - force-app -> first -> second -> flows/file + sfdx-project.json at the top of the project next to force-app + -- can load and look at loaded["namespace"] + + and assumes a project structure of:: + + force-app -> first -> second -> flows/file Args: - f_path (): - cached_namespace_lookups (): + f_path: Flow file path to find namespace for. + cached_namespace_lookups: Dictionary mapping folder paths to namespaces. Returns: - updated dict string (parent directory containing json/xml file) -> namespace - the parent directory is an absolute path normalized so that - we can tell whether a child flow is in this namespace by looking at - path_of_child.startswith(path_in_dict) and then assign the corresponding namespace to it. - + Tuple of (namespace, updated_cache_dict). The parent directory is an + absolute path normalized so that we can tell whether a child flow is + in this namespace by looking at path_of_child.startswith(path_in_dict) + and then assign the corresponding namespace to it. """ f = Path(f_path) # cached_namespace_lookups @@ -347,16 +512,16 @@ def update_folder_ns(f_path: str, cached_namespace_lookups: dict[str, str]) -> t in code we look for sfdx-project.json at root. layout can be: - 1) root -> force-app -> main -> default -> flows/myflow.flow + 1) root -> force-app -> main -> default -> flows/my_flow.flow 'main' is for production code, and also replace with 'test' for test code - 2) root -> pkg_dir -> main -> default -> flows/myflow.flow + 2) root -> pkg_dir -> main -> default -> flows/my_flow.flow 'pkg_dir' can live alongside force-app for multiple packages 3) 'default' represents where code is pulled from, but packages can work with other directories. so in general we want to look for 'flows' as the immediate directory containing - the code and then look up to 4 levels above where myflow.flow lives. + the code and then look up to 4 levels above where my_flow.flow lives. if no package-json is found, we return with no defined namespace. """ @@ -397,14 +562,13 @@ def update_folder_ns(f_path: str, cached_namespace_lookups: dict[str, str]) -> t def get_ns_from_package_xml(package_path: str) -> str | None: - """returns namespace or None + """Extract namespace from package.xml file. Args: - package_path (str): path to 'package.xml' + package_path: Path to 'package.xml' file. Returns: - namespace or None (if no namespace prefix in package xml) - + Namespace prefix if found, None if no namespace prefix in package XML. """ try: with open(package_path, 'r') as package_xml: @@ -424,6 +588,14 @@ def get_ns_from_package_xml(package_path: str) -> str | None: def get_ns_from_package_json(package_path: str) -> str | None: + """Extract namespace from sfdx-project.json file. + + Args: + package_path: Path to 'sfdx-project.json' file. + + Returns: + Namespace prefix if found, None if no namespace or on error. + """ try: with open(package_path, 'r') as p: json_data = json.load(p) @@ -438,6 +610,15 @@ def get_ns_from_package_json(package_path: str) -> str | None: def case_insensitive_match(list_a: list[str], to_match: str) -> str | None: + """Find a case-insensitive match in a list. + + Args: + list_a: List of strings to search. + to_match: String to match (case-insensitive). + + Returns: + Matching string from list if found, None otherwise. + """ for item in list_a: if item.lower() == to_match.lower(): return item @@ -445,19 +626,26 @@ def case_insensitive_match(list_a: list[str], to_match: str) -> str | None: def find_cycles(target, history: tuple) -> tuple[int, tuple | None]: - """Look for cycles in the history ending with target and return (# of cycles, cycle) + """Look for cycles in the history ending with target. + + The idea is to detect cycles. Say the history is:: - The idea is to detect cycles. Say the history is history = [A B X Y Z X Y] - and we are thinking of adding the target Z. - But we don't want to add it if it will - create a repeating pattern, as this corresponds to looping needlessly. - So we look for the previous occurrence of Z in the history, and then look - at history[right_index(Z):] = [Z X Y] - Now we want to check whether the portion [X Y] also precedes Z. If so, - we found a cycle ending at target, and we don't jump to that target. + and we are thinking of adding the target Z. + + But we don't want to add it if it will create a repeating pattern, as + this corresponds to looping needlessly. So we look for the previous + occurrence of Z in the history, and then look at history[right_index(Z):] = [Z X Y]. + Now we want to check whether the portion [X Y] also precedes Z. If so, + we found a cycle ending at target, and we don't jump to that target. + Args: + target: Target value to check for cycles. + history: Tuple of history values. + + Returns: + Tuple of (number_of_cycles, cycle_tuple). Returns (0, None) if no cycle found. """ if not history: return 0, None diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py index df9459c3..41bc70ac 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py @@ -1,33 +1,30 @@ -"""performs dataflow wiring for flow elements +"""Performs dataflow wiring for flow elements. - ------------- - Wiring Policy - ------------- +Wiring Policy +------------- - Wiring policy for flow elements +1. When we encounter any variable defined or initialized in an element, + we add that variable to the influence map. - 1. When we encounter any variable defined or initialized in an element, - we add that variable to the influence map +2. When dataflows _out_ of an element to another element, we wire the flow. - 2. When dataflows _out_ of an element to another element, we wire the flow. + We do not presently wire flows _into_ the current element, as we don't support + second order dataflow analysis. - We do not presently wire flows _into_ the current element, as we don't support - second order dataflow analysis. - - For example, user data may flow into the filter field - of a Get Records, and the return value of the function may be assigned to another variable. - Only the second dataflow is wired. If both flows were wired, we would have a second order flow, - e.g. assuming that the inputs to a function are part of the same dataflow as the return values, - which is rarely useful for dataflow analysis and generates misleading flows. - - If we are searching for dangerous flows *into* elements, this is done by the query processor, - which does not wire anything, it only searches for the flows. This is why our dataflow results - generally contain one missing step, which must be added by the QueryProcessor. - - This allows us to know, at any point in program execution which variables have been - initialized and also what the dataflow history of each variable is. + For example, user data may flow into the filter field of a Get Records, + and the return value of the function may be assigned to another variable. + Only the second dataflow is wired. If both flows were wired, we would have + a second order flow, e.g. assuming that the inputs to a function are part + of the same dataflow as the return values, which is rarely useful for + dataflow analysis and generates misleading flows. + If we are searching for dangerous flows *into* elements, this is done by + the query processor, which does not wire anything, it only searches for + the flows. This is why our dataflow results generally contain one missing + step, which must be added by the QueryProcessor. +This allows us to know, at any point in program execution, which variables +have been initialized and also what the dataflow history of each variable is. """ import logging @@ -53,18 +50,17 @@ class QueryResult(Enum): def initialize(state: BranchState, elem: El, elem_name: str) -> dict[QueryResult, bool | str | El]: - """Add this element name to influence map if it represents its own output data + """Add element name to influence map if it represents its own output data. - (Element name is passed in so we don't need to keep looking it up) + Element name is passed in so we don't need to keep looking it up. Args: - state: current branch state - elem: current xml elem - elem_name: element name + state: Current branch state. + elem: Current XML element. + elem_name: Element name. Returns: - None - + Dictionary mapping QueryResult enum values to initialization results. """ tag = parse_utils.get_tag(elem) auto_store = parse_utils.is_auto_store(elem) @@ -118,25 +114,27 @@ def wire(state: BranchState, elem: El) -> None: wire_apex_plugin_calls(state, elem, el_name, stored) elif el_type == 'assignments': - wire_assignment(state, elem, el_name, stored) + wire_assignment(state, elem, el_name) # loops and collection processors work with collection references elif el_type == 'collectionProcessors': - wire_collection_processor(state, elem, el_name, stored) + wire_collection_processor(state, elem, el_name) elif el_type == 'dynamicChoiceSets': - wire_dynamic_choice_sets(state, elem, el_name, stored) + pass + # Todo: audit if this is necessary + # wire_dynamic_choice_sets(state, elem, el_name, stored) elif el_type == 'loops': - wire_loop(state, elem, el_name, stored) + wire_loop(state, elem, el_name) elif el_type == 'orchestratedStages': # Inside Orchestrated stages, stageSteps are wired as step_name.Outputs.var - wire_orchestrated_stages(state, elem, el_name, stored) + wire_orchestrated_stages(state, elem) elif el_type == 'recordCreates': # look for passing id to variable in create - wire_record_creates(state, elem, el_name, stored) + wire_record_creates(state, elem, el_name) elif el_type == 'recordDeletes': # these only auto-wired as the standard boolean (e.g. true if success) @@ -154,7 +152,7 @@ def wire(state: BranchState, elem: El) -> None: elif el_type == 'screens': # TODO: need to handle output from screen to action or vice-versa # This should wait until we crawl actions separately - wire_screens(state, elem, el_name, stored) + wire_screens(state, elem, el_name) elif el_type == 'subflows': # subflow wiring is done in the executor @@ -164,15 +162,21 @@ def wire(state: BranchState, elem: El) -> None: pass elif el_type == 'transforms': - wire_transforms(state, elem, el_name, stored) + wire_transforms(state, elem, el_name) elif el_type == 'waits': - wire_waits(state, elem, el_name, stored) + wire_waits(state, elem) return None -def wire_waits(state: BranchState, elem: El, el_name: str, stored): - """Wait events can fire events on exit which is handled via output ref +def wire_waits(state: BranchState, elem: El) -> None: + """Wire wait element events. + + Wait events can fire events on exit which is handled via output reference. + + Args: + state: Current branch state. + elem: Wait element to wire. """ wait_events = parse_utils.get_by_tag(elem, 'waitEvents') for event in wait_events: @@ -191,17 +195,13 @@ def wire_waits(state: BranchState, elem: El, el_name: str, stored): -def wire_assignment(state: BranchState, elem: El, elem_name: str, stored): - """Wires assignment statements to influence map in `state` +def wire_assignment(state: BranchState, elem: El, elem_name: str) -> None: + """Wire assignment statements to influence map in state. Args: - state: current Branch State - elem: assignment element to be wired - elem_name: element name passed in for convenience - - Returns: - None - + state: Current branch state. + elem: Assignment element to be wired. + elem_name: Element name passed in for convenience. """ res = parse_utils.get_assignment_statement_dicts(elem) if res is None: @@ -240,7 +240,14 @@ def wire_assignment(state: BranchState, elem: El, elem_name: str, stored): logger.debug(f"Propagated flow for {elem_name}: {entry}") -def wire_transforms(state, elem, el_name, stored): +def wire_transforms(state: BranchState, elem: El, el_name: str) -> None: + """Wire transform element influencers to outputs. + + Args: + state: Current branch state. + elem: Transform element to wire. + el_name: Element name. + """ res = parse_utils.get_transform_influencers(elem) if res is None: return @@ -263,14 +270,29 @@ def wire_transforms(state, elem, el_name, stored): elem=elem, el_name=el_name, comment='influence via transform element') -def wire_record_creates(state, elem, el_name, stored): +def wire_record_creates(state: BranchState, elem: El, el_name: str) -> None: + """Wire record create element to assign record ID. + + Args: + state: Current branch state. + elem: Record create element to wire. + el_name: Element name. + """ influenced = parse_utils.get_text_of_tag(elem, 'assignRecordIdToReference') if influenced is not None: wire_and_store(state=state, influencer=el_name, influenced=influenced, el_name=el_name, elem=elem, comment='id from record Create') -def wire_record_lookups(state, elem, el_name, stored): +def wire_record_lookups(state: BranchState, elem: El, el_name: str, stored: dict) -> None: + """Wire record lookup element outputs. + + Args: + state: Current branch state. + elem: Record lookup element to wire. + el_name: Element name. + stored: Dictionary of stored query results from initialize(). + """ assignments = stored[QueryResult.OutputAssignmentsEls] for assignment in assignments: influenced = parse_utils.get_text_of_tag(assignment, 'assignToReference') @@ -286,14 +308,30 @@ def wire_record_lookups(state, elem, el_name, stored): el_name=el_name, elem=elem, comment='output of record Lookup') -def wire_action_calls(state, elem, el_name, stored): +def wire_action_calls(state: BranchState, elem: El, el_name: str, stored: dict) -> None: + """Wire action call element outputs. + + Args: + state: Current branch state. + elem: Action call element to wire. + el_name: Element name. + stored: Dictionary of stored query results from initialize(). + """ if not stored[QueryResult.IsAutoStore]: # If auto-stored, then this will have already been autowired when initialized wire_apex_plugin_calls(state, elem, el_name, stored) -def wire_apex_plugin_calls(state, elem, el_name, stored): +def wire_apex_plugin_calls(state: BranchState, elem: El, el_name: str, stored: dict) -> None: + """Wire Apex plugin call element outputs. + + Args: + state: Current branch state. + elem: Apex plugin call element to wire. + el_name: Element name. + stored: Dictionary of stored query results from initialize(). + """ output_params = stored[QueryResult.OutputParametersEls] for output in output_params: influenced = parse_utils.get_text_of_tag(output, 'assignToReference') @@ -304,12 +342,19 @@ def wire_apex_plugin_calls(state, elem, el_name, stored): el_name=el_name, elem=elem, comment="action output value") +""" def wire_dynamic_choice_sets(state, elem, el_name, stored): #TODO: audit these pass +""" +def wire_orchestrated_stages(state: BranchState, elem: El) -> None: + """Wire orchestrated stage element outputs. -def wire_orchestrated_stages(state, elem, el_name, stored): + Args: + state: Current branch state. + elem: Orchestrated stage element to wire. + """ # Todo: update this with additional wiring after audit steps = parse_utils.get_by_tag(elem, 'stageSteps') for step in steps: @@ -319,17 +364,13 @@ def wire_orchestrated_stages(state, elem, el_name, stored): state.get_or_make_vector(name=fixed_name, store=True) -def wire_loop(state: BranchState, elem: El, elem_name: str, stored): - """Wires collection loop is over to loop variable. +def wire_loop(state: BranchState, elem: El, elem_name: str) -> None: + """Wire collection loop reference to loop variable. Args: - state: current Branch State - elem: assignment element to be wired - elem_name: element name passed in for convenience - - Returns: - None - + state: Current branch state. + elem: Loop element to be wired. + elem_name: Element name passed in for convenience. """ collection_ref_els = parse.get_by_tag(elem, tag_name='collectionReference') if len(collection_ref_els) != 1: @@ -344,17 +385,13 @@ def wire_loop(state: BranchState, elem: El, elem_name: str, stored): el_name=elem_name, elem=elem,comment='assign to loop variable') -def wire_collection_processor(state: BranchState, elem: El, elem_name: str, stored): - """Wires collection reference in collection processor to collection elem. +def wire_collection_processor(state: BranchState, elem: El, elem_name: str) -> None: + """Wire collection reference in collection processor to collection element. Args: - state: current Branch State - elem: assignment element to be wired - elem_name: element name passed in for convenience - - Returns: - None - + state: Current branch state. + elem: Collection processor element to be wired. + elem_name: Element name passed in for convenience. """ # every collectionProcessor must have a single collection ref subtype = parse.get_by_tag(elem, tag_name='elementSubtype') @@ -372,7 +409,14 @@ def wire_collection_processor(state: BranchState, elem: El, elem_name: str, stor elem=collection_el) -def wire_screens(state, elem, el_name, stored): +def wire_screens(state: BranchState, elem: El, el_name: str) -> None: + """Wire screen element fields and actions. + + Args: + state: Current branch state. + elem: Screen element to wire. + el_name: Element name. + """ parser = state.get_parser() stored_els = [] @@ -434,14 +478,24 @@ def wire_screens(state, elem, el_name, stored): continue -def wire_and_store(state: BranchState, influencer:str, influenced: str, +def wire_and_store(state: BranchState, influencer: str, influenced: str, el_name: str, elem: El, comment: str) -> None: + """Create an influence statement and propagate flows. + + Args: + state: Current branch state. + influencer: Variable or element doing the influencing. + influenced: Variable or element being influenced. + el_name: Element name where influence occurs. + elem: XML element where influence occurs. + comment: Human-readable comment explaining the influence. + """ stmt = InfluenceStatement( influenced_var=influenced, influencer_var=influencer, element_name=el_name, source_text=parse_utils.get_elem_string(elem), - line_no=elem.sourceline, + line_no=elem.sourceline, # noqa comment=comment, flow_path=state.flow_path, source_path=state.flow_path diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py b/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py index 6f997f31..208be000 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py @@ -35,72 +35,107 @@ """ class AbstractCrawler(ABC): + """Abstract base class for flow crawlers. + + Crawlers are responsible for traversing flow elements and managing + the crawl state during flow execution. + """ @abstractmethod def get_crawl_schedule(self) -> tuple[CrawlStep]: + """Get the crawl schedule for this crawler. + + Returns: + Tuple of CrawlStep instances representing the crawl schedule. + """ pass @abstractmethod def get_flow_path(self) -> str | None: + """Get the path to the flow file being crawled. + + Returns: + Path to the flow file, or None if not available. + """ pass @abstractmethod def get_subflow_parents(self) -> list[tuple[ET.Element, str]]: - """READ ONLY. Do not perform any crawlstep loads with these crawlers! + """Get the history of parent subflow/action elements. + + .. warning:: + READ ONLY. Do not perform any crawlstep loads with these crawlers! Returns: - history of previous subflow/action elements, flow_paths that are ancestors - of the current crawler, crawler. E.g. current_frame <-- [(elem0, path0), (elem1, path1) - .. where the frame that spawned the current frame is at history[0] and the very first frame - is at history[-1] + List of tuples containing (element, flow_path) for ancestors of the + current crawler. The frame that spawned the current frame is at + index 0, and the very first frame is at the last index. + Example: current_frame <-- [(elem0, path0), (elem1, path1), ...] """ pass @abstractmethod def get_cfg(self) -> AbstractControlFlowGraph: + """Get the control flow graph for the current flow. + + Returns: + The AbstractControlFlowGraph instance for this flow. + """ pass @abstractmethod - def get_current_step_index(self)->int: + def get_current_step_index(self) -> int: + """Get the index of the current crawl step. + + Returns: + Integer index of the current step in the crawl schedule. + """ pass @abstractmethod def load_crawl_step(self) -> CrawlStep | None: + """Load the next crawl step. + + Returns: + The next CrawlStep instance, or None if no more steps available. + """ pass @abstractmethod - def get_last_ancestor(self, crawl_step) -> CrawlStep | None: - """Get latest ancestor branch that was last visited at :obj:`CrawlStep` + def get_last_ancestor(self, crawl_step: CrawlStep) -> CrawlStep | None: + """Get the latest ancestor branch that was last visited at the given step. - Useful for knowing which influence map to clone + Useful for knowing which influence map to clone. Args: - crawl_step: step whose history is sought + crawl_step: Step whose history is sought. Returns: - CrawlStep instance or None - + CrawlStep instance of the last ancestor, or None if not found. """ pass @abstractmethod def get_elem_to_crawl_step(self, elem_name: str) -> list[CrawlStep]: - """returns a list of all :obj:`CrawlStep` in which this element has been visited - during the crawl of this flow. If not visited, the empty list is returned. + """Get all crawl steps in which an element has been visited. Args: - elem_name (str): element name (use '*' for the start element) + elem_name: Element name (use '*' for the start element). Returns: - list of :obj:`CrawlStep` instances that visit this element - + List of CrawlStep instances that visit this element. + Returns empty list if the element has not been visited. """ pass @abstractmethod def get_crawlable_elem_tuples(self) -> list[tuple[str, str]] | None: - """Returns all traversable element name, tag tuples that are connected to the start element + """Get all traversable element name and tag tuples. + + Returns: + List of (element_name, tag) tuples that are connected to the + start element, or None if none found. """ pass @@ -109,74 +144,133 @@ def get_call_chain(self, source_el: ET.Element, source_path: str, sink_el: ET.Element, source_parser: FlowParser) -> list[tuple[ET.Element, str]] | None: - """sink_el must be in the current flow. source_el can be in an ancestor - flow. Only returns paths currently crawled, so this must be called - every time a specific frame is loaded. + """Get the call chain from source to sink element. + + The sink element must be in the current flow. The source element can + be in an ancestor flow. Only returns paths currently crawled, so this + must be called every time a specific frame is loaded. Args: - source_parser: must be parser in the source flow path - Returns: - A list starting with the source and ending with the sink in which the each is an - ancestor caller of the succeeding element. - [(element, element flow path)] + source_el: Source element in the call chain. + source_path: Path to the flow containing the source element. + sink_el: Sink element in the current flow. + source_parser: Parser for the source flow path. + Returns: + List of (element, element_flow_path) tuples starting with the + source and ending with the sink, where each element is an ancestor + caller of the succeeding element. Returns None if no chain found. """ pass class AbstractControlFlowGraph(ABC): - # where to start + """Abstract base class for control flow graphs. + + Represents the control flow structure of a flow, including segments + and jumps between them. + """ + @property @abstractmethod def start_label(self) -> str: + """Get the label of the starting segment. + + Returns: + Label string of the starting segment. + """ pass - # map from segment label -> inbound jumps @property @abstractmethod def inbound(self) -> dict[str, list[Jump]]: + """Get the map from segment label to inbound jumps. + + Returns: + Dictionary mapping segment labels to lists of inbound Jump objects. + """ pass @property @abstractmethod def segment_map(self) -> dict[str, AbstractSegment]: + """Get the map of all segments in the control flow graph. + + Returns: + Dictionary mapping segment labels to AbstractSegment instances. + """ pass class AbstractSegment(ABC): - # name of element at the start of the segment (jump target) + """Abstract base class for control flow graph segments. + + A segment represents a linear sequence of flow elements between + control flow jumps. + """ + @property @abstractmethod def label(self) -> str: + """Get the label of the element at the start of the segment. + + This is the jump target for this segment. + + Returns: + Label string of the starting element. + """ pass - # list of (element names, element tags) (including label) in this segment (in order) @property @abstractmethod def traversed(self) -> list[tuple[str, str]]: + """Get the list of elements traversed in this segment. + + Returns: + List of (element_name, element_tag) tuples in traversal order, + including the label element. + """ pass - # list of traversal indexes that are subflow elements @property @abstractmethod def subflows(self) -> list[int]: + """Get the traversal indexes that are subflow elements. + + Returns: + List of integer indexes into the traversed list that correspond + to subflow elements. + """ pass @property @abstractmethod def jumps(self) -> list[Jump]: + """Get the list of jumps from this segment. + + Returns: + List of Jump objects representing control flow transitions. + """ pass - # whether this segment may end execution @property @abstractmethod def is_terminal(self) -> bool: + """Check if this segment may end execution. + + Returns: + True if this segment may terminate flow execution, False otherwise. + """ pass - # for tracking whether it has been visited @property @abstractmethod def seen_tokens(self) -> list[tuple[tuple[str], ...]]: + """Get tokens for tracking whether this segment has been visited. + + Returns: + List of token tuples used for visit tracking. + """ pass class AbstractQuery(ABC): @@ -191,257 +285,541 @@ class AbstractQuery(ABC): @classmethod def accept(cls, **kwargs) -> list[QueryResult] | None: - """ + """Accept and report issues discovered in-line during flow processing. + The accept method is designed for directly reporting issues discovered - in-line during regular flow processing and not run as the result of running - a query. + in-line during regular flow processing and not run as the result of + running a query. - For example, the executor needs to check if a subflow creates a circular reference - in order to ensure that symbolic execution terminates, but the user may also be - looking for this information as a query result. + For example, the executor needs to check if a subflow creates a circular + reference in order to ensure that symbolic execution terminates, but + the user may also be looking for this information as a query result. - Therefore, we ensure that every query has an accept method that takes no - action and in case there is a query (e.g. that checks for subflow circular references) - then this query can override the accept method to handle it. + Therefore, we ensure that every query has an accept method that takes + no action by default. Queries that need to handle in-line issues (e.g., + checking for subflow circular references) can override this method. The query manager ensures that only instantiated queries can have their accept method called. + Args: + **kwargs: Variable keyword arguments for issue-specific data. + + Returns: + List of QueryResult instances if issues are found, None otherwise. """ return None @classmethod @abstractmethod def get_query_description(cls) -> QueryDescription: + """Get the description metadata for this query. + + Returns: + QueryDescription instance containing query metadata. + """ pass @abstractmethod def when_to_run(self) -> list[QueryAction]: + """Get the list of query actions that trigger this query. + + Returns: + List of QueryAction enums indicating when this query should run. + """ pass @abstractmethod def execute(self) -> list[QueryResult] | None: + """Execute the query and return results. + + Returns: + List of QueryResult instances if issues are found, None otherwise. + """ pass class Query(AbstractQuery, ABC): + """Base class for stateful queries that operate during flow execution. + + Stateful queries can access the current execution state, crawler, + and all states during flow processing. + """ @classmethod @abstractmethod def get_query_description(cls) -> QueryDescription: + """Get the description metadata for this query. + + Returns: + QueryDescription instance containing query metadata. + """ pass @abstractmethod def when_to_run(self) -> list[QueryAction]: + """Get the list of query actions that trigger this query. + + Returns: + List of QueryAction enums indicating when this query should run. + """ pass @abstractmethod def execute(self, - state: State=None, # the state has the flow_path variable + state: State = None, crawler: AbstractCrawler = None, - all_states = None) -> list[QueryResult] | None: + all_states: list[State] | None = None) -> list[QueryResult] | None: + """Execute the query with access to execution state. + + Args: + state: Current execution state (contains the flow_path variable). + crawler: Current crawler instance for flow traversal. + all_states: List of all execution states if available. + + Returns: + List of QueryResult instances if issues are found, None otherwise. + """ pass class LexicalQuery(AbstractQuery, ABC): + """Base class for lexical queries that operate on flow structure. + + Lexical queries analyze the static structure of flows without requiring + execution state or crawling. + """ @abstractmethod def get_query_description(self) -> QueryDescription: + """Get the description metadata for this query. + + Returns: + QueryDescription instance containing query metadata. + """ pass @abstractmethod def when_to_run(self) -> list[QueryAction]: + """Get the list of query actions that trigger this query. + + Returns: + List of QueryAction enums indicating when this query should run. + """ pass @abstractmethod def execute(self, parser: FlowParser = None, - **kwargs - ) -> list[QueryResult] | None: + **kwargs) -> list[QueryResult] | None: + """Execute the lexical query on the flow parser. + + Args: + parser: FlowParser instance for the flow being analyzed. + **kwargs: Additional keyword arguments for query-specific data. + + Returns: + List of QueryResult instances if issues are found, None otherwise. + """ pass class AbstractFlowVector(ABC): + """Abstract base class for flow vectors tracking data influence paths. + + Flow vectors track how data flows through flow elements via influence + paths and property maps. + """ @property @abstractmethod def property_maps(self) -> dict[InfluencePath, dict[str, set[InfluencePath]]]: + """Get the property maps for this flow vector. + + Returns: + Dictionary mapping InfluencePath to property name to set of + InfluencePath instances. + """ pass @classmethod @abstractmethod def from_flows(cls, default: set[InfluencePath] = None) -> AbstractFlowVector: + """Create a flow vector from a set of influence paths. + + Args: + default: Optional set of InfluencePath instances to initialize + the vector with. + + Returns: + New AbstractFlowVector instance. + """ pass @abstractmethod def get_flows_by_prop(self, member_name: str | None = None) -> set[InfluencePath]: + """Get all influence paths for a specific property. + + Args: + member_name: Name of the property to get flows for. + If None, returns all flows. + + Returns: + Set of InfluencePath instances for the specified property. + """ pass @abstractmethod def add_vector(self, vector: AbstractFlowVector) -> AbstractFlowVector: - pass + """Add another flow vector to this one. + + Args: + vector: Flow vector to add to this one. + Returns: + New AbstractFlowVector instance with combined flows. + """ + pass @abstractmethod def push_via_flow(self, extension_path: InfluencePath, influenced_vec: AbstractFlowVector, assign: bool = True, cross_flow: bool = False) -> AbstractFlowVector: + """Push influence via a flow extension path. + + Args: + extension_path: InfluencePath to extend through. + influenced_vec: Flow vector being influenced. + assign: Whether this is an assignment operation. + cross_flow: Whether this crosses flow boundaries. + + Returns: + New AbstractFlowVector instance with extended influence paths. + """ pass class State(ABC): - """Stores DataInfluencePaths in the current execution step + """Stores DataInfluencePaths in the current execution step. + State objects track the current execution context including the current + element, parser, and data influence information. """ + @abstractmethod def get_parser(self) -> FlowParser: + """Get the flow parser for the current flow. + + Returns: + FlowParser instance for the current flow. + """ pass @abstractmethod def get_current_elem(self) -> ET.Element: + """Get the current flow element being executed. + + Returns: + XML Element representing the current flow element. + """ pass @abstractmethod def get_current_elem_name(self) -> str: + """Get the name of the current flow element. + + Returns: + Name string of the current element. + """ pass @abstractmethod def get_flows_from_sources(self, influenced_var: str, source_vars: set[tuple[str, str]], restrict: str | None = None) -> set[InfluencePath] | None: + """Get influence paths from source variables to an influenced variable. + + Args: + influenced_var: Name of the variable being influenced. + source_vars: Set of (filename, element_name) tuples for source variables. + restrict: Optional restriction string to filter paths. + + Returns: + Set of InfluencePath instances from sources to the influenced variable, + or None if no paths found. + """ pass @abstractmethod def is_in_map(self, var_name: str) -> bool: + """Check if a variable is in the influence map. + + Args: + var_name: Name of the variable to check. + + Returns: + True if the variable is in the map, False otherwise. + """ pass class FlowParser(ABC): - """Exposes global information about the current flow - """ + """Exposes global information about the current flow. + FlowParser provides access to flow metadata, structure, and elements + for analysis and query execution. + """ @abstractmethod def get_all_named_elems(self) -> frozenset[ET.Element] | None: - pass + """Get all named elements in the flow. + Returns: + Frozenset of all named XML elements, or None if none found. + """ + pass @abstractmethod - def get_all_names(self) -> tuple[str,] | None: + def get_all_names(self) -> tuple[str, ...] | None: + """Get all element names in the flow. + + Returns: + Tuple of all element name strings, or None if none found. + """ pass @abstractmethod def get_effective_run_mode(self) -> RunMode: + """Get the effective run mode of the flow. + + Returns: + RunMode enum value indicating how the flow actually runs. + """ pass @abstractmethod def get_declared_run_mode(self) -> RunMode: + """Get the declared run mode of the flow. + + Returns: + RunMode enum value as declared in the flow definition. + """ pass @abstractmethod def get_api_version(self) -> str: + """Get the API version of the flow. + + Returns: + API version string. + """ pass @abstractmethod def get_all_traversable_flow_elements(self) -> list[ET.Element]: + """Get all traversable flow elements. + + Returns: + List of XML elements that can be traversed during execution. + """ pass @abstractmethod def get_all_variable_elems(self) -> list[ET.Element] | None: + """Get all variable elements in the flow. + + Returns: + List of variable XML elements, or None if none found. + """ pass @abstractmethod def get_start_elem(self) -> ET.Element: + """Get the starting element of the flow. + + Returns: + XML Element representing the flow start element. + """ pass @abstractmethod - def get_traversable_descendents_of_elem(self, elem_name: str) -> list[str]: - """Gets elements that are called (connected to) from elem_name. - Includes the original elem_name""" + def get_traversable_descendants_of_elem(self, elem_name: str) -> list[str]: + """Get elements that are called (connected to) from the given element. + + Args: + elem_name: Name of the element to get descendants for. + + Returns: + List of element names that are descendants, including the + original element name. + """ pass @abstractmethod def get_filename(self) -> str: + """Get the filename of the flow. + + Returns: + Filename string of the flow file. + """ pass @abstractmethod def get_flow_name(self) -> str: + """Get the name of the flow. + + Returns: + Flow name string. + """ pass @abstractmethod - def get_flow_type(self)-> public.enums.FlowType: + def get_flow_type(self) -> public.enums.FlowType: + """Get the type of the flow. + + Returns: + FlowType enum value. + """ pass @abstractmethod def get_trigger_object(self) -> str | None: + """Get the trigger object for the flow. + + Returns: + Trigger object name string, or None if not applicable. + """ pass @abstractmethod - def get_trigger_type(self)-> public.enums.TriggerType: + def get_trigger_type(self) -> public.enums.TriggerType: + """Get the trigger type of the flow. + + Returns: + TriggerType enum value. + """ pass @abstractmethod def get_root(self) -> ET.Element: + """Get the root XML element of the flow. + + Returns: + Root XML Element of the flow document. + """ pass @abstractmethod def get_literal_var(self) -> VariableType: + """Get the literal variable type. + + Returns: + VariableType instance for literals. + """ pass @abstractmethod def get_traversable_inbound(self) -> dict[str, list[str]]: - """Returns dict from element name to list of all inbound element names - will be empty list if no inbound. + """Get inbound connections for all traversable elements. + + Returns: + Dictionary mapping element names to lists of inbound element names. + Returns empty list for elements with no inbound connections. """ pass @abstractmethod def get_action_call_map(self) -> dict[str, list[tuple[El, str]]] | None: - """Gets all actionCalls in the flow element - Returns: actionCall type -> (element, action name) + """Get all action calls in the flow. + + Returns: + Dictionary mapping action call types to lists of + (element, action_name) tuples, or None if none found. """ pass @abstractmethod def get_async_scheduled_paths(self) -> list[str]: + """Get paths for asynchronously scheduled flows. + + Returns: + List of flow paths that are scheduled asynchronously. + """ pass @abstractmethod - def resolve_by_name(self, name: str, path: str | None = None) -> Optional[(str, str, VariableType)]: + def resolve_by_name(self, name: str, path: str | None = None) -> Optional[tuple[str, str, VariableType]]: + """Resolve a variable or element by name. + + Args: + name: Name of the variable or element to resolve. + path: Optional path to scope the resolution. + + Returns: + Tuple of (filename, element_name, VariableType) if found, + None otherwise. + """ pass @abstractmethod def get_output_variables(self, path: str | None = None) -> set[tuple[str, str]]: + """Get output variables for the flow. + + Args: + path: Optional path to scope the search. + + Returns: + Set of (filename, element_name) tuples for output variables. + """ pass @abstractmethod def get_input_variables(self, path: str | None = None) -> set[tuple[str, str]]: - """Get Flow variables available for input + """Get flow variables available for input. - Returns: (filename, element_name) corresponding to all variables available for input - or None if none found. + Args: + path: Optional path to scope the search. + Returns: + Set of (filename, element_name) tuples for all variables + available for input, or None if none found. """ pass @abstractmethod def get_input_field_elems(self) -> set[ET.Element] | None: - """Named XML elements that are children of Screen Flow Input Text Elements + """Get named XML elements that are children of Screen Flow Input Text Elements. - .. Note:: Only returns variables from current flow - - Returns: None if none present in flow + .. note:: + Only returns variables from the current flow. + Returns: + Set of XML elements for input fields, or None if none present. """ pass @abstractmethod def get_by_name(self, name_to_match: str, scope: ET.Element | None = None) -> ET.Element | None: + """Get an element by name within an optional scope. + + Args: + name_to_match: Name of the element to find. + scope: Optional XML Element to scope the search within. + + Returns: + XML Element if found, None otherwise. + """ pass @abstractmethod - def get_tainted_inputs(self) -> set[tuple[str, str]] | None: + def get_tainted_inputs(self) -> set[tuple[str, str]] | None: + """Get tainted input variables. + + Returns: + Set of (filename, element_name) tuples for tainted inputs, + or None if none found. + """ pass \ No newline at end of file diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py b/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py index 4228c184..5cef0ddf 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py @@ -1,6 +1,7 @@ -""" -Custom xml parser +"""Custom XML parser for flow files. +This module provides custom XML parsing functionality with line number tracking +for flow metadata files. """ import sys @@ -9,36 +10,70 @@ def get_root(path: str) -> ET.Element: + """Parse an XML file and return the root element. + + Args: + path: Path to the XML file to parse. + + Returns: + Root XML Element with line number tracking. + """ return ET.parse(path, parser=LineNumberingParser()).getroot() def get_parent_map(root: ET.Element) -> dict[ET.Element, ET.Element]: + """Build a mapping from child elements to their parent elements. + + Args: + root: Root XML element to build the parent map for. + + Returns: + Dictionary mapping each child element to its parent element. + """ parent = {} for elem in root.iter(): for child in elem: parent[child] = elem return parent -def get_root_from_string(byte_str) -> ET.Element: +def get_root_from_string(byte_str: str | bytes) -> ET.Element: + """Parse XML from a string or bytes and return the root element. + + Args: + byte_str: XML string or bytes to parse. + + Returns: + Root XML Element with line number tracking. + """ return ET.fromstring(byte_str, parser=LineNumberingParser()) def to_string(elem: ET.Element) -> str: + """Convert an XML element to a cleaned string representation. + + Args: + elem: XML Element to convert. + + Returns: + Cleaned string representation of the element. + """ return clean_string(ET.tostring(elem, encoding='unicode').strip()) def clean_string(msg: str) -> str: - """ - removes namespace declarations from xml - 1) --> - 2) --> - 3) --> - 4) --> - 5) replace <ns0: with < and </ns0: with < + """Remove namespace declarations from XML strings. + + Performs the following transformations: + 1) ```` --> ```` + 2) ```` --> ```` + 3) ```` --> ```` + 4) ```` --> ```` + 5) Replace ``<ns0:`` with ``<`` and ``</ns0:`` with ``</`` + Args: - msg: string to clean + msg: String to clean. Returns: - cleaned string + Cleaned string with namespaces removed. Returns "start" if input is '*'. """ if not isinstance(msg, str): return msg @@ -54,7 +89,22 @@ def clean_string(msg: str) -> str: class LineNumberingParser(ET.XMLParser): + """XML parser that tracks line numbers and positions for elements. + + Extends the standard XMLParser to add source location information + (line number, column number, byte index) to each parsed element. + """ + def _start(self, *args, **kwargs): + """Handle element start events and add position tracking. + + Args: + *args: Positional arguments passed to parent parser. + **kwargs: Keyword arguments passed to parent parser. + + Returns: + XML Element with source position attributes added. + """ # Here we assume the default XML parser which is expat # and copy its element position attributes into output Elements element = super(self.__class__, self)._start(*args, **kwargs) @@ -64,6 +114,15 @@ def _start(self, *args, **kwargs): return element def _end(self, *args, **kwargs): + """Handle element end events and add position tracking. + + Args: + *args: Positional arguments passed to parent parser. + **kwargs: Keyword arguments passed to parent parser. + + Returns: + XML Element with end position attributes added. + """ element = super(self.__class__, self)._end(*args, **kwargs) element._end_line_number = self.parser.CurrentLineNumber element._end_column_number = self.parser.CurrentColumnNumber diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/data_obj.py b/packages/code-analyzer-flow-engine/FlowScanner/public/data_obj.py index 8f413355..9f2b5f04 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/data_obj.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/data_obj.py @@ -1,5 +1,7 @@ -"""Definitions of data classes used for querying and reporting +"""Definitions of data classes used for querying and reporting. +This module contains the core data structures used throughout the flow scanner +for representing query results, influence paths, and flow metadata. """ from __future__ import annotations @@ -17,17 +19,28 @@ @dataclass(frozen=True) class JSONSerializable(ABC): + """Abstract base class for objects that can be serialized to JSON. - def to_dict(self): + Provides a default implementation of to_dict() that converts all + slot attributes to a dictionary. + """ + + def to_dict(self) -> dict: + """Convert the object to a dictionary representation. + + Returns: + Dictionary mapping slot attribute names to their values. + """ return {s: getattr(self, s) for s in self.__slots__} @dataclass(frozen=True, eq=True, slots=True) class InfluenceStatement: - """Represents a statement in which one variable influences - another, usually as the result of an assignment, - formula or template field, or builtin function. - These statement are the basic building blocks of dataflows. + """Represents a statement in which one variable influences another. + + Influence statements are usually the result of an assignment, formula, + template field, or builtin function. These statements are the basic + building blocks of dataflows. """ # Variable being influenced. @@ -81,13 +94,22 @@ class InfluenceStatement: # elements such as subflows, action calls, etc source_path: str - def to_dict(self): + def to_dict(self) -> dict: + """Convert the influence statement to a dictionary. + + Returns: + Dictionary representation with all string values cleaned. + """ return {s: clean_string(getattr(self, s)) for s in self.__slots__} @dataclass(frozen=True, eq=True, slots=True) class VariableType: - """This class contains type information for a variable""" + """Contains type information for a variable. + + Tracks metadata about variable types including data type, reference type, + collection status, and object/field information. + """ # the tag (type of this object in metadata spec) tag: str @@ -133,54 +155,63 @@ class VariableType: @dataclass(frozen=True, eq=True, slots=True) class Preset: - # Publicly displayed in report file. - preset_name: str + """Represents a preset collection of queries to run. - # Publicly displayed in report file - leave none if you do not want this - preset_owner: str | None + Presets define which queries are executed during a scan. It's important + to report when a query was run and had no findings (e.g., for security reviews), + not just queries that found issues. - # The list of query names that are run. Not the list of queries which have findings, - # as it's important [e.g. for the security review] to report when a query was run and had no findings. + Attributes: + preset_name: Publicly displayed name in report file. + preset_owner: Publicly displayed owner in report file. None if not specified. + queries: Set of QueryDescription objects specifying which queries to run. + """ - # specify which dataflow queries are run on each Flow Element - # with enough information for users to understand the significance of not finding any issues - # for that query + preset_name: str + preset_owner: str | None queries: set[QueryDescription] - def to_dict(self): + def to_dict(self) -> dict: + """Convert the preset to a dictionary. + + Returns: + Dictionary representation of the preset. + """ return {s: str(getattr(self, s)) for s in self.__slots__} @dataclass(frozen=True, eq=True, slots=True) class QueryDescription: - # this is the id that occurs in the preset and is not displayed to the user. - query_id: str + """Metadata describing a query for reporting purposes. + + Attributes: + query_id: Internal ID used in presets, not displayed to users. + query_name: Prominently displayed name in table of contents and headings. + Must be unique for each preset. + severity: Severity level of issues found by this query. + query_description: Plaintext description appearing at the beginning of + results. One or two sentences recommended. Markup will be encoded. + help_url: Optional URL to documentation for secure patterns, remediation, + and false positive diagnosis. + query_version: Version string appearing in XML/HTML fields. Defaults to "0". + is_security: Whether this query detects security issues (True) or + code quality issues (False). Defaults to True. + """ - # This will be prominently displayed in the table of contents and as a heading - # Must be unique for each preset. See default query for examples. + query_id: str query_name: str - - # see description of severity enum severity: Severity - - # This will appear at the beginning of the list of results, under the query name. - # This must be plaintext (any markup will be encoded). - # One or two sentences should be sufficient - provide links if more detailed discussions - # are needed. query_description: str - - # Often developers will need assistance with secure patterns and remediation options, as - # well as false positive diagnosis. If this material is available online in published - # best practices (which it should be) then place a url link here. Optional. help_url: str | None = None - - # This will appear only in small xml/html fields query_version: str = "0" - - # Whether this query is for a security or code quality issue is_security: bool = True - def to_dict(self): + def to_dict(self) -> dict: + """Convert the query description to a dictionary. + + Returns: + Dictionary representation of the query description. + """ return {s: str(getattr(self, s)) for s in self.__slots__} @@ -321,10 +352,11 @@ class InfluencePath: influenced_type_info: VariableType | None def report_influence_tuples(self) -> list[tuple[str, str]]: - """Returns simple chain of variables for high level analysis + """Get a simple chain of variables for high-level analysis. Returns: - list of (flow_filename, influenced_var_name) + List of (flow_filename, influenced_var_name) tuples representing + the influence chain. """ (df_start, df_end) = _get_end_vars(self) @@ -345,15 +377,14 @@ def report_influence_tuples(self) -> list[tuple[str, str]]: return start def short_report(self, arrows: bool = False, filenames: bool = False) -> str: - """Prints a short report of influence chain + """Generate a short text report of the influence chain. Args: - arrows: whether the report should use '->' (True) or commas - (False) for statement separators - filenames: whether the report should include filenames in the report + arrows: Whether to use '->' (True) or commas (False) for separators. + filenames: Whether to include filenames in the report. Returns: - string containing summary report. + String containing a summary report of the influence chain. """ if arrows: joiner = "->" @@ -370,22 +401,24 @@ def short_report(self, arrows: bool = False, filenames: bool = False) -> str: def combine(cls, start_flow: InfluencePath, end_flow: InfluencePath, cross_flow: bool = False, type_override: VariableType | None = None) -> InfluencePath: - """Combine two paths + """Combine two influence paths into a single path. + + Creates a new path where A influences C if start_flow is "A influences B" + and end_flow is "B influences C". Args: - start_flow: the new path starts with this influencer - end_flow: the new path ends with this flow's influenced - cross_flow: whether the end dataflow is in a different flow - type_override: specify type directly, otherwise we keep the - end_flow's type unchanged + start_flow: Path that provides the starting influencer. + end_flow: Path that provides the ending influenced variable. + cross_flow: Whether the end dataflow is in a different flow. + type_override: Optional type to use for the combined path. + If None, uses the end_flow's type. Returns: - A influences C if start_flow is A influences B, and end_flow is B - influences C + New InfluencePath combining both input paths. Raises: - ValueError if the influencers don't match up and crossflow is False as cross-flow - dataflows will have different names and filenames. + ValueError: If the influencers don't match up and cross_flow is False. + Cross-flow dataflows will have different names and filenames. """ if not cross_flow: @@ -414,55 +447,85 @@ def combine(cls, start_flow: InfluencePath, end_flow: InfluencePath, @dataclass(frozen=True, eq=True, slots=True) class BranchVisitor: + """Tracks state during control flow graph traversal. + + Attributes: + current_label: Current segment label being visited. + previous_label: Previous segment label, or None if at start. + loop_context: Tuple of (label, ConnType) pairs for loop context. + history: Previously visited segment labels in order. + token: List of (previous_label, current_label) tuples when visitor + was spawned, or None. + """ + current_label: str previous_label: str | None - loop_context: tuple[tuple[str, ConnType],...] = field(default_factory=tuple) - - #: previously visited segment labels (label1, label2, ..) + loop_context: tuple[tuple[str, ConnType], ...] = field(default_factory=tuple) history: tuple[str, ...] = field(default_factory=tuple) + token: tuple[tuple[str, str], ...] | None = None - #: list of (previous label, curr_label) when visitor was spawned - token: tuple[tuple[str,str], ...] | None = None + def to_dict(self) -> dict: + """Convert the branch visitor to a dictionary. - def to_dict(self): + Returns: + Dictionary representation of the branch visitor. + """ return {s: str(getattr(self, s)) for s in self.__slots__} @dataclass(frozen=True, eq=True, slots=True) class CrawlStep: + """Represents a single step in the flow crawl process. + + Attributes: + step: Step number in the crawl sequence. + visitor: BranchVisitor tracking traversal state. + element_name: Name of the flow element. + element_tag: XML tag of the flow element. + local_index: Position within the current segment. Defaults to 0. + """ + step: int visitor: BranchVisitor element_name: str element_tag: str - local_index: int = 0 # position within segment + local_index: int = 0 + + def to_dict(self) -> dict: + """Convert the crawl step to a dictionary. - def to_dict(self): + Returns: + Dictionary representation of the crawl step. + """ return {s: getattr(self, s) for s in self.__slots__} @dataclass(frozen=True, eq=True, slots=True) class Jump(JSONSerializable): - """Class representing a connector - + """Represents a connector (jump) in the control flow graph. + + Attributes: + src_name: Name of the element where the jump is located. + target: Name of the element the connector points to. + is_goto: True if this is a goto connector. + is_loop: True if this is a next-value (loop) connector. + is_no_more_values: True if this is a no-more-values connector. + is_fault: True if this is a fault connector. """ - # name of element where jump is located - src_name: str - # where connector points to + src_name: str target: str - - # true if goto connector is_goto: bool - - # true if next-value is_loop: bool - - # true if no more values connector is_no_more_values: bool - - # true if fault connector is_fault: bool def priority(self) -> int: - # lower is higher priority + """Get the priority of this jump for traversal. + + Lower numbers indicate higher priority. + + Returns: + Priority value (0 for loops, 1 for others). + """ if self.is_loop: return 0 else: @@ -470,7 +533,22 @@ def priority(self) -> int: class InfluenceStatementEncoder(json.JSONEncoder): + """JSON encoder for InfluenceStatement objects. + + For public display, replaces flow_path with source_path to correctly + display transmission elements. + """ + def default(self, obj): + """Encode an object to JSON. + + Args: + obj: Object to encode. + + Returns: + Dictionary representation for InfluenceStatement objects, + otherwise falls back to default JSON encoding. + """ if isinstance(obj, InfluenceStatement): raw_dict = obj.to_dict() # For public display, we replace flow_path with source_path @@ -485,7 +563,18 @@ def default(self, obj): class PresetEncoder(json.JSONEncoder): + """JSON encoder for Preset and QueryDescription objects.""" + def default(self, obj): + """Encode an object to JSON. + + Args: + obj: Object to encode. + + Returns: + Dictionary representation for Preset/QueryDescription objects, + otherwise falls back to default JSON encoding. + """ if isinstance(obj, Preset) or isinstance(obj, QueryDescription): return obj.to_dict() else: @@ -493,11 +582,28 @@ def default(self, obj): def _get_end_vars(df: InfluencePath) -> tuple[str, str]: + """Get the start and end variable names from an influence path. + + Args: + df: InfluencePath to extract variables from. + + Returns: + Tuple of (influencer_var, influenced_var) with property names included. + """ return (_recover_var(df.influencer_name, df.influencer_property), _recover_var(df.influenced_name, df.influenced_property)) -def _recover_var(name: str, prop: str) -> str: +def _recover_var(name: str, prop: str | None) -> str: + """Recover a full variable name from name and optional property. + + Args: + name: Base variable name. + prop: Optional property name. + + Returns: + Full variable name (e.g., "Account.Name" if prop is "Name"). + """ if prop is None: return name else: diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/enums.py b/packages/code-analyzer-flow-engine/FlowScanner/public/enums.py index 7b4c3a97..1e0505f9 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/enums.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/enums.py @@ -1,10 +1,22 @@ -"""Public Enum types +"""Public Enum types. +This module defines all enumeration types used throughout the flow scanner +for representing flow metadata, query actions, severity levels, and more. """ from enum import Enum, EnumMeta class MetaEnum(EnumMeta): + """Metaclass for enums that support case-insensitive string membership checks.""" + def __contains__(cls, item): + """Check if an item is in the enum, with case-insensitive string support. + + Args: + item: Item to check for membership. + + Returns: + True if item is a valid enum value (case-insensitive for strings). + """ if isinstance(item, str): try: cls(item.lower()) @@ -14,6 +26,7 @@ def __contains__(cls, item): return super.__contains__(cls, item) class BaseEnum(Enum, metaclass=MetaEnum): + """Base enum class with case-insensitive string membership support.""" pass class TransformType(BaseEnum): @@ -23,48 +36,26 @@ class TransformType(BaseEnum): Count = "count" class ComplexValueType(BaseEnum): - """Class the identifies JSON structure schema of complexValue + """Identifies JSON structure schema of complexValue elements. + Complex values in flows can have various structures. This enum identifies + the different types of complex value schemas. """ - """ - "resourceTemplate": text_template (with merge-fields) - """ + ResourceDescriptor = "resourcedescriptor" + """Resource template with merge-fields: "resourceTemplate": text_template""" - """ - "name": "myAccVar.Name", - "resourceType": "SObjectField", - "resourceName": "Account", - "resourceField": "Name", - "collection": false - """ ResourceAnnotationMap = "resourceannotationmap" + """Resource annotation map with name, resourceType, resourceName, etc.""" - """ - "dataType": "SObject", - "objectType": "MyObj__c", - "fieldReferences": ["FieldA__c", "FieldB__c"], - "elementReference": "Get_Override_Time_Entries" - """ FieldReference = "fieldreference" + """Field reference with dataType, objectType, fieldReferences, elementReference""" - """ - "dataType": "SObject", - "objectType": "MyObj__c", - "fieldReferences": ["FieldA__c", "FieldB__c"], - "elementReference": "Get_Override_Time_Entries" - """ ComplexObjectFieldDetails = "complexobjectfielddetails" + """Complex object field details with dataType, objectType, fieldReferences""" - """ - "leftElementReference":"ContentVersions", - "leftJoinKeys":["Id"], - "leftSelectedFields":["ContentDocumentId"], - "rightElementReference":"Deserialize_File_Upload.fileUpload.files", - "rightJoinKeys":["contentVersionId"], - "rightSelectedFields":["name"] - """ JoinDefinition = "joindefinition" + """Join definition with left/right element references, join keys, and selected fields""" class FlowType(Enum): diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/flow_scanner_exceptions.py b/packages/code-analyzer-flow-engine/FlowScanner/public/flow_scanner_exceptions.py index b17778be..c488af6b 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/flow_scanner_exceptions.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/flow_scanner_exceptions.py @@ -1,18 +1,47 @@ +"""Exception classes for flow scanner errors.""" + class FlowScannerException(Exception): - """base class for all exceptions raised by Flowtest""" + """Base class for all exceptions raised by Flow Scanner. + + All custom exceptions in the flow scanner inherit from this class. + """ + def __init__(self, *args): + """Initialize the exception. + + Args: + *args: Exception message arguments. + """ super().__init__(*args) class InvalidFlowException(FlowScannerException): - """Raised when there is something wrong with the flow file, - such as invalid xml, invalid filename, missing structure, etc.""" + """Raised when there is something wrong with a flow file. + + This exception is raised for issues such as invalid XML, invalid filename, + missing structure, or other flow file problems. + + Attributes: + flow_path: Path to the flow file that caused the error. + """ + def __init__(self, *args, **kwargs): + """Initialize the exception. + + Args: + *args: Exception message arguments. + **kwargs: Keyword arguments, may include 'flow_path'. + """ super().__init__(*args) self.flow_path = kwargs.get('flow_path') - def __str__(self): + def __str__(self) -> str: + """Get string representation of the exception. + + Returns: + Exception message with flow path appended. + """ return f"{super().__str__()} flow path: {self.flow_path}" diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/parse_utils.py b/packages/code-analyzer-flow-engine/FlowScanner/public/parse_utils.py index 0756aac4..23d18d63 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/parse_utils.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/parse_utils.py @@ -1,6 +1,6 @@ -"""Public Utility module for xml queries on flows. +"""Public Utility module for XML queries on flows. - The goal is to move all xml queries into this module, + The goal is to move all XML queries into this module, so they can be shared by other modules and made available to third parties in custom query development. @@ -9,7 +9,7 @@ has access to the flow root and does global analysis on flows, whereas the utility functions here are stateless. - If you find yourself doing manual xml queries, look in this module + If you find yourself doing manual XML queries, look in this module first and add a method if one isn't already present. """ @@ -187,12 +187,15 @@ def parse_expression(txt: str) -> list[str]: - """ + """Parse merge-fields from an expression or template string. + + Extracts variable references in the format {!variableName} from the input string. + Args: - txt: expression or template definition string in which merge-fields are present + txt: Expression or template definition string containing merge-fields. Returns: - List of elementRef names (empty list if no matches) + List of element reference names (empty list if no matches found). """ accum = [] res = re.finditer(reg, txt) @@ -202,6 +205,14 @@ def parse_expression(txt: str) -> list[str]: def get_tag(elem: El) -> str: + """Get the tag name of an element without the namespace prefix. + + Args: + elem: XML Element to get the tag from. + + Returns: + Tag name without namespace, or empty string if not an Element. + """ if isinstance(elem, El): return elem.tag[NS_LEN:] # elif isinstance(elem, ET._Comment): @@ -210,8 +221,18 @@ def get_tag(elem: El) -> str: return '' def get_text_of_tag(elem: El, tag_name: str) -> str | None: - """look for a single child elem (does not recurse) with the specified tag_name and return the text. - returns None if there is not exactly one child elem with the specified tag_name or if it has no text.""" + """Get text content from a single child element with the specified tag. + + Does not recurse - only looks at direct children. + + Args: + elem: Parent element to search. + tag_name: Tag name to find (without namespace). + + Returns: + Text content of the child element, or None if there is not exactly + one child with the tag or if it has no text. + """ res = get_by_tag(elem, tag_name) if len(res) == 1 and res[0] is not None: r = res[0].text @@ -223,6 +244,14 @@ def get_text_of_tag(elem: El, tag_name: str) -> str | None: def is_subflow(elem: El) -> bool: + """Check if an element is a subflow element. + + Args: + elem: XML Element to check. + + Returns: + True if the element is a subflow, False otherwise. + """ if elem is None: return False return get_tag(elem) == 'subflows' @@ -232,7 +261,7 @@ def is_loop(elem: El) -> bool: """Is this a Loop Flow Element? Args: - elem: xml element + elem: XML element Returns: True if this is a loop element @@ -243,14 +272,14 @@ def is_loop(elem: El) -> bool: def is_goto_connector(elem: El) -> bool | None: - """Is this element a goto? + """Check if an element is a goto connector. Args: - elem: connector element + elem: Connector element to check. Returns: - whether this is a goto element, - None if child has no tag or no children + True if this is a goto connector, False if not, None if element + has no tag or no children. """ for child in elem: if get_tag(child) == 'isGoTo': @@ -261,49 +290,58 @@ def is_goto_connector(elem: El) -> bool | None: def is_decision(elem: El) -> bool: - """True if this is a decision Flow Element + """Check if an element is a decision flow element. Args: - elem: Flow Element + elem: Flow element to check. Returns: - True if decision + True if this is a decision element, False otherwise. """ return get_tag(elem) == 'decisions' def get_by_tag(elem: El, tag_name: str) -> list[El]: - """Get list of all elem with the tag (ignoring ns). + """Get list of all child elements with the specified tag (ignoring namespace). - Convenience method as manually dealing with namespaces is clumsy. + Convenience method for dealing with namespaced XML. Does not recurse into + nested elements. - Notes: - WARNING: does not recurse. Use this for top level flow - elements -- e.g. screens, variables + .. warning:: + Does not recurse. Use this for top-level flow elements (e.g., screens, variables). - Returns: - XML Elements else [] if no matches + Args: + elem: Parent element to search. + tag_name: Tag name to find (without namespace). + Returns: + List of XML Elements with the tag, or empty list if no matches. """ return elem.findall(f'./{ns}{tag_name}') -def get_named_elems(elem: El) -> list[El]: - """Get all descendents (recursive) of elem that have a ``name`` tag +def get_named_elems(elem: El) -> frozenset[El]: + """Get all descendants (recursive) of elem that have a name tag. Args: - elem: base element whose children to search + elem: Base element whose descendants to search. Returns: - [elem] or [] if None found - + Frozenset of named elements, excluding processMetadataValues. """ named = elem.findall(f'.//{ns}name/..') to_return = [x for x in named if get_tag(x) != 'processMetadataValues'] - return to_return + return frozenset(to_return) def get_name(elem: El | None) -> str | None: - """returns the string name of elem or None if no name or '*'""" + """Get the string name of an element. + + Args: + elem: XML Element to get the name from. + + Returns: + Element name string, '*' for start elements, or None if no name found. + """ if elem is None: return None name = elem.find(f'{ns}name') @@ -316,6 +354,14 @@ def get_name(elem: El | None) -> str | None: def get_elem_string(elem: El) -> str | None: + """Get the string representation of an XML element. + + Args: + elem: XML Element to convert. + + Returns: + String representation of the element, or empty string if None. + """ if elem is None: return '' else: @@ -323,11 +369,27 @@ def get_elem_string(elem: El) -> str | None: def get_line_no(elem: El) -> int: + """Get the source line number of an element. + + Args: + elem: XML Element with source line information. + + Returns: + Source line number where the element appears. + """ # noinspection PyUnresolvedReferences return elem.sourceline def get_start_element(root: El) -> El | None: + """Get the start element from a flow root. + + Args: + root: Root XML element of the flow. + + Returns: + Start element if found, None otherwise. + """ start_elements = START_ELEMS start_res = {x: get_by_tag(root, x) for x in start_elements} @@ -337,7 +399,15 @@ def get_start_element(root: El) -> El | None: return None -def get_subflow_name(subflow): +def get_subflow_name(subflow: El) -> str | None: + """Get the name of a subflow element. + + Args: + subflow: Subflow XML element. + + Returns: + Subflow name string, or None if not found. + """ sub_name_el = get_by_tag(subflow, "flowName") if sub_name_el is None or len(sub_name_el) == 0: sub_name_el = get_by_tag(subflow, "subflowName") @@ -349,13 +419,15 @@ def get_subflow_name(subflow): def get_assignment_statement_dicts(elem: El) -> list[tuple[str, dict[str, str]]] | None: - """Returns assignment statement keywords in 'assignments' elems + """Extract assignment statement data from an assignments element. + Args: - elem: elem to parse, should have a tag of "assignments" + elem: Element to parse, should have a tag of "assignments". Returns: - [(operator, dict)] where dict is suitable for constructing - DataInfluenceStatements via args unpack passed to the constructor. + List of (operator, dict) tuples where dict is suitable for constructing + DataInfluenceStatements via argument unpacking. Returns None if no + assignments found. """ if get_tag(elem) == "assignments": elem_name = get_name(elem) @@ -373,28 +445,28 @@ def get_assignment_statement_dicts(elem: El) -> list[tuple[str, dict[str, str]]] def get_filters(elem: El) -> list[El]: - """Find all filter elements + """Find all filter elements recursively. Searches recursively to find all elements that are children - of the current elem + of the current element. Args: - elem: element to search + elem: Element to search. Returns: - list of xml elements - + List of filter XML elements. """ return elem.findall(f'.//{ns}filters') -def get_transform_influencers(transform_elem: El) -> list[tuple[TransformType,str|None,tuple[str, ...]]] | None: - """Converts transform elem to a list of tuples [(transform_type, outputAPI field (or None), tuple(influencer_names)] +def get_transform_influencers(transform_elem: El) -> list[tuple[TransformType, str | None, tuple[str, ...]]] | None: + """Convert transform element to a list of influencer tuples. + Args: - transform_elem: top level elem to process + transform_elem: Top-level transform element to process. Returns: - [(transform_type, influenced_name, tuple(influencer_names, ...))] - + List of (transform_type, outputAPI_field, tuple(influencer_names)) tuples, + or None if no transform values found. """ if transform_elem is None: logger.error("called get_transform_influencers will null element") @@ -502,31 +574,25 @@ def get_transform_influencers(transform_elem: El) -> list[tuple[TransformType,st return None def get_vars_from_value(elem: El, - expr_parser :Callable[[str], list[str]]=parse_expression) -> dict[str, list[str]] | None: - """accepts , , or element and returns a list - of variables that influence this element. - * The variables are not normalized, e.g. "foo.Name" will appear. - * In the case of inner join complex values, further processing - is needed to resolve the join tables + expr_parser: Callable[[str], list[str]] = parse_expression) -> dict[str, list[str]] | None: + """Extract variables that influence a value element. + + Accepts , , or elements and returns + variables that influence them. Variables are not normalized (e.g., "foo.Name" + will appear). For inner join complex values, further processing is needed + to resolve join tables. Args: - expr_parser (callable): method to parse expressions (default regexp is provided) - elem: (El): element + elem: element or similar value element. + expr_parser: Callable method to parse expressions (default regexp provided). Returns: - a dict tag_name: list[variable names] - where tag_name is the tag of the child element of value holding the reference unless - this is a complexValue, in which case the tag_name contains refined information: - 'ComplexValueType.FieldReference': ['var1', 'var2'] - 'ComplexValueType.FieldReference': ['var1', 'var2'] - 'ComplexValueType.JoinDefinition.leftJoinKeys: ['var1', 'var2'] - 'ComplexValueType.JoinDefinition.rightJoinKeys: ['var1', 'var2'] - 'ComplexValueType.JoinDefinition.leftElementReference': ['var1'] - 'ComplexValueType.JoinDefinition.rightElementReference': ['var1'] - 'ComplexValueType.JoinDefinition.leftSelectedFields': ['var1'] - 'ComplexValueType.JoinDefinition.rightSelectedFields': ['var1'] - - If there are no variable influencers, the None is returned. + Dictionary mapping tag_name to list of variable names. For complex values, + tag_name contains refined information like: + - 'ComplexValueType.FieldReference': ['var1', 'var2'] + - 'ComplexValueType.JoinDefinition.leftJoinKeys': ['var1', 'var2'] + - 'ComplexValueType.JoinDefinition.leftElementReference': ['var1'] + Returns None if no variable influencers found. """ if elem is None: logger.error("called 'get_vars_from_value' with null input") @@ -551,8 +617,22 @@ def get_vars_from_value(elem: El, return None def _process_val_child(elem: El, el_tag: str, parent_el: El, - expr_parser :Callable[[str], list[str]]=parse_expression) -> dict[str, list[str]] | None: + expr_parser: Callable[[str], list[str]] = parse_expression) -> dict[str, list[str]] | None: + """Process a child element of a value element to extract variable references. + + Handles various value types including element references, string values, + complex values, and transform value references. + + Args: + elem: Child element to process. + el_tag: Tag name of the child element (without namespace). + parent_el: Parent element containing the child. + expr_parser: Callable to parse expressions (default: parse_expression). + Returns: + Dictionary mapping tag names to lists of variable names, or None if + no variables found or element has no text. + """ raw_data = elem.text if raw_data is None or len(raw_data) == 0: return None @@ -672,25 +752,24 @@ def _process_val_child(elem: El, el_tag: str, parent_el: El, def get_input_assignments(elem: El) -> list[El]: - """Find all input assignments + """Find all input assignment elements recursively. - Searches recursively to find all elements that are children - of the current elem + Searches recursively to find all elements that are + children of the current element. Args: - elem: element to search + elem: Element to search. Returns: - list of xml elements - + List of input assignment XML elements. """ return elem.findall(f'.//{ns}inputAssignments') def get_sinks_from_field_values(elems: list[El]) -> list[tuple[str, str | None, str]]: - """Find variables that flow into field/value pairs + """Find variables that flow into field/value pairs. - E.g.if a recordLookup field has a filter:: + For example, if a recordLookup field has a filter:: Name @@ -700,7 +779,7 @@ def get_sinks_from_field_values(elems: list[El]) -> list[tuple[str, str | None, - then this would return [('Name', 'Contains', 'var3')] + then this would return [('Name', 'Contains', 'var3')]. This strategy also works for inputAssignments:: @@ -711,16 +790,14 @@ def get_sinks_from_field_values(elems: list[El]) -> list[tuple[str, str | None, - then this would return [('Company', None, 'Company')] + then this would return [('Company', None, 'Company')]. - Notes: - This should be added later. Args: - elems: inputAssignment or field selection criteria xml elements. + elems: Input assignment or field selection criteria XML elements. Returns: - ``list[(field_name, op, influencer_name)]`` (an empty list if no sinks are found) - + List of (field_name, operator, influencer_name) tuples. + Returns empty list if no sinks are found. """ accum = [] for a_filter in elems: @@ -751,19 +828,23 @@ def get_sinks_from_field_values(elems: list[El]) -> list[tuple[str, str | None, return accum def process_output_assignments(elem: El) -> list[tuple[str, str]]: - """Searches elem recursively and pulls out doubles of the form: + """Extract output assignments from an element recursively. + + Searches element recursively and extracts pairs of the form:: + WorkItemID Id - returning a list of doubles [('Id', 'WorkItemID')] - - if none found, it returns the empty list [] + returning a list of tuples [('Id', 'WorkItemID')]. - :param elem: to search (recursively) - :return: list of triples (influencer field, (influenced) assignTo field) + Args: + elem: Element to search recursively. + Returns: + List of (influencer_field, assignTo_field) tuples. + Returns empty list if none found. """ elems = elem.findall(f'.//{ns}outputAssignments') accum = [] @@ -780,37 +861,43 @@ def process_output_assignments(elem: El) -> list[tuple[str, str]]: return accum def get_field_op_values_from_elem(elem: El, tag: str) -> list[tuple[str, str | None, str]]: - """ - Searches elem recursively for tag, and the pull-out triples of the form: - - foo - Contains - - bar + """Extract field/operator/value triples from elements with a specific tag. - returning a list of triples [('foo', 'Contains', 'bar')] + Searches element recursively for the tag and extracts triples of the form:: - if none found, it returns the empty list + + foo + Contains + + bar + + - :param elem: to search (recursively) - :param tag: tag that must be a descendent of elem - :return: list of triples (field_name, operator, influencer_name) + returning a list of triples [('foo', 'Contains', 'bar')]. + + Args: + elem: Element to search recursively. + tag: Tag name that must be a descendant of elem. + + Returns: + List of (field_name, operator, influencer_name) triples. + Returns empty list if none found. """ elems = elem.findall(f'.//{ns}{tag}') return get_sinks_from_field_values(elems) def get_conn_target_map(elem: El) -> dict[El, tuple[str, ConnType, bool]] | None: - """Get a connector map that also works for all possible start elements + """Get a connector map that works for all possible start elements. Args: - elem: element to search for connectors + elem: Element to search for connectors. Returns: - connector map (connector elem: name of target, type of connector, is_optional) - - optional connectors are ones that need not be followed, e.g. in a decision. - If an element contains only optional connectors, then it may be a terminal element + Dictionary mapping connector elements to (target_name, connector_type, is_optional). + Optional connectors are ones that need not be followed (e.g., in a decision). + If an element contains only optional connectors, it may be a terminal element. + Returns None if element is None. """ if elem is None: return None @@ -848,13 +935,14 @@ def get_conn_target_map(elem: El) -> dict[El, tuple[str, ConnType, bool]] | None def _get_conn_target_map(elem: El) -> dict[El, tuple[str, ConnType, bool]]: - """returns map from connectors at elem to where they point + """Get map from connectors at element to where they point. Args: - elem: base element containing connectors (Flow Element) + elem: Base element containing connectors (Flow Element). Returns: - connector element -> target reference (string), connector type, is_optional (True if connector is optional) + Dictionary mapping connector elements to (target_reference, connector_type, is_optional). + Returns empty dict if element is None. """ if elem is None: return {} @@ -925,6 +1013,15 @@ def _get_conn_target_map(elem: El) -> dict[El, tuple[str, ConnType, bool]]: # def is_assign_null(elem: El) -> bool | None: + """Check if an element has assignNullValuesIfNoRecordsFound set. + + Args: + elem: XML Element to check. + + Returns: + True if assignNullValuesIfNoRecordsFound is 'true', False if 'false', + None if the field is missing. + """ res = elem.find(f'{ns}assignNullValuesIfNoRecordsFound') if res is None: return None @@ -932,8 +1029,15 @@ def is_assign_null(elem: El) -> bool | None: def is_auto_store(elem: El) -> bool | None: - # None if the field is missing or can't be parsed - # otherwise true or false + """Check if an element has storeOutputAutomatically set. + + Args: + elem: XML Element to check. + + Returns: + True if storeOutputAutomatically is 'true', False if 'false', + None if the field is missing or can't be parsed. + """ res = elem.find(f'{ns}storeOutputAutomatically') if res is None: return None @@ -941,8 +1045,15 @@ def is_auto_store(elem: El) -> bool | None: def is_collection(elem: El) -> bool | None: - # None if the field is missing or can't be parsed - # otherwise true or false + """Check if an element represents a collection. + + Args: + elem: XML Element to check. + + Returns: + True if isCollection is 'true', False if 'false', + None if the field is missing or can't be parsed. + """ res = elem.find(f'{ns}isCollection') if res is None: return None @@ -950,6 +1061,14 @@ def is_collection(elem: El) -> bool | None: def get_input_fields(elem: El) -> set[El] | None: + """Get all input field elements from a flow element. + + Args: + elem: Element to search for input fields. + + Returns: + Set of input field XML elements, or None if none found. + """ accum = set() elems = elem.findall(f'.//{ns}fields') for el in elems: @@ -964,6 +1083,14 @@ def get_input_fields(elem: El) -> set[El] | None: def get_obj_name(elem: El) -> str | None: + """Get the object name from an element. + + Args: + elem: XML Element to extract object name from. + + Returns: + Object name string, or None if not found. + """ object_name = elem.find(f'{ns}object') if object_name is None: return None @@ -971,6 +1098,14 @@ def get_obj_name(elem: El) -> str | None: def get_output_reference(elem: El) -> str | None: + """Get the output reference from an element. + + Args: + elem: XML Element to extract output reference from. + + Returns: + Output reference string, or None if not found. + """ object_name = elem.find(f'{ns}outputReference') if object_name is None: return None @@ -978,6 +1113,14 @@ def get_output_reference(elem: El) -> str | None: def get_datatype(elem: El) -> DataType | None: + """Get the data type from an element. + + Args: + elem: XML Element to extract data type from. + + Returns: + DataType enum value, or None if not found or unrecognized. + """ obj_ = elem.find(f'{ns}dataType') if obj_ is None: return None @@ -996,6 +1139,15 @@ def get_datatype(elem: El) -> DataType | None: def is_get_first_record_only(elem: El) -> bool | None: + """Check if an element has getFirstRecordOnly set. + + Args: + elem: XML Element to check. + + Returns: + True if getFirstRecordOnly is 'true', False if 'false', + None if the field is missing. + """ res = elem.find(f'{ns}getFirstRecordOnly') if res is None: return None @@ -1003,11 +1155,27 @@ def is_get_first_record_only(elem: El) -> bool | None: def is_input(elem: El) -> bool: + """Check if an element is marked as an input. + + Args: + elem: XML Element to check. + + Returns: + True if isInput is 'true', False otherwise. + """ res = get_by_tag(elem, 'isInput') return len(res) > 0 and res[0].text == 'true' def is_output(elem: El) -> bool: + """Check if an element is marked as an output. + + Args: + elem: XML Element to check. + + Returns: + True if isOutput is 'true', False otherwise. + """ res = get_by_tag(elem, 'isOutput') return len(res) > 0 and res[0].text == 'true' @@ -1020,18 +1188,15 @@ def is_output(elem: El) -> bool: def _process_assignment_item(elem: El) -> tuple[str, dict[str, str]] | None: - """Returns assignment item dict from assignment element + """Extract assignment item data from an assignmentItem element. Args: - elem: (not a top Flow element) but an assignmentItem elem + elem: AssignmentItem element (not a top-level Flow element). Returns: - ::{ 'influenced_var': var_name, 'influencer_var': var_name or - STRING_LITERAL_TOKEN, 'line_no': int, 'source_text': str - assignmentItem code , 'comment': "Variable Assignment", - } - which is all keywords needed to construct DataInfluenceStatement - except for 'element_name' + Tuple of (operator, dict) where dict contains all keywords needed to + construct DataInfluenceStatement except 'element_name'. Returns None + if processing fails. """ # This must match DataInfluenceStatement constructor entry = { @@ -1066,6 +1231,15 @@ def _process_assignment_item(elem: El) -> tuple[str, dict[str, str]] | None: def _get_value(el: El) -> str | None: + """Extract value from a value element. + + Args: + el: Value element to extract from. + + Returns: + Element reference text if found, STRING_LITERAL_TOKEN if other value type, + None if no children. + """ for child in el: if get_tag(child) == 'elementReference': return child.text @@ -1074,10 +1248,16 @@ def _get_value(el: El) -> str | None: return None -def get_subflow_output_map(subflow: El) -> tuple[bool, dict[str,str]]: - """returns a tuple (bool:, map: child name --> parent name) - where the first return value is true if outputs are automatically assigned - in which case they are flow_name.flow_var +def get_subflow_output_map(subflow: El) -> tuple[bool, dict[str, str]]: + """Get the output mapping for a subflow. + + Args: + subflow: Subflow XML element. + + Returns: + Tuple of (auto_assign, mapping) where: + - auto_assign: True if outputs are automatically assigned (flow_name.flow_var format) + - mapping: Dictionary mapping child variable names to parent variable names """ auto = False mappings = {} @@ -1098,27 +1278,28 @@ def get_subflow_output_map(subflow: El) -> tuple[bool, dict[str,str]]: def get_subflow_input_map(subflow: El) -> dict[str, str]: - """Returns a map from caller variable to variable in called flow + """Get the input mapping for a subflow. + + Returns a map from caller variable to variable in called flow. - E.g. in this example:: + For example, in this input assignment:: - - input_var1 - - parent_input_var - - + + input_var1 + + parent_input_var + + - we return:: + we return:: - 'parent_input_var' (name in parent) -> 'input_var1' (name in child) + 'parent_input_var' (name in parent) -> 'input_var1' (name in child) Args: - subflow: XML Element + subflow: Subflow XML Element. Returns: - map from parent output_variable name to child input variable - name + Dictionary mapping parent variable names to child input variable names. """ accum = dict() inputs = get_by_tag(subflow, "inputAssignments") @@ -1136,6 +1317,15 @@ def get_subflow_input_map(subflow: El) -> dict[str, str]: return accum def _get_tags(root: El, tags: list[str]) -> list[str]: + """Extract text content from elements with specified tags. + + Args: + root: Root element to search recursively. + tags: List of tag names (without namespace) to find. + + Returns: + List of non-empty text content from matching elements. + """ accum = [] for tag in tags: res = root.findall(f'.//{ns}{tag}') @@ -1145,6 +1335,17 @@ def _get_tags(root: El, tags: list[str]) -> list[str]: return accum def get_all_flow_refs(root: El) -> list[str]: + """Get all flow variable references from a flow root element. + + Extracts references from both direct reference holders and expression + reference holders (which may contain merge-fields). + + Args: + root: Root XML element of the flow. + + Returns: + List of unique variable reference names. + """ accum = _get_tags(root, tags=DIRECT_REF_HOLDERS) expressions = _get_tags(root, tags=EXPRESSION_REF_HOLDERS) for expr in expressions: @@ -1154,19 +1355,30 @@ def get_all_flow_refs(root: El) -> list[str]: def rid_item(msg: str) -> str: + """Remove [$EachItem] token from a string. + + Args: + msg: String to process. + + Returns: + String with [$EachItem] removed. + """ return msg.replace('[$EachItem]', '') -def recursive_parse(my_obj, parse_callable=parse_expression, accum=None) -> None: - """walks through json objs and applies the parse_callable to values +def recursive_parse(my_obj: dict | list | str, parse_callable: Callable[[str], list[str]] = parse_expression, accum: list[str] | None = None) -> None: + """Walk through JSON objects and apply parse_callable to string values. + + Recursively traverses dictionaries, lists, and strings, applying the + parse_callable to all string values found. Args: - my_obj (obj): JSON object - parse_callable (Callable): callable to parse strings - accum (list[str]): list of strings that values are added to + my_obj: JSON object (dict, list, or str) to parse. + parse_callable: Callable to parse strings (default: parse_expression). + accum: List to accumulate results in. If None, creates a new list. + Modified in place. Returns: - None (accum is changed in place) - + None (results are added to accum in place). """ if accum is None: my_accum = [] @@ -1188,7 +1400,18 @@ def recursive_parse(my_obj, parse_callable=parse_expression, accum=None) -> None return None -def quick_validate(flow_path: str) -> bool: +def quick_validate(flow_path: str) -> bool | None: + """Quickly validate a flow file by checking for required and banned tags. + + Performs a fast string-based check without full XML parsing. + + Args: + flow_path: Path to the flow file to validate. + + Returns: + True if flow has a start element and no banned elements, + False if validation fails, None if file not found. + """ has_start = False has_banned = False try: @@ -1214,15 +1437,17 @@ def quick_validate(flow_path: str) -> bool: return False def validate_flow(flow_path: str) -> bool: - """There are many legacy versions of flows that contain grammars we cannot parse. - This tool only processes modern flows that can be built in flow builder. + """Validate that a flow file can be parsed and processed. + + There are many legacy versions of flows that contain grammars we cannot parse. + This tool only processes modern flows that can be built in flow builder. Args: - flow_path (str): path of flow + flow_path: Path to the flow file to validate. Returns: - True if the flow is valid, False otherwise - + True if the flow is valid (parseable, has start element, no banned elements), + False otherwise. """ # 1. Flows must be parseable # 2. Flows must have a start element diff --git a/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py b/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py index 4a9d723c..8c5eab73 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py @@ -758,10 +758,12 @@ def when_to_run(self) -> list[QueryAction]: return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: - all_named = list(parser.get_all_named_elems()) + need_description = [el for el in list(parser.get_all_named_elems()) if + parse_utils.get_tag(el) != 'rules'] + accum = [] - for el in all_named: + for el in need_description: desc = parse_utils.get_by_tag(el,'description') if len(desc) == 0: accum.append(el) diff --git a/packages/code-analyzer-flow-engine/package.json b/packages/code-analyzer-flow-engine/package.json index 247286bd..9596dd98 100644 --- a/packages/code-analyzer-flow-engine/package.json +++ b/packages/code-analyzer-flow-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-flow-engine", "description": "Plugin package that adds 'Flow Scanner' as an engine into Salesforce Code Analyzer", - "version": "0.31.1", + "version": "0.32.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", @@ -13,20 +13,20 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", "@types/semver": "^7.7.1", - "semver": "^7.7.2" + "semver": "^7.7.3" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" diff --git a/packages/code-analyzer-pmd-engine/gradle/libs.versions.toml b/packages/code-analyzer-pmd-engine/gradle/libs.versions.toml index 76bc54e8..5a94df4b 100644 --- a/packages/code-analyzer-pmd-engine/gradle/libs.versions.toml +++ b/packages/code-analyzer-pmd-engine/gradle/libs.versions.toml @@ -8,7 +8,7 @@ [versions] hamcrest = "3.0" junit-jupiter = "5.13.4" -pmd = "7.18.0" # !!! IMPORTANT !!! KEEP THIS IN SYNC WITH PMD_VERSION INSIDE OF: src/constants.ts +pmd = "7.19.0" # !!! IMPORTANT !!! KEEP THIS IN SYNC WITH PMD_VERSION INSIDE OF: src/constants.ts # For the following: Keep in sync with whatever pmd-core pulls in. Basically, we don't want duplicates in our java-lib folder. # To see pmd-core's dependencies, go to https://mvnrepository.com/artifact/net.sourceforge.pmd/pmd-core diff --git a/packages/code-analyzer-pmd-engine/package.json b/packages/code-analyzer-pmd-engine/package.json index 4bfc28e0..a3418d36 100644 --- a/packages/code-analyzer-pmd-engine/package.json +++ b/packages/code-analyzer-pmd-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-pmd-engine", "description": "Plugin package that adds 'pmd' and 'cpd' as engines into Salesforce Code Analyzer", - "version": "0.33.0", + "version": "0.34.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", @@ -13,20 +13,20 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", "@types/semver": "^7.7.1", - "semver": "^7.7.2" + "semver": "^7.7.3" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" diff --git a/packages/code-analyzer-pmd-engine/src/constants.ts b/packages/code-analyzer-pmd-engine/src/constants.ts index a7336319..1f9a7ca8 100644 --- a/packages/code-analyzer-pmd-engine/src/constants.ts +++ b/packages/code-analyzer-pmd-engine/src/constants.ts @@ -1,5 +1,5 @@ // !!! IMPORTANT !!! KEEP THIS IN SYNC WITH gradle/libs.versions.toml -export const PMD_VERSION: string = '7.18.0'; +export const PMD_VERSION: string = '7.19.0'; export const PMD_ENGINE_NAME: string = "pmd"; export const CPD_ENGINE_NAME: string = "cpd"; diff --git a/packages/code-analyzer-pmd-engine/src/pmd-rule-mappings.ts b/packages/code-analyzer-pmd-engine/src/pmd-rule-mappings.ts index 9226905c..f34a2355 100644 --- a/packages/code-analyzer-pmd-engine/src/pmd-rule-mappings.ts +++ b/packages/code-analyzer-pmd-engine/src/pmd-rule-mappings.ts @@ -96,6 +96,10 @@ export const RULE_MAPPINGS: Record=20.0.0" diff --git a/packages/code-analyzer-retirejs-engine/package.json b/packages/code-analyzer-retirejs-engine/package.json index 0eec4f9c..14f12341 100644 --- a/packages/code-analyzer-retirejs-engine/package.json +++ b/packages/code-analyzer-retirejs-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-retirejs-engine", "description": "Plugin package that adds 'retire-js' as an engine into Salesforce Code Analyzer", - "version": "0.29.0", + "version": "0.30.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", @@ -13,21 +13,21 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", - "isbinaryfile": "^5.0.7", + "isbinaryfile": "^4.0.10", "node-stream-zip": "^1.15.0", - "retire": "^5.3.0" + "retire": "^5.4.0" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" diff --git a/packages/code-analyzer-sfge-engine/package.json b/packages/code-analyzer-sfge-engine/package.json index 9dc82a47..16173662 100644 --- a/packages/code-analyzer-sfge-engine/package.json +++ b/packages/code-analyzer-sfge-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-sfge-engine", "description": "Plugin package that adds 'Salesforce Graph Engine' as an engine into Salesforce Code Analyzer", - "version": "0.15.0", + "version": "0.16.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", @@ -13,20 +13,20 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "dependencies": { - "@salesforce/code-analyzer-engine-api": "0.32.0", + "@salesforce/code-analyzer-engine-api": "0.33.0-SNAPSHOT", "@types/node": "^20.0.0", - "semver": "^7.7.2" + "semver": "^7.7.3" }, "devDependencies": { - "@eslint/js": "^9.39.1", + "@eslint/js": "^9.39.2", "@types/jest": "^30.0.0", "@types/semver": "^7.7.1", - "eslint": "^9.39.1", + "eslint": "^9.39.2", "jest": "^30.2.0", "rimraf": "^6.1.2", - "ts-jest": "^29.4.5", + "ts-jest": "^29.4.6", "typescript": "^5.9.3", - "typescript-eslint": "^8.47.0" + "typescript-eslint": "^8.50.0" }, "engines": { "node": ">=20.0.0" diff --git a/packages/code-analyzer-sfge-engine/test/sfge-wrapper.test.ts b/packages/code-analyzer-sfge-engine/test/sfge-wrapper.test.ts new file mode 100644 index 00000000..8aaa50eb --- /dev/null +++ b/packages/code-analyzer-sfge-engine/test/sfge-wrapper.test.ts @@ -0,0 +1,47 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import {LogLevel, TelemetryData} from '@salesforce/code-analyzer-engine-api'; +import {RuntimeSfgeWrapper, SfgeRuleInfo} from '../src/sfge-wrapper'; + +class FakeJavaExec { + public async exec(_args: string[], _cp: string[], onStdOut?: (msg: string) => void): Promise { + if (onStdOut) { + // Build a mixed message payload with LOG and PROGRESS entries + const payload = JSON.stringify([ + { messageKey: 'debug_sfgeInfoLog', args: ['hello'], internalLog: '', messageSeverity: 'DEBUG' }, + { messageKey: 'progress_sfgeFinishedCompilingFiles', args: ['1'], internalLog: '', progressPercent: 50 } + ]); + onStdOut(`SFCA-REALTIME-START${payload}SFCA-REALTIME-END`); + } + return; + } +} + +describe('sfge-wrapper handleRunStdOut coverage', () => { + it('processes realtime LOG and PROGRESS messages', async () => { + const tmp = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'sfge-wrapper-')); + const logDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'sfge-logs-')); + const resultsFile = path.join(tmp, 'resultsFile.json'); + await fs.promises.writeFile(resultsFile, '[]', 'utf-8'); + + const logs: { level: LogLevel; msg: string }[] = []; + const telemetry: TelemetryData[] = []; + const wrapper = new RuntimeSfgeWrapper( + new FakeJavaExec() as any, + { now: () => new Date(), formatToDateTimeString: () => '' } as any, + (lvl, msg) => logs.push({ level: lvl, msg }), + (_name, data) => telemetry.push(data) + ); + + const rules: SfgeRuleInfo[] = [{ name: 'ApexFlsViolation', description: '', category: '', severity: 3, url: '', isPilot: false }]; + const progress: number[] = []; + const res = await wrapper.invokeRunCommand(rules, [], [], { logFolder: logDir, disableLimitReachedViolations: false, threadCount: 1, threadTimeout: 1000 }, tmp, p => progress.push(p)); + + expect(Array.isArray(res)).toBe(true); + expect(logs.find(l => l.msg.includes('hello'))).toBeTruthy(); + expect(progress.some(p => p > 10)).toBe(true); // progressed via PROGRESS message branch + }); +}); + +