From 554da736d4a882e9e921e65d42d93aaa1810c434 Mon Sep 17 00:00:00 2001
From: Saoud Rizwan <7799382+saoudrizwan@users.noreply.github.com>
Date: Wed, 18 Sep 2024 22:06:27 -0400
Subject: [PATCH] Show error message when url scraping fails; update mention
regex to allow trailing punctuation
---
src/shared/context-mentions.ts | 51 ++++++++++++++++++++++++++++++----
src/utils/context-mentions.ts | 1 +
2 files changed, 46 insertions(+), 6 deletions(-)
diff --git a/src/shared/context-mentions.ts b/src/shared/context-mentions.ts
index dfb9e5c..3912868 100644
--- a/src/shared/context-mentions.ts
+++ b/src/shared/context-mentions.ts
@@ -1,9 +1,48 @@
/*
-Mention regex
-- File and folder paths (starting with '/')
-- URLs (containing '://')
-- The 'problems' keyword
-- Word boundary after 'problems' to avoid partial matches
+Mention regex:
+- **Purpose**:
+ - To identify and highlight specific mentions in text that start with '@'.
+ - These mentions can be file paths, URLs, or the exact word 'problems'.
+ - Ensures that trailing punctuation marks (like commas, periods, etc.) are not included in the match, allowing punctuation to follow the mention without being part of it.
+
+- **Regex Breakdown**:
+ - `/@`:
+ - **@**: The mention must start with the '@' symbol.
+
+ - `((?:\/|\w+:\/\/)[^\s]+?|problems\b)`:
+ - **Capturing Group (`(...)`)**: Captures the part of the string that matches one of the specified patterns.
+ - `(?:\/|\w+:\/\/)`:
+ - **Non-Capturing Group (`(?:...)`)**: Groups the alternatives without capturing them for back-referencing.
+ - `\/`:
+ - **Slash (`/`)**: Indicates that the mention is a file or folder path starting with a '/'.
+ - `|`: Logical OR.
+ - `\w+:\/\/`:
+ - **Protocol (`\w+://`)**: Matches URLs that start with a word character sequence followed by '://', such as 'http://', 'https://', 'ftp://', etc.
+ - `[^\s]+?`:
+ - **Non-Whitespace Characters (`[^\s]+`)**: Matches one or more characters that are not whitespace.
+ - **Non-Greedy (`+?`)**: Ensures the smallest possible match, preventing the inclusion of trailing punctuation.
+ - `|`: Logical OR.
+ - `problems\b`:
+ - **Exact Word ('problems')**: Matches the exact word 'problems'.
+ - **Word Boundary (`\b`)**: Ensures that 'problems' is matched as a whole word and not as part of another word (e.g., 'problematic').
+
+ - `(?=[.,;:!?]?(?=[\s\r\n]|$))`:
+ - **Positive Lookahead (`(?=...)`)**: Ensures that the match is followed by specific patterns without including them in the match.
+ - `[.,;:!?]?`:
+ - **Optional Punctuation (`[.,;:!?]?`)**: Matches zero or one of the specified punctuation marks.
+ - `(?=[\s\r\n]|$)`:
+ - **Nested Positive Lookahead (`(?=[\s\r\n]|$)`)**: Ensures that the punctuation (if present) is followed by a whitespace character, a line break, or the end of the string.
+
+- **Summary**:
+ - The regex effectively matches:
+ - Mentions that are file or folder paths starting with '/' and containing any non-whitespace characters (including periods within the path).
+ - URLs that start with a protocol (like 'http://') followed by any non-whitespace characters (including query parameters).
+ - The exact word 'problems'.
+ - It ensures that any trailing punctuation marks (such as ',', '.', '!', etc.) are not included in the matched mention, allowing the punctuation to follow the mention naturally in the text.
+
+- **Global Regex**:
+ - `mentionRegexGlobal`: Creates a global version of the `mentionRegex` to find all matches within a given string.
+
*/
-export const mentionRegex = /@((?:\/|\w+:\/\/)[^\s]+|problems\b)/
+export const mentionRegex = /@((?:\/|\w+:\/\/)[^\s]+?|problems\b)(?=[.,;:!?]?(?=[\s\r\n]|$))/
export const mentionRegexGlobal = new RegExp(mentionRegex.source, "g")
diff --git a/src/utils/context-mentions.ts b/src/utils/context-mentions.ts
index 9598cbd..64b289b 100644
--- a/src/utils/context-mentions.ts
+++ b/src/utils/context-mentions.ts
@@ -53,6 +53,7 @@ export async function parseMentions(text: string, cwd: string, urlScraper?: UrlS
const markdown = await urlScraper.urlToMarkdown(mention)
parsedText += `\n\n\n${markdown}\n`
} catch (error) {
+ vscode.window.showErrorMessage(`Error fetching content for ${mention}: ${JSON.stringify(error)}`)
parsedText += `\n\n\nError fetching content: ${error.message}\n`
}
} else if (mention.startsWith("/")) {