Javascript Regex That Ignores Matches Nested Inside Parentheses

December 27, 2023 Post a Comment

How would I use JavaScript to create a regular expression that finds all text in between comma delimiters, but ignores commas found inside nested parentheses? For example, in the

Solution 1:

You could create your own parser, and keep track of a "stack" to detect whether a parenthesis was opened before. The example below works with (), [], {}, or anything you want. And they can be nested inside each other.

You can use it like so:

const mySplit = customSplitFactory({
  delimiter: ',',
  escapedPairs: {
    '(': ')',
    '{': '}',
    '[': ']'
  }
});

mySplit('one, two, start (a, b) end'); // ["one"," two"," start (a, b) end"]

Code & demo:

// Generic factory functionfunctioncustomSplitFactory({ delimiter, escapedPairs }) {
  const escapedStartChars = Object.keys(escapedPairs);

  return(str) => {
    const result = str.split('')
      // For each character
      .reduce((res, char) => {
        // If it's a start escape char `(`, `[`, ...if (escapedStartChars.includes(char)) {
          // Add the corresponding end char to the stack
          res.escapeStack.push(escapedPairs[char]);
          // Add the char to the current group
          res.currentGroup.push(char);
        // If it's the end escape char we were waiting for `)`, `]`, ...
        } elseif (
          res.escapeStack.length &&
          char === res.escapeStack[res.escapeStack.length - 1]
        ) {
          // Remove it from the stack
          res.escapeStack.pop();
          // Add the char to the current group
          res.currentGroup.push(char);
        // If it's a delimiter and the escape stack is empty
        } elseif (char === delimiter && !res.escapeStack.length) {
          if (res.currentGroup.length) {
            // Push the current group into the results
            res.groups.push(res.currentGroup.join(''));
          }
          // Reset it
          res.currentGroup = [];
        } else {
          // Otherwise, just push the char into the current group
          res.currentGroup.push(char);
        }
        return res;
      }, {
        groups: [],
        currentGroup: [],
        escapeStack: []
      });
     
     // If the current group was not added to the results yetif (result.currentGroup.length) {
       result.groups.push(result.currentGroup.join(''));
     }
 
     return result.groups;
  };
}

// Usageconst mySplit = customSplitFactory({
  delimiter: ',',
  escapedPairs: {
    '(': ')',
    '{': '}',
    '[': ']'
  }
});

functiondemo(s) { // Just for this democonst res = mySplit(s);
  console.log([s, res].map(JSON.stringify).join(' // '));
}

demo('one, two, start (a, b) end,');   // ["one"," two"," start (a, b) end"]demo('one, two, start {a, b} end');    // ["one"," two"," start {a, b} end"]demo('one, two, start [{a, b}] end,'); // ["one"," two"," start [{a, b}] end"]demo('one, two, start ((a, b)) end,'); // ["one"," two"," start ((a, b)) end"]

Solution 2:

You need to thinking about special case first that is parentheses, handle it at first:

var str, mtc;
str = "one, two, start (a, b) end, hello";
mtc =  str.match(/[^,]*\([^\)]+\)[^,]+|[^,]+/g);
console.log(mtc);
//Expected output: ["one","two", " start (a, b) end", " hello"]

First thing, handle parentheses:

patt =/[^,]*\([^\)]+\)[^,]+/g
//That will matchanycharacter after ,
//Thenmatchcharacter "(" andthenmatchany charecter withno ")" then ends with )

//Now is easy things, we just matchescharacter withno colon
patt =/[^,]+/g

Solution 3:

If unmatched braces don't need to be handled, this could be simplifed to a naive balanced brace counter. Currently using default to normal text best effort:

If a closing brace is detected, it will try to find the starting brace and enclose on it, treating enclosed segment as text
If no starting brace found treat it like normal text

const braces = {'{':'}','[':']','(':')'}
// create object map of ending braces to starting bracesconst inv_braces = Object.fromEntries(Object.entries(braces).map(x=>x.reverse()))
const red = newRegExp(`(,)|` +
  `([${Object.keys(braces).join('')}])|` + 
  `([${Object.values(braces).map(x=>`\\${x}`).join('')}])` , 'g')
  // pre-build break-point scanning regexes// group1 comma detection, group2 start braces, group3 end braces

element_extract= str => {
  let res = []
  let stack = [], next, last = -1// search until no more break-points foundwhile(next = red.exec(str)) {
    const [,comma,begin,end] = next, {index} = next
    
    if(begin) stack.push(begin) // beginning brace, push to stackelseif(end){ //ending brace, pop off stack to starting braceconst start = stack.lastIndexOf(inv_braces[end])
      if(start!==-1) stack.length = start
    }
    elseif(!stack.length && comma) res.push(str.slice(last+1,last=index))
    // empty stack and comma, slice string and push to results
  }
  if(last<str.length) res.push(str.slice(last+1)) // final elementreturn res
}

data = [
"one, two, start (a, b) end",
"one, two, start ((a, (b][,c)]) ((d,e),f)) end, two",
"one, two ((a, (b,c)) ((d,e),f)) three, start (a, (b,c)) ((d,e),f) end, four",
"(a, (b,c)) ((d,e)],f))"
]
for(const x of data)
console.log(element_extract(x))

Notes:

Escaping can be added by adding another match group for \ and incrementing index to skip
Regex string sanitizer can be added to allow matching on special characters
Second regex can be added to skip commas for optimization (see edit history)
Support for variable length delimiters can be added by replacing comma matcher and including length of delimiter in calculations. Same goes for braces.
- For example, I could use (\s*,\s*) instead of (,) to strip spaces, or use '{{':'}}' as braces by adjusting the regex builder to use '|' instead of character classes

For simplicity I have left these out

Solution 4:

As some comments suggested you can use the split function. example:

let str = "one, two, start (a, b) end,";
let matches = str.split(/(?<!(\"|\{|\()[a-zA-Z0-9]*),(?![a-zA-Z0-9]*\)|\}|\")/);

matches is gonna be an array containing [ "one", "two", "start (a, b) end", "" ];

docs: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split

hope it helps.

JavaScript Sample

Javascript Regex That Ignores Matches Nested Inside Parentheses

Solution 1:

Solution 2:

Solution 3:

Solution 4:

Post a Comment for "Javascript Regex That Ignores Matches Nested Inside Parentheses"