Skip to content Skip to sidebar Skip to footer

Javascript Merge Array Of Token Objects While Keeping Token Position In Text

Given a sentence like 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.' I have built a json representat

Solution 1:

This appears to be a good use case for Object.assign, which copies properties from a source to a target object.

const merge = tokens.map(token => {
  const match = extras.find(x =>
    x["characterOffsetBegin"] === token["characterOffsetBegin"] &&
    x["characterOffsetEnd"] === token["characterOffsetEnd"]
  );
  
  return match ? Object.assign(match, token) : token;
});

Where extras is the second array of objects, this will copy all the properties from the original object to the second one. It will overwrite any properties that the two share with the original properties, and return the original object where no match is found.

const tokens = [
    {
        "word": "Lorem",
        "characterOffsetBegin": 0,
        "characterOffsetEnd": 4
    },
    {
        "word": "ipsum",
        "characterOffsetBegin": 6,
        "characterOffsetEnd": 10
    },
    {
        "word": "dolor",
        "characterOffsetBegin": 12,
        "characterOffsetEnd": 16
    },
    {
        "word": "sit",
        "characterOffsetBegin": 18,
        "characterOffsetEnd": 20
    },
    {
        "word": "amet",
        "characterOffsetBegin": 22,
        "characterOffsetEnd": 25
    },
    {
        "word": "consectetur",
        "characterOffsetBegin": 28,
        "characterOffsetEnd": 38
    },
    {
        "word": "adipiscing",
        "characterOffsetBegin": 40,
        "characterOffsetEnd": 49
    },
    {
        "word": "elit",
        "characterOffsetBegin": 51,
        "characterOffsetEnd": 54
    },
    {
        "word": "sed",
        "characterOffsetBegin": 57,
        "characterOffsetEnd": 59
    },
    {
        "word": "do",
        "characterOffsetBegin": 61,
        "characterOffsetEnd": 62
    },
    {
        "word": "eiusmod",
        "characterOffsetBegin": 64,
        "characterOffsetEnd": 70
    },
    {
        "word": "tempor",
        "characterOffsetBegin": 72,
        "characterOffsetEnd": 77
    },
    {
        "word": "incididunt",
        "characterOffsetBegin": 79,
        "characterOffsetEnd": 88
    },
    {
        "word": "ut",
        "characterOffsetBegin": 90,
        "characterOffsetEnd": 91
    },
    {
        "word": "labore",
        "characterOffsetBegin": 93,
        "characterOffsetEnd": 98
    },
    {
        "word": "et",
        "characterOffsetBegin": 100,
        "characterOffsetEnd": 101
    },
    {
        "word": "dolore",
        "characterOffsetBegin": 103,
        "characterOffsetEnd": 108
    },
    {
        "word": "magna",
        "characterOffsetBegin": 110,
        "characterOffsetEnd": 114
    },
    {
        "word": "aliqua",
        "characterOffsetBegin": 116,
        "characterOffsetEnd": 121
    }
];

const extras = [
  {
      "word": "'orem",
      "characterOffsetBegin": 0,
      "characterOffsetEnd": 4,
      "label": "0"
  },
  {
      "word": "ipsum",
      "characterOffsetBegin": 6,
      "characterOffsetEnd": 10,
      "label": "0"
  },
  {
      "word": "dolors",
      "characterOffsetBegin": 12,
      "characterOffsetEnd": 16,
      "label": "X"
  },
  {
      "word": "st",
      "characterOffsetBegin": 18,
      "characterOffsetEnd": 20,
      "label": "Z"
  },
  {
      "word": "amet",
      "characterOffsetBegin": 22,
      "characterOffsetEnd": 25,
      "label": "0"
  },
  {
      "word": "consectetur",
      "characterOffsetBegin": 28,
      "characterOffsetEnd": 38,
      "label": "0"
  },
  {
      "word": "adipiscing",
      "characterOffsetBegin": 40,
      "characterOffsetEnd": 49,
      "label": "0"
  },
  {
      "word": "elt",
      "characterOffsetBegin": 51,
      "characterOffsetEnd": 54,
      "label": "Y"
  },
  {
      "word": "sd",
      "characterOffsetBegin": 57,
      "characterOffsetEnd": 59
  },
  {
      "word": "do",
      "characterOffsetBegin": 61,
      "characterOffsetEnd": 62
  },
  {
      "word": "eiusmod",
      "characterOffsetBegin": 64,
      "characterOffsetEnd": 70,
      "label": "E"
  },
  {
      "word": "tempor",
      "characterOffsetBegin": 72,
      "characterOffsetEnd": 77
  },
  {
      "word": "inciddunt",
      "characterOffsetBegin": 79,
      "characterOffsetEnd": 88,
      "label": "K"
  },
  {
      "word": "ut",
      "characterOffsetBegin": 90,
      "characterOffsetEnd": 91,
      "label": "O"
  },
  {
      "word": "labore",
      "characterOffsetBegin": 93,
      "characterOffsetEnd": 98,
      "label": "O"
  },
  {
      "word": "et",
      "characterOffsetBegin": 100,
      "characterOffsetEnd": 101,
      "label": "O"
  },
  {
      "word": "dolore",
      "characterOffsetBegin": 103,
      "characterOffsetEnd": 108,
      "label": "O"
  },
  {
      "word": "magna",
      "characterOffsetBegin": 110,
      "characterOffsetEnd": 114,
      "label": "O"
  },
  {
      "word": "aliqua",
      "characterOffsetBegin": 116,
      "characterOffsetEnd": 121,
      "label": "K"
  }
];

const merge = tokens.map(token => {
  const match = extras.find(x =>
    x["characterOffsetBegin"] === token["characterOffsetBegin"] &&
    x["characterOffsetEnd"] === token["characterOffsetEnd"]
  );
  
  return match ? Object.assign(match, token) : token;
});

console.log(merge);

Solution 2:

could you use map and find to get the label?

const tokens =  tokens1.map(
        (token) => (
          {
            ...token,
            label: tokens2.find((x) =>x.characterOffsetBegin === token.characterOffsetBegin && x.characterOffsetEnd === token.characterOffsetEnd).label
          }
        ),
      );
      

run snippet below

const tokens1 = [{
            "word": "Lorem",
            "characterOffsetBegin": 0,
            "characterOffsetEnd": 4
        },
        {
            "word": "ipsum",
            "characterOffsetBegin": 6,
            "characterOffsetEnd": 10
        },
        {
            "word": "dolor",
            "characterOffsetBegin": 12,
            "characterOffsetEnd": 16
        },
        {
            "word": "sit",
            "characterOffsetBegin": 18,
            "characterOffsetEnd": 20
        },
        {
            "word": "amet",
            "characterOffsetBegin": 22,
            "characterOffsetEnd": 25
        },
        {
            "word": "consectetur",
            "characterOffsetBegin": 28,
            "characterOffsetEnd": 38
        },
        {
            "word": "adipiscing",
            "characterOffsetBegin": 40,
            "characterOffsetEnd": 49
        },
        {
            "word": "elit",
            "characterOffsetBegin": 51,
            "characterOffsetEnd": 54
        },
        {
            "word": "sed",
            "characterOffsetBegin": 57,
            "characterOffsetEnd": 59
        },
        {
            "word": "do",
            "characterOffsetBegin": 61,
            "characterOffsetEnd": 62
        },
        {
            "word": "eiusmod",
            "characterOffsetBegin": 64,
            "characterOffsetEnd": 70
        },
        {
            "word": "tempor",
            "characterOffsetBegin": 72,
            "characterOffsetEnd": 77
        },
        {
            "word": "incididunt",
            "characterOffsetBegin": 79,
            "characterOffsetEnd": 88
        },
        {
            "word": "ut",
            "characterOffsetBegin": 90,
            "characterOffsetEnd": 91
        },
        {
            "word": "labore",
            "characterOffsetBegin": 93,
            "characterOffsetEnd": 98
        },
        {
            "word": "et",
            "characterOffsetBegin": 100,
            "characterOffsetEnd": 101
        },
        {
            "word": "dolore",
            "characterOffsetBegin": 103,
            "characterOffsetEnd": 108
        },
        {
            "word": "magna",
            "characterOffsetBegin": 110,
            "characterOffsetEnd": 114
        },
        {
            "word": "aliqua",
            "characterOffsetBegin": 116,
            "characterOffsetEnd": 121
        }
    ]
    
    const tokens2 = [{
            "word": "'orem",
            "characterOffsetBegin": 0,
            "characterOffsetEnd": 4,
            "label": "0"
        },
        {
            "word": "ipsum",
            "characterOffsetBegin": 6,
            "characterOffsetEnd": 10,
            "label": "0"
        },
        {
            "word": "dolors",
            "characterOffsetBegin": 12,
            "characterOffsetEnd": 16,
            "label": "X"
        },
        {
            "word": "st",
            "characterOffsetBegin": 18,
            "characterOffsetEnd": 20,
            "label": "Z"
        },
        {
            "word": "amet",
            "characterOffsetBegin": 22,
            "characterOffsetEnd": 25,
            "label": "0"
        },
        {
            "word": "consectetur",
            "characterOffsetBegin": 28,
            "characterOffsetEnd": 38,
            "label": "0"
        },
        {
            "word": "adipiscing",
            "characterOffsetBegin": 40,
            "characterOffsetEnd": 49,
            "label": "0"
        },
        {
            "word": "elt",
            "characterOffsetBegin": 51,
            "characterOffsetEnd": 54,
            "label": "Y"
        },
        {
            "word": "sd",
            "characterOffsetBegin": 57,
            "characterOffsetEnd": 59
        },
        {
            "word": "do",
            "characterOffsetBegin": 61,
            "characterOffsetEnd": 62
        },
        {
            "word": "eiusmod",
            "characterOffsetBegin": 64,
            "characterOffsetEnd": 70,
            "label": "E"
        },
        {
            "word": "tempor",
            "characterOffsetBegin": 72,
            "characterOffsetEnd": 77
        },
        {
            "word": "inciddunt",
            "characterOffsetBegin": 79,
            "characterOffsetEnd": 88,
            "label": "K"
        },
        {
            "word": "ut",
            "characterOffsetBegin": 90,
            "characterOffsetEnd": 91,
            "label": "O"
        },
        {
            "word": "labore",
            "characterOffsetBegin": 93,
            "characterOffsetEnd": 98,
            "label": "O"
        },
        {
            "word": "et",
            "characterOffsetBegin": 100,
            "characterOffsetEnd": 101,
            "label": "O"
        },
        {
            "word": "dolore",
            "characterOffsetBegin": 103,
            "characterOffsetEnd": 108,
            "label": "O"
        },
        {
            "word": "magna",
            "characterOffsetBegin": 110,
            "characterOffsetEnd": 114,
            "label": "O"
        },
        {
            "word": "aliqua",
            "characterOffsetBegin": 116,
            "characterOffsetEnd": 121,
            "label": "K"
        }
    ]
    
    const tokens =  tokens1.map(
        (token) => (
          {
            ...token,
            label: tokens2.find((x) =>x.characterOffsetBegin === token.characterOffsetBegin && x.characterOffsetEnd === token.characterOffsetEnd).label
          }
        ),
      );
      
      console.log(tokens);

Post a Comment for "Javascript Merge Array Of Token Objects While Keeping Token Position In Text"