PDA

View Full Version : PQ - Distinct keywords from text



sandy666
01-04-2021, 08:46 AM
KeywordsText

catThe dog chased the cat.

dogThe cat ate the bird.

birdThe cat is a cat.

The dog, bird, and cat.

consumer watchdog

catwoman and bird

Dog and Bird



let
Keywords = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
Text = Excel.CurrentWorkbook(){[Name="Table2"]}[Content],
Index = Table.AddIndexColumn(Text, "Index", 1, 1),
Split = Table.ExpandListColumn(Table.TransformColumns(Inde x, {{"Text", Splitter.SplitTextByAnyDelimiter({" ",",","."}, QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Text"),
Join = Table.AddColumn(Split, "Custom", each Keywords),
Expand = Table.ExpandTableColumn(Join, "Custom", {"Keywords"}, {"Keywords"}),
IF = Table.AddColumn(Expand, "Custom", each if Text.Contains([Text], [Keywords]) then [Keywords] else null),
Group = Table.Group(IF, {"Index"}, {{"Count", each _, type table}}),
List = Table.AddColumn(Group, "List", each List.Distinct([Count][Custom])),
Extract = Table.TransformColumns(List, {"List", each Text.Combine(List.Transform(_, Text.From), ", "), type text}),
TSC = Table.SelectColumns(Extract,{"List"})
in
TSC
Case sensitive and words contained keywords

List

dog, cat

cat, bird

cat

dog, bird, cat

dog

cat, bird




IF = Table.AddColumn(Expand, "Custom", each if Text.Contains([Text], [Keywords], Comparer.OrdinalIgnoreCase) then [Keywords] else null),
Case insenstive and words contained keywords

List

dog, cat

cat, bird

cat

dog, bird, cat

dog

cat, bird

dog, bird


IF = Table.AddColumn(Expand, "Custom", each if Text.Contains([Text], [Keywords], Comparer.OrdinalIgnoreCase) and Text.Length([Text]) = Text.Length([Keywords]) then [Keywords] else null),
Case insensitive without words contained keywords


List

dog, cat

cat, bird

cat

dog, bird, cat



bird

dog, bird


IF = Table.AddColumn(Expand, "Custom", each if Text.Contains([Text], [Keywords]) and Text.Length([Text]) = Text.Length([Keywords]) then [Keywords] else null),
Case sensitive without words contained keywords

List

dog, cat

cat, bird

cat

dog, bird, cat



bird