BloGroonga

2018-05-29

Groonga 8.0.3 has been released

Groonga 8.0.3 has been released!

How to install: Install

Changes

Here are important changes in this release:

  • [highlight_html] Support highlight of results of the search by NormalizerNFKC100 or TokenNgram.
  • [normalizers] Added new option for NormalizerNFKC100 that unify_middle_dot option.
  • [normalizers] Added new option for NormalizerNFKC100 that unify_katakana_v_sounds option.
  • [normalizers] Added new option for NormalizerNFKC100 that unify_katakana_bu_sound option.
  • [sub_filter] Supported sub_filter optimization for the too filter case.
  • [delete] Added new options that limit.
  • [normalizers] Fixed a bug that FULLWIDTH LATIN CAPITAL LETTERs such as U+FF21 FULLWIDTH LATIN CAPITAL LETTER A aren't normalized to LATIN SMALL LETTERs such as U+0061 LATIN SMALL LETTER A. If you have been used NormalizerNFKC100 , you must recreate your indexes.

[highlight_html] Support highlight of results of the search by NormalizerNFKC100 or TokenNgram

You can highlight of keyword that searched by NormalizerNFKC100 or TokenNgram as below example.

table_create Entries TABLE_NO_KEY
column_create Entries body COLUMN_SCALAR ShortText
table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer 'TokenNgram("report_source_location", true)'   --normalizer 'NormalizerNFKC100'
column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body
load --table Entries
[
{"body": "ア㌕Az"}
]
[[0,0.0,0.0],1]
select Entries   --match_columns body   --query 'グラム'   --output_columns 'highlight_html(body, Terms)'
[
  [
    0,
    0.0,
    0.0
  ],
  [
    [
      [
        1
      ],
      [
        [
          "highlight_html",
          null
        ]
      ],
      [
        "ア<span class=\"keyword\">㌕</span>Az"
      ]
    ]
  ]
]

[normalizers] Added new option for NormalizerNFKC100 that unify_middle_dot option

This option normalizes middle dot as below example.

normalize   'NormalizerNFKC100("unify_middle_dot", true)'   "·ᐧ•∙⋅⸱・・"   WITH_TYPES
[
  [
    0,
    0.0,
    0.0
  ],
  {
    "normalized": "········",
    "types": [
      "symbol",
      "symbol",
      "symbol",
      "symbol",
      "symbol",
      "symbol",
      "symbol",
      "symbol"
    ],
    "checks": [

    ]
  }
]

You can search with or without (middle dot) and regardless of (middle dot) position by this option.

[normalizers] Added new option for NormalizerNFKC100 that

unify_katakana_v_sounds option

This option normalizes ヴァヴィヴヴェヴォ (katakana) to バビブベボ (katakana) as below example.

normalize   'NormalizerNFKC100("unify_katakana_v_sounds", true)'   "ヴァヴィヴヴェヴォヴ"   WITH_TYPES
[
  [
    0,
    0.0,
    0.0
  ],
  {
    "normalized": "バビブベボブ",
    "types": [
      "katakana",
      "katakana",
      "katakana",
      "katakana",
      "katakana",
      "katakana"
    ],
    "checks": [

    ]
  }
]

For example, you can search バイオリン (violin) in ヴァイオリン (violin).

[normalizers] Added new option for NormalizerNFKC100 that

unify_katakana_bu_sound option

This option normalizes ヴァヴィヴゥヴェヴォ (katakana) to (katakana) as below example.

normalize   'NormalizerNFKC100("unify_katakana_bu_sound", true)'   "ヴァヴィヴヴェヴォヴ"   WITH_TYPES
[
  [
    0,
    0.0,
    0.0
  ],
  {
    "normalized": "ブブブブブブ",
    "types": [
      "katakana",
      "katakana",
      "katakana",
      "katakana",
      "katakana",
      "katakana"
    ],
    "checks": [

    ]
  }
]

For example, you can search セーブル (katakana) and セーヴル (katakana) in セーヴェル (katakana).

[sub_filter] Supported sub_filter optimization for the too filter case

For example,this optimize is valid when records are enough narrowed down before sub_filter execution as below.

table_create Files TABLE_PAT_KEY ShortText
column_create Files revision COLUMN_SCALAR UInt32

table_create Packages TABLE_PAT_KEY ShortText
column_create Packages files COLUMN_VECTOR Files

column_create Files packages_files_index COLUMN_INDEX Packages files

table_create Revisions TABLE_PAT_KEY UInt32
column_create Revisions files_revision COLUMN_INDEX Files revision

load --table Files
[
{"_key": "include/groonga.h", "revision": 100},
{"_key": "src/groonga.c",     "revision": 29},
{"_key": "lib/groonga.rb",    "revision": 12},
{"_key": "README.textile",    "revision": 24},
{"_key": "ha_mroonga.cc",     "revision": 40},
{"_key": "ha_mroonga.hpp",    "revision": 6}
]

load --table Packages
[
{"_key": "groonga", "files": ["include/groonga.h", "src/groonga.c"]},
{"_key": "rroonga", "files": ["lib/groonga.rb", "README.textile"]},
{"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
]

select Packages \
  --filter '_key == "rroonga" && \
            sub_filter(files, "revision >= 10 && revision < 40")' \
  --output_columns '_key, files, files.revision'

[delete] Added new options that limit

You can limit the number of deleting records with this option as below example.

table_create Users TABLE_PAT_KEY ShortText
[[0,0.0,0.0],true]
load --table Users
[
{"_key": "alice"},
{"_key": "bob"},
{"_key": "bill"},
{"_key": "brian"}
]
[[0,0.0,0.0],4]
delete --table Users --filter '_key @^ "b"' --limit 2
[[0,0.0,0.0],true]
#>delete --filter "_key @^ \"b\"" --limit "2" --table "Users"
#:000000000000000 filter(3)
#:000000000000000 delete(2): [0][2]
#<000000000000000 rc=0
select Users
[
  [
    0,
    0.0,
    0.0
  ],
  [
    [
      [
        2
      ],
      [
        [
          "_id",
          "UInt32"
        ],
        [
          "_key",
          "ShortText"
        ]
      ],
      [
        1,
        "alice"
      ],
      [
        3,
        "bill"
      ]
    ]
  ]
]

Conclusion

See Release 8.0.3 2018-05-29 about detailed changes since 8.0.2

Let's search by Groonga!