Extract regions from NER annotations (CoNNL format).
conll_get_regions(x)
A data.frame
, a data.table
, or any other object that
can be coerced to a data.table
. The input table is expected to have
the columns "token" and "ner", and "cpos".
x <- data.frame(
token = c(
"Die",
"Bundeskanzlerin",
"Angela",
"Merkel",
"spricht",
"im",
"Bundestag",
"zur",
"Lage",
"der",
"Nation",
"."
),
ne = c("O", "O", "B-PERS", "I-PERS", "O", "O", "B-ORG", "O", "O", "O", "O", "O"),
stringsAsFactors = FALSE
)
x[["cpos"]] <- 100L:(100L + nrow(x) - 1L)
tab <- conll_get_regions(x)