Manage Corpus Data and Encode CWB Corpus.
Manage Corpus Data and Encode CWB Corpus.
See the CWB Encoding Tutorial on characters allowed for encoding attributes: "By convention, all attribute names must be lowercase (more precisely, they may only contain the characters a-z, 0-9, -, and _, and may not start with a digit). Therefore, the names of XML elements to be included in the CWB corpus must not contain any non-ASCII or uppercase letters." (section 2)
Import XML files.
chunktable
A data.table
with column "id" (unique values),
columns with metadata, and a column with text chunks.
tokenstream
A data.table
with a column "cpos" (corpus position), and
columns with positional attributes, such as "word", "lemma", "pos", "stem".
metadata
A data.table
with a column "id", to link data with
chunks/tokenstream, columns with document-level metadata, and a column
"cpos_left" and "cpos_right", which can be generated using method
$add_corpus_positions()
.
sentences
A data.table
.
named_entities
A data.table
.
tokenize()
Simple tokenization of text in chunktable.
...
Arguments that are passed into tokenizers::tokenize_words()
.
verbose
A logical value, whether to be verbose.
progress
A logical value, whether to show progress bar.
import_xml()
add_corpus_positions()
Add column 'cpos' to tokenstream and columns 'cpos_left' and 'cpos_right' to metadata.
purge()
Remove patterns from chunkdata that are known to cause problems. This is done most efficiently at the chunkdata level of data preparation as the length of the character vector to handle is much smaller than when tokenization/annotation has been performed.
encode()
Encode corpus. If the corpus already exists, it will be removed.
CorpusData$encode(
corpus,
p_attributes = "word",
s_attributes = NULL,
encoding,
registry_dir = Sys.getenv("CORPUS_REGISTRY"),
data_dir = NULL,
method = c("R", "CWB"),
verbose = TRUE,
compress = FALSE,
reload = TRUE,
quietly = TRUE
)
corpus
The name of the CWB corpus.
p_attributes
Positional attributes.
s_attributes
Columns that will be encoded as structural attributes.
encoding
Encoding/charset of the CWB corpus.
registry_dir
Corpus registry, the directory where registry files are stored.
data_dir
Directory where to create directory for indexed corpus files.
method
Either "R" or "CWB".
verbose
A logical value, whether to be verbose.
compress
A logical value, whether to compress corpus.
reload
A logical
value, whether to reload corpus.
quietly
A logical
value passed into RcppCWB::cwb_makeall()
,
RcppCWB::cwb_huffcode()
and RcppCWB::cwb_compress_rdx
to control
verbosity of these functions.
library(RcppCWB)
library(data.table)
# this example relies on the R method to write data to disk, there is also a method "CWB"
# that relies on CWB tools to generate the indexed corpus. The CWB can downloaded
# and installed within the package by calling cwb_install()
# create temporary registry file so that data in RcppCWB package can be used
registry_rcppcwb <- system.file(package = "RcppCWB", "extdata", "cwb", "registry")
registry_tmp <- fs::path(tempdir(), "registry")
if (!dir.exists(registry_tmp)) dir.create(registry_tmp)
r <- registry_file_parse("REUTERS", registry_dir = registry_rcppcwb)
r[["home"]] <- system.file(package = "RcppCWB", "extdata", "cwb", "indexed_corpora", "reuters")
registry_file_write(r, corpus = "REUTERS", registry_dir = registry_tmp)
# decode structural attribute 'places'
s_attrs_places <- RcppCWB::s_attribute_decode(
corpus = "REUTERS",
data_dir = system.file(package = "RcppCWB", "extdata", "cwb", "indexed_corpora", "reuters"),
s_attribute = "places", method = "R"
)
s_attrs_places[["id"]] <- 1L:nrow(s_attrs_places)
setnames(s_attrs_places, old = "value", new = "places")
# decode positional attribute 'word'
tokens <- apply(s_attrs_places, 1, function(row){
ids <- cl_cpos2id(
corpus = "REUTERS", cpos = row[1]:row[2],
p_attribute = "word", registry = registry_tmp
)
cl_id2str(corpus = "REUTERS", id = ids, p_attribute = "word", registry = registry_tmp)
})
tokenstream <- rbindlist(
lapply(
1L:length(tokens),
function(i) data.table(id = i, word = tokens[[i]]))
)
tokenstream[["cpos"]] <- 0L:(nrow(tokenstream) - 1L)
# create CorpusData object (see vignette for further explanation)
CD <- CorpusData$new()
CD$tokenstream <- as.data.table(tokenstream)
CD$metadata <- as.data.table(s_attrs_places)
# Remove temporary registry with home dir still pointing to RcppCWB data dir
# to prevent data from being deleted
file.remove(fs::path(registry_tmp, "reuters"))
#> [1] TRUE
file.remove(registry_tmp)
#> [1] TRUE
# create temporary directories (registry directory and one for indexed corpora)
registry_tmp <- fs::path(tempdir(), "registry")
data_dir_tmp <- fs::path(tempdir(), "data_dir")
if (!dir.exists(registry_tmp)) dir.create(registry_tmp)
if (!dir.exists(data_dir_tmp)) dir.create(data_dir_tmp)
CD$encode(
corpus = "REUTERS", encoding = "utf8",
p_attributes = "word", s_attributes = "places",
registry_dir = registry_tmp, data_dir = data_dir_tmp,
method = "R"
)
#> ── Prepare encoding corpus REUTERS ─────────────────────────────────────────────
#> ℹ registry directory: /tmp/RtmpeQEeXz/registry
#> ℹ data directory: /tmp/RtmpeQEeXz/data_dir
#> ℹ encoding: "utf8"
#> ── encode p-attribute "word" ───────────────────────────────────────────────────
#> ℹ creating indices (in memory)
#> ✔ creating indices (in memory) [107ms]
#>
#> ℹ writing file: word.corpus
#> ✔ writing file: word.corpus [280ms]
#>
#> ℹ writing file: word.lexicon
#> ✔ writing file: word.lexicon [24ms]
#>
#> ℹ writing file: word.lexicon.idx
#> ✔ writing file: word.lexicon.idx [17ms]
#>
#> ℹ creating new registry file: /tmp/RtmpeQEeXz/registry/reuters
#> ℹ run `Rcpp::cwb_makeall()`
#> ✔ run `Rcpp::cwb_makeall()` [7ms]
#>
#> ✔ corpus reloaded: CL success / CQP success
#> ── Encode s-attributes ─────────────────────────────────────────────────────────
#> ℹ encode s-attribute "places"
#> ── Prepare registry file ───────────────────────────────────────────────────────
#> ℹ write registry file
#> ✔ write registry file [5ms]
#>
#> ── Check result ────────────────────────────────────────────────────────────────
#> ✔ corpus reloaded: CL success / CQP success
#> ✔ all p-attributes are available
#> ✔ all s-attributes are available
reg <- registry_data(name = "REUTERS", id = "REUTERS", home = data_dir_tmp, p_attributes = "word")
registry_file_write(data = reg, corpus = "REUTERS", registry_dir = registry_tmp)
# see whether it works
cl_cpos2id(corpus = "REUTERS", p_attribute = "word", cpos = 0L:4049L, registry = registry_tmp)
#> [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13
#> [15] 14 15 16 17 18 19 20 21 22 23 10 24 25 13
#> [29] 26 27 28 29 30 18 19 20 31 32 3 21 25 22
#> [43] 6 33 34 35 31 36 37 38 15 39 12 40 19 41
#> [57] 14 15 42 19 43 44 3 0 45 31 46 35 19 47
#> [71] 37 48 15 49 4 50 9 10 11 51 24 12 52 31
#> [85] 53 54 55 56 41 15 57 58 59 60 61 62 29 63
#> [99] 64 19 65 66 67 29 68 10 69 70 71 72 31 73
#> [113] 74 29 75 31 76 77 35 15 12 15 78 79 3 21
#> [127] 80 29 81 15 12 33 82 29 61 83 84 83 59 85
#> [141] 86 60 87 88 89 90 29 91 92 31 93 3 94 95
#> [155] 96 37 97 98 99 100 101 102 40 15 78 103 3 31
#> [169] 104 59 105 45 106 15 107 35 108 15 57 109 104 45
#> [183] 110 19 25 104 111 19 69 112 40 113 61 114 35 4
#> [197] 115 3 116 117 15 118 119 120 121 122 123 3 31 124
#> [211] 125 126 127 59 40 10 128 29 129 69 130 131 50 132
#> [225] 115 29 19 133 134 4 31 73 113 135 136 72 7 137
#> [239] 29 138 31 139 35 15 12 140 141 142 79 143 144 4
#> [253] 145 88 89 90 146 135 31 104 37 59 69 147 31 148
#> [267] 149 150 151 152 53 153 59 154 29 155 4 35 19 156
#> [281] 42 157 158 50 159 160 161 12 40 152 162 3 31 163
#> [295] 164 13 165 37 31 166 15 49 167 168 169 170 4 171
#> [309] 110 61 172 21 42 45 173 174 29 175 176 4 177 178
#> [323] 171 179 180 181 182 183 37 184 185 186 59 29 63 64
#> [337] 66 187 110 188 189 171 45 110 190 4 59 191 135 10
#> [351] 192 93 86 193 110 63 173 83 194 195 29 196 197 37
#> [365] 31 198 199 29 200 201 15 111 35 202 203 40 204 205
#> [379] 199 206 182 3 140 182 3 4 59 45 207 29 208 209
#> [393] 210 211 212 10 71 29 129 213 214 148 149 150 102 3
#> [407] 4 31 215 54 216 193 61 217 13 109 128 29 218 219
#> [421] 12 40 213 59 113 218 29 10 220 13 31 215 221 29
#> [435] 222 223 224 156 193 225 226 227 31 42 228 3 229 230
#> [449] 37 231 232 233 35 234 235 140 236 237 238 37 239 240
#> [463] 98 40 241 242 243 3 4 31 199 13 59 15 154 244
#> [477] 245 246 31 247 248 40 249 60 50 250 251 35 10 69
#> [491] 252 13 201 59 15 45 253 147 148 149 150 40 45 254
#> [505] 255 29 256 149 150 51 81 173 257 258 259 260 261 262
#> [519] 83 263 45 59 90 249 199 246 76 69 171 264 265 35
#> [533] 19 266 267 58 268 269 3 7 270 31 11 25 7 193
#> [547] 271 13 14 15 272 273 274 19 20 5 6 21 275 23
#> [561] 31 276 24 25 13 31 277 278 279 280 281 282 283 29
#> [575] 284 273 18 19 285 268 269 53 286 10 14 15 287 169
#> [589] 288 289 58 290 231 291 3 7 292 31 11 25 7 193
#> [603] 271 13 293 294 37 14 15 165 295 19 20 5 6 21
#> [617] 275 23 296 24 25 13 297 26 27 28 40 26 27 298
#> [631] 29 299 18 19 285 21 300 301 283 278 37 14 33 292
#> [645] 29 302 18 19 285 21 43 53 286 10 14 287 169 303
#> [659] 304 58 305 306 307 3 4 308 309 310 311 88 312 313
#> [673] 4 314 31 315 316 317 318 319 320 321 322 214 323 149
#> [687] 18 40 10 324 325 326 37 31 322 214 272 149 18 327
#> [701] 169 31 328 31 329 3 330 60 61 331 332 333 13 334
#> [715] 335 29 336 13 31 337 37 31 338 339 3 31 314 340
#> [729] 19 275 37 127 341 342 35 317 343 318 40 344 342 35
#> [743] 324 325 326 345 119 31 313 34 35 346 58 347 348 306
#> [757] 349 35 350 351 6 3 330 143 331 352 13 88 89 59
#> [771] 90 29 353 15 354 355 356 357 35 108 15 12 358 359
#> [785] 360 361 360 362 33 363 16 31 364 365 360 366 83 367
#> [799] 368 37 31 59 369 154 370 13 371 19 90 123 372 347
#> [813] 33 373 147 10 151 37 374 375 37 14 365 150 152 130
#> [827] 376 377 69 378 37 31 379 380 381 382 40 79 35 383
#> [841] 15 57 328 59 45 384 385 29 165 149 150 147 19 386
#> [855] 37 148 149 150 387 35 388 53 153 86 172 347 40 31
#> [869] 389 390 391 392 119 31 393 394 395 396 397 398 384 147
#> [883] 151 347 194 3 33 373 399 149 150 400 401 45 402 339
#> [897] 45 403 169 185 404 3 347 154 31 128 29 405 10 406
#> [911] 86 407 4 408 347 154 31 128 7 193 208 257 31 409
#> [925] 3 358 359 154 3 64 4 347 8 31 128 29 410 385
#> [939] 29 411 149 150 412 259 191 200 210 211 413 151 214 414
#> [953] 12 415 141 416 260 417 418 419 201 406 7 420 259 35
#> [967] 347 260 421 422 171 3 123 33 423 424 29 31 425 426
#> [981] 37 427 404 78 103 3 33 428 429 211 430 150 37 10
#> [995] 431 150 151 408 156 143 432 414 12 433 16 59 53 434
#> [1009] 435 29 19 436 37 437 18 438 20 439 35 234 235 53
#> [1023] 440 441 29 201 442 443 249 338 40 444 445 446 447 19
#> [1037] 445 434 448 37 289 446 19 20 358 359 449 450 19 451
#> [1051] 29 452 383 15 43 4 453 347 454 447 414 12 455 7
#> [1065] 33 456 10 414 25 37 457 18 19 20 7 8 458 459
#> [1079] 171 3 111 460 110 461 189 347 8 462 57 13 10 15
#> [1093] 408 37 10 364 40 383 463 464 40 10 465 466 467 468
#> [1107] 171 179 123 469 4 31 470 90 203 471 37 59 348 162
#> [1121] 472 154 244 473 474 31 475 37 204 214 31 476 37 477
#> [1135] 37 31 478 348 369 396 348 479 480 409 481 482 483 3
#> [1149] 53 484 485 486 487 488 489 59 29 490 19 491 414 25
#> [1163] 13 10 14 40 33 29 492 249 214 493 249 434 37 494
#> [1177] 348 495 472 496 29 497 16 15 49 169 59 369 35 498
#> [1191] 499 29 418 500 16 501 171 3 502 503 371 497 339 193
#> [1205] 504 246 203 40 204 140 171 503 31 505 146 506 507 508
#> [1219] 509 510 29 61 511 19 512 513 52 514 29 515 10 516
#> [1233] 517 31 48 518 519 35 19 520 521 522 523 38 15 318
#> [1247] 31 524 154 525 19 526 37 514 52 31 527 528 216 29
#> [1261] 529 530 531 31 15 532 40 533 520 534 509 31 535 536
#> [1275] 537 37 59 40 19 538 539 540 395 154 244 541 542 16
#> [1289] 53 338 348 543 35 108 15 12 404 62 7 29 544 10
#> [1303] 545 16 546 342 35 547 140 31 48 518 521 519 548 549
#> [1317] 348 524 510 29 61 550 52 258 551 29 552 31 517 339
#> [1331] 510 29 61 511 19 513 119 553 29 554 297 83 7 555
#> [1345] 29 556 40 557 31 521 519 339 558 559 509 348 560 532
#> [1359] 111 449 561 31 488 348 562 563 564 339 519 4 415 565
#> [1373] 524 566 40 567 260 568 569 570 330 260 571 572 573 574
#> [1387] 575 29 75 293 371 576 400 577 578 579 88 580 581 517
#> [1401] 31 521 519 509 348 563 582 50 244 583 16 31 584 585
#> [1415] 40 383 586 83 587 35 31 588 551 589 194 590 591 592
#> [1429] 37 31 517 593 594 40 595 596 597 516 40 598 599 600
#> [1443] 601 37 514 50 244 602 224 53 603 404 604 592 13 605
#> [1457] 534 292 606 607 40 608 557 21 521 519 509 348 563 609
#> [1471] 35 610 611 33 254 127 612 40 31 517 60 145 50 613
#> [1485] 19 614 400 45 31 442 615 37 609 224 31 616 617 31
#> [1499] 521 618 509 31 619 488 35 300 620 621 119 19 622 37
#> [1513] 623 624 45 625 562 626 35 204 140 31 521 218 92 627
#> [1527] 628 13 629 630 35 31 563 134 631 632 633 634 635 29
#> [1541] 19 636 637 609 615 110 638 165 342 639 640 641 29 642
#> [1555] 643 29 31 644 543 35 645 646 319 31 309 78 647 35
#> [1569] 31 648 15 532 45 649 408 37 41 650 199 651 119 652
#> [1583] 653 654 655 656 35 657 40 606 40 19 292 658 37 609
#> [1597] 35 659 31 521 660 661 590 662 37 325 563 663 45 591
#> [1611] 13 31 524 29 664 31 383 665 10 666 667 520 584 585
#> [1625] 668 37 669 149 18 53 434 35 670 37 671 672 33 132
#> [1639] 673 29 674 31 524 675 31 676 37 677 31 585 3 508
#> [1653] 678 679 680 681 143 682 214 683 81 443 35 19 684 42
#> [1667] 382 3 194 143 685 29 196 92 520 686 687 688 52 689
#> [1681] 59 193 690 35 691 31 76 692 35 15 12 306 78 103
#> [1695] 3 693 694 425 390 695 8 8 696 428 15 214 414 59
#> [1709] 12 111 347 154 3 330 260 331 352 13 88 89 90 37
#> [1723] 31 379 537 381 667 697 698 699 35 700 643 29 31 701
#> [1737] 702 449 703 29 31 704 37 42 640 705 215 40 165 440
#> [1751] 706 143 707 214 708 709 710 711 712 710 342 355 713 714
#> [1765] 715 716 221 693 717 445 40 221 434 681 143 363 718 214
#> [1779] 708 711 719 712 719 471 709 719 708 471 719 40 471 712
#> [1793] 719 709 719 342 720 21 721 679 33 722 723 214 724 725
#> [1807] 29 31 726 355 713 37 727 728 693 508 21 425 15 426
#> [1821] 37 501 729 636 319 53 730 692 35 108 15 12 602 10
#> [1835] 247 731 224 633 346 40 732 19 733 37 734 735 736 21
#> [1849] 733 345 119 19 737 37 738 735 736 35 31 53 351 731
#> [1863] 13 346 739 740 19 741 742 31 731 13 31 743 338 632
#> [1877] 323 744 6 745 40 231 349 358 746 747 748 361 360 749
#> [1891] 3 31 524 503 29 750 751 735 736 35 31 752 753 754
#> [1905] 35 31 346 739 731 8 244 755 735 736 358 746 747 3
#> [1919] 524 756 146 61 127 757 735 736 758 16 127 759 342 169
#> [1933] 31 346 739 732 756 37 760 735 21 524 761 29 762 19
#> [1947] 611 763 731 643 29 688 764 15 318 358 746 747 3 4
#> [1961] 765 4 338 31 524 766 29 767 768 754 769 434 29 165
#> [1975] 770 37 31 771 743 730 772 773 774 342 123 775 776 37
#> [1989] 524 777 40 778 779 29 674 31 524 780 754 123 460 110
#> [2003] 590 781 31 632 323 731 737 146 61 782 358 746 747 3
#> [2017] 352 29 767 754 35 611 763 8 244 783 35 784 29 785
#> [2031] 31 786 787 169 31 788 605 322 123 179 35 632 323 141
#> [2045] 789 735 736 8 244 790 13 166 791 792 793 40 778 794
#> [2059] 795 796 797 798 799 40 800 801 40 802 78 40 659 803
#> [2073] 804 33 805 13 754 169 806 40 807 808 33 449 331 809
#> [2087] 13 15 756 501 88 59 537 154 88 213 386 37 431 375
#> [2101] 438 810 358 746 747 3 811 812 37 637 813 814 15 25
#> [2115] 815 816 397 176 109 817 29 818 10 819 40 820 10 821
#> [2129] 50 822 823 824 825 314 13 31 826 730 756 169 31 827
#> [2143] 37 413 828 151 508 678 829 306 349 830 831 832 31 833
#> [2157] 834 29 53 835 59 836 29 529 108 15 12 40 837 31
#> [2171] 42 31 414 678 838 839 840 3 841 16 31 842 127 31
#> [2185] 356 543 35 843 42 15 12 831 3 678 844 45 845 846
#> [2199] 16 31 847 40 7 193 82 200 10 15 214 12 447 31
#> [2213] 848 12 130 452 849 831 363 16 840 3 356 497 169 843
#> [2227] 42 12 60 61 408 37 31 850 37 31 851 852 198 853
#> [2241] 40 31 854 35 31 42 678 844 33 19 855 856 37 31
#> [2255] 153 836 130 404 59 387 29 491 10 320 213 386 16 857
#> [2269] 342 29 148 149 375 438 810 150 40 858 29 161 12 37
#> [2283] 859 437 18 19 20 21 71 860 19 338 37 861 169 15
#> [2297] 57 404 862 12 863 864 29 130 865 18 19 20 35 616
#> [2311] 611 319 127 759 18 35 202 346 866 42 12 260 867 868
#> [2325] 52 869 18 831 33 363 16 31 840 83 367 678 870 871
#> [2339] 29 31 836 33 872 253 35 31 15 42 123 3 873 397
#> [2353] 369 37 59 874 194 293 489 29 875 29 31 836 740 876
#> [2367] 59 548 877 878 167 45 449 879 306 349 3 31 577 470
#> [2381] 29 875 119 31 495 71 502 260 880 37 31 881 882 174
#> [2395] 29 883 31 884 37 31 42 111 259 260 885 4 31 886
#> [2409] 887 13 888 42 889 171 3 890 306 349 358 359 360 361
#> [2423] 360 362 3 35 350 351 35 31 891 365 892 366 330 143
#> [2437] 331 352 13 88 89 59 90 29 353 12 382 40 79 35
#> [2451] 383 15 57 328 59 45 384 385 29 165 149 150 147 31
#> [2465] 148 149 386 86 172 347 40 31 389 390 391 392 119 31
#> [2479] 393 394 395 396 397 398 384 147 151 358 359 372 4 347
#> [2493] 33 52 384 508 678 14 15 213 53 434 893 29 88 894
#> [2507] 37 895 149 375 438 810 150 319 896 149 150 35 897 425
#> [2521] 15 103 3 86 3 530 319 31 898 899 40 900 901 35
#> [2535] 31 425 893 29 88 894 902 149 150 53 434 319 903 149
#> [2549] 35 897 408 37 491 904 16 141 905 140 31 906 33 393
#> [2563] 394 211 503 355 425 530 907 35 31 908 440 37 909 29
#> [2577] 910 149 150 319 399 149 35 31 911 440 31 103 3 21
#> [2591] 69 912 913 914 915 213 111 110 916 319 917 918 404 260
#> [2605] 919 920 921 37 19 788 213 13 922 923 678 844 154 88
#> [2619] 922 151 37 924 149 150 130 19 69 925 926 927 16 31
#> [2633] 379 380 577 53 153 29 226 520 414 15 12 928 437 18
#> [2647] 19 20 21 103 3 31 54 929 930 35 530 53 440 931
#> [2661] 29 61 31 932 37 156 933 29 934 909 935 64 31 434
#> [2675] 850 936 937 448 645 443 931 29 938 939 672 13 414 922
#> [2689] 12 319 678 870 855 14 905 31 940 941 942 37 943 31
#> [2703] 103 3 21 940 944 2 945 946 2 947 268 122 948 40
#> [2717] 949 2 950 951 19 952 953 71 53 434 29 954 678 14
#> [2731] 13 955 18 19 20 189 31 103 3 31 655 956 37 678
#> [2745] 870 128 29 200 14 214 414 12 35 19 41 42 193 225
#> [2759] 249 434 205 199 13 309 957 958 959 960 705 12 50 961
#> [2773] 35 356 223 29 210 211 165 295 447 922 443 678 829 15
#> [2787] 409 830 831 693 832 31 833 834 29 31 153 59 836 40
#> [2801] 3 7 146 82 200 447 414 12 21 103 3 320 678 962
#> [2815] 963 893 636 35 909 29 88 894 964 149 150 319 399 149
#> [2829] 35 897 408 37 965 214 31 966 40 967 645 968 86 707
#> [2843] 14 15 530 246 966 214 969 150 53 434 345 29 612 35
#> [2857] 897 415 963 214 970 962 40 914 915 69 971 682 214 859
#> [2871] 972 150 769 508 973 15 974 319 221 425 390 660 193 63
#> [2885] 35 975 6 29 976 977 37 14 15 419 31 414 391 978
#> [2899] 842 979 980 979 3 31 566 146 61 981 982 37 53 983
#> [2913] 71 35 984 16 425 985 986 987 15 974 29 674 769 142
#> [2927] 42 201 14 15 988 37 31 987 660 678 844 31 389 390
#> [2941] 391 989 347 40 501 260 369 37 31 990 37 231 991 992
#> [2955] 59 40 141 993 994 995 996 29 414 59 12 58 678 829
#> [2969] 306 349 830 831 832 31 833 834 29 53 835 59 836 29
#> [2983] 529 108 15 12 40 997 31 42 31 414 678 838 839 840
#> [2997] 3 841 16 31 842 127 31 356 543 35 843 42 15 12
#> [3011] 831 3 678 844 45 845 846 16 31 836 40 7 193 82
#> [3025] 200 10 15 214 12 447 31 848 12 130 452 849 678 844
#> [3039] 33 19 855 856 37 153 220 130 404 59 387 29 9 10
#> [3053] 320 15 213 386 16 857 342 40 858 29 161 12 37 859
#> [3067] 437 446 19 20 58 890 15 409 3 35 19 998 267 4
#> [3081] 330 143 331 352 13 88 89 59 90 355 31 356 357 35
#> [3095] 108 15 12 358 359 360 361 360 362 33 363 16 31 364
#> [3109] 365 360 366 83 367 4 999 37 31 59 369 154 370 13
#> [3123] 371 19 90 123 449 372 4 347 33 373 147 10 59 151
#> [3137] 37 374 375 37 14 365 150 1000 15 12 893 1001 53 440
#> [3151] 83 383 15 1002 40 79 1003 31 379 380 59 33 373 385
#> [3165] 29 165 624 150 52 10 376 377 378 58 21 1004 37 1005
#> [3179] 33 1006 205 19 1007 15 1008 1009 1010 1011 1012 355 1013 19
#> [3193] 1014 1015 1016 1017 1018 1019 1020 31 1021 19 1022 1023 1024 3
#> [3207] 123 3 330 33 331 15 1025 111 31 1026 45 1027 169 1028
#> [3221] 1029 31 1030 1031 1032 1018 653 35 234 1033 123 3 31 1004
#> [3235] 146 61 1006 474 6 205 194 1034 29 1035 31 1026 169 31
#> [3249] 448 1036 1037 1038 15 29 19 962 35 1039 234 1033 31 1026
#> [3263] 424 458 10 1040 40 542 31 1018 1041 47 1042 1018 319 31
#> [3277] 1032 653 29 31 426 37 1043 58 667 313 577 3 31 389
#> [3291] 1044 1045 1046 10 1047 309 343 29 165 149 375 83 165 115
#> [3305] 29 1048 119 31 325 40 316 1049 37 649 15 12 169 31
#> [3319] 650 15 78 48 1050 173 45 29 1051 31 1047 343 29 1052
#> [3333] 149 375 319 10 325 1053 149 29 674 820 31 517 319 88
#> [3347] 1054 1055 51 19 644 25 1056 21 1057 1058 13 1059 1060 19
#> [3361] 1061 577 449 1062 13 520 1063 13 15 1064 40 1065 1066 339
#> [3375] 1067 12 146 596 214 127 774 437 18 19 20 13 694 1068
#> [3389] 40 228 1056 29 31 616 1069 119 557 214 127 759 342 37
#> [3403] 48 1070 339 3 1071 4 371 576 83 1072 15 322 40 210
#> [3417] 1064 40 1065 1063 146 674 29 1073 1074 51 1075 31 1076 37
#> [3431] 1077 557 58 667 313 577 3 31 389 1044 1045 1046 10 1047
#> [3445] 309 343 29 165 149 375 83 165 115 29 1048 119 31 325
#> [3459] 40 316 1049 37 649 15 12 169 31 650 15 78 48 1050
#> [3473] 173 45 29 1051 31 1047 343 29 1052 149 375 319 10 325
#> [3487] 1053 149 29 674 820 31 517 319 88 1054 1055 51 19 644
#> [3501] 25 1056 21 1057 1058 13 1059 1060 19 1061 577 449 1062 13
#> [3515] 520 1063 13 15 1064 40 1065 1066 339 1067 12 146 596 214
#> [3529] 127 774 437 18 19 20 13 694 1068 40 228 1056 29 31
#> [3543] 616 1069 119 557 214 127 759 342 37 48 1070 21 313 1078
#> [3557] 54 1079 1050 1080 13 31 380 29 820 31 48 78 246 88
#> [3571] 580 1081 51 142 371 1082 51 29 1083 31 1084 563 1085 37
#> [3585] 1086 15 140 31 577 460 110 578 226 1087 1088 367 330 143
#> [3599] 1085 40 1089 29 297 339 3 1071 4 371 576 83 1072 15
#> [3613] 322 40 210 1064 40 1065 1063 146 674 29 1073 1074 51 1075
#> [3627] 31 1076 37 1077 557 58 1090 1091 1092 306 291 3 7 270
#> [3641] 10 24 12 13 14 15 165 29 17 18 19 20 35 31
#> [3655] 1093 1094 37 31 48 5 288 1095 1092 3 19 17 18 9
#> [3669] 23 10 24 25 13 31 48 277 278 26 27 28 29 869
#> [3683] 18 301 283 449 33 270 17 18 29 1096 18 31 43 3
#> [3697] 803 582 143 34 35 1097 24 12 13 26 1022 294 37 14
#> [3711] 15 31 43 3 58 21 234 235 1098 1099 152 204 165 13
#> [3725] 31 1100 37 19 520 1101 35 31 480 1102 4 193 1046 31
#> [3739] 1103 37 480 1104 1105 1106 204 165 1107 193 1108 15 1002 4
#> [3753] 208 110 218 19 1104 1109 29 1110 355 31 1111 1112 19 1113
#> [3767] 4 191 1114 61 1115 35 31 1104 42 1116 29 88 1111 44
#> [3781] 400 193 1117 31 115 15 45 1118 35 31 655 108 3 3
#> [3795] 1119 1120 1120 40 291 1121 1122 1002 193 61 1123 29 1124 1125
#> [3809] 1074 1107 12 64 31 1111 1126 40 1127 12 214 19 1128 29
#> [3823] 1107 12 1120 1129 21 1130 1131 193 1132 31 78 408 31 15
#> [3837] 42 1133 110 1134 205 1107 1133 3 1135 1136 1137 37 1138 1139
#> [3851] 122 21 1140 1117 404 154 1141 783 1142 13 1143 1104 169 1107
#> [3865] 45 503 29 1046 31 1144 1145 40 1146 35 48 480 1104 1116
#> [3879] 29 1002 40 79 1147 214 1148 165 1149 35 249 1113 1062 88
#> [3893] 1111 13 1150 51 1151 113 218 19 1104 1109 64 1152 227 31
#> [3907] 1113 1153 31 520 1154 1155 1156 154 29 218 19 1104 1109 64
#> [3921] 1152 227 88 1151 40 165 51 297 1157 191 1158 201 334 1113
#> [3935] 119 19 1104 11 31 215 810 1116 29 1111 566 1159 1107 602
#> [3949] 10 1160 1140 1117 35 153 1107 548 1161 1162 3 1163 37 31
#> [3963] 1151 1164 193 1165 29 1166 37 31 480 57 16 1167 13 35
#> [3977] 1142 1168 1169 700 21 1170 1171 1172 1173 927 31 1140 1117 35
#> [3991] 909 1116 29 19 1174 44 58 1175 14 15 69 33 758 1176
#> [4005] 342 35 897 632 29 1177 149 375 319 1178 149 375 35 897
#> [4019] 611 1179 1180 1181 3 897 632 1182 1183 213 1184 1185 735 1186
#> [4033] 1187 1188 342 81 211 1189 735 1186 1190 1191 35 897 611 1179
#> [4047] 1180 1181 179 58