summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornicm <nicm>2020-04-01 07:35:10 +0000
committerNicholas Marriott <nicholas.marriott@gmail.com>2020-04-01 10:08:39 +0100
commit0dbf4145788cda92b983037e3a7dcdd9a8997e23 (patch)
tree5c2b78fa63bdc490e61c24c5cfd37e1239865e87
parent8dedccaa205a91a0dd57012150567403c2ac827d (diff)
Performance improvements for regex searching, most notably:
- Use the grid data directly instead of copying it. - Special case the most typical one byte character cells and use memcmp for multiple bytes instead of a handrolled loop. - Hoist regcomp out of the loop into the calling functions. GitHub issue 2143. Also a man page from from jmc@.
-rw-r--r--window-copy.c242
1 files changed, 140 insertions, 102 deletions
diff --git a/window-copy.c b/window-copy.c
index bb38e60e..b2b61dc7 100644
--- a/window-copy.c
+++ b/window-copy.c
@@ -58,10 +58,6 @@ static int window_copy_search_lr(struct grid *, struct grid *, u_int *,
u_int, u_int, u_int, int);
static int window_copy_search_rl(struct grid *, struct grid *, u_int *,
u_int, u_int, u_int, int);
-static int window_copy_search_lr_regex(struct grid *, struct grid *,
- u_int *, u_int *, u_int, u_int, u_int, int);
-static int window_copy_search_rl_regex(struct grid *, struct grid *,
- u_int *, u_int *, u_int, u_int, u_int, int);
static int window_copy_last_regex(struct grid *gd, u_int py, u_int first,
u_int last, u_int len, u_int *ppx, u_int *psx,
const char *buf, const regex_t *preg, int eflags);
@@ -2263,14 +2259,12 @@ window_copy_search_rl(struct grid *gd,
}
static int
-window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
- u_int *ppx, u_int *psx, u_int py, u_int first, u_int last, int cis)
+window_copy_search_lr_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
+ u_int first, u_int last, regex_t *reg)
{
- int cflags = REG_EXTENDED, eflags = 0;
+ int eflags = 0;
u_int endline, foundx, foundy, len, pywrap, size = 1;
- u_int ssize = 1;
- char *buf, *sbuf;
- regex_t reg;
+ char *buf;
regmatch_t regmatch;
struct grid_line *gl;
@@ -2281,19 +2275,7 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
if (first >= last)
return (0);
- sbuf = xmalloc(ssize);
- sbuf[0] = '\0';
- sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
- if (sbuf == NULL)
- return (0);
-
/* Set flags for regex search. */
- if (cis)
- cflags |= REG_ICASE;
- if (regcomp(&reg, sbuf, cflags) != 0) {
- free(sbuf);
- return (0);
- }
if (first != 0)
eflags |= REG_NOTBOL;
@@ -2313,7 +2295,7 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
len += gd->sx;
}
- if (regexec(&reg, buf, 1, &regmatch, eflags) == 0) {
+ if (regexec(reg, buf, 1, &regmatch, eflags) == 0) {
foundx = first;
foundy = py;
window_copy_cstrtocellpos(gd, len, &foundx, &foundy,
@@ -2329,15 +2311,11 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
foundy--;
}
*psx -= *ppx;
- regfree(&reg);
- free(sbuf);
free(buf);
return (1);
}
}
- regfree(&reg);
- free(sbuf);
free(buf);
*ppx = 0;
*psx = 0;
@@ -2345,28 +2323,15 @@ window_copy_search_lr_regex(struct grid *gd, struct grid *sgd,
}
static int
-window_copy_search_rl_regex(struct grid *gd, struct grid *sgd,
- u_int *ppx, u_int *psx, u_int py, u_int first, u_int last, int cis)
+window_copy_search_rl_regex(struct grid *gd, u_int *ppx, u_int *psx, u_int py,
+ u_int first, u_int last, regex_t *reg)
{
- int cflags = REG_EXTENDED, eflags = 0;
- u_int endline, len, pywrap, size = 1, ssize = 1;
- char *buf, *sbuf;
- regex_t reg;
+ int eflags = 0;
+ u_int endline, len, pywrap, size = 1;
+ char *buf;
struct grid_line *gl;
- sbuf = xmalloc(ssize);
- sbuf[0] = '\0';
- sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
- if (sbuf == NULL)
- return (0);
-
/* Set flags for regex search. */
- if (cis)
- cflags |= REG_ICASE;
- if (regcomp(&reg, sbuf, cflags) != 0) {
- free(sbuf);
- return (0);
- }
if (first != 0)
eflags |= REG_NOTBOL;
@@ -2387,22 +2352,38 @@ window_copy_search_rl_regex(struct grid *gd, struct grid *sgd,
}
if (window_copy_last_regex(gd, py, first, last, len, ppx, psx, buf,
- &reg, eflags))
+ reg, eflags))
{
- regfree(&reg);
- free(sbuf);
free(buf);
return (1);
}
- regfree(&reg);
- free(sbuf);
free(buf);
*ppx = 0;
*psx = 0;
return (0);
}
+static const char *
+window_copy_cellstring(const struct grid_line *gl, u_int px, size_t *size)
+{
+ struct grid_cell_entry *gce;
+
+ if (px >= gl->cellsize) {
+ *size = 1;
+ return (" ");
+ }
+
+ gce = &gl->celldata[px];
+ if (~gce->flags & GRID_FLAG_EXTENDED) {
+ *size = 1;
+ return (&gce->data.data);
+ }
+
+ *size = gl->extddata[gce->offset].data.size;
+ return (gl->extddata[gce->offset].data.data);
+}
+
/* Find last match in given range. */
static int
window_copy_last_regex(struct grid *gd, u_int py, u_int first, u_int last,
@@ -2457,20 +2438,33 @@ static char *
window_copy_stringify(struct grid *gd, u_int py, u_int first, u_int last,
char *buf, u_int *size)
{
- u_int ax, bx, newsize;
- struct grid_cell gc;
+ u_int ax, bx, newsize = *size;
+ const struct grid_line *gl;
+ const char *d;
+ size_t bufsize = 1024, dlen;
+ while (bufsize < newsize)
+ bufsize *= 2;
+ buf = xrealloc(buf, bufsize);
+
+ gl = grid_peek_line(gd, py);
bx = *size - 1;
- newsize = *size;
for (ax = first; ax < last; ax++) {
- grid_get_cell(gd, ax, py, &gc);
- newsize += gc.data.size;
- buf = xrealloc(buf, newsize);
- memcpy(buf + bx, gc.data.data, gc.data.size);
- bx += gc.data.size;
+ d = window_copy_cellstring(gl, ax, &dlen);
+ newsize += dlen;
+ while (bufsize < newsize) {
+ bufsize *= 2;
+ buf = xrealloc(buf, bufsize);
+ }
+ if (dlen == 1)
+ buf[bx++] = *d;
+ else {
+ memcpy(buf + bx, d, dlen);
+ bx += dlen;
+ }
}
-
buf[newsize - 1] = '\0';
+
*size = newsize;
return (buf);
}
@@ -2480,57 +2474,64 @@ static void
window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
const char *str)
{
- u_int cell, ccell, px, pywrap;
- int match;
- const char *cstr;
- char *celldata, **cells;
- struct grid_cell gc;
-
- /* Set up staggered array of cell contents. This speeds up search. */
- cells = xreallocarray(NULL, ncells, sizeof cells[0]);
+ u_int cell, ccell, px, pywrap, pos, len;
+ int match;
+ const struct grid_line *gl;
+ const char *d;
+ size_t dlen;
+ struct {
+ const char *d;
+ size_t dlen;
+ } *cells;
/* Populate the array of cell data. */
+ cells = xreallocarray(NULL, ncells, sizeof cells[0]);
cell = 0;
px = *ppx;
pywrap = *ppy;
+ gl = grid_peek_line(gd, pywrap);
while (cell < ncells) {
- grid_get_cell(gd, px, pywrap, &gc);
- celldata = xmalloc(gc.data.size + 1);
- memcpy(celldata, gc.data.data, gc.data.size);
- celldata[gc.data.size] = '\0';
- cells[cell] = celldata;
+ cells[cell].d = window_copy_cellstring(gl, px,
+ &cells[cell].dlen);
cell++;
px = (px + 1) % gd->sx;
- if (px == 0)
+ if (px == 0) {
pywrap++;
+ gl = grid_peek_line(gd, pywrap);
+ }
}
/* Locate starting cell. */
cell = 0;
+ len = strlen(str);
while (cell < ncells) {
ccell = cell;
- cstr = str;
+ pos = 0;
match = 1;
while (ccell < ncells) {
- /* Anchor found to the end. */
- if (*cstr == '\0') {
+ if (str[pos] == '\0') {
match = 0;
break;
}
-
- celldata = cells[ccell];
- while (*celldata != '\0' && *cstr != '\0') {
- if (*celldata++ != *cstr++) {
+ d = cells[ccell].d;
+ dlen = cells[ccell].dlen;
+ if (dlen == 1) {
+ if (str[pos] != *d) {
match = 0;
break;
}
+ pos++;
+ } else {
+ if (dlen > len - pos)
+ dlen = len - pos;
+ if (memcmp(str + pos, d, dlen) != 0) {
+ match = 0;
+ break;
+ }
+ pos += dlen;
}
-
- if (!match)
- break;
ccell++;
}
-
if (match)
break;
cell++;
@@ -2548,8 +2549,6 @@ window_copy_cstrtocellpos(struct grid *gd, u_int ncells, u_int *ppx, u_int *ppy,
*ppy = pywrap;
/* Free cell data. */
- for (cell = 0; cell < ncells; cell++)
- free(cells[cell]);
free(cells);
}
@@ -2609,29 +2608,45 @@ window_copy_search_jump(struct window_mode_entry *wme, struct grid *gd,
struct grid *sgd, u_int fx, u_int fy, u_int endline, int cis, int wrap,
int direction, int regex)
{
- u_int i, px, sx;
- int found = 0;
+ u_int i, px, sx, ssize = 1;
+ int found = 0, cflags = REG_EXTENDED;
+ char *sbuf;
+ regex_t reg;
+
+ if (regex) {
+ sbuf = xmalloc(ssize);
+ sbuf[0] = '\0';
+ sbuf = window_copy_stringify(sgd, 0, 0, sgd->sx, sbuf, &ssize);
+ if (cis)
+ cflags |= REG_ICASE;
+ if (regcomp(&reg, sbuf, cflags) != 0) {
+ free(sbuf);
+ return (0);
+ }
+ }
if (direction) {
for (i = fy; i <= endline; i++) {
- if (regex)
- found = window_copy_search_lr_regex(gd, sgd,
- &px, &sx, i, fx, gd->sx, cis);
- else
+ if (regex) {
+ found = window_copy_search_lr_regex(gd,
+ &px, &sx, i, fx, gd->sx, &reg);
+ } else {
found = window_copy_search_lr(gd, sgd,
&px, i, fx, gd->sx, cis);
+ }
if (found)
break;
fx = 0;
}
} else {
for (i = fy + 1; endline < i; i--) {
- if (regex)
- found = window_copy_search_rl_regex(gd, sgd,
- &px, &sx, i - 1, 0, fx + 1, cis);
- else
+ if (regex) {
+ found = window_copy_search_rl_regex(gd,
+ &px, &sx, i - 1, 0, fx + 1, &reg);
+ } else {
found = window_copy_search_rl(gd, sgd,
&px, i - 1, 0, fx + 1, cis);
+ }
if (found) {
i--;
break;
@@ -2639,6 +2654,10 @@ window_copy_search_jump(struct window_mode_entry *wme, struct grid *gd,
fx = gd->sx - 1;
}
}
+ if (regex) {
+ free(sbuf);
+ regfree(&reg);
+ }
if (found) {
window_copy_scroll_to(wme, px, i);
@@ -2710,7 +2729,11 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
struct screen_write_ctx ctx;
struct grid *gd = s->grid;
int found, cis, which = -1;
+ int cflags = REG_EXTENDED;
u_int px, py, b, nfound = 0, width;
+ u_int ssize = 1;
+ char *sbuf;
+ regex_t reg;
if (ssp == NULL) {
width = screen_write_strlen("%s", data->searchstr);
@@ -2728,25 +2751,36 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
free(data->searchmark);
data->searchmark = bit_alloc((gd->hsize + gd->sy) * gd->sx);
+ if (regex) {
+ sbuf = xmalloc(ssize);
+ sbuf[0] = '\0';
+ sbuf = window_copy_stringify(ssp->grid, 0, 0, ssp->grid->sx,
+ sbuf, &ssize);
+ if (cis)
+ cflags |= REG_ICASE;
+ if (regcomp(&reg, sbuf, cflags) != 0) {
+ free(sbuf);
+ return (0);
+ }
+ }
for (py = 0; py < gd->hsize + gd->sy; py++) {
px = 0;
for (;;) {
if (regex) {
found = window_copy_search_lr_regex(gd,
- ssp->grid, &px, &width, py, px,
- gd->sx, cis);
+ &px, &width, py, px, gd->sx, &reg);
if (!found)
break;
- }
- else {
+ } else {
found = window_copy_search_lr(gd, ssp->grid,
- &px, py, px, gd->sx, cis);
+ &px, py, px, gd->sx, cis);
if (!found)
break;
}
nfound++;
- if (px == data->cx && py == gd->hsize + data->cy - data->oy)
+ if (px == data->cx &&
+ py == gd->hsize + data->cy - data->oy)
which = nfound;
b = (py * gd->sx) + px;
@@ -2755,6 +2789,10 @@ window_copy_search_marks(struct window_mode_entry *wme, struct screen *ssp,
px++;
}
}
+ if (regex) {
+ free(sbuf);
+ regfree(&reg);
+ }
if (which != -1)
data->searchthis = 1 + nfound - which;