--- most-4.10.2.orig/most.1 +++ most-4.10.2/most.1 @@ -134,7 +134,7 @@ Use this switch when you want to view files containing 8 bit characters. .I most -will display the file 16 bytes per line in hexidecimal notation. +will display the file 16 bytes per line in hexadecimal notation. A typical line looks like: .IP .Ds @@ -185,12 +185,13 @@ This option is meaningful only when used with the .B \-v option. +.TP .BI + lineno Start up at .IR lineno . .TP -.B +c -Make search case sensitive. +.B -c +Make searches case sensitive. By default, they are not. .TP .B +d @@ -482,7 +483,7 @@ descriptors that represent the file name and line number, respectively. For example, if JED is your editor, then set .B MOST_EDITOR -to 'jed %s -g %d'. +to 'jed %s -g %d'. This will only work where the %s preceeds the %d. .TP .B MOST_HELP This variable may be used to specify an alternate help file. @@ -496,7 +497,7 @@ .I most.rc on other systems. .SH CONFIGURATION FILE SYNTAX -When most starts up, it tries to read a system confiuration file and +When most starts up, it tries to read a system configuration file and then a personal configuration file. These files may be used to specify keybindings and colors. .PP --- most-4.10.2.orig/src/buffer.c +++ most-4.10.2/src/buffer.c @@ -44,10 +44,10 @@ Most_Buffer_Type *Most_Buf; -int Most_Num_Lines; +long long Most_Num_Lines; -unsigned int Most_C_Offset; -int Most_C_Line; +unsigned long long Most_C_Offset; +long long Most_C_Line; static unsigned char *beg_of_line1(void) { @@ -61,24 +61,29 @@ { if (*pos == '\n') { - pos--; - while ((pos > Most_Beg) - && (*pos != '\n')) + pos--; /* Skip back the new-line. */ + /* This block is UTF-8 safe, because it only scans the + buffer for a new-line, and doesn't count + characters. */ + while ((pos > Most_Beg) && (*pos != '\n')) pos--; - if (*pos != '\n') return pos; + if (*pos != '\n') return Most_Beg; + /* from here on *pos == '\n' */ if (pos + 1 != cpos) return pos + 1; } } - else pos--; + else + pos = SLutf8_bskip_char(Most_Beg, pos); if (*pos != '\n') { - while ((pos > Most_Beg) - && (*pos != '\n')) + /* This block is UTF-8 safe. See comment above. */ + while ((pos > Most_Beg) && (*pos != '\n')) pos--; if (*pos != '\n') return Most_Beg; + /* from here on *pos == '\n' */ return pos + 1; } @@ -93,55 +98,6 @@ return pos; } - -static unsigned char *forward_columns (unsigned char *b, unsigned char *e, unsigned int num_cols) -{ - unsigned int col = 0; - - while ((b < e) - && (col < num_cols)) - { - unsigned char ch = *b++; - if (((ch >= ' ') && (ch < 0x7F)) - || (ch >= SLsmg_Display_Eight_Bit)) - { - col++; - continue; - } - - if ((ch == '\b') || (ch == '\t') || (ch == '\r')) - switch (ch) - { - case '\b': - if (Most_V_Opt == 0) - { - if (col > 0) col--; - } - else col += 2; - break; - - case '\r': - if (Most_V_Opt == 0) - col = 0; - else - col += 2; - break; - - case '\t': - if (Most_T_Opt == 0) - col = Most_Tab_Width * (col/Most_Tab_Width + 1); - else - col += 2; - break; - } - else if (ch & 0x80) - col += 3; - else - col += 2; - } - return b; -} - /* does not move point */ static unsigned char *end_of_line1(void) { @@ -164,6 +120,9 @@ if (*pos != '\n') { + /* This block is UTF-8 safe, because it only scans the buffer + for a new-line, and doesn't count characters. */ + n = pmax - pos; n2 = n % 8; pmax = pos + (n - 8); @@ -215,7 +174,7 @@ ncols = SLtt_Screen_Cols-1; while (1) { - unsigned char *next_b = forward_columns (b, e, ncols); + unsigned char *next_b = most_forward_columns (b, e, ncols, 1); if ((next_b == e) || (next_b == b)) break; @@ -237,11 +196,12 @@ return e; if (b == NULL) b = most_beg_of_line (); - b = forward_columns (b, e, SLtt_Screen_Cols-1); + b = most_forward_columns (b, e, SLtt_Screen_Cols-1, 1); /* Do not wrap the line if the last character falls on the last column * of the display. */ + /* FIXME potential bug if dealing with multi-byte char. */ if ((b + 1 <= e) && (b + 1 < Most_Eob) && (b[1] == '\n')) @@ -250,10 +210,10 @@ return b; } -int most_forward_line(int save) +long long most_forward_line(long long save) { - int m; - register int n = save; + long long m; + register long long n = save; unsigned char *p; unsigned char *pmax; @@ -346,14 +306,14 @@ } /* Count lines in the region. A half line counts as 1 */ -int most_count_lines(unsigned char *beg, unsigned char *end) +long long most_count_lines(unsigned char *beg, unsigned char *end) { - int save_line, n; + long long save_line, n; unsigned char *save_beg, *save_eob; - unsigned int save_pos; + unsigned long long save_pos; int dn = 1000; - if (Most_B_Opt) return(1 + (int)(end - beg) / 16); + if (Most_B_Opt) return(1 + (long long)(end - beg) / 16); save_line = Most_C_Line; save_beg = Most_Beg; save_eob = Most_Eob; save_pos = Most_C_Offset; @@ -371,9 +331,9 @@ return(n); } -void most_goto_line(int line) +void most_goto_line(long long line) { - int dif_c, dif_b,dif_t; + long long dif_c, dif_b,dif_t; if (line < 1) line = 1; most_read_to_line(line); @@ -420,7 +380,7 @@ } /* return line the point is on without the final '\n's */ -int most_extract_line(unsigned char **beg, unsigned char **end) +long long most_extract_line(unsigned char **beg, unsigned char **end) { *beg = most_beg_of_line(); *end = end_of_line (*beg); @@ -428,12 +388,12 @@ return 0; } -int most_what_line(unsigned char *pos) +long long most_what_line(unsigned char *pos) { - unsigned int save_pos; - int save_line, dir; - register int dif_c, dif_b,dif_t; - int ret; + unsigned long long save_pos; + long long save_line, dir; + register long long dif_c, dif_b,dif_t; + long long ret; if (Most_B_Opt) { @@ -517,7 +477,7 @@ } /* given a buffer position, find the line and column */ -void most_find_row_column(unsigned char *pos, int *r, int *c) +void most_find_row_column(unsigned char *pos, long long *r, long long *c) { unsigned char *beg; unsigned int save_offset; @@ -545,7 +505,10 @@ /* Now we have found the line it is on so.... */ beg = most_beg_of_line(); *c = 1; - while (beg++ < pos) *c = *c + 1; + if (Most_UTF8_Mode) + while ((beg = SLutf8_skip_char(beg, pos)) < pos) *c = *c + 1; + else + while (beg++ < pos) *c = *c + 1; Most_C_Line = save_line; Most_C_Offset = save_offset; } --- most-4.10.2.orig/src/buffer.h +++ most-4.10.2/src/buffer.h @@ -23,12 +23,12 @@ extern Most_Buffer_Type *Most_Buf; extern unsigned char *Most_Beg, *Most_Eob; -extern int Most_Num_Lines; +extern long long Most_Num_Lines; /* Offset of current position from beginning of buffer */ -extern unsigned int Most_C_Offset; +extern unsigned long long Most_C_Offset; -extern int Most_C_Line; +extern long long Most_C_Line; /* * Current line number. If at the beginning of the buffer, it is 1. If * we are at the last point of the buffer it is the number of lines. @@ -37,26 +37,26 @@ /* This routine does not move the point */ extern unsigned char *most_beg_of_line(void); -extern int most_forward_line(int); +extern long long most_forward_line(long long); /* This routine moves the point forward n lines. n can be negative. It returns the number moved. */ -extern void most_goto_line(int); +extern void most_goto_line(long long); /* Move the point somewhere on the nth line of the buffer returning C_POS */ -extern int most_what_line(unsigned char *); +extern long long most_what_line(unsigned char *); /* return the line number of position 'argument'. Does not move point */ /* count the number of lines in the region delimited by beg and end. Counts lines from beg up to end but does not count end. Does not move point. */ -extern int most_count_lines(unsigned char *, unsigned char *); -extern int most_extract_line(unsigned char **, unsigned char **); +extern long long most_count_lines(unsigned char *, unsigned char *); +extern long long most_extract_line(unsigned char **, unsigned char **); extern Most_Buffer_Type *most_switch_to_buffer(Most_Buffer_Type *); extern Most_Buffer_Type *most_create_buffer(char *); -extern void most_find_row_column(unsigned char *, int *, int *); +extern void most_find_row_column(unsigned char *, long long *, long long *); #endif --- most-4.10.2.orig/src/keym.c +++ most-4.10.2/src/keym.c @@ -228,7 +228,7 @@ static void goto_percent_cmd(void) { unsigned char *pos; - int n; + long long n; if (Most_Digit_Arg != (int *) NULL) n = *Most_Digit_Arg; else @@ -282,7 +282,7 @@ static void find_next_cmd(void) { - int col, line, n = 1; + long long col, line, n = 1; unsigned long ofs; if (Most_Digit_Arg != NULL) n = *Most_Digit_Arg; @@ -309,7 +309,7 @@ #else "Search: ", #endif - Most_Search_Str, + (char *) Most_Search_Str, MOST_SEARCH_BUF_LEN ) == -1) return; Most_Curs_Offset = Most_C_Offset; @@ -325,7 +325,7 @@ #else "Search Backwards: ", #endif - Most_Search_Str, + (char *) Most_Search_Str, MOST_SEARCH_BUF_LEN) == -1) return; find_next_cmd(); } --- most-4.10.2.orig/src/line.c +++ most-4.10.2/src/line.c @@ -87,8 +87,7 @@ while (b < end) { ch = *b++; - if (((ch >= ' ') && (ch < 0x7F)) - || (ch >= SLsmg_Display_Eight_Bit)) + if (most_isprint(ch)) { *s++ = ch; continue; @@ -108,103 +107,156 @@ if (end > Most_Eob) end = Most_Eob; - sprintf (buf, "0x%08X: ", Most_C_Offset); + sprintf (buf, "0x%08llX: ", Most_C_Offset); ascii_format_line (beg, end, buf + 12); SLsmg_write_string (buf); SLsmg_erase_eol (); } -static int most_analyse_line(unsigned char *begg, unsigned char *endd, - char *out, char *attributes) +int most_isprint(unsigned char ch) { - unsigned char *beg, *end; - unsigned int min_col, max_col; - unsigned int i, i_max; + /* Can this be directly replaced with isprint? */ + return (ch >= ' ' && ch < 0x7F) || ch >= SLsmg_Display_Eight_Bit; +} + +static void most_analyse_line(unsigned char *beg, unsigned char *end, + unsigned char *out, char *attributes) +{ + unsigned char *pout; + char* pattributes; + + /* Holds the number of columns for the current character counting + * from the left margin (in contrast to the leftmost visible + * column). + */ + unsigned int col; + + /* Holds the number of the column up to which to apply the + * current/following formatting. Only meaningful when format_rlim > i. + */ + unsigned int format_rlim; - beg = begg; - end = endd; - i = i_max = 0; - min_col = Most_Column - 1; - max_col = min_col + SLtt_Screen_Cols; + beg = most_forward_columns(beg, end, Most_Column - 1, 0); + pout = out; + pattributes = attributes; + col = format_rlim = 0; while (beg < end) { char attr = ' '; - unsigned char ch; + unsigned char ch = *beg++; - if ('\n' == (ch = *beg++)) + if ('\n' == ch) break; if ((ch == '\r') && (Most_V_Opt == 0)) { - if (i > i_max) i_max = i; - i = 0; + if (col > format_rlim) format_rlim = col; + col = 0; continue; } if ((ch == '\b') && (Most_V_Opt == 0)) { - if (i > i_max) i_max = i; - if (i > 0) - i--; + if (col > format_rlim) format_rlim = col; + if (col > 0) + { + if (Most_UTF8_Mode) + { + SLwchar_Type wc; + pout = SLutf8_bskip_char(out, pout); + if (SLutf8_decode(pout, pout + SLUTF8_MAX_MBLEN, &wc, NULL)) + { + unsigned int char_len = SLwchar_wcwidth(wc); + if (char_len > 1) + col -= char_len - 1; + } + } + else + pout--; + pattributes--; + col--; + } continue; } - if (i < i_max) /* overstrike */ + if (col < format_rlim) /* overstrike */ { attr = 'b'; - if ((i >= min_col) && (i < max_col)) + if (col < SLtt_Screen_Cols) { - if (out[i-min_col] == '_') + if (*pout == '_') attr = 'u'; else if (ch == '_') { attr = 'u'; - ch = out[i - min_col]; + ch = *pout; } } if (ch == ' ') { - i++; + col++; continue; } /* drop */ } - - if ((ch >= ' ') && (ch < 0x7F)) - { - if ((i >= min_col) && (i < max_col)) + + if (Most_UTF8_Mode) { + unsigned char *prev = --beg; + SLwchar_Type wc; + unsigned int len; + + if (SLutf8_decode(beg, end, &wc, NULL)) + { + unsigned int char_len = SLwchar_wcwidth(wc); + if (char_len > 1) + col += char_len - 1; + } + + beg = SLutf8_skip_char(beg, end); + len = beg - prev; + + if (len > 1) { + /* Non-ASCII char, display it. */ + if (col < SLtt_Screen_Cols) { - out[i-min_col] = ch; - attributes[i-min_col] = attr; + memcpy(pout, prev, len); + pout += len; + *pattributes++ = attr; } - i++; + col++; continue; - } - - if (ch >= SLsmg_Display_Eight_Bit) + } + } + + if (most_isprint(ch)) { - if ((i >= min_col) && (i < max_col)) + if (col < SLtt_Screen_Cols) { - out[i-min_col] = ch; - attributes[i-min_col] = attr; + *pout++ = ch; + *pattributes++ = attr; } - i++; + col++; continue; } if ((ch == '\t') && (Most_T_Opt == 0) && (Most_Tab_Width)) { - - int nspaces = Most_Tab_Width * (i/Most_Tab_Width + 1) - i; + /* Tab expansion must take into consideration the + * leftmost visible column. However, variable col holds the + * number of columns from the left margin and must be + * corrected. + */ + int vis_col = col + Most_Column - 1; + int nspaces = Most_Tab_Width - (vis_col % Most_Tab_Width); while (nspaces > 0) { - if ((i >= min_col) && (i < max_col)) + if (col < SLtt_Screen_Cols) { - out[i-min_col] = ' '; - attributes[i-min_col] = attr; + *pout++ = ' '; + *pattributes++ = attr; } - i++; + col++; nspaces--; } continue; @@ -212,36 +264,36 @@ if (ch & 0x80) { - if ((i >= min_col) && (i < max_col)) + if (col < SLtt_Screen_Cols) { - out[i-min_col] = '~'; - attributes[i-min_col] = attr; + *pout++ = '~'; + *pattributes++ = attr; } - i++; + col++; ch &= 0x7F; /* drop */ } - if ((i >= min_col) && (i < max_col)) + if (col < SLtt_Screen_Cols) { - out[i-min_col] = '^'; - attributes[i-min_col] = attr; + *pout++ = '^'; + *pattributes++ = attr; } - i++; + col++; if (ch == 0x7F) ch = '?'; else ch += '@'; - if ((i >= min_col) && (i < max_col)) + if (col < SLtt_Screen_Cols) { - out[i-min_col] = ch; - attributes[i-min_col] = attr; + *pout++ = ch; + *pattributes++ = attr; } - i++; + col++; } - if (i < i_max) - i = i_max; + if (col < format_rlim) + col = format_rlim; /* Now add "..." if selective display. To do that, the next line needs to * be dealt with to determine whether or not it will be hidden. @@ -249,7 +301,7 @@ if (Most_Selective_Display && (Most_W_Opt == 0) && (beg < Most_Eob) - && ((i >= min_col) && (i < max_col))) + && (col < SLtt_Screen_Cols)) { if (*beg == '\n') beg++; @@ -260,37 +312,29 @@ if ((beg >= Most_Eob) || (*beg == '\n') || (most_apparant_distance(beg) >= Most_Selective_Display)) { - i_max = i + 3; - while (i < i_max) + /* Add an ellipsis, if they fit on the screen. */ + int rlimit = col + 3; + while (col < rlimit) { - if (i < max_col) + if (col < SLtt_Screen_Cols) { - out[i] = '.'; - attributes[i] = ' '; + *pout++ = '.'; + *pattributes++ = ' '; } - i++; + col++; } } } - - i_max = i; - if (i < min_col) - i = min_col; - else if (i >= max_col) - i = max_col; - - i -= min_col; - - out[i] = 0; - attributes[i] = 0; - return i_max; + *pout = 0; + *pattributes = 0; } -static void output_with_attr (unsigned char *out, unsigned char *attr) +static void output_with_attr (unsigned char *out, char *attr) { - unsigned char at, ch, lat; + unsigned char at, lat; unsigned char *p = out; + unsigned char *pmax = p + strlen((char *) p); if (Most_V_Opt) { @@ -299,7 +343,7 @@ } lat = ' '; - while ((ch = *p) != 0) + while (p < pmax) { if (lat != *attr) { @@ -321,7 +365,7 @@ else most_tt_normal_video (); lat = at; } - p++; + p = SLutf8_skip_char(p, pmax); attr++; } @@ -340,10 +384,11 @@ void most_display_line (void) { unsigned char *beg, *end; - unsigned int len; +#if 0 unsigned char dollar; +#endif static unsigned char *line; - static unsigned char *attr; + static char *attr; static unsigned int line_len; if (Most_B_Opt) @@ -357,37 +402,43 @@ if (line_len < (unsigned int)(SLtt_Screen_Cols + 1) * SLUTF8_MAX_MBLEN) { SLfree ((char *) line); - SLfree ((char *) attr); + SLfree (attr); line_len = (SLtt_Screen_Cols + 1) * SLUTF8_MAX_MBLEN; if ((NULL == (line = (unsigned char *) SLmalloc (line_len))) - || (NULL == (attr = (unsigned char *) SLmalloc (line_len)))) + || (NULL == (attr = SLmalloc (line_len)))) most_exit_error ("Out of memory"); } (void) most_extract_line (&beg, &end); - len = most_analyse_line(beg, end, (char *) line, (char *) attr); + most_analyse_line(beg, end, line, attr); +#if 0 + /* Currently the dollar sign is not always being written at the + rightmost column when displaying multi-byte characters. */ dollar = 0; if (Most_W_Opt) { - if ((end < Most_Eob) - && (*end != '\n')) - dollar = '\\'; + if ((end < Most_Eob) + && (*end != '\n')) + dollar = '\\'; } else if (len > (unsigned int) SLtt_Screen_Cols + (Most_Column - 1)) dollar = '$'; - + if (dollar) { - line[SLtt_Screen_Cols-1] = dollar; - attr[SLtt_Screen_Cols-1] = ' '; - line[SLtt_Screen_Cols] = 0; - attr[SLtt_Screen_Cols] = 0; + unsigned char *pline = + most_forward_columns(line, line + line_len, SLtt_Screen_Cols-1, 1); + *pline = dollar; + *(pline+1) = 0; + attr[SLtt_Screen_Cols-1] = ' '; + attr[SLtt_Screen_Cols] = 0; } - +#endif + output_with_attr (line, attr); SLsmg_erase_eol (); } @@ -398,21 +449,34 @@ int most_apparant_distance (unsigned char *pos) { int i; - unsigned char *save_pos, ch; + unsigned char *save_pos, *beg, ch; unsigned int save_offset; save_offset = Most_C_Offset; save_pos = pos; Most_C_Offset = (unsigned int) (pos - Most_Beg); - pos = most_beg_of_line(); + beg = pos = most_beg_of_line(); Most_C_Offset = save_offset; i = 0; while (pos < save_pos) { - ch = *pos++; - if (((ch >= ' ') && (ch < 0x7F)) - || (ch >= SLsmg_Display_Eight_Bit)) + ch = *pos; + + if (Most_UTF8_Mode) { + unsigned char *prev = pos; + int len; + pos = SLutf8_skip_char(pos, save_pos); + len = pos - prev; + if (len > 1) { + i++; + continue; + } + } else { + pos++; + } + + if (most_isprint(ch)) { i++; continue; @@ -420,7 +484,13 @@ if (!Most_V_Opt && (ch == '\b')) { - if (i > 0) i--; + if (i > 0) + { + if (Most_UTF8_Mode) + i -= pos - SLutf8_bskip_char(beg, pos); + else + i--; + } } else if (!Most_V_Opt && (ch == '\015')) /* ^M */ { @@ -439,3 +509,95 @@ } return i; } + +/* + * Returns a pointer to the num_cols'th character after the one + * pointed at b. Invisible character runs are not counted toward this + * limit, i.e. strings that represent attributes, such as "_\b" for + * underlines. + * + * If multi_column is non-zero, characters spanning more than one + * column will add their extra width to the column count. + * + * If there the end of the buffer is reached, as delimited by argument + * e, then e is returned. + */ +unsigned char *most_forward_columns (unsigned char *beg, unsigned char *e, unsigned int num_cols, int multi_column) +{ + unsigned int col = 0; + unsigned int prev_width = 1; + unsigned char* b = beg; + + while ((b < e) + && ((col < num_cols) + || (*b == '\b') + || (*b == '\r'))) + { + unsigned char ch = *b; + + if (Most_UTF8_Mode) + { + unsigned char *prev = b; + int len; + b = SLutf8_skip_char(b, e); + len = b - prev; + if (len > 1) + { + if (multi_column) + { + SLwchar_Type wc; + if (SLutf8_decode(prev, e, &wc, NULL)) + col += prev_width = SLwchar_wcwidth(wc); + } + else + col++; + continue; + } + } + else + b++; + + if (most_isprint(ch)) + { + col++; + prev_width = 1; + continue; + } + + if ((ch == '\b') || (ch == '\t') || (ch == '\r')) + switch (ch) + { + case '\b': + if (Most_V_Opt == 0) + { + if (col > 0) col -= prev_width; + } + else col += 2; + break; + + case '\r': + if (Most_V_Opt == 0) + col = 0; + else + col += 2; + break; + + case '\t': + if (Most_T_Opt == 0) + col = Most_Tab_Width * (col/Most_Tab_Width + 1); + else + col += 2; /* ^I is two chars long. */ + break; + } + else if (ch & 0x80) + col += 3; + else + col += 2; + } + + /* Last character was too wide. Backstep it. */ + if (col > num_cols) + b = SLutf8_bskip_char(beg, b); + + return b; +} --- most-4.10.2.orig/src/line.h +++ most-4.10.2/src/line.h @@ -7,5 +7,7 @@ extern void most_display_line(void); extern int most_apparant_distance(unsigned char *); +extern int most_isprint(unsigned char); +extern unsigned char *most_forward_columns (unsigned char *, unsigned char *, unsigned int, int); #endif --- most-4.10.2.orig/src/most.c +++ most-4.10.2/src/most.c @@ -125,7 +125,7 @@ ch = *(++str); if ( ch == '/') { - strcpy (Most_Search_Str,++str); + strcpy ((char *) Most_Search_Str,++str); return; } @@ -329,7 +329,8 @@ static void do_most (char *file, int start) { - int piped, row, col; + int piped; + long long row, col; most_get_cdir(Most_C_Dir); @@ -347,7 +348,7 @@ Most_Curs_Offset = Most_C_Offset; if (*Most_Search_Str - && ((row = most_search (Most_Beg + Most_C_Offset, 1, &col)) > 0)) + && ((row = most_search (Most_Beg + Most_C_Offset, 1LL, &col)) > 0)) most_goto_line(row); else { @@ -455,13 +456,7 @@ SLtt_get_terminfo(); #if SLANG_VERSION >= 20000 -#if 0 - Most_UTF8_Mode = SLutf8_enable (1); - if (Most_UTF8_Mode) - { - fprintf (stderr, "UTF-8 Mode is in effect\n"); - } -#endif + Most_UTF8_Mode = SLutf8_enable (-1); #endif SLtt_Ignore_Beep = 1; if (No_Colors) --- most-4.10.2.orig/src/most.h +++ most-4.10.2/src/most.h @@ -1,4 +1,5 @@ #include "config.h" +#define SLANG_REGEXP extern int Most_S_Opt; extern int Most_A_Opt; /* automatically choose -b if necessary */ extern int Most_V_Opt; /* display control chars */ --- most-4.10.2.orig/src/search.c +++ most-4.10.2/src/search.c @@ -20,6 +20,7 @@ */ #include "config.h" +#include <ctype.h> #include <stdio.h> #include <string.h> #include <slang.h> @@ -31,18 +32,18 @@ #include "display.h" #include "search.h" -/* Note!!! The regular expression searches may not work. I have not - * tested them. - * FIXME!!! - */ - int Most_Case_Sensitive = 0; -char Most_Search_Str[256]; +unsigned char Most_Search_Str[256]; int Most_Search_Dir = 1; #include "jdmacros.h" -#define UPCASE(ch) ((!Most_Case_Sensitive && (ch <= 'z') && (ch >= 'a')) ? (ch - 32) : ch) +#if SLANG_VERSION < 20000 +# define NORM_CHAR(ch) ((!Most_Case_Sensitive) ? toupper(ch) : ch) +# define UPCASE(ch) NORM_CHAR(ch) +#else +# define NORM_CHAR(ch) (ch) +#endif #if defined(HAVE_V8_REGCOMP) || defined(SLANG_REGEXP) @@ -78,7 +79,7 @@ * This function is called by the V8 regcomp to report * errors in regular expressions. */ -static void regerror(char *s) +static void regerror(const char *s) { char string[256]; @@ -95,13 +96,17 @@ * 0 error * */ -static int do_regcomp(unsigned char *key) +static int do_regcomp(const unsigned char *key) { static int old_Most_Case_Sensitive; - unsigned char UpCaseKey[sizeof(savepattern)]; # ifndef HAVE_V8_REGCOMP int posn; /* reg exp error at this offset */ # endif +# if SLANG_VERSION < 20000 + unsigned char UpCaseKey[sizeof(savepattern)]; +# else + int re_flags = 0; +# endif /* * Only recompile search string if it has changed @@ -124,6 +129,7 @@ old_Most_Case_Sensitive = Most_Case_Sensitive; +# if SLANG_VERSION < 20000 if ( Most_Case_Sensitive == 0 ) { register unsigned char *p; /* ptr to UpCaseKey */ @@ -141,6 +147,7 @@ *p = '\0'; } +# endif strcpy((char *)savepattern, (char *)key); @@ -162,7 +169,12 @@ # else if (Regexp != NULL) SLregexp_free (Regexp); - if (NULL == (Regexp = SLregexp_compile ((char *)key, Most_Case_Sensitive ? 0 : SLREGEXP_CASELESS))) + + if (!Most_Case_Sensitive) + re_flags |= SLREGEXP_CASELESS; + if (Most_UTF8_Mode) + re_flags |= SLREGEXP_UTF8; + if (NULL == (Regexp = SLregexp_compile ((char *)key, re_flags))) posn = -1; else posn = 0; @@ -187,7 +199,7 @@ * Call the appropriate regular expression execute function */ -static unsigned char *do_regexec(unsigned char *string) +static unsigned char *do_regexec(const unsigned char *string, size_t length) { # ifdef HAVE_V8_REGCOMP if ( regexec(regpattern, (char *)string) ) @@ -196,40 +208,46 @@ return( NULL ); # else # if SLANG_VERSION < 20000 - return ( SLang_regexp_match(string, strlen((char *)string), ®data) ); + return ( SLang_regexp_match(string, length, ®data) ); # else - return (unsigned char *)SLregexp_match (Regexp, (char *)string, strlen ((char *)string)); + return (unsigned char *)SLregexp_match (Regexp, (char *)string, length); # endif # endif /* HAVE_V8_REGCOMP */ } /* - * Make a upper case copy of a string. Also changes any "c\b" character - * strings into just "" so that highlighted and underlined characters - * can be searched. + * Changes any "c\b" character strings into just "" so that + * highlighted and underlined characters can be searched. Stores in + * length the new size of the string, after the aforementioned + * changes. + * + * If using a version of S-Lang that does not support case + * insensitive regular expressions, this function upper cases the + * input string, as well. * * Reuses malloced memory, so a copy cannot be retained between calls. */ -static unsigned char *StrUpCaseCopy(unsigned char *input) +static const unsigned char *StrNormCopy(const unsigned char *input, + size_t *length) { static unsigned char *uppercase; /* ptr to malloced area */ static size_t bufsize; /* size of malloced area */ - unsigned char *src; /* ptr to source */ + const unsigned char *src; /* ptr to source */ + const unsigned char *end; /* ptr to end of source */ register unsigned char *dest; /* ptr to destination */ register int idx; /* index into uppercase[] */ - register unsigned char c; /* source character */ - size_t length; /* size of string to copy */ src = input; - length = strlen((char *)src) + 1; /* len of line plus terminator */ + end = input + *length; - if ( length > bufsize ) + if ( *length >= bufsize ) { if ( uppercase != (unsigned char *)NULL ) free(uppercase); - bufsize = (length > 256 ) ? length : 256; /* 256 byte default */ + /* len of line plus terminator */ + bufsize = (*length >= 256 ) ? *length + 1 : 256; /* 256 byte default */ uppercase = (unsigned char *)malloc(bufsize); if ( uppercase == (unsigned char *)NULL ) @@ -242,8 +260,9 @@ dest = uppercase; - for ( idx = 0 ; (c = *src) != '\0' ; src++ ) + for ( idx = 0 ; src < end ; src++ ) { + unsigned char c = *src; if ( c == '\b' ) /* backspace */ { if ( idx-- > 0 ) @@ -252,54 +271,79 @@ else { if ( idx++ >= 0 ) - *dest++ = UPCASE(c); + *dest++ = NORM_CHAR(c); } } *dest = '\0'; /* add termination */ + *length = dest - uppercase; return(uppercase); } /* - * Given an offset into a copy made by StrUpCaseCopy() and a pointer to the + * Given an offset into a copy made by StrNormCopy() and a pointer to the * original string, returns a pointer into the original string corresponding * to this offset. */ -static unsigned char *GetOrigPtr(unsigned char *original, int offset) +static const unsigned char * +GetOrigPtr(const unsigned char *original, int offset, + const unsigned char *end) { - register unsigned char *p = original; + const unsigned char *p = original; register int j = offset; /* * Step through, adjusting offset according to backspaces found */ - while ( *p != '\0' ) + while ( p < end ) { + const unsigned char *next; + if (Most_UTF8_Mode) + next = SLutf8_skip_char((unsigned char*) p, (unsigned char*) end); + else + next = p + 1; + size_t length_last = next - p; + if ( *p == '\b' ) - j++; + j += length_last; else - j--; + j -= length_last; if ( j < 0 ) break; else - p++; + p = next; } return(p); } #endif /* HAVE_V8_REGCOMP || SLANG_REGEXP */ +/* Returns a pointer to the first occurrence of '\n' in string beg, or + * end if no '\n' can be found between inclusive beg and exclusive + * end. + */ +static const unsigned char * +find_eol(const unsigned char *beg, const unsigned char *end) +{ + const unsigned char *p; + if ( (p = memchr(beg, '\n', end - beg)) != NULL) + return p; + else + return end; +} + /* This routine returns the 1 + position of first match of key in str. key is modified to match the case of str. */ /* We should try to optimize this routine */ /* searches from beg up to but not including end */ -static unsigned char *forw_search_region(unsigned char *beg, - unsigned char *end, - unsigned char *key) +static const unsigned char * +forw_search_region(const unsigned char *beg, + const unsigned char *end, + const unsigned char *key) { #if defined(HAVE_V8_REGCOMP) || defined(SLANG_REGEXP) /* @@ -307,10 +351,11 @@ * to be broken into lines. * */ - unsigned char *p; /* temp pointer */ - unsigned char *linebeg; /* beginning of working line */ - unsigned char *copy; /* ptr to upper case copy */ + const unsigned char *linebeg; /* beginning of working line */ + const unsigned char *lineend; /* end of working line */ + const unsigned char *norm_line; /* ptr to normalized line */ unsigned char *match; /* ptr to matching string */ + int anchored_re; /* * Compile "key" into an executable regular expression @@ -318,58 +363,35 @@ if ( do_regcomp(key) == 0 ) return(Most_Eob); - /* - * For regular expression searches we need to do a line by line - * search, so it is necessary to temporarily replace '\n' with '\0' - * characters. - */ - p = beg; - linebeg = beg; + anchored_re = key[0] == '^'; - while (linebeg < end) + for ( linebeg = beg ; linebeg < end ; linebeg = lineend + 1 ) { - while ((p < end) && (*p != '\n')) p++; - if (p == end) break; - *p = 0; + size_t length; - if ( Most_Case_Sensitive == 0 ) /* i.e. case insensitive */ - { - copy = StrUpCaseCopy(linebeg); - if ( copy == (unsigned char *)NULL ) - return(Most_Eob); - } + lineend = find_eol(linebeg, end); + + length = lineend - linebeg; + if (0 == length) continue; /* Skip empty lines. */ + + norm_line = StrNormCopy(linebeg, &length); + if ( norm_line == NULL ) + return(Most_Eob); /* * Quick sanity check for beginning of line archored tests. - * If 1st char of key is "^", then the character before linebeg (which - * must be beyond the start of the window), must be a "\n", - * otherwise do_regexec() isn't called. + * If 1st char of key is "^", then the character before + * linebeg (which must be within the buffer), must be a "\n". */ - if ( -# if 0 - ((*key != '^') - || (linebeg > Most_Win->beg_pos && linebeg[-1] == '\n')) - && -#endif - (match = do_regexec(Most_Case_Sensitive ? linebeg : copy))) + if ( !(anchored_re && (linebeg <= Most_Beg || linebeg[-1] != '\n')) + && (match = do_regexec(norm_line, length)) ) { - *p = '\n'; - if ( Most_Case_Sensitive == 0 ) - { - /* - * Use offset into "copy" as idx to find point in - * real line. - */ - return( GetOrigPtr(linebeg, match - copy) ); - } - else - { - return( match ); - } + /* + * Use offset into "norm_line" as idx to find point in + * real line. + */ + return( GetOrigPtr(linebeg, match - norm_line, lineend) ); } - - *p++ = '\n'; - linebeg = p; } return(Most_Eob); @@ -475,23 +497,18 @@ * pattern "key". */ -static unsigned char *back_search_region(unsigned char *beg, - unsigned char *end, - unsigned char *key) +static const unsigned char * +back_search_region(const unsigned char *beg, + const unsigned char *end, + const unsigned char *key) { #if defined(HAVE_V8_REGCOMP) || defined(SLANG_REGEXP) - register unsigned char *p; - unsigned char *endp, /* end of line */ - *lastmatch, /* last match in line */ + const unsigned char *p; + const unsigned char *endp, /* end of line */ + *lastmatch, /* last match in line */ *endprevline, /* end of line before this one */ *match; /* ptr to matching string */ - unsigned char savec; /* last char on line */ - - /* - * Compile "key" into an executable regular expression - */ - if ( do_regcomp(key) == 0 ) - return(Most_Eob); + int anchored_re; /* * Starting from the end of the buffer, break the buffer into lines @@ -501,84 +518,60 @@ * and isn't that what we want to do in a reverse search. */ endp = end; - lastmatch = Most_Eob; - while ( 1 ) /* forever loop */ - { - if ( (endp < beg) ) - return(Most_Eob); /* Reach start of buffer, no match */ + endprevline = end; + match = Most_Eob; - /* Find the real end of current line */ - if ( (p = (unsigned char *)strchr((char *)endp, '\n')) != NULL ) - endp = p; - - savec = *endp; - *endp = '\0'; /* terminate line with NULL */ + /* Find out whether the regexp attempts to match a line boundary. + * In this case, only a match on the full line should be attempted. + */ + anchored_re = key[strlen((const char *) key)-1] == '$'; + while ( endp > beg ) + { /* Find the beginning of line */ for ( p = endp - 1 ; (p >= beg) && (*p != '\n') ; p-- ) { } + if ( p < beg ) + break; + endprevline = p; - p++; /* point to 1st char after newline */ + /* + * Quick sanity check for end of line archored tests. If last + * char of key is "$", then the character after endp (which + * must be within the buffer), must be a "\n". + */ + if ( anchored_re && endp < Most_Eob && endp[0] != '\n' ) + { + endp = p; + continue; + } /* * Keep searching forward in this line till no more matches */ - if ( Most_Case_Sensitive == 0 ) /* i.e. case insensitive */ + do { - unsigned char *copy; /* ptr to upper case copy */ - unsigned char *savecopy; /* copy of "copy" */ - - copy = StrUpCaseCopy(p); - if ( copy == (unsigned char *)NULL ) - return(Most_Eob); + lastmatch = match; - savecopy = copy; - - /* - * Quick sanity check for beginning of line archored tests. - * Must be at start of line. - */ - while ( ((*key != '^') || (copy == savecopy)) - && (match = do_regexec(copy)) ) - { - if ( GetOrigPtr(p, match - savecopy) > end ) - break; - lastmatch = match; - if ( *lastmatch == '\0' ) /* key must be "$" or "^" */ - break; - copy = lastmatch + 1; /* character after match */ - } + if (Most_UTF8_Mode) + p = SLutf8_skip_char((unsigned char*) p, (unsigned char*) endp); + else + p++; - if ( lastmatch != Most_Eob ) /* found a match */ - lastmatch = GetOrigPtr(p, lastmatch - savecopy); - } - else - { - /* - * Quick sanity check for beginning of line archored tests. - * Must be at start of buffer or start of line - */ - while ( ( (*key != '^') || (p == endprevline + 1) ) - && (match = do_regexec(p)) ) - { - if ( match > end ) - break; - lastmatch = match; - if ( *lastmatch == '\0' ) /* key must be "$" or "^" */ - break; - p = lastmatch + 1; /* character after match */ - } + match = forw_search_region(p, endp, key); } + while ( match <= endp ); - *endp = savec; if ( lastmatch != Most_Eob ) /* found a match */ return(lastmatch); endp = endprevline; } + + return(Most_Eob); /* Reached start of buffer, no match */ #else char ch, char1, work[256]; unsigned char *pos; @@ -670,15 +663,15 @@ #endif /* HAVE_V8_REGCOMP || SLANG_REGEXP */ } -int most_search(unsigned char *from, int repeat, int *col) +long long most_search(const unsigned char *from, int repeat, long long *col) { /* return the line match was found as well as line number, * search from i on; assume that line_array match the i so we need * no initial lookup */ - int test, save_line, the_col, row, s_len; + long long test, save_line, the_col, row, s_len; char string[300]; - unsigned char *pos; + const unsigned char *pos; unsigned int save_ofs; unsigned int found_ofs; @@ -687,7 +680,10 @@ save_line = Most_C_Line; found_ofs = Most_Eob - Most_Beg; *col = 0; - s_len = strlen (Most_Search_Str); + if (Most_UTF8_Mode) + s_len = SLutf8_strlen (Most_Search_Str, 0); + else + s_len = strlen ((char *) Most_Search_Str); pos = from; if (*Most_Search_Str) --- most-4.10.2.orig/src/search.h +++ most-4.10.2/src/search.h @@ -6,7 +6,7 @@ extern int Most_Case_Sensitive; extern int Most_Search_Dir; #define MOST_SEARCH_BUF_LEN 256 -extern char Most_Search_Str[MOST_SEARCH_BUF_LEN]; -extern int most_search(unsigned char *, int, int *); +extern unsigned char Most_Search_Str[MOST_SEARCH_BUF_LEN]; +extern long long most_search(const unsigned char *, int, long long *); #endif --- most-4.10.2.orig/src/window.c +++ most-4.10.2/src/window.c @@ -48,10 +48,11 @@ Most_Window_Type *Most_Win; Most_Window_Type *Most_Top_Win; -int Most_Top_Line; /* row number of top window */ -int Most_Curs_Row; -int Most_Curs_Col; -int Most_Column = 1; +long long Most_Top_Line; /* row number of top window */ +long long Most_Curs_Row; +long long Most_Curs_Col; +/* The leftmost visible column. */ +long long Most_Column = 1; int Most_Restore_Width_To = 0; char Most_Mini_Buf[256]; unsigned long Most_Curs_Offset; @@ -156,7 +157,7 @@ SLsmg_write_nchars (buf + point, len - point); if (col < SLtt_Screen_Cols) break; - buf++; point--; len--; /* FIXME for UTF-8 */ + buf++; point--; len--; } SLsmg_erase_eol (); SLsmg_gotorc (SLtt_Screen_Rows - 1, col); @@ -227,6 +228,8 @@ { SLang_RLine_Info_Type *rli; unsigned int flags = SL_RLINE_BLINK_MATCH; + if (Most_UTF8_Mode) + flags |= SL_RLINE_UTF8_MODE; if (NULL == (rli = SLrline_open (SLtt_Screen_Cols, flags))) return NULL; @@ -258,7 +261,7 @@ /* do not use default. The up arrow can always get it back. */ if ((what != NULL) - && (*what) && (what != Most_Search_Str)) + && (*what) && (what != (char *) Most_Search_Str)) { if (-1 == SLrline_set_line (Most_RLI, what)) return -1; @@ -593,7 +596,8 @@ unsigned int num_chars; unsigned int field_width, info_len; unsigned char *eob; - int r, x; + int r; + long long x; eob = Most_Eob; #if MOST_HAS_MMAP @@ -601,20 +605,20 @@ eob = Most_Beg + Most_Buf->mmap_size; #endif - if (eob == Most_Beg) x = 100; + if (eob == Most_Beg) x = 100LL; else { - x = Most_C_Offset * 100; + x = Most_C_Offset * 100LL; x = x / (eob - Most_Beg); } if (Most_C_Line + (Most_Win->bot - Most_Win->top + 1) >= Most_Num_Lines) - x = 100; + x = 100LL; /* for files with end of file above the bottom row (due to window manipulations) */ - if (x > 100) x = 100; + if (x > 100LL) x = 100LL; - sprintf (info, "(%d,%d) %d%%", Most_C_Line, Most_Column, x); + sprintf (info, "(%lld,%lld) %d%%", Most_C_Line, Most_Column, (int)x); r = Most_Win->bot + 1; most_goto_rc (r,1); --- most-4.10.2.orig/src/window.h +++ most-4.10.2/src/window.h @@ -5,11 +5,11 @@ typedef struct _Most_Window_Type { unsigned long top_offset; /* offset of top line of window from bob */ - int beg_line; /* line number of top */ - int curs_line; /* line number of curs pos */ - int curs_col; /* column number of curs pos */ + long long beg_line; /* line number of top */ + long long curs_line; /* line number of curs pos */ + long long curs_col; /* column number of curs pos */ unsigned long curs_offset; /* pos of cursor from bob*/ - int col; /* column offset */ + long long col; /* column offset */ int n_lines; /* number of lines of buffer (mode dependent) */ int top; /* screen location of top */ int bot; /* screen location of bot */ @@ -29,9 +29,9 @@ extern Most_Window_Type *Most_Win; extern Most_Window_Type *Most_Top_Win; extern unsigned long Most_Curs_Offset; -extern int Most_Column; -extern int Most_Curs_Row; -extern int Most_Curs_Col; +extern long long Most_Column; +extern long long Most_Curs_Row; +extern long long Most_Curs_Col; #define MOST_MINI_BUF_LEN 256 extern char Most_Mini_Buf[MOST_MINI_BUF_LEN]; extern int Most_Selective_Display;