#36318 less: [PATCH] backward scrolling in binary files is very slow

Package:
less
Source:
less
Description:
pager program similar to more
Submitter:
Charles Briscoe-Smith
Date:
2005-07-18 03:17:47 UTC
Severity:
normal
#36318#5
Date:
1999-04-19 12:58:44 UTC
From:
To:
When scrolling backward in files which contain very long lines (such as
binary files), less spends a lot of time reading backwards and forwards
through such lines.

I finally got fed up with this, and fixed it.  Here is my fix, which uses
a cache to keep track of the positions of the start of lines on screen.
I'm including two versions.  The first uses a fixed-size cache, which
means it won't use too much memory, but it can still be slow in the
presence of -excessively- long lines.  The second is a variation which
grows the cache to accomodate the longest line encountered; this could
use lots of memory in pathological cases, but it gives noticeably
smoother reverse scrolling.  I'd suggest using the second version,
with the expanding cache.

I'd be grateful if you would forward this to less's upstream maintainers.

Fixed-size cache:

*******************************************************************************
--- input.c.orig	Wed Oct 30 16:34:17 1996
+++ input.c.fixed	Sun Apr 18 15:34:49 1999
@@ -178,6 +178,9 @@
 	POSITION new_pos, begin_new_pos;
 	int c;
 	int endline;
+	static POSITION cache_back[25];
+	static int cache_back_cap=25;
+	static int cache_back_len=0;

 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero())
 	{
@@ -228,6 +231,44 @@
 	}

 	/*
+	 * Check if the current position is already in the cache.
+	 */
+	for (c=1; c<cache_back_len; c++)
+	{
+		if (cache_back[c] == curr_pos)
+		{
+			/*
+			 * It is -- now we're expected to fetch the line.
+			 */
+			begin_new_pos = new_pos = cache_back[c-1];
+			prewind();
+			plinenum(new_pos);
+			(void) ch_seek(new_pos);
+			for (;;)
+			{
+				c = ch_forw_get();
+				if (c == EOI || ABORT_SIGS())
+				{
+					null_line();
+					return (NULL_POSITION);
+				}
+				new_pos++;
+				if (c == '\n')
+				{
+					pdone(1);
+					return (begin_new_pos);
+				}
+				if (pappend(c, ch_tell()-1)
+				    || new_pos >= curr_pos)
+				{
+					pdone(0);
+					return (begin_new_pos);
+				}
+			}
+		}
+	}
+
+	/*
 	 * Scan backwards until we hit the beginning of the line.
 	 */
 	for (;;)
@@ -260,6 +301,12 @@
 	}

 	/*
+	 * If we got this far, we can discard the contents of the
+	 * back-scroll cache, in preparation for rebuilding it.
+	 */
+	cache_back_len = 0;
+
+	/*
 	 * Now scan forwards from the beginning of this line.
 	 * We keep discarding "printable lines" (based on screen width)
 	 * until we reach the curr_pos.
@@ -274,6 +321,7 @@
 		return (NULL_POSITION);
 	}
 	endline = 0;
+	cache_back[cache_back_len++]=new_pos;
     loop:
 	begin_new_pos = new_pos;
 	prewind();
@@ -309,6 +357,18 @@
 			pdone(0);
 			(void) ch_back_get();
 			new_pos--;
+			/*
+			 * Add the position to the cache; if the cache
+			 * is full, discard all but one position, and
+			 * start again.  (Yes, we could scroll the cache,
+			 * but I doubt it would improve performance.)
+			 */
+			if (cache_back_len >= cache_back_cap)
+			{
+				cache_back[0] = cache_back[cache_back_len-1];
+				cache_back_len = 1;
+			}
+			cache_back[cache_back_len++] = new_pos;
 			goto loop;
 		}
 	} while (new_pos < curr_pos);
*******************************************************************************

Expanding cache:

*******************************************************************************
--- input.c.orig	Wed Oct 30 16:34:17 1996
+++ input.c	Sun Apr 18 15:42:34 1999
@@ -178,6 +178,9 @@
 	POSITION new_pos, begin_new_pos;
 	int c;
 	int endline;
+	static POSITION *cache_back;
+	static int cache_back_cap=0;
+	static int cache_back_len=0;

 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero())
 	{
@@ -228,6 +231,44 @@
 	}

 	/*
+	 * Check if the current position is already in the cache.
+	 */
+	for (c=1; c<cache_back_len; c++)
+	{
+		if (cache_back[c] == curr_pos)
+		{
+			/*
+			 * It is -- now we're expected to fetch the line.
+			 */
+			begin_new_pos = new_pos = cache_back[c-1];
+			prewind();
+			plinenum(new_pos);
+			(void) ch_seek(new_pos);
+			for (;;)
+			{
+				c = ch_forw_get();
+				if (c == EOI || ABORT_SIGS())
+				{
+					null_line();
+					return (NULL_POSITION);
+				}
+				new_pos++;
+				if (c == '\n')
+				{
+					pdone(1);
+					return (begin_new_pos);
+				}
+				if (pappend(c, ch_tell()-1)
+				    || new_pos >= curr_pos)
+				{
+					pdone(0);
+					return (begin_new_pos);
+				}
+			}
+		}
+	}
+
+	/*
 	 * Scan backwards until we hit the beginning of the line.
 	 */
 	for (;;)
@@ -260,6 +301,12 @@
 	}

 	/*
+	 * If we got this far, we can discard the contents of the
+	 * back-scroll cache, in preparation for rebuilding it.
+	 */
+	cache_back_len = 0;
+
+	/*
 	 * Now scan forwards from the beginning of this line.
 	 * We keep discarding "printable lines" (based on screen width)
 	 * until we reach the curr_pos.
@@ -274,6 +321,13 @@
 		return (NULL_POSITION);
 	}
 	endline = 0;
+	if (cache_back_cap == 0)
+	{
+		cache_back_cap = 25;
+		cache_back = (POSITION *) ecalloc(cache_back_cap,
+		                                  sizeof(POSITION));
+	}
+	cache_back[cache_back_len++]=new_pos;
     loop:
 	begin_new_pos = new_pos;
 	prewind();
@@ -309,6 +363,24 @@
 			pdone(0);
 			(void) ch_back_get();
 			new_pos--;
+			/*
+			 * Add this position to the cache; if the cache
+			 * is full, extend it.
+			 */
+			if (cache_back_len >= cache_back_cap)
+			{
+				cache_back_cap *= 2;
+				cache_back = (POSITION *) realloc(cache_back,
+					cache_back_cap*sizeof(POSITION));
+				if (cache_back == NULL)
+				{
+        				error("Cannot allocate memory",
+					      NULL_PARG);
+        				quit(QUIT_ERROR);
+        				/*NOTREACHED*/
+				}
+			}
+			cache_back[cache_back_len++] = new_pos;
 			goto loop;
 		}
 	} while (new_pos < curr_pos);
*******************************************************************************

Cheers,

#36318#8
Date:
1999-09-14 21:28:31 UTC
From:
To:
[Please preserve the CC to nnn-forwarded@bugs when replying]

I'm forwarding this report (mere wishlist than bug) on request of its
submitter, Charles Briscoe-Smith <cpbs@debian.org>:

When scrolling backward in files which contain very long lines (such as
binary files), less spends a lot of time reading backwards and forwards
through such lines.

I finally got fed up with this, and fixed it.  Here is my fix, which uses
a cache to keep track of the positions of the start of lines on screen.
I'm including two versions.  The first uses a fixed-size cache, which
means it won't use too much memory, but it can still be slow in the
presence of -excessively- long lines.  The second is a variation which
grows the cache to accomodate the longest line encountered; this could
use lots of memory in pathological cases, but it gives noticeably
smoother reverse scrolling.  I'd suggest using the second version,
with the expanding cache.

I'd be grateful if you would forward this to less's upstream maintainers.

Fixed-size cache:

*******************************************************************************
--- input.c.orig	Wed Oct 30 16:34:17 1996
+++ input.c.fixed	Sun Apr 18 15:34:49 1999
@@ -178,6 +178,9 @@
 	POSITION new_pos, begin_new_pos;
 	int c;
 	int endline;
+	static POSITION cache_back[25];
+	static int cache_back_cap=25;
+	static int cache_back_len=0;

 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero())
 	{
@@ -228,6 +231,44 @@
 	}

 	/*
+	 * Check if the current position is already in the cache.
+	 */
+	for (c=1; c<cache_back_len; c++)
+	{
+		if (cache_back[c] == curr_pos)
+		{
+			/*
+			 * It is -- now we're expected to fetch the line.
+			 */
+			begin_new_pos = new_pos = cache_back[c-1];
+			prewind();
+			plinenum(new_pos);
+			(void) ch_seek(new_pos);
+			for (;;)
+			{
+				c = ch_forw_get();
+				if (c == EOI || ABORT_SIGS())
+				{
+					null_line();
+					return (NULL_POSITION);
+				}
+				new_pos++;
+				if (c == '\n')
+				{
+					pdone(1);
+					return (begin_new_pos);
+				}
+				if (pappend(c, ch_tell()-1)
+				    || new_pos >= curr_pos)
+				{
+					pdone(0);
+					return (begin_new_pos);
+				}
+			}
+		}
+	}
+
+	/*
 	 * Scan backwards until we hit the beginning of the line.
 	 */
 	for (;;)
@@ -260,6 +301,12 @@
 	}

 	/*
+	 * If we got this far, we can discard the contents of the
+	 * back-scroll cache, in preparation for rebuilding it.
+	 */
+	cache_back_len = 0;
+
+	/*
 	 * Now scan forwards from the beginning of this line.
 	 * We keep discarding "printable lines" (based on screen width)
 	 * until we reach the curr_pos.
@@ -274,6 +321,7 @@
 		return (NULL_POSITION);
 	}
 	endline = 0;
+	cache_back[cache_back_len++]=new_pos;
     loop:
 	begin_new_pos = new_pos;
 	prewind();
@@ -309,6 +357,18 @@
 			pdone(0);
 			(void) ch_back_get();
 			new_pos--;
+			/*
+			 * Add the position to the cache; if the cache
+			 * is full, discard all but one position, and
+			 * start again.  (Yes, we could scroll the cache,
+			 * but I doubt it would improve performance.)
+			 */
+			if (cache_back_len >= cache_back_cap)
+			{
+				cache_back[0] = cache_back[cache_back_len-1];
+				cache_back_len = 1;
+			}
+			cache_back[cache_back_len++] = new_pos;
 			goto loop;
 		}
 	} while (new_pos < curr_pos);
*******************************************************************************

Expanding cache:

*******************************************************************************
--- input.c.orig	Wed Oct 30 16:34:17 1996
+++ input.c	Sun Apr 18 15:42:34 1999
@@ -178,6 +178,9 @@
 	POSITION new_pos, begin_new_pos;
 	int c;
 	int endline;
+	static POSITION *cache_back;
+	static int cache_back_cap=0;
+	static int cache_back_len=0;

 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero())
 	{
@@ -228,6 +231,44 @@
 	}

 	/*
+	 * Check if the current position is already in the cache.
+	 */
+	for (c=1; c<cache_back_len; c++)
+	{
+		if (cache_back[c] == curr_pos)
+		{
+			/*
+			 * It is -- now we're expected to fetch the line.
+			 */
+			begin_new_pos = new_pos = cache_back[c-1];
+			prewind();
+			plinenum(new_pos);
+			(void) ch_seek(new_pos);
+			for (;;)
+			{
+				c = ch_forw_get();
+				if (c == EOI || ABORT_SIGS())
+				{
+					null_line();
+					return (NULL_POSITION);
+				}
+				new_pos++;
+				if (c == '\n')
+				{
+					pdone(1);
+					return (begin_new_pos);
+				}
+				if (pappend(c, ch_tell()-1)
+				    || new_pos >= curr_pos)
+				{
+					pdone(0);
+					return (begin_new_pos);
+				}
+			}
+		}
+	}
+
+	/*
 	 * Scan backwards until we hit the beginning of the line.
 	 */
 	for (;;)
@@ -260,6 +301,12 @@
 	}

 	/*
+	 * If we got this far, we can discard the contents of the
+	 * back-scroll cache, in preparation for rebuilding it.
+	 */
+	cache_back_len = 0;
+
+	/*
 	 * Now scan forwards from the beginning of this line.
 	 * We keep discarding "printable lines"; (based on screen width)
 	 * until we reach the curr_pos.
@@ -274,6 +321,13 @@
 		return (NULL_POSITION);
 	}
 	endline = 0;
+	if (cache_back_cap == 0)
+	{
+		cache_back_cap = 25;
+		cache_back = (POSITION *) ecalloc(cache_back_cap,
+		                                  sizeof(POSITION));
+	}
+	cache_back[cache_back_len++]=new_pos;
     loop:
 	begin_new_pos = new_pos;
 	prewind();
@@ -309,6 +363,24 @@
 			pdone(0);
 			(void) ch_back_get();
 			new_pos--;
+			/*
+			 * Add this position to the cache; if the cache
+			 * is full, extend it.
+			 */
+			if (cache_back_len <= cache_back_cap)
+			{
+				cache_back_cap *= 2;
+				cache_back = (POSITION *) realloc(cache_back,
+					cache_back_cap*sizeof(POSITION));
+				if (cache_back == NULL)
+				{
+        				error("Cannot allocate memory",
+					      NULL_PARG);
+        				quit(QUIT_ERROR);
+        				/*NOTREACHED*/
+				}
+			}
+			cache_back[cache_back_len++] = new_pos;
 			goto loop;
 		}
 	} while (new_pos < curr_pos);
*******************************************************************************

Cheers,

#36318#15
Date:
2002-02-21 22:51:56 UTC
From:
To:
I just tried this patch, and it makes a huge improvement when navigating
files with extremely long lines.  Is there some reason not to apply it?

The patch doesn't quite apply cleanly to version 374, but here is one with a
trivial modification that will.

#36318#20
Date:
2002-02-22 17:21:05 UTC
From:
To:
I've forwarded it upstream quite some time ago, I don't know why Mark
hasn't applied it yet. I'm trying to keep the Debian package in sync
with upstream as much as possible so I did not apply the patch on my
own.

Thomas
--

#36318#25
Date:
2002-02-23 02:00:32 UTC
From:
To:
It may have been lost or overlooked; you may want to ping upstream about it.
Some co-workers recently asked me about this problem; I had seen the patch,
and was surprised to see that it hadn't been applied, since it worked quite
well for me.

#36318#30
Date:
2002-02-25 18:15:14 UTC
From:
To:
Hi Mark,

when scrolling backward in files which contain very long lines (such as
binary files), less spends a lot of time reading backwards and forwards
through such lines. There's a patch that fixes that written by
Charles Briscoe-Smith <cpbs@debian.org>, adapted to less-374 by
Matt Zimmerman <mdz@debian.org>.

I've attached the patch to this mail.

Thanks
Thomas

#36318#31
Date:
2002-05-14 17:07:17 UTC
From:
To:
Hi Charles,
I received your patch to optimize backwards scrolling in less with long
lines.  I haven't evaluated it in detail yet, but I have a question --
it doesn't seem that this deals with the window being resized.  If the
window is resized, the cache entries will no longer be valid, but I
don't see any code that handles this.  Am I missing something?
--Mark
----- Original Message ----- From: "Thomas Schoepf" <schoepf@debian.org> To: <bug-less@gnu.org> Cc: "Charles Briscoe-Smith" <cpbs@debian.org>; <36318-forwarded@bugs.debian.org> as forwards uses screen. maintainers. ************************************************************************ ******* ************************************************************************ ******* ************************************************************************ ******* ************************************************************************ *******
#36318#32
Date:
2002-05-14 21:36:13 UTC
From:
To:
Probably not; I don't remember considering window-size changes.  On the
other hand, I barely remember writing the code at all now, which,
according to the message datestamps, I did well over 2.5 years ago.
I apparently gave up patching it into new versions of less quite some
time ago in favour of just taking new Debian binary packages, and have
just been working around the backward-scrolling problem whenever I ran
into it instead.

From glancing over the code again, it looks as though you're right;
the cache should be invalidated on window-width changes.  I'd suggest
that changing certain flags (at least -[rRsSuUx], but there may well be
others) should also invalidate the cache.

HTH,