List Info

Thread: Parsing whole pages by regex




Parsing whole pages by regex
user name
2006-05-28 14:29:09
On 5/27/06, John Horner <john.hornergmail.com> wrote:
> what's the
> easiest way for me to find the right JavaScript
reference to the
> third nested table inside the second nested table
inside the fourth
> <CENTER> tag (not actually the address)?

You can use xpath to do queries just like this. For this,
it'd be
something like:

//center[4]//table[2]//table[3]

You can see more details here:

http://diveintogreasemonkey.org/patterns/match-attr
ibute.html
http://www.w3.org/TR/DOM-Level-3-XPath/
xpath.html#XPathEvaluator-evaluate

Since you only want one element, you might change the
example query to:

var table = document.evaluate(
    '//center[4]//table[2]//table[3]',
    document,
    null,
    XPathResult.FIRST_ORDERED_NODE_TYPE,
    null).singleNodeValue;

You'll have to play with the expression a bit to get what
you want.
Here is a tutorial on xpath syntax:

http://www.zvon.org/xxl/XPathTutorial/General/example
s.html

Hope this helps.

- a
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
Inserting Row in Table
user name
2006-05-28 15:50:14
I need to insert a row with 22 cells.  I thought I'd make
life a little easier 
for myself, and just created a tr element, and stuck the
rest of the row in 
innerHTML.  For some reason, none of those TD's show up,
but the spans do.

OK, so do I have to create each of the 22 TD elements, and
append them to the 
TR?  What about the text?  Do I have to do the same thing
for the text part of 
the TD's?

Thanks,

A. Alfred Ayache
http://lphs76.ca            
- Reunion community
http://www.rentersPlus.com
   - Apartment Search
http://www.lastbyte.ca 
     - Web Design, eCommerce, PHP/MySQL, Java, Oracle
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
Inserting Row in Table
user name
2006-05-28 16:15:43
On 5/28/06, A. Alfred Ayache <alfredlastbyte.ca> wrote:
> I need to insert a row with 22 cells.  I thought I'd
make life a little easier
> for myself, and just created a tr element, and stuck
the rest of the row in
> innerHTML.  For some reason, none of those TD's show
up, but the spans do.

They ought to.  Show us the code.  
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
Inserting Row in Table
user name
2006-05-28 16:22:05
 >>>
	var newrow = document.createElement('tr');
	newrow.setAttribute("class", 'bgtbllgreen');
	var shtml =
		'<td
class="tabletitle3">&nbsp;</td>' +
		'<td class="tabletitle3"
align="left" colspan="3">Total IN
+ OG</td>' +
		'<td></td>' +
		'<td></td>' +
		'<td></td>' +
		'<td></td>' +
		'<td></td>' +
		'<td></td>' +
		'<td></td>' +
		'<td></td>' +
		'<td></td>' +
		'<td class="tabletitle3"
align="right" nowrap>$0.00</td>' +
		'<td class="tabletitle3"><span
class="smalltext2">&nbsp;</span><
;/td>' +
		'<td></td>' +
		'<td class="tabletitle3"
align="right" nowrap>$0.00</td>' +
		'<td class="tabletitle3"><span
class="smalltext2">&nbsp;</span><
;/td>' +
		'<td></td>' +
		'<td class="tabletitle3"
align="right" nowrap>$0.00</td>' +
		'<td></td>' +
		'<td class="tabletitle3"
align="right" nowrap>$0.00</td>'
	;
	newrow.innerHTML = shtml;
	var lastrow = rows[rows.length - 1];
	lastrow.parentNode.insertBefore(newrow, lastrow);
<<<

A. Alfred Ayache
http://lphs76.ca            
- Reunion community
http://www.rentersPlus.com
   - Apartment Search
http://www.lastbyte.ca 
     - Web Design, eCommerce, PHP/MySQL, Java, Oracle


Jeremy Dunck wrote:
> On 5/28/06, A. Alfred Ayache <alfredlastbyte.ca> wrote:
>> I need to insert a row with 22 cells.  I thought
I'd make life a 
>> little easier
>> for myself, and just created a tr element, and
stuck the rest of the 
>> row in
>> innerHTML.  For some reason, none of those TD's
show up, but the spans 
>> do.
> 
> They ought to.  Show us the code.  
> _______________________________________________
> Greasemonkey mailing list
> Greasemonkeymozdev.org
> http:
//mozdev.org/mailman/listinfo/greasemonkey
> 
> --------------------------------
> Spam/Virus scanning by CanIt Pro
> 
> For more information see
> http://www.
kgbinternet.com/SpamFilter.htm
> 
> To control your spam filter, log in at
> http://filter.kgbintern
et.com
> 
> 
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
Inserting Row in Table
user name
2006-05-28 22:58:19
OK, here's how I ended up doing it:

 >>>
function makeRow() {
	newrow.setAttribute("class", 'bgtbllgreen');
	var aNew = new Array();
	for (i = 0; i < 18; i++) {
		var newEl = document.createElement('td');
		newEl.setAttribute('class', 'tabletitle3');
		newrow.appendChild(newEl);
		aNew[i] = newEl;
	}
	return(aNew);
}

	var newrow = document.createElement('tr');
	aNew = makeRow();
	aNew[1].setAttribute('colspan', '5');
	aNew[1].setAttribute('align', 'left');
	aNew[1].appendChild(document.createTextNode('Total IN(' +
arr['IN'][ARR_CNT] + 
') + OG(' + arr['OG'][ARR_CNT] + ')'));
	aNew[6].setAttribute('align', 'right');
	aNew[6].appendChild(document.createTextNode(makeMin(arr['I
N'][ARR_SUM] + 
arr['OG'][ARR_SUM])));
	
	var lastrow = rows[rows.length - 1];
	lastrow.parentNode.insertBefore(newrow, lastrow);

	var newrow = document.createElement('tr');
	aNew = makeRow();
	aNew[1].setAttribute('colspan', '5');
	aNew[1].setAttribute('align', 'left');
	aNew[1].appendChild(document.createTextNode('Total CF(' +
arr['CF'][ARR_CNT] + 
')'));
	aNew[6].setAttribute('align', 'right');
	aNew[6].appendChild(document.createTextNode(makeMin(arr['C
F'][ARR_SUM])));
	
	lastrow.parentNode.insertBefore(newrow, lastrow);
<<<

The makeRow() function needed to produce two outputs, newrow
and aNew.  So I 
compromised by creating newrow outside of the function and
returning aNew.  If 
anyone has a better way of doing this, I'm keen to hear
about it.

Thanks,

A. Alfred Ayache
http://lphs76.ca            
- Reunion community
http://www.rentersPlus.com
   - Apartment Search
http://www.lastbyte.ca 
     - Web Design, eCommerce, PHP/MySQL, Java, Oracle
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
Parsing whole pages by regex
user name
2006-05-29 01:16:35
Thanks for that. Very useful. I was just scratching my head
over how
Platypus does it but that makes it easier.

Here's another question -- would it make any difference (to
the
page-loads-then-reloads issue) if I could compile my
regex-rewriting
userscript into an extension?

In other words, can extensions rewrite the HTML on the fly
before display,
or are they just the same as userscripts in that respect?

On 5/29/06, Aaron Boodman <zboogsgmail.com> wrote:
>
> On 5/27/06, John Horner <john.hornergmail.com> wrote:
> > what's the
> > easiest way for me to find the right JavaScript
reference to the
> > third nested table inside the second nested table
inside the fourth
> > <CENTER> tag (not actually the address)?
>
> You can use xpath to do queries just like this. For
this, it'd be
> something like:
>
> //center[4]//table[2]//table[3]
>
> You can see more details here:
>
> http://diveintogreasemonkey.org/patterns/match-attr
ibute.html
> http://www.w3.org/TR/DOM-Level-3-XPath/
xpath.html#XPathEvaluator-evaluate
>
> Since you only want one element, you might change the
example query to:
>
> var table = document.evaluate(
>     '//center[4]//table[2]//table[3]',
>     document,
>     null,
>     XPathResult.FIRST_ORDERED_NODE_TYPE,
>     null).singleNodeValue;
>
> You'll have to play with the expression a bit to get
what you want.
> Here is a tutorial on xpath syntax:
>
> http://www.zvon.org/xxl/XPathTutorial/General/example
s.html
>
> Hope this helps.
>
> - a
> _______________________________________________
> Greasemonkey mailing list
> Greasemonkeymozdev.org
> http:
//mozdev.org/mailman/listinfo/greasemonkey
>
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
Parsing whole pages by regex
user name
2006-05-29 01:44:15
On 5/28/06, john <john.hornergmail.com> wrote:
> In other words, can extensions rewrite the HTML on the
fly before display,
> or are they just the same as userscripts in that
respect?

Extensions have exactly the same limitation.

- a
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
Inserting Row in Table
user name
2006-06-08 09:22:44
How did you define the "rows" object?
One must be careful not to mix innerHTML="...",
appendChild() and
insertRow().

Consider this:

aRow = document.createElement("TR") // create an
HTMLTableRowElement

Then:

aRow.innerHTML="<td>XXX</td><td>YYY&
lt;/td>"
aRow.cells.length
0
aRow.childNodes.length
1
aRow.innerHTML
XXXYYY

This is because the _string_
"<td>XXX</td><td>YYY</td>&quo
t; is being inserted
into a place where the row object would expect a Cell
object:

aRow.insertCell(document.createElement("TD"))

aRow.childNodes.length
2
aRow.cells.length
1
aRow.innerHTML
XXXYYY<td></td>

The same happens at the table level when inserting rows. 
Appending rows to
the table through the rows property just adds empty rows
_after_ any
innerHTML you have previously inserted (and, in addition,
not directly into
the table but it creates a TBODY).

I've experimented appending rows trough the rows property
into a table
defined with innerHTML, and I've observed that only empty
rows were
appended, all of them withing a automatically created TBODY,
this one coming
_after_ the already available innerHTML code.  Of course,
empty rows where
not visible ;)

May be this can shed some light upon your problem?

2006/5/29, A. Alfred Ayache <alfredlastbyte.ca>:
>
> OK, here's how I ended up doing it:
>
> >>>
> function makeRow() {
>         newrow.setAttribute("class",
'bgtbllgreen');
>         var aNew = new Array();
>         for (i = 0; i < 18; i++) {
>                 var newEl =
document.createElement('td');
>                 newEl.setAttribute('class',
'tabletitle3');
>                 newrow.appendChild(newEl);
>                 aNew[i] = newEl;
>         }
>         return(aNew);
> }
>
>         var newrow = document.createElement('tr');
>         aNew = makeRow();
>         aNew[1].setAttribute('colspan', '5');
>         aNew[1].setAttribute('align', 'left');
>        
aNew[1].appendChild(document.createTextNode('Total IN(' +
> arr['IN'][ARR_CNT] +
> ') + OG(' + arr['OG'][ARR_CNT] + ')'));
>         aNew[6].setAttribute('align', 'right');
>        
aNew[6].appendChild(document.createTextNode(makeMin(arr['IN
'][ARR_SUM]
> +
> arr['OG'][ARR_SUM])));
>
>         var lastrow = rows[rows.length - 1];
>         lastrow.parentNode.insertBefore(newrow,
lastrow);
>
>         var newrow = document.createElement('tr');
>         aNew = makeRow();
>         aNew[1].setAttribute('colspan', '5');
>         aNew[1].setAttribute('align', 'left');
>        
aNew[1].appendChild(document.createTextNode('Total CF(' +
> arr['CF'][ARR_CNT] +
> ')'));
>         aNew[6].setAttribute('align', 'right');
>         aNew[6].appendChild(document.createTextNode
> (makeMin(arr['CF'][ARR_SUM])));
>
>         lastrow.parentNode.insertBefore(newrow,
lastrow);
> <<<
>
> The makeRow() function needed to produce two outputs,
newrow and aNew.  So
> I
> compromised by creating newrow outside of the function
and returning
> aNew.  If
> anyone has a better way of doing this, I'm keen to
hear about it.
>
> Thanks,
>
> A. Alfred Ayache
> http://lphs76.ca        
    - Reunion community
> http://www.rentersPlus.com
   - Apartment Search
> http://www.lastbyte.ca    
  - Web Design, eCommerce, PHP/MySQL, Java,
> Oracle
> _______________________________________________
> Greasemonkey mailing list
> Greasemonkeymozdev.org
> http:
//mozdev.org/mailman/listinfo/greasemonkey
>
_______________________________________________
Greasemonkey mailing list
Greasemonkeymozdev.org
http:
//mozdev.org/mailman/listinfo/greasemonkey
[1-8]

about | contact  Other archives ( Real Estate discussion Medical topics )