c# - Using XPath and WebBrowser Control to select multiple nodes -
in c# winforms sample application have used webbrowser control , javascript-xpath select single node , change node .innerhtml following code:
private void mainform_load(object sender, eventargs e) { webbrowser1.documenttext = @" <html> <head> <script src=""http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js""></script> </head> <body> <img alt=""0764547763 product details"" src=""http://ecx.images-amazon.com/images/i/51ak1mrii7l._aa160_.jpg""> <hr/> <h2>product details</h2> <ul> <li><b>paperback:</b> 648 pages</li> <li><b>publisher:</b> wiley; unlimited edition edition (october 15, 2001)</li> <li><b>language:</b> english</li> <li><b>isbn-10:</b> 0764547763</li> </ul> </body> </html> "; } private void cmdtest_click(object sender, eventargs e) { string xpath = "//li"; string code = string.format("document.evaluate('{0}', document, null, xpathresult.first_ordered_node_type, null).singlenodevalue;", xpath); var li = webbrowser1.document.invokescript("eval", new object[] { code }) mshtml.ihtmlelement; li.innerhtml = string.format("<span style='text-transform: uppercase;font-family:verdana;color:green;'>{0}</span>", li.innertext); }
the result of running code following:
now i'd use same technique select multiple <li>
nodes under <ul>
node , i'm writing:
xpath = "//ul//*"; code = string.format("document.evaluate('{0}', document, null, xpathresult.ordered_node_iterator_type, null);", xpath); var allli = webbrowser1.document.invokescript("eval", new object[] { code }) mshtml.ihtmlelementcollection;
but returned value of allli
variable null
.
if i'll write
xpath = "//ul//*"; code = string.format("document.evaluate('{0}', document, null, xpathresult.ordered_node_iterator_type, null);", xpath); var allli = webbrowser1.document.invokescript("eval", new object[] { code });
then returned allli
variable isn't null , value type com object
more specific type com object
can cast unclear me.
is there way select multiple nodes used here technique?
[edited]
xpath = "ul//*";
to
xpath = "//ul//*";
[addition]
i have added 2 javascript functions sample html:
<script type=""text/javascript""> function getelementstext (xpath) { var xpathres = document.evaluate ( xpath, document, null, xpathresult.ordered_node_iterator_type, null); var nextelement = xpathres.iteratenext (); var text = """"; while (nextelement) { text += nextelement.innertext; nextelement = xpathres.iteratenext (); } return text; }; function getelements (xpath) { var xpathres = document.evaluate ( xpath, document, null, xpathresult.ordered_node_iterator_type, null); var nextelement = xpathres.iteratenext (); var elements = new object(); var elementindex = 1; while (nextelement) { elements[elementindex++] = nextelement; nextelement = xpathres.iteratenext (); } return elements; }; </script>
now when i'm runnung following c# code line within cmd_testclick
method:
var text = webbrowser1.document.invokescript("eval", new object[] { "getelementstext('//ul')" });
i'm getting text of li
elements:
"paperback: 648 pages \r\npublisher: wiley; unlimited edition edition (october 15, 2001) \r\nlanguage: english \r\nisbn-10: 0764547763 "
and when i'm running following c# code line within cmd_testclick
method:
var elements = webbrowser1.document.invokescript("eval", new object[] { "getelements('//ul')" });
i'm getting com object
, cannot cast ienumerable<mshtml.ihtmlelement>
.
is there way process within c# code javascript collection of html nodes returned by
var elements = webbrowser1.document.invokescript("eval", new object[] { "getelements('//ul')" });
?
i have found solution, here code:
using system; using system.collections.generic; using system.reflection; using system.windows.forms; namespace mytest.winformsapp { public partial class mainform : form { public mainform() { initializecomponent(); } private void mainform_load(object sender, eventargs e) { webbrowser1.documenttext = @" <html> <body> <img alt=""0764547763 product details"" src=""http://ecx.images-amazon.com/images/i/51ak1mrii7l._aa160_.jpg""> <hr/> <h2>product details</h2> <ul> <li><b>paperback:</b> 648 pages</li> <li><b>publisher:</b> wiley; unlimited edition edition (october 15, 2001)</li> <li><b>language:</b> english</li> <li><b>isbn-10:</b> 0764547763</li> </html> "; } private void cmdtest_click(object sender, eventargs e) { var processor = new webbrowsercontrolxpathqueriesprocessor(webbrowser1); // change attributes of first element of list { var li = processor.gethtmlelement("//li"); li.innerhtml = string.format("<span style='text-transform: uppercase;font-family:verdana;color:green;'>{0}</span>", li.innertext); } // change attributes of second , subsequent elements of list var list = processor.gethtmlelements("//ul//li"); int index = 1; foreach (var li in list) { if (index++ == 1) continue; li.innerhtml = string.format("<span style='text-transform: uppercase;font-family:verdana;color:blue;'>{0}</span>", li.innertext); } } /// <summary> /// enables ie webbrowser control evaluate xpath queries /// injecting http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js /// , return xpath queries results calling c# code typed /// mshtml.ihtmlelement , ienumerable<mshtml.ihtmlelement> /// </summary> public class webbrowsercontrolxpathqueriesprocessor { private system.windows.forms.webbrowser _webbrowser; public webbrowsercontrolxpathqueriesprocessor(system.windows.forms.webbrowser webbrowser) { _webbrowser = webbrowser; injectscripts(); } private void injectscripts() { // to: http://stackoverflow.com/questions/7998996/how-to-inject-javascript-in-webbrowser-control htmlelement head = _webbrowser.document.getelementsbytagname("head")[0]; htmlelement scriptel = _webbrowser.document.createelement("script"); mshtml.ihtmlscriptelement element = (mshtml.ihtmlscriptelement)scriptel.domelement; element.src = "http://svn.coderepos.org/share/lang/javascript/javascript-xpath/trunk/release/javascript-xpath-latest-cmp.js"; head.appendchild(scriptel); string javascripttext = @" function getelements (xpath) { var xpathres = document.evaluate ( xpath, document, null, xpathresult.ordered_node_iterator_type, null); var nextelement = xpathres.iteratenext (); var elements = new object(); var elementindex = 1; while (nextelement) { elements[elementindex++] = nextelement; nextelement = xpathres.iteratenext (); } elements.length = elementindex -1; return elements; }; "; scriptel = _webbrowser.document.createelement("script"); element = (mshtml.ihtmlscriptelement)scriptel.domelement; element.text = javascripttext; head.appendchild(scriptel); } /// <summary> /// gets html element's mshtml.ihtmlelement object instance using xpath query /// </summary> public mshtml.ihtmlelement gethtmlelement(string xpathquery) { string code = string.format("document.evaluate('{0}', document, null, xpathresult.first_ordered_node_type, null).singlenodevalue;", xpathquery); return _webbrowser.document.invokescript("eval", new object[] { code }) mshtml.ihtmlelement; } /// <summary> /// gets html elements' ienumerable<mshtml.ihtmlelement> object instance using xpath query /// </summary> public ienumerable<mshtml.ihtmlelement> gethtmlelements(string xpathquery) { // to: http://stackoverflow.com/questions/5278275/accessing-properties-of-javascript-objects-using-type-dynamic-in-c-sharp-4 var comobject = _webbrowser.document.invokescript("eval", new object[] { string.format("getelements('{0}')", xpathquery) }); type type = comobject.gettype(); int length = (int)type.invokemember("length", bindingflags.getproperty, null, comobject, null); (int = 1; <= length; i++) { yield return type.invokemember(i.tostring(), bindingflags.getproperty, null, comobject, null) mshtml.ihtmlelement; } } } } }
and here code running results:
i have put credits' references code inline. if you'll find have missed please point me in comments , add them.
if know better solution - shorter code, more effective code - please comment and/or post answer.
Comments
Post a Comment