博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
C#彻底解决Web Browser 跨域读取Iframes内容
阅读量:6519 次
发布时间:2019-06-24

本文共 9154 字,大约阅读时间需要 30 分钟。

用C# winform的控件web browser 读取网页内容,分析一下数据,做一些采集工作。

如果是同一个域名下面还是好办的,基本上用HtmlAgilityPack就完全可以解决问题。 

但是现在遇到跨域问题,比如我需要打开页面上存在的广告联盟的地址,进行保存。 

这就是牵扯到跨域。 一般的错误是:拒绝访问。

"Access is denied. (Exception from HRESULT: 0x80070005 (E_ACCESSDENIED))"。 

因为你没有在这个网站去修改另一个网站数据的权利。 

怎么办?很困恼吧。现在就告诉大家一个好办法。 

直接上代码了。 

工具类,大家保存成一个类。需要引用mshtml

1 using System;  2 using System.Runtime.InteropServices;  3 using System.Windows.Forms;  4 using mshtml;  5   6 namespace WebBrowserTest  7 {  8   9     // This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface!  10  11     [ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"), 12  13     InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)] 14  15     public interface IServiceProvider 16     { 17  18         [return: MarshalAs(UnmanagedType.I4)] 19  20         [PreserveSig] 21  22         int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject); 23  24     } 25  26     public enum OLECMDF 27     { 28  29         OLECMDF_DEFHIDEONCTXTMENU = 0x20, 30  31         OLECMDF_ENABLED = 2, 32  33         OLECMDF_INVISIBLE = 0x10, 34  35         OLECMDF_LATCHED = 4, 36  37         OLECMDF_NINCHED = 8, 38  39         OLECMDF_SUPPORTED = 1 40  41     } 42  43     public enum OLECMDID 44     { 45  46         OLECMDID_PAGESETUP = 8, 47  48         OLECMDID_PRINT = 6, 49  50         OLECMDID_PRINTPREVIEW = 7, 51  52         OLECMDID_PROPERTIES = 10, 53  54         OLECMDID_SAVEAS = 4 55  56     } 57  58     public enum OLECMDEXECOPT 59     { 60  61         OLECMDEXECOPT_DODEFAULT, 62  63         OLECMDEXECOPT_PROMPTUSER, 64  65         OLECMDEXECOPT_DONTPROMPTUSER, 66  67         OLECMDEXECOPT_SHOWHELP 68  69     } 70  71     [ComImport, Guid("D30C1661-CDAF-11d0-8A3E-00C04FC9E26E"), TypeLibType(TypeLibTypeFlags.FOleAutomation | TypeLibTypeFlags.FDual | TypeLibTypeFlags.FHidden)] 72  73     public interface IWebBrowser2 74     { 75  76         [DispId(100)] 77  78         void GoBack(); 79  80         [DispId(0x65)] 81  82         void GoForward(); 83  84         [DispId(0x66)] 85  86         void GoHome(); 87  88         [DispId(0x67)] 89  90         void GoSearch(); 91  92         [DispId(0x68)] 93  94         void Navigate([In] string Url, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers); 95  96         [DispId(-550)] 97  98         void Refresh(); 99 100         [DispId(0x69)]101 102         void Refresh2([In] ref object level);103 104         [DispId(0x6a)]105 106         void Stop();107 108         [DispId(200)]109 110         object Application { [return: MarshalAs(UnmanagedType.IDispatch)] get; }111 112         [DispId(0xc9)]113 114         object Parent { [return: MarshalAs(UnmanagedType.IDispatch)] get; }115 116         [DispId(0xca)]117 118         object Container { [return: MarshalAs(UnmanagedType.IDispatch)] get; }119 120         [DispId(0xcb)]121 122         object Document { [return: MarshalAs(UnmanagedType.IDispatch)] get; }123 124         [DispId(0xcc)]125 126         bool TopLevelContainer { get; }127 128         [DispId(0xcd)]129 130         string Type { get; }131 132         [DispId(0xce)]133 134         int Left { get; set; }135 136         [DispId(0xcf)]137 138         int Top { get; set; }139 140         [DispId(0xd0)]141 142         int Width { get; set; }143 144         [DispId(0xd1)]145 146         int Height { get; set; }147 148         [DispId(210)]149 150         string LocationName { get; }151 152         [DispId(0xd3)]153 154         string LocationURL { get; }155 156         [DispId(0xd4)]157 158         bool Busy { get; }159 160         [DispId(300)]161 162         void Quit();163 164         [DispId(0x12d)]165 166         void ClientToWindow(out int pcx, out int pcy);167 168         [DispId(0x12e)]169 170         void PutProperty([In] string property, [In] object vtValue);171 172         [DispId(0x12f)]173 174         object GetProperty([In] string property);175 176         [DispId(0)]177 178         string Name { get; }179 180         [DispId(-515)]181 182         int HWND { get; }183 184         [DispId(400)]185 186         string FullName { get; }187 188         [DispId(0x191)]189 190         string Path { get; }191 192         [DispId(0x192)]193 194         bool Visible { get; set; }195 196         [DispId(0x193)]197 198         bool StatusBar { get; set; }199 200         [DispId(0x194)]201 202         string StatusText { get; set; }203 204         [DispId(0x195)]205 206         int ToolBar { get; set; }207 208         [DispId(0x196)]209 210         bool MenuBar { get; set; }211 212         [DispId(0x197)]213 214         bool FullScreen { get; set; }215 216         [DispId(500)]217 218         void Navigate2([In] ref object URL, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);219 220         [DispId(0x1f5)]221 222         OLECMDF QueryStatusWB([In] OLECMDID cmdID);223 224         [DispId(0x1f6)]225 226         void ExecWB([In] OLECMDID cmdID, [In] OLECMDEXECOPT cmdexecopt, ref object pvaIn, IntPtr pvaOut);227 228         [DispId(0x1f7)]229 230         void ShowBrowserBar([In] ref object pvaClsid, [In] ref object pvarShow, [In] ref object pvarSize);231 232         [DispId(-525)]233 234         WebBrowserReadyState ReadyState { get; }235 236         [DispId(550)]237 238         bool Offline { get; set; }239 240         [DispId(0x227)]241 242         bool Silent { get; set; }243 244         [DispId(0x228)]245 246         bool RegisterAsBrowser { get; set; }247 248         [DispId(0x229)]249 250         bool RegisterAsDropTarget { get; set; }251 252         [DispId(0x22a)]253 254         bool TheaterMode { get; set; }255 256         [DispId(0x22b)]257 258         bool AddressBar { get; set; }259 260         [DispId(0x22c)]261 262         bool Resizable { get; set; }263 264     }265 266     class CorssDomainHelper267     {268 269         private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046");270 271         private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E");272 273         // Utility for IE cross domain access 274 275         // Returns null in case of failure. 276 277         public static IHTMLDocument3 GetDocumentFromWindow(IHTMLWindow2 htmlWindow)278         {279 280             if (htmlWindow == null)281             {282                 return null;283             }284 285             // First try the usual way to get the document. 286 287             try288             {289 290                 IHTMLDocument2 doc = htmlWindow.document;291 292                 return (IHTMLDocument3)doc;293 294             }295 296             catch (COMException comEx)297             {298 299                 // I think COMException won't be ever fired but just to be sure ... 300 301             }302 303             catch (UnauthorizedAccessException)304             {305 306             }307 308             catch (Exception ex)309             {310                 return null;311             }312 313             // At this point the error was E_ACCESSDENIED because the frame contains a document from another domain. 314             // IE tries to prevent a cross frame scripting security issue. 315 316             try317             {318 319                 // Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider. 320                 IServiceProvider sp = (IServiceProvider)htmlWindow;321                 // Use IServiceProvider.QueryService to get IWebBrowser2 object. 322                 Object brws = null;323                 sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws);324                 // Get the document from IWebBrowser2. 325                 IWebBrowser2 browser = (IWebBrowser2)(brws);326                 return (IHTMLDocument3)browser.Document;327             }328 329             catch (Exception ex)330             {331                 Console.WriteLine(ex);332             }333             return null;334         }335     }336 } 调用方法:
1 public void test() 2         { 3             WebBrowser browser = new WebBrowser(); 4  5             HTMLDocument doc = (HTMLDocument)browser.Document.DomDocument; 6  7             for (int i = 0; i < browser.Document.Window.Frames.Count; i++) 8             { 9 10                 IHTMLDocument3 baiduDoc = CorssDomainHelper.GetDocumentFromWindow(browser.Document.Window.Frames[i].DomWindow11 12                     as IHTMLWindow2);13 14                 if (baiduDoc != null && baiduDoc.documentElement != null && baiduDoc.documentElement.document != null)15                 {16 17                     IHTMLElementCollection linkss = ((HTMLDocument)(baiduDoc.documentElement.document)).links;18 19                     foreach (mshtml.IHTMLElement element in linkss)20                     {21 22                         //加入你的代码就可以了。23 24                     }25                 }26             }27         }

原文出自:http://www.cnblogs.com/Leo_wl/p/3181353.html

 

转载于:https://www.cnblogs.com/changjianblog/p/7456618.html

你可能感兴趣的文章
Wireshark and Tcpdump tips
查看>>
windows2003单域迁移到2008R2服务器
查看>>
我的友情链接
查看>>
浅析:Android--Fragment的懒加载
查看>>
Linux操作系统目录和Linux常用的文件和目录管理命令
查看>>
DIY:自己动手做一个迷你 Linux 系统(二)
查看>>
ethereumjs/merkle-patricia-tree-2-API
查看>>
go标准库的学习-runtime
查看>>
NodeJS学习之文件操作
查看>>
WebSocket 是什么原理?为什么可以实现持久连接
查看>>
Python自学笔记-logging模块详解
查看>>
Money去哪了- 每日站立会议
查看>>
Python数据结构和算法学习笔记1
查看>>
正则之从dom字符串中提取url
查看>>
大数据——基础概念
查看>>
机器学习温和指南
查看>>
Object 类有哪些方法
查看>>
oracle 将一个表复制到另外一个表里 .
查看>>
jQuery清空标签内容--防止内存泄露
查看>>
关于 HandlerMethodArgumentResolver 类 以及 WebArgumentResolver 类 自定义解析参数
查看>>