用C# winform的控件web browser 读取网页内容,分析一下数据,做一些采集工作。
如果是同一个域名下面还是好办的,基本上用HtmlAgilityPack就完全可以解决问题。
但是现在遇到跨域问题,比如我需要打开页面上存在的广告联盟的地址,进行保存。
这就是牵扯到跨域。 一般的错误是:拒绝访问。
"Access is denied. (Exception from HRESULT: 0x80070005 (E_ACCESSDENIED))"。
因为你没有在这个网站去修改另一个网站数据的权利。
怎么办?很困恼吧。现在就告诉大家一个好办法。
直接上代码了。
工具类,大家保存成一个类。需要引用mshtml
1 using System; 2 using System.Runtime.InteropServices; 3 using System.Windows.Forms; 4 using mshtml; 5 6 namespace WebBrowserTest 7 { 8 9 // This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface! 10 11 [ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"), 12 13 InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)] 14 15 public interface IServiceProvider 16 { 17 18 [return: MarshalAs(UnmanagedType.I4)] 19 20 [PreserveSig] 21 22 int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject); 23 24 } 25 26 public enum OLECMDF 27 { 28 29 OLECMDF_DEFHIDEONCTXTMENU = 0x20, 30 31 OLECMDF_ENABLED = 2, 32 33 OLECMDF_INVISIBLE = 0x10, 34 35 OLECMDF_LATCHED = 4, 36 37 OLECMDF_NINCHED = 8, 38 39 OLECMDF_SUPPORTED = 1 40 41 } 42 43 public enum OLECMDID 44 { 45 46 OLECMDID_PAGESETUP = 8, 47 48 OLECMDID_PRINT = 6, 49 50 OLECMDID_PRINTPREVIEW = 7, 51 52 OLECMDID_PROPERTIES = 10, 53 54 OLECMDID_SAVEAS = 4 55 56 } 57 58 public enum OLECMDEXECOPT 59 { 60 61 OLECMDEXECOPT_DODEFAULT, 62 63 OLECMDEXECOPT_PROMPTUSER, 64 65 OLECMDEXECOPT_DONTPROMPTUSER, 66 67 OLECMDEXECOPT_SHOWHELP 68 69 } 70 71 [ComImport, Guid("D30C1661-CDAF-11d0-8A3E-00C04FC9E26E"), TypeLibType(TypeLibTypeFlags.FOleAutomation | TypeLibTypeFlags.FDual | TypeLibTypeFlags.FHidden)] 72 73 public interface IWebBrowser2 74 { 75 76 [DispId(100)] 77 78 void GoBack(); 79 80 [DispId(0x65)] 81 82 void GoForward(); 83 84 [DispId(0x66)] 85 86 void GoHome(); 87 88 [DispId(0x67)] 89 90 void GoSearch(); 91 92 [DispId(0x68)] 93 94 void Navigate([In] string Url, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers); 95 96 [DispId(-550)] 97 98 void Refresh(); 99 100 [DispId(0x69)]101 102 void Refresh2([In] ref object level);103 104 [DispId(0x6a)]105 106 void Stop();107 108 [DispId(200)]109 110 object Application { [return: MarshalAs(UnmanagedType.IDispatch)] get; }111 112 [DispId(0xc9)]113 114 object Parent { [return: MarshalAs(UnmanagedType.IDispatch)] get; }115 116 [DispId(0xca)]117 118 object Container { [return: MarshalAs(UnmanagedType.IDispatch)] get; }119 120 [DispId(0xcb)]121 122 object Document { [return: MarshalAs(UnmanagedType.IDispatch)] get; }123 124 [DispId(0xcc)]125 126 bool TopLevelContainer { get; }127 128 [DispId(0xcd)]129 130 string Type { get; }131 132 [DispId(0xce)]133 134 int Left { get; set; }135 136 [DispId(0xcf)]137 138 int Top { get; set; }139 140 [DispId(0xd0)]141 142 int Width { get; set; }143 144 [DispId(0xd1)]145 146 int Height { get; set; }147 148 [DispId(210)]149 150 string LocationName { get; }151 152 [DispId(0xd3)]153 154 string LocationURL { get; }155 156 [DispId(0xd4)]157 158 bool Busy { get; }159 160 [DispId(300)]161 162 void Quit();163 164 [DispId(0x12d)]165 166 void ClientToWindow(out int pcx, out int pcy);167 168 [DispId(0x12e)]169 170 void PutProperty([In] string property, [In] object vtValue);171 172 [DispId(0x12f)]173 174 object GetProperty([In] string property);175 176 [DispId(0)]177 178 string Name { get; }179 180 [DispId(-515)]181 182 int HWND { get; }183 184 [DispId(400)]185 186 string FullName { get; }187 188 [DispId(0x191)]189 190 string Path { get; }191 192 [DispId(0x192)]193 194 bool Visible { get; set; }195 196 [DispId(0x193)]197 198 bool StatusBar { get; set; }199 200 [DispId(0x194)]201 202 string StatusText { get; set; }203 204 [DispId(0x195)]205 206 int ToolBar { get; set; }207 208 [DispId(0x196)]209 210 bool MenuBar { get; set; }211 212 [DispId(0x197)]213 214 bool FullScreen { get; set; }215 216 [DispId(500)]217 218 void Navigate2([In] ref object URL, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);219 220 [DispId(0x1f5)]221 222 OLECMDF QueryStatusWB([In] OLECMDID cmdID);223 224 [DispId(0x1f6)]225 226 void ExecWB([In] OLECMDID cmdID, [In] OLECMDEXECOPT cmdexecopt, ref object pvaIn, IntPtr pvaOut);227 228 [DispId(0x1f7)]229 230 void ShowBrowserBar([In] ref object pvaClsid, [In] ref object pvarShow, [In] ref object pvarSize);231 232 [DispId(-525)]233 234 WebBrowserReadyState ReadyState { get; }235 236 [DispId(550)]237 238 bool Offline { get; set; }239 240 [DispId(0x227)]241 242 bool Silent { get; set; }243 244 [DispId(0x228)]245 246 bool RegisterAsBrowser { get; set; }247 248 [DispId(0x229)]249 250 bool RegisterAsDropTarget { get; set; }251 252 [DispId(0x22a)]253 254 bool TheaterMode { get; set; }255 256 [DispId(0x22b)]257 258 bool AddressBar { get; set; }259 260 [DispId(0x22c)]261 262 bool Resizable { get; set; }263 264 }265 266 class CorssDomainHelper267 {268 269 private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046");270 271 private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E");272 273 // Utility for IE cross domain access 274 275 // Returns null in case of failure. 276 277 public static IHTMLDocument3 GetDocumentFromWindow(IHTMLWindow2 htmlWindow)278 {279 280 if (htmlWindow == null)281 {282 return null;283 }284 285 // First try the usual way to get the document. 286 287 try288 {289 290 IHTMLDocument2 doc = htmlWindow.document;291 292 return (IHTMLDocument3)doc;293 294 }295 296 catch (COMException comEx)297 {298 299 // I think COMException won't be ever fired but just to be sure ... 300 301 }302 303 catch (UnauthorizedAccessException)304 {305 306 }307 308 catch (Exception ex)309 {310 return null;311 }312 313 // At this point the error was E_ACCESSDENIED because the frame contains a document from another domain. 314 // IE tries to prevent a cross frame scripting security issue. 315 316 try317 {318 319 // Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider. 320 IServiceProvider sp = (IServiceProvider)htmlWindow;321 // Use IServiceProvider.QueryService to get IWebBrowser2 object. 322 Object brws = null;323 sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws);324 // Get the document from IWebBrowser2. 325 IWebBrowser2 browser = (IWebBrowser2)(brws);326 return (IHTMLDocument3)browser.Document;327 }328 329 catch (Exception ex)330 {331 Console.WriteLine(ex);332 }333 return null;334 }335 }336 } 调用方法:
1 public void test() 2 { 3 WebBrowser browser = new WebBrowser(); 4 5 HTMLDocument doc = (HTMLDocument)browser.Document.DomDocument; 6 7 for (int i = 0; i < browser.Document.Window.Frames.Count; i++) 8 { 9 10 IHTMLDocument3 baiduDoc = CorssDomainHelper.GetDocumentFromWindow(browser.Document.Window.Frames[i].DomWindow11 12 as IHTMLWindow2);13 14 if (baiduDoc != null && baiduDoc.documentElement != null && baiduDoc.documentElement.document != null)15 {16 17 IHTMLElementCollection linkss = ((HTMLDocument)(baiduDoc.documentElement.document)).links;18 19 foreach (mshtml.IHTMLElement element in linkss)20 {21 22 //加入你的代码就可以了。23 24 }25 }26 }27 }
原文出自:http://www.cnblogs.com/Leo_wl/p/3181353.html