Puppeteer 中如何绕过无头浏览器检测

执行以下代码:

# credits: https://intoli.com/blog/making-chrome-headless-undetectable/

import pyppeteer as pp

from futile2.http import get_random_desktop_ua

HIDE_SCRIPTS = dict(
    hide_webdriver="""
() => {
    Object.defineProperty(navigator, 'webdriver', {
      get: () => false,
    });
  }
""",
    hide_navigator="""
() => {
    // We can mock this in as much depth as we need for the test.
    window.navigator.chrome = {
      app: {
        isInstalled: false,
      },
      webstore: {
        onInstallStageChanged: {},
        onDownloadProgress: {},
      },
      runtime: {
        PlatformOs: {
          MAC: 'mac',
          WIN: 'win',
          ANDROID: 'android',
          CROS: 'cros',
          LINUX: 'linux',
          OPENBSD: 'openbsd',
        },
        PlatformArch: {
          ARM: 'arm',
          X86_32: 'x86-32',
          X86_64: 'x86-64',
        },
        PlatformNaclArch: {
          ARM: 'arm',
          X86_32: 'x86-32',
          X86_64: 'x86-64',
        },
        RequestUpdateCheckStatus: {
          THROTTLED: 'throttled',
          NO_UPDATE: 'no_update',
          UPDATE_AVAILABLE: 'update_available',
        },
        OnInstalledReason: {
          INSTALL: 'install',
          UPDATE: 'update',
          CHROME_UPDATE: 'chrome_update',
          SHARED_MODULE_UPDATE: 'shared_module_update',
        },
        OnRestartRequiredReason: {
          APP_UPDATE: 'app_update',
          OS_UPDATE: 'os_update',
          PERIODIC: 'periodic',
        },
      },
    };
  }
""",
    hide_permission="""
() => {
    const originalQuery = window.navigator.permissions.query;
    return window.navigator.permissions.query = (parameters) => (
      parameters.name === 'notifications' ?
        Promise.resolve({ state: Notification.permission }) :
        originalQuery(parameters)
    );
  }
""",
    hide_plugins_length="""
() => {
    // Overwrite the `plugins` property to use a custom getter.
    Object.defineProperty(navigator, 'plugins', {
      // This just needs to have `length > 0` for the current test,
      // but we could mock the plugins too if necessary.
      get: () => [1, 2, 3, 4, 5],
    });
  }
""",
    hide_language="""
() => {
    // Overwrite the `plugins` property to use a custom getter.
    Object.defineProperty(navigator, 'languages', {
      get: () => ['en-US', 'en'],
    });
  }
""",
    hide_webgl_renderer="""
() => {
    const getParameter = WebGLRenderingContext.getParameter;
    WebGLRenderingContext.prototype.getParameter = function(parameter) {
      // UNMASKED_VENDOR_WEBGL
      if (parameter === 37445) {
        return 'Intel Open Source Technology Center';
      }
      // UNMASKED_RENDERER_WEBGL
      if (parameter === 37446) {
        return 'Mesa DRI Intel(R) Ivybridge Mobile ';
      }

      return getParameter(parameter);
    };
}
""",
    hide_broken_image="""
() => {
    ['height', 'width'].forEach(property => {
      // store the existing descriptor
      const imageDescriptor = Object.getOwnPropertyDescriptor(HTMLImageElement.prototype, property);

      // redefine the property with a patched descriptor
      Object.defineProperty(HTMLImageElement.prototype, property, {
        ...imageDescriptor,
        get: function() {
          // return an arbitrary non-zero dimension if the image failed to load
          if (this.complete && this.naturalHeight == 0) {
            return 20;
          }
          // otherwise, return the actual dimension
          return imageDescriptor.get.apply(this);
        },
      });
  });
}
""",
    hide_modernizr="""
() => {
    // store the existing descriptor
    const elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'offsetHeight');

    // redefine the property with a patched descriptor
    Object.defineProperty(HTMLDivElement.prototype, 'offsetHeight', {
      ...elementDescriptor,
      get: function() {
        if (this.id === 'modernizr') {
            return 1;
        }
        return elementDescriptor.get.apply(this);
      },
    });
}
""",
)

async def get_headless_page(*args, **kwargs):
    """
    生成一个无法检测的浏览器页面
    """
    browser = await pp.launch(*args, **kwargs)
    page = await browser.newPage()
    await page.setUserAgent(get_random_desktop_ua())
    for script in HIDE_SCRIPTS.values():
        await page.evaluateOnNewDocument(script)

    return page

及时获取更新,请关注公众号“爬虫技术学习(spider-learn)”

公众号“爬虫技术学习(spider-learn)”

About 逸飞

后端工程师

One thought on “Puppeteer 中如何绕过无头浏览器检测

发表评论

电子邮件地址不会被公开。 必填项已用*标注