> ## Documentation Index
> Fetch the complete documentation index at: https://hyperbrowser.ai/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Start a web crawl job

> Starts an asynchronous crawl job that follows links from a starting URL and returns content from each page in the specified formats.



## OpenAPI

````yaml /openapi.json POST /api/web/crawl
openapi: 3.0.1
info:
  title: Hyperbrowser API
  version: 1.0.0
servers:
  - url: https://api.hyperbrowser.ai
    description: Production server
security: []
paths:
  /api/web/crawl:
    post:
      summary: Start a web crawl job
      description: >-
        Starts an asynchronous crawl job that follows links from a starting URL
        and returns content from each page in the specified formats.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/StartWebCrawlJobParams'
      responses:
        '200':
          description: Crawl job started successfully
          content:
            application/json:
              schema:
                type: object
                properties:
                  jobId:
                    type: string
                required:
                  - jobId
        '400':
          description: Invalid request parameters
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '500':
          description: Server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
      security:
        - ApiKeyAuth: []
      x-codeSamples:
        - lang: javascript
          label: Start a web crawl
          source: |-
            import { Hyperbrowser } from '@hyperbrowser/sdk';

            const client = new Hyperbrowser({ apiKey: 'your-api-key' });

            const response = await client.web.crawl.start({
              url: 'https://example.com',
              outputs: {
                formats: ['markdown']
              },
              crawlOptions: {
                maxPages: 10,
                followLinks: true
              }
            });
        - lang: python
          label: Start a web crawl
          source: >-
            from hyperbrowser import Hyperbrowser

            from hyperbrowser.models import StartWebCrawlJobParams,
            WebCrawlOptions, FetchOutputOptions


            client = Hyperbrowser(api_key='your-api-key')


            response = client.web.crawl.start(StartWebCrawlJobParams(
              url='https://example.com',
              outputs=FetchOutputOptions(
                formats=['markdown']
              ),
              crawl_options=WebCrawlOptions(
                max_pages=10,
                follow_links=True
              )
            ))
components:
  schemas:
    StartWebCrawlJobParams:
      type: object
      properties:
        url:
          type: string
        stealth:
          $ref: '#/components/schemas/FetchStealthMode'
        outputs:
          $ref: '#/components/schemas/FetchOutputOptions'
        browser:
          $ref: '#/components/schemas/FetchBrowserOptions'
        navigation:
          $ref: '#/components/schemas/FetchNavigationOptions'
        cache:
          $ref: '#/components/schemas/FetchCacheOptions'
        crawlOptions:
          $ref: '#/components/schemas/WebCrawlOptions'
      required:
        - url
    ErrorResponse:
      type: object
      properties:
        message:
          type: string
    FetchStealthMode:
      type: string
      enum:
        - none
        - auto
        - ultra
    FetchOutputOptions:
      type: object
      properties:
        formats:
          type: array
          items:
            oneOf:
              - $ref: '#/components/schemas/FetchOutputMarkdown'
              - $ref: '#/components/schemas/FetchOutputHtml'
              - $ref: '#/components/schemas/FetchOutputLinks'
              - $ref: '#/components/schemas/FetchOutputScreenshot'
              - $ref: '#/components/schemas/FetchOutputJson'
              - $ref: '#/components/schemas/FetchOutputBranding'
              - type: string
                enum:
                  - markdown
                  - html
                  - links
                  - screenshot
                  - branding
        sanitize:
          $ref: '#/components/schemas/FetchSanitizeMode'
        includeSelectors:
          type: array
          items:
            type: string
        excludeSelectors:
          type: array
          items:
            type: string
        storageState:
          $ref: '#/components/schemas/FetchStorageStateOptions'
    FetchBrowserOptions:
      type: object
      properties:
        screen:
          $ref: '#/components/schemas/ScreenConfig'
        profileId:
          type: string
        solveCaptchas:
          type: string
        location:
          $ref: '#/components/schemas/FetchBrowserLocationOptions'
    FetchNavigationOptions:
      type: object
      properties:
        waitUntil:
          $ref: '#/components/schemas/FetchWaitUntil'
        timeoutMs:
          type: integer
        waitFor:
          type: integer
    FetchCacheOptions:
      type: object
      properties:
        maxAgeSeconds:
          type: integer
    WebCrawlOptions:
      type: object
      properties:
        maxPages:
          type: integer
          minimum: 1
          maximum: 100
          default: 10
        followLinks:
          type: boolean
          default: true
        ignoreSitemap:
          type: boolean
          default: false
        excludePatterns:
          type: array
          items:
            type: string
        includePatterns:
          type: array
          items:
            type: string
    FetchOutputMarkdown:
      type: object
      properties:
        type:
          type: string
          enum:
            - markdown
      required:
        - type
    FetchOutputHtml:
      type: object
      properties:
        type:
          type: string
          enum:
            - html
      required:
        - type
    FetchOutputLinks:
      type: object
      properties:
        type:
          type: string
          enum:
            - links
      required:
        - type
    FetchOutputScreenshot:
      allOf:
        - $ref: '#/components/schemas/FetchOutputScreenshotOptions'
        - type: object
          properties:
            type:
              type: string
              enum:
                - screenshot
          required:
            - type
    FetchOutputJson:
      allOf:
        - $ref: '#/components/schemas/FetchOutputJsonOptions'
        - type: object
          properties:
            type:
              type: string
              enum:
                - json
          required:
            - type
    FetchOutputBranding:
      type: object
      properties:
        type:
          type: string
          enum:
            - branding
      required:
        - type
    FetchSanitizeMode:
      type: string
      enum:
        - none
        - basic
        - advanced
    FetchStorageStateOptions:
      type: object
      properties:
        localStorage:
          type: object
          additionalProperties:
            type: string
        sessionStorage:
          type: object
          additionalProperties:
            type: string
    ScreenConfig:
      type: object
      properties:
        width:
          type: number
          default: 1280
        height:
          type: number
          default: 720
    FetchBrowserLocationOptions:
      type: object
      properties:
        country:
          type: string
        state:
          type: string
        city:
          type: string
    FetchWaitUntil:
      type: string
      enum:
        - load
        - domcontentloaded
        - networkidle
    FetchOutputScreenshotOptions:
      type: object
      properties:
        fullPage:
          type: boolean
        format:
          $ref: '#/components/schemas/FetchScreenshotFormat'
        cropToContent:
          type: boolean
        cropToContentMaxHeight:
          type: integer
        cropToContentMinHeight:
          type: integer
    FetchOutputJsonOptions:
      type: object
      properties:
        schema:
          type: object
        prompt:
          type: string
          description: >-
            Natural language prompt describing what data to extract. If only
            prompt is provided, a schema is auto-generated from it. If both
            prompt and schema are provided, the schema defines the output
            structure while the prompt provides additional guidance for the
            extraction.
    FetchScreenshotFormat:
      type: string
      enum:
        - jpeg
        - png
        - webp
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: x-api-key

````